doc/3.1/latex/whamman.tex

   1 \documentclass[12pt]{article}
   2 %\usepackage{latex2html}
   3 \usepackage{enumerate}
   4 \usepackage{longtable}
   5 \usepackage{hyperref}
   6 \usepackage{amsmath}
   7 \usepackage{color}
   8 \parindent=0pt
   9 \parskip=12pt
  10 \textheight=24cm
  11 \textwidth=18cm
  12 \topmargin=-2.5cm
  13 \oddsidemargin=-0.5cm
  14 \setcounter{secnumdepth}{5}
  15 \setcounter{tocdepth}{5}
  16 \begin{document}
  17 \sloppy
  18
  19 \title{WHAM \\ (Weighted Histogram Analysis Method)\\
  20         Processing results of UNRES/MREMD simulations}
  21
  22 \author{Department of Molecular Modeling\\ Faculty of Chemistry\\ University of Gdansk\\ Sobieskiego 18\\ 80-952 Gdansk, Poland\\
  23 \\
  24 \\
  25 Scheraga Group\\ Baker Laboratory of Chemistry \\
  26 and Chemical Biology\\ Cornell University\\ Ithaca, NY 14853-1303, USA}
  27
  28 \maketitle
  29
  30 \newpage
  31
  32 \tableofcontents
  33
  34 %1. License terms
  35 %2. References
  36 %3. Functions of the program
  37 %4. Installation
  38 %5. Running the program
  39 %6. Input and output files
  40 %6.1. Summary of files
  41 %6.2. The main input file
  42 %6.2.1. General data
  43 %6.2.2. Molecule and energy parameter data
  44 %6.2.2.1. General information
  45 %6.2.2.2. Sequence information
  46 %6.2.2.3. Dihedral angle restraint information
  47 %6.2.2.4. Disulfide-bridge data
  48 %6.2.3. Energy-term weights and parameter files
  49 %6.2.4. (M)REMD/Hamiltonian (M)REMD setting specification
  50 %6.2.5. Information of files from which to read conformations
  51 %6.2.6. Information of reference structure and comparing scheme
  52 %6.3. The structure of the main output file (out)
  53 %6.4. The thermodynamic quantity and ensemble average (stat) files
  54 %6.5. The conformation summary with classification (stat) files
  55 %6.6. The histogram files
  56 %6.7. The rmsd-radius of gyration potential of mean force files
  57 %6.8. The PDB files
  58 %6.9. The compresses Cartesian coordinates (cx) file
  59 %7. Support
  60
  61 \newpage
  62
  63 \section{LICENSE TERMS}
  64 \label{sect:license}
  65
  66 \begin{itemize}
  67
  68 \item
  69                 This software is provided free of charge to academic users, subject to the condition that no part of it be sold or used otherwise for commercial purposes, including, but not limited to its incorporation into commercial software packages, without written consent from the authors. For permission contact Prof. H. A. Scheraga, Cornell University.
  70
  71 \item
  72                 This software package is provided on an ``as is'' basis. We in no way warrant either this software or results it may produce.
  73
  74 \item
  75                 Reports or publications using this software package must contain an acknowledgment to the authors and the NIH Resource in the form commonly used in academic research.
  76
  77 \end{itemize}
  78
  79 \newpage
  80
  81 \section{REFERENCES}
  82 \label{sect:references}
  83
  84 Citing the following references in your work that makes use of the WHAM software is gratefully
  85 acknowledged:
  86
  87 \begingroup
  88 \renewcommand{\section}[2]{}%
  89 \begin{thebibliography}{10}
  90
  91 \bibitem{kumar_1992}
  92 S. Kumar, D. Bouzida, R.H. Swendsen, P.A. Kollman, J.M. Rosenberg.
  93 The weighted histogram analysis method for free-energy calculations on biomolecules. I. The method.
  94 {\it J. Comput. Chem.}, {\bf 1992}, 13, 1011-1021.
  95
  96 \bibitem{liwo_2007}
  97 A. Liwo, M. Khalili, C. Czaplewski, S. Kalinowski, S. Oldziej, K. Wachucik, H.A. Scheraga.
  98 Modification and optimization of the united-residue (UNRES) potential energy function for canonical simulations. I. Temperature dependence of the effective energy function and tests of the optimization method with single training proteins.
  99 {\it J. Phys. Chem. B}, {\bf 2007}, 111, 260-285.
 100
 101 \bibitem{oldziej_2004}
 102 S. Oldziej, A. Liwo, C. Czaplewski, J. Pillardy, H.A. Scheraga.
 103 Optimization of the UNRES force field by hierarchical design of the potential-energy landscape. 2. Off-lattice tests of the method with single proteins.
 104 {\it J. Phys. Chem. B}, {\bf 2004}, 108, 16934-16949.
 105
 106 \end{thebibliography}
 107
 108 \endgroup
 109
 110 \newpage
 111
 112 \section{FUNCTIONS OF THE PROGRAM}
 113 \label{sect:func}
 114
 115 The program processes the results of replica exchange (REMD) or multiplexed replica exchange molecular
 116 dynamics (MREMD) simulations with UNRES to compute the probabilities of the obtained conformations to
 117 occur at particular temperatures. The program is based on the variant of the weighted histogram analysis
 118 (WHAM) method \cite{kumar_1992} described in ref \cite{liwo_2007}.
 119
 120 The program outputs the following information:
 121
 122 \begin{enumerate}[(a)]
 123
 124 \item
 125 Temperature profiles of thermodynamic and structural ensemble-averaged quantities.
 126
 127 \item
 128 Histograms of native-likeness measure q (defined by eqs 8-11 of ref [\cite{liwo_2007}]).
 129
 130 \item
 131 Optionally the most probable conformations at REMD temperatures.
 132
 133 \item
 134 Optionally the coordinates with information to compute probabilities for the conformations to occur at any temperature.
 135
 136 \end{enumerate}
 137
 138 The program takes usually UNRES compressed coordinate files (cx files) from MREMD obtained by using the TRAJ1FILE option. The user can request to partition the whole run into equal slices (or windows), each starting from, say, snapshot n (for each trajectory) and ending at snapshot n+1.
 139 Alternatively, the UNRES Cartesian coordinate (x files) can be input; however, they must contain only the analyzed portion of the trajectories; they are usually prepared from single trajectories by using xdrf2x.
 140
 141 Two versions of the program are provided:
 142
 143 \begin{enumerate}[(a)]
 144
 145 \item
 146 Canonical version which treats single polypeptide chains; the source code is in WHAM/src directory.
 147
 148 \item
 149 Version for oligomeric proteins; multiple chains are handled by inserting dummy residues in the sequence; the source code is in WHAM/src-M directory.
 150
 151 \end{enumerate}
 152
 153 \newpage
 154
 155 \section{INSTALLATION}
 156 \label{sect:install}
 157
 158 Customize Makefile to your system. See section 7 of the description of UNRES for compiler flags that are used to created executables for a particular force field. There are already several Makefiles prepared for various systems and force fields.
 159
 160 Run make in the WHAM/src directory WHAM/src-M directory for multichain version. Make sure that MPI is installed on your system; the present program runs only in parallel mode.
 161
 162 \newpage
 163
 164 \section{RUNNING THE PROGRAM}
 165 \label{sect:running}
 166
 167 The program requires a parallel system to run. Depending on system, either the wham.csh C-shell script (in WHAM/bin directory) can be started using mpirun or the binary in the C-shell script must be executed through mpirun. See the wham.csh C-shell script and section 6 for the files processed by the program.
 168
 169 \newpage
 170
 171 \section{INPUT AND OUTPUT FILES}
 172 \label{sect:inoutfiles}
 173
 174 \subsection{Summary of the files}
 175 \label{sect:inoutfiles:summary}
 176
 177 The C-shell script wham.csh is used to run the program (see the WHAM/bin directory). The data files that the script needs are mostly the same as for UNRES (see section 6 of UNRES description). In addition, the environmental variable CONTFUN specifies the method to assess whether two side chains are at contact; if CONTFUN=GB, the criterion defined by eq 8 of ref 4 is used to assess whether two side chains are at contact. Also, the parameter files from the C-shell scripts are overridden if the data from Hamiltonian MREMD are processed; if so, the parameter files are defined in the main input file.
 178
 179 The main input file must have inp extension. If it is INPUT.inp, the output files are as follows:
 180
 181 \begin{description}
 182
 183 \item{INPUT.out\_POTxxx} -- output files from different processors (INPUT.out\_000 is the main output file). POT is the identifier of the sidechain-sidechain potential.
 184
 185 \item{INPUT\_POT\_GB\_xxx.stat} or INPUT\_POT\_slice\_YYXXX.stat -- the summary conformation-classification file from processor xxx (each processor handles part of conformations); the second occurs if the run is partitioned into slices.
 186
 187 \item{INPUT.thermal} or INPUT\_slice\_yy.thermal -- thermodynamic functions and temperature profiles of the ensemble averages (the second form if the run is partitioned into slices).
 188
 189 \item{INPUT\_T\_xxx.pdb} or INPUT\_slice\_yy\_T\_xxx.pdb -- top conformations the number of these conformations is selected by the user) in PDB format.
 190
 191 \item{INPUT.cx} -- the compressed UNRES coordinate file with information to compute the probability of a given conformation at any temperature.
 192
 193 \item{INPUT.hist}, INPUT\_slice\_xx.hist, INPUT\_par\_yy.hist, INPUT\_par\_yy\_slice\_zz.x -- histograms of q at MREMD temperatures.
 194
 195 \item{INPUT.ent}, INPUT\_slice\_xx.ent, INPUT\_par\_yy.ent, INPUT\_par\_yy\_slice\_xx.ent -- the histogram(s) of energy density.
 196
 197 \item{INPUT.rmsrgy}, INPUT\_par\_yy.rmsrgy, INPUT\_slice\_xx.rmsrgy or INPUT\_par\_yy\_slice\_xx.rmsrgy -- the 2D histogram(s) of rmsd from the experimental structure and radius of gyration.
 198
 199 \end{description}
 200
 201 \subsection{Main input file}
 202 \label{sect:inoutfiles:main}
 203
 204 This file has the same structure as the UNRES input file; most of the data are input in a keyword-based form (see section 7.1 of UNRES description). The data are grouped into records, referred to as lines. Each record, except for the records that are input in non-keyword based form, can be continued by placing an ampersand (\&) in column 80. Such a format is referred to as the data list format.
 205
 206 In the following description, the default values are given in parentheses.
 207
 208 \subsubsection{General data (data list format)}
 209 \label{sect:inoutfiles:main:general}
 210
 211 \begin{description}
 212
 213 \item{N\_ENE} (N\_ENE\_MAX) -- the number of energy components.
 214
 215 \item{SYM} (1) -- number of chains with same sequence (for oligomeric proteins only).
 216
 217 \item{HAMIL\_REP} -- if present, Hamiltonian process the results of replica exchange runs (replicas with different parameters of the energy function).
 218
 219 \item{NPARMSET} (1) -- number of energy parameter sets ($>$1 only for Hamiltonian replica exchange simulations).
 220
 221 \item{SEPARATE\_PARSET} -- if present, HREMD was run in a mode such that only temperature but not energy-function parameters was exchanged.
 222
 223 \item{IPARMPRINT} (1) -- number of parameter set with which to construct conformational ensembles; important only when HREMD runs are processed.
 224
 225 \item{ENE\_ONLY} -- if present, only conformational energies will be calculated and printed; no WHAM iteration.
 226
 227 \item{EINICHECK} (2) -- $>0$ compare the conformational energies against those stored in the coordinate file(s); 1: compare but print only a warning message if different; 2: compare and terminate the program if different; 0: don't compare.
 228
 229 \item{MAXIT} (5000) -- maximum number of iterations in solving WHAM equations.
 230
 231 \item{ISAMPL} (1) -- input conformation sampling frequency (e.g., if ISAMPL=5, only each 5th conformation will be read).
 232
 233 \item{NSLICE} (1) -- number of ``slices'' or ``windows'' into which each trajectory will be partitioned; each slice will be analyzed independently.
 234
 235 \item{FIMIN} (0.001) -- maximum average difference between window free energies between the current and the previous iteration.
 236
 237 \item{ENSEMBLES} (0) -- number of conformations (ranked according to probabilities) to be output to PDB file at each MREMD temperature; 0 means that no conformations will be output. Non-zero values should not be used when NSLICE$>$1.
 238
 239 \item{CLASSIFY} -- if present, each conformation will be assigned a class, according to the scheme described in
 240 ref \cite{oldziej_2004}.
 241
 242 \item{DELTA} (0.01) -- one dimension bin size of the histogram in q.
 243
 244 \item{DELTRMS} (0.05) -- rms dimension bin size in rms-radius of gyration histograms.
 245
 246 \item{DELTRGY} (0.05) - radius of gyration bin size in rms-radius of gyration histograms.
 247
 248 \item{NQ} (1) -- number of q's (can be for entire molecule, fragments, and pairs of fragments).
 249
 250 \item{CXFILE} -- produce the compressed coordinate file with information necessary to compute the probabilities of conformations at any temperature.
 251
 252 \item{HISTOUT} -- if present, the histograms of q at MREMD temperatures are constructed and printed to main output file.
 253
 254 \item{HISTFILE} -- if present, the histograms are also printed to separate files.
 255
 256 \item{ENTFILE} -- if present, histogram of density of states (entropy) is constructed and printed.
 257
 258 \item{RMSRGYMAP} -- if present, 2D histograms of radius of rmsd and radius of gyration at MREMD temperatures are constructed and printed.
 259
 260 \item{WITH\_DIHED\_CONSTR} -- if present, dihedral-angle restraints were imposed in the processed MREMD simulations.
 261
 262 \item{RESCALE} (1) -- Choice of the type of temperature dependence of the force field.
 263
 264 \begin{description}
 265
 266 \item{$>0$} -- no temperature dependence.
 267
 268 \item{1} -- homographic dependence (not implemented yet with any force field).
 269
 270 \item{2} -- hyperbolic tangent dependence \cite{liwo_2007}.
 271
 272 \end{description}
 273
 274 \end{description}
 275
 276 \subsubsection{Molecule data}
 277 \label{sect:inoutfiles:main:molecule}
 278
 279 \paragraph{General information}
 280 \label{sect:inoutfiles:main:molecule:geninfo}
 281
 282 \begin{description}
 283
 284 \item{SCAL14} (0.4) -- scale factor of backbone-electrostatic 1,4-interactions.
 285
 286 \item{SCALSCP} (1.0) -- scale factor of SC-p interactions.
 287
 288 \item{CUTOFF} (7.0) -- cut-off on backbone-electrostatic interactions to compute 4- and higher-order correlations.
 289
 290 \item{DELT\_CORR} (0.5) -- thickness of the distance range in which the energy is decreased to zero.
 291
 292 \item{ONE\_LETTER} -- if present, the sequence is to be read in 1-letter code, otherwise 3-letter code.
 293
 294 \end{description}
 295
 296 \paragraph{Sequence information \\ \\}
 297 \label{sect:inoutfiles:main:molecule:sequence}
 298
 299
 300 1st record (keyword-based input):
 301
 302 NRES -- number of residues, including the UNRES dummy terminal residues, if present
 303
 304 Next records: amino-acid sequence
 305
 306 3-letter code: Sequence is input in format 20(1X,A3)
 307
 308 1-letter code: Sequence is input in format 80A1
 309
 310 \paragraph{Dihedral angle restraint information \\ \\}
 311 \label{sect:inoutfiles:main:molecule:restraints}
 312
 313
 314 This is the information about dihedral-angle restraints, if any are present.
 315 It is specified only when WITH\_DIHED\_CONSTR is present in the first record.
 316
 317 1st line: ndih\_constr -- number of restraints (free format).
 318
 319 2nd line: ftors -- force constant (free format).
 320
 321 Each of the following ndih\_constr lines:
 322
 323 idih\_constr(i),phi0(i),drange(i) (free format)
 324
 325 \begin{description}
 326
 327 \item{idih\_constr(i)} -- the number of the dihedral angle gamma corresponding to the ith restraint.
 328
 329 \item{phi0(i)} -- center of dihedral-angle restraint.
 330
 331 \item{drange(i)} -- range of flat well (no restraints for phi0(i) +/- drange(i)).
 332
 333 \end{description}
 334
 335 \paragraph{Disulfide-bridge data\\ \\}
 336 \label{sect:inoutfiles:main:molecule:disulfide}
 337
 338
 339 1st line: NS, (ISS(I),I=1,NS) (free format)
 340
 341 \begin{description}
 342
 343 \item{NS} -- number of cystine residues forming disulfide bridges.
 344
 345 \item{ISS(I)} -- the number of the Ith disulfide-bonding cystine in the sequence.
 346
 347 \end{description}
 348
 349 nd line: NSS, (IHPB(I),JHPB(I),I=1,NSS) (free format)
 350
 351 \begin{description}
 352
 353 \item{NSS} -- number of disulfide bridges
 354
 355 \item{IHPB(I),JHPB(I)} - the first and the second residue of ith disulfide link
 356
 357 \end{description}
 358
 359 Because the input is in free format, each line can be split.
 360
 361 \subsubsection{Energy-term weights and parameter files}
 362 \label{sect:inoutfiles:main:weights}
 363
 364 There are NPARMSET records specified below.
 365 All items described in this section are input in keyword-based mode.
 366
 367 1st record: Weights for the following energy terms:
 368
 369 \begin{description}
 370
 371 \item{WSC} (1.0) -- side-chain-side-chain interaction energy.
 372 \item{WSCP} (1.0) -- side chain-peptide group interaction energy.
 373 \item{WELEC} (1.0) -- peptide-group-peptide group interaction energy.
 374 \item{WEL\_LOC (1.0)} -- third-order backbone-local correlation energy.
 375 \item{WCORR} (1.0) -- fourth-order backbone-local correlation energy.
 376 \item{WCORR5} (1.0) -- fifth-order backbone-local correlation energy.
 377 \item{WCORR6} (1.0) -- sixth-order backbone-local correlation energy.
 378 \item{WTURN3} (1.0) -- third-order backbone-local correlation energy of pairs of peptide groups separated by a single peptide group.
 379 \item{WTURN4} (1.0) -- fourth-order backbone-local correlation energy of pairs of peptide groups separated by two peptide groups.
 380 \item{WTURN6} (1.0) -- sixth-order backbone-local correlation energy for pairs of peptide groups separated by four peptide groups.
 381 \item{WBOND} (1.0) -- virtual-bond-stretching energy.
 382 \item{WANG} (1.0) -- virtual-bond-angle-bending energy.
 383 \item{WTOR} (1.0) -- virtual-bond-torsional energy.
 384 \item{WTORD} (1.0) -- virtual-bond-double-torsional energy.
 385 \item{WSCCOR} (1.0) -- sequence-specific virtual-bond-torsional energy.
 386 \item{WDIHC} (0.0) -- dihedral-angle-restraint energy.
 387 \item{WHPB} (1.0) -- distance-restraint energy.
 388
 389 \end{description}
 390
 391 2nd record: Parameter files. If filename is not specified that corresponds to particular parameters, the respective name from the C-shell script will be assigned. If no files are to be specified, an empty line must be inserted.
 392
 393 \begin{description}
 394 \item{BONDPAR} -- bond-stretching parameters.
 395 \item{THETPAR} -- backbone virtual-bond-angle-bending parameters.
 396 \item{ROTPAR} -- side-chain-rotamer parameters.
 397 \item{TORPAR} -- backbone-torsional parameters.
 398 \item{TORDPAR} -- backbone-double-torsional parameters.
 399 \item{FOURIER} -- backbone-local -- backbone-electrostatic correlation parameters.
 400 \item{SCCORAR} -- sequence-specific backbone-torsional parameters (not used at present).
 401 \item{SIDEPAR} -- side-chain-side-chain-interaction parameters.
 402 \item{ELEPAR} -- backbone-electrostatic-interaction parameters.
 403 \item{SCPPAR} -- backbone-side-chain-interaction parameters.
 404 \end{description}
 405
 406 \subsubsection{(M)REMD/Hamiltonian (M)REMD setting specification}
 407 \label{sect:inoutfiles:main:MREMD}
 408
 409 If HAMIL\_REP is present in general data, read the following group of records only once; otherwise, read for each parameter set (NPARSET times total).
 410
 411 \begin{description}
 412
 413 \item{NT} (1) -- number of temperatures.
 414
 415 \item{REPLICA} -- if present, replicas in temperatures were specified with this parameter set.
 416
 417 \item{UMBRELLA} -- if present, umbrella-sampling was run with this parameter set.
 418
 419 \item{READ\_ISET} -- if present, umbrella-sampling-window number is read from the compressed Cartesian coordinate (cx) file even if the data are not from umbrella-sampling run(s). ISET is present in the cx files from the present version of UNRES.
 420
 421 \end{description}
 422
 423 Following NT records are for consecutive temperature replicas; each record is
 424 organized as keyword-based input:
 425
 426 \begin{description}
 427
 428 \item{TEMP} (298.0) - initial temperature of this replica (replicas in MREMD).
 429
 430 \item{FI} (0.0) - initial values of the dimensionless free energies for all q-restraint windows for this replica (NR values).
 431
 432 \item{KH} (100.0) - force constants of q restraints (NR values).
 433 Q0 (0.0d0) - q-restraint centers (NR values)</p>
 434
 435 \end{description}
 436
 437 \subsubsection{Information of files from which to read conformations}
 438 \label{sect:inoutfile:main:conffiles}
 439
 440 If HAMIL\_REP is present in general data, read the following two records only once; otherwise, read for each parameter set (NPARSET times total).
 441
 442 1st record (keyword-based input):.
 443
 444 For temperature replica only ONE record is read; for non-(M)REMD runs, NT records must be supplied. The records are in keyword-based format.
 445
 446 \begin{description}
 447 \item{NFILE\_ASC} -- number of files in ASCII format (UNRES Cartesian coordinate (x) files) for current parameter set.
 448
 449 \item{NFILE\_CX} -- number of compressed coordinate files (cx files) for current parameter set.
 450
 451 \item{NFILE\_BINi} -- number of binary coordinate files (now obsolete because it requires initial conversion of ASCII format trajectories into binary format).
 452
 453 \end{description}
 454
 455 It is strongly recommended to use cx files from (M)REMD runs with TRAJ1FILE option. Multitude of trajectory files which are opened and closed by different processors might impair file system accessibility. Should you wish to process trajectories each one of which is stored in a separate file, better collate the required slices of them first to an x file by using the xdrf2x program piped to the UNIX cat command.
 456
 457 coordinate file name(s) without extension.
 458
 459 \subsubsection{Information of reference structure and comparing scheme}
 460 \label{sect:inoutfile:main:reference}
 461
 462 The following records pertain to setting up the classification of conformation aimed ultimately at obtaining a class numbers. Fragments and pairs of fragments are specified and compared against those of reference structure in terms of secondary structure, number of contacts, rmsd, virtual-bond-valence and dihedral angles, etc. Then the class number is constructed as described in ref 3. A brief description of comparison procedure is as follows:
 463
 464 \begin{enumerate}
 465
 466 \item
 467 Elementary fragments usually corresponding to elements of secondary or supersecondary structure are selected. Based on division into fragments, levels of structural hierarchy are defined.
 468
 469 \item
 470 At level 1, each fragment is checked for agreement with the corresponding fragment in the native structure. Comparison is carried out at two levels: the secondary structure agreement and the contact-pattern agreement level.
 471
 472 At the secondary structure level the secondary structure (helix, strand or undefined) in the fragment is compared with that in the native fragment in a residue-wise manner. Score 0 is assigned if the structure is different in more than 1/3 of the fragment, 1 is assigned otherwise.
 473
 474 The contact-pattern agreement level compares the contacts between the peptide groups of the backbone of the fragment and the native fragment and also compares their virtual-bond dihedral angles gamma. It is allowed to shift the sequence by up to 3 residues to obtain contact pattern match. A score of 0 is assigned if more than 1/3 of native contacts do not occur or there is more than 60 deg (usually, but this cutoff can be changed) maximum difference in gamma. Otherwise score 1 is assigned.
 475
 476 The total score of a fragment is an octal number consisting of bits hereafter referred to S (secondary structure) C (contact match) and H (sHift) (they are in the order HCS). Their values are as follows:
 477
 478 \begin{description}
 479 \item{S} -- 1 native secondary structure; 0 otherwise,
 480 \item{C} -- 1 native contact pattern; 0 otherwise,
 481 \item{H} -- 1 contact match obtained without sequence shift 0 otherwise.
 482 \end{description}
 483
 484 For example,
 485 octal 7 (111) corresponds to native secondary structure, native contact pattern, and no need to shift the sequence for contact match;
 486 octal 1 (001) corresponds to native secondary structure only (i.e., nonnative contact pattern).
 487
 488 \item
 489 At level 2, contacts between (i) the peptide groups or (ii) the side chains within pairs of fragments are compared. Case (i) holds when we seek contacts between the strands of a larger beta-sheet formed by two fragments, case (ii) when we seek the interhelix or helix-beta sheet contacts. Additionally, the pairs of fragments are compared with their native counterparts by rmsd.
 490
 491 Score 0 is assigned to a pair of fragments, if it has less than 2/3 native contacts and too large rmsd (a cut-off of 0.1 A/residue is set), score 1 if it has enough native contacts and sufficiently low rmsd, but the sequence has to be shifted to obtain a match, and score 2, if sufficient match is obtained without shift.
 492
 493 \item
 494 At level 3 and higher, triads, quadruplets,..., etc. of fragments are compared in terms of rmsd from their native counterparts (the last level corresponds to comparing whole molecules). The score (0, 1, or 2) is assigned to each composite fragment as in the case of level 2.
 495
 496 \item
 497 The TOTAL class number of a structure is a binary number composed of parts of scores of fragments, fragment pairs, etc. It is illustrated on the following example; it is assumed that the molecule has three fragment as in the case of 1igd.
 498
 499 \end{enumerate}
 500
 501 \begin{verbatim}
 502 level 1      level 2                   level 3
 503 123 123 123||1-2 1-3 2-3 1-2 1-3 2-3 || 1-2-3 | 1-2-3 ||
 504 sss|ccc|hhh|| c   c   c | h   h   h  ||   r   |   h   ||
 505 \end{verbatim}
 506
 507 Bits s, c, and h of level 1 are explained in point 2; bits c and h of level 2 pertain to contact-pattern match and shift; bits r and h of level 3 pertain to rmsd match and shift for level 3.
 508
 509 The input is specified as follows:
 510
 511 1st record (keyword-based input):
 512
 513 \begin{description}
 514
 515 \item{VERBOSE} -- if present, detailed output in classification (use if you want to fill up the disk).
 516
 517 \item{PDBREF} -- if present, the reference structure is read from the pdb.
 518
 519 \item{BINARY} -- if present, the class will be output in octal/quaternary/binary format for levels 1, 2, and 3, respectively.
 520
 521 \item{DONT\_MERGE\_HELICES} -- if present, the pieces of helices that contain only small breaks of hydrogen-bonding contacts (e.g., a kink) are not merged in a larger helix.
 522
 523 \item{NLEVEL=n} -- number of classification levels.
 524
 525 \item{n$>$0} -- the fragments for n levels will be defined manually.
 526
 527 \item{n$<$0} -- the number of levels is -n and the fragments will be detected automatically.
 528
 529 \item{START=n} -- the number of conformation at which to start.
 530
 531 \item{END=n} -- the number of conformation at which to end.
 532
 533 \item{FREQ=n} (1) - sampling frequency of conformations; e.g. FREQ=2 means that every second conformation will be considered.
 534
 535 \item{CUTOFF\_UP=x} - upper boundary of rmsd cutoff (the value is per 50 residues).
 536
 537 \item{CUTOFF\_LOW=x} -- lower boundary of rmsd cutoff (per 50 residues).
 538
 539 \item{RMSUP\_LIM=x} -- lower absolute boundary of rmsd cutoff (regardless of fragment length).
 540
 541 \item{RMSUPUP\_LIM=x} -- upper absolute boundary of rmsd cutoff (regardless of fragment length).
 542
 543 \item{FRAC\_SEC=x} (0.66666) the fraction of native secondary structure to consider a fragment native in secondary structure.
 544
 545 \end{description}
 546
 547 2nd record:
 548
 549 For nlevel$<$0 (automatic fragment assignment):
 550
 551 \begin{description}
 552
 553 \item{SPLIT\_BET=n} (0) : if 1, the hairpins are split into strands and strands are considered elementary fragment.
 554
 555 \item{ANGCUT\_HEL=x} (50): cutoff on gamma angle differences from the native for a helical fragment.
 556
 557 MAXANG\_HEL=x (60) : as above but maximum cutoff
 558
 559 \item{ANGCUT\_BET=x} (90), MAXANG\_BET=x (360), ANGCUT\_STRAND=x (90), MAXANG\_STRAND=x (360) -- same but for a hairpin or sheet fragment.
 560
 561 \item{FRAC\_MIN=x} (0.6666) -- minimum fraction of native secondary structure.
 562
 563 \item{NC\_FRAC\_HEL=x (0.5)} -- fraction of native contacts for a helical fragment.
 564
 565 \item{NC\_REQ\_HEL=x} (0) -- minimum required number of contacts.
 566
 567 \item{NC\_FRAC\_BET=x} (0.5), NC\_REQ\_BET=x (0) -- same for beta sheet fragments.
 568
 569 \item{NC\_FRAC\_PAIR=x} (0.3), NC\_REQ\_PAIR=x (0) : same for pairs of segments.
 570
 571 \item{NSHIFT\_HEL=n} (3), NSHIFT\_BET=n (3), NSHIFT\_STRAND=n (3), NSHIFT\_PAIR=n (3) -- allowed sequence shift to match native and compared structure for the respective types of secondary structure.
 572
 573 \item{RMS\_SINGLE=n} (0), CONT\_SINGLE=n (1), LOCAL\_SINGLE=n (1), RMS\_PAIR=n (0).
 574
 575 \item{CONT\_PAIR=n} (1) -- types of criteria in considering the geometry of a fragment or pair native; 1 means that the criterion is turned on.
 576
 577 \end{description}
 578
 579 For nlevel$>$0 (manual assignment):
 580
 581 Level 1:
 582
 583 1st line:
 584
 585 \begin{description}
 586
 587 \item{NFRAG=n} -- number of elementary fragments.
 588
 589 \end{description}
 590
 591 Next lines (one group of lines per each fragment):
 592
 593 1st line:
 594
 595 \begin{description}
 596
 597 \item{NPIECE=n} -- number of segments constituting the fragment.
 598
 599 \item{ANGCUT}, MAXANG, FRAC\_MIN, NC\_FRAC, NC\_REQ -- criterial numbers of native-likeness as for automatic classification.
 600
 601 \item{LOCAL}, ELCONT, SCCONT, RMS : types of criteria implemented, as for automatic classification except that ELECONT and SCCONT mean that electrostatic or side-chain contacts are considered, respectively.
 602
 603 \end{description}
 604
 605 NPIECE following lines:
 606
 607 IFRAG1=n, IFRAG2=n -- the start and end residue of a continuous segment constituting a fragment.
 608
 609 Level 2 and higher:
 610
 611 1st line:
 612
 613 \begin{description}
 614
 615 \item{NFRAG=n} -- number of fragments considered at this level.
 616
 617 \end{description}
 618
 619 For each fragment the following line is read:
 620
 621 \begin{description}
 622
 623 \item{NPIECE=n} -- number of elementary fragments (as defined at level 1) constituting this composite fragment.
 624
 625 \item{IPIECE=i1 i2 ... in} -- the numbers of these fragments.
 626
 627 \item{NC\_FRAC}, NC\_REQ : contact criteria (valid only for level 2).
 628
 629 \item{ELCONT}, SCCONT, RMS : as for level 1; note, that for level 3 and higher the only criterion of nativelikeness is rms.
 630
 631 \end{description}
 632
 633 3rd (for nlevel$<$0) or following (for n$>$0) line:
 634
 635 Name of the file with reference structure (e.g., the pdb file with the experimental structure)
 636
 637 \subsection{The structure of the main output file (out)}
 638 \label{sect:inoutfiles:output:main}
 639
 640 The initial portion of the main output file, named INPUT.out\_POT\_000 contains information of parameter files specified in the C-shell script, compilation info, and the UNRES numeric code of the amino-acid sequence.
 641 Subsequently, actual energy-term weights and parameter files are printed. If lprint was set at .true. in parmread.F, all energy-function parameters are printed. If REFSTR was specified in the control-data list, the program then outputs the read reference-structure coordinates and partition of structure into fragments.
 642 Subsequently, the information about the number of structures read in and those that were rejected is printed followed by succinct information form the iteration process. Finally, the histograms (also output separately to specific histogram files; see section 6.6) and the data of the dependence of free energy, energy, heat capacity, and conformational averages on temperature are printed (these are also output separately to file described in section \ref{sect:inoutfiles:histograms}).
 643
 644 The output files corresponding to non-master processors (INPUT.out\_POT\_xxx where xxx$>$0 contain only the information up to the iteration protocol. These files can be deleted right after the run.
 645
 646 \subsection{The thermodynamic quantity and ensemble average (thermal) files}
 647 \label{sect:inoutfiles:outpput:thermo}
 648
 649 The files INPUT.thermal or INPUT\_slice\_yy.thermal contain thermodynamic, ensemble-averaged conformation-dependent quantities and their temperature derivatives. The structure of a record is as follows:
 650
 651 \begin{tabular}{p{2cm}p{2cm}p{2cm}p{2cm}p{2cm}p{2cm}p{2cm}}
 652    T&           F&             E&      $q_1...q_n$&  rmsd&    Rgy&     Cv\\
 653   298.0&     -83.91454&    -305.28112&  0.30647&  6.28347& 11.61204&0.70886E+01\\
 654 \end{tabular}
 655
 656 \begin{tabular}{p{2.5cm}p{2.5cm}p{2.5cm}p{2.5cm}p{2.5cm}p{2.5cm}}
 657        $var(q_1) ...$         &  var(rmsd)&    var(Rgy)&  $cov(q_1,E) ...$           & cov(rmsd,E)& cov(Rgy,E)\\
 658                     $var(q_n)$&           &            &                 $cov(q_n,E)$&            &           \\
 659     0.35393E-02&    0.51539E+01&    0.57012E+00&    0.43802E+00& 0.62384E+01&    0.33912E+01\\
 660 \end{tabular}
 661
 662 where:
 663
 664 \begin{description}
 665
 666 \item{T} -- absolute temperature (in K),
 667
 668 \item{F} -- free energy at T,
 669
 670 \item{E} -- average energy at T,
 671
 672 \item{$q_1..q_n$}: ensemble-averaged q values at T (usually only the total q corresponding to whole molecule is requested, as in the example above, but the user can specify more than one fragment or pair of fragments for which the q's are calculated, If there is no reference structure, this entry contains a 0,
 673
 674 \item{rmsd} -- ensemble-averaged root mean square deviation at T,
 675
 676 \item{Rgy} -- ensemble-averaged radius of gyration computed from Calpha coordinates at T,
 677
 678 \item{$C_v$} -- heat capacity at T,
 679
 680 \item{$var(q_1)...var(q_n)$} -- variances of q's at T,
 681
 682 \item{var(rmsd)} -- variance of rmsd at T,
 683
 684 \item{var(Rgy)} -- variance of radius of gyration at T,
 685
 686 \item{$cov(q_1,E)...cov(q_n,E)$} -- covariances of q's and energy at T,
 687
 688 \item{cov(rmsd,E)} -- covariance of rmsd and energy at T,
 689
 690 \item{cov(Rgy,E)} -- covariance of radius of gyration and energy at T.
 691
 692 \end{description}
 693
 694 According to Camacho and Thirumalali (Europhys. Lett., 35, 627, 1996), the maximum of the variance of the radius of gyration corresponds to the collapse point of a polypeptide chain and the maximum variance of q or rmsd corresponds to the midpoint of the transition to the native structure. More precisely, these points are inflection points in the plots of the respective quantities which, with temperature-independent force field, are proportional to their covariances with energy.
 695
 696 \subsection{The conformation summary with classification (stat) files}
 697 \label{sect:inoutfiles:class}
 698
 699 The stat files (with names INPUT\_POT\_xxx.stat or INPUT\_POT\_sliceyyxxx.stat; where yy is the number of a slice and xxx is the rank of a processor) contain the output of the classification of subsequent conformations (equally partitioned between processors). The files can be concatenated by processor rank to get a summary file. Each line has the following structure (example values are also provided):
 700
 701
 702 \begin{tabular}{|c|cccc|}\hline
 703 &&\multicolumn{3}{c|}{whole molecule}\\
 704 \cline{2-5}
 705 No&energy&rmsd&q&ang\\ \hline
 706  9999&  -122.42&  4.285&0.3751& 47.8\\ \hline
 707 \end{tabular}
 708
 709 \begin{tabular}{|cccccccccccc|c|}\hline
 710 \multicolumn{13}{|c|}{level 1}\\ \hline
 711 \multicolumn{6}{|c}{frag 1}&\multicolumn{3}{c}{frag 2}&\multicolumn{3}{c|}{frag 3}&class 1\\ \cline{1-12}
 712 n1&n2&n3&rmsd&q&ang&rmsd&q&ang&rmsd&q&ang&\\ \hline
 713  4&10&21 & 0.6&0.33& 16.7& 3.6&0.42& 56.3& 0.7&0.12& 16.5&737 \\ \hline
 714 \end{tabular}
 715
 716 \begin{tabular}{|cccccc|c|cc|c|c|} \hline
 717 \multicolumn{7}{|c|}{level 2}&\multicolumn{3}{c|}{level 3}&\\
 718 \cline{1-10}
 719 nc1&nc2&rmsd&q&rmsd&q&class 2&rmsd&q&class 3&class\\ \hline
 720 9& 0&  1.6&0.20& 4.3&0.20&20& 0&  4.0&2&737.20.2\\ \hline
 721 \end{tabular}
 722
 723 %                                      |                           level 1                           |       level 2                |    level3 |
 724 %                                      |                                                             |                              |           |
 725 %                      whole mol       |            frag1           frag2            frag3       cl1 |                              |           |
 726 %No      energy    rmsd  q      ang dif|n1n2 n3   rms  q    ang   rms  q    ang   rms  q    ang      | nc1nc2 rms q     rms q    cl2|    rms cl3|class
 727 % 9999   -122.42   4.285 0.3751  47.8  |4 10 21   0.6 0.33  16.7  3.6 0.42  56.3  0.7 0.12  16.5 737 | 9  0   1.6 0.20  4.3 0.20 20 | 0   4.0 2 |737.20.2
 728
 729 where
 730
 731 \begin{description}
 732
 733 \item{No} -- the number of the conformation.
 734
 735 \item{``whole molecule''} denotes the characteristics of the whole molecule q = 1-Wolynes'q.
 736
 737 \item{level 1, 2, and 3} denote the characteristics computed for the respective fragments as these levels.
 738
 739 \item{n1, n2, n3} -- number of native contacts for a given segment.
 740
 741 \item{cl1, cl2, cl3} -- group of segment classes for segments at level 1, 2, and 3, respectively.
 742
 743 \item{class} -- total class of the conformation.
 744
 745 \end{description}
 746
 747 The octal/quaternary/binary numbers denoting the class for a fragment at level 1, 2, and 3, respectively, are described in ref. \cite{oldziej_2004}.
 748
 749 \subsection{The histogram files}
 750 \label{sect:inoutfiles:histograms}
 751
 752 The histogram file with names INPUT\_[par\_yy][\_slice\_xx].hist where xx denotes the number of the slice and yy denotes the number of the parameter if SEPARATE\_PARSET was specified in input contain histograms of q at replica temperatures and energy-parameter sets; with SEPARATE\_PARSET histograms corresponding to subsequent parameter sets are saved in files with par\_yy infixes. The histograms are multidimensional if q is a vector (usually, however, q corresponds to the entire molecule and, consequently, the histograms are one-dimensional). The histogram files are printed if histfile and histout was specified in the control data record.
 753
 754 Each line of a histogram file corresponds to a given (multidimensional) bin in q contains the following:
 755
 756
 757 \begin{itemize}
 758
 759 \item
 760 $q_1,...,q_n$ at a given bin (format f6.3 for each)
 761
 762 \item
 763 histogram values for subsequent replica temperatures (format e20.10 for each)
 764
 765 \item
 766 iparm (the number of parameter set; format i5)
 767
 768 \item
 769 If SEPARATE\_PARSET was not specified, the entries corresponding to each parameter follow one another.
 770
 771 \end{itemize}
 772
 773 The state density is printed to file(s) INPUT[\_slice\_xx].ent. Each line contains the left boundary of the energy bin and ln(state density) followed by ``ent'' string. At present, the state density is calculated correctly only if one energy-parameter set is used.</p>
 774
 775 \subsection{The rmsd-radius of gyration potential of mean force files}
 776 \label{sect:inoutfiles:rmsd-rgy}
 777
 778 These files with names INPUT[\_par\_yy][\_slice\_xx].rmsrgy contain the two-dimensional potentials of mean force in rmsd and radius of gyration at all replica-exchange temperatures and for all energy-parameter sets.
 779 A line contains the left boundaries of the radius of gyration -- rmsd bin (radius of gyration first) (format 2f8.2) and the PMF values at all replica-exchange temperatures (e14.5), followed by the number of the parameter set.
 780 With SEPARATE\_PARSET, the PMFs corresponding to different parameter sets are printed to separate files.
 781
 782 \subsection{The PDB files}
 783 \label{sect:inoutfiles:PDB}
 784
 785 The PDB files with names INPUT\_[slice\_xx\_]Tyyy.pdb, where Tyyy specifies a given replica temperature contain the conformations whose probabilities at replica temperature T sum to 0.99, after sorting the conformations
 786 by probabilities in descending order. The PDB files follow the standard format; see \href{ftp://ftp.wwpdb.org/pub/pdb/doc/format_descriptions/Format_v33_Letter.pdf}{\textcolor{blue}{ftp://ftp.wwpdb.org/pub/pdb/doc/format\_descriptions}}.
 787 %/Format_v33_Letter.pdf">ftp://ftp.wwpdb.org/pub/pdb/doc/format_descriptions/Format_v33_Letter.pdf</a>.
 788 For single-chain proteins, an example is as follows:
 789
 790 \begin{verbatim}
 791 REMARK CONF    9059 TEMPERATURE  330.0 RMS   8.86
 792 REMARK DIMENSIONLESS FREE ENERGY   -1.12726E+02
 793 REMARK ENERGY   -2.22574E+01 ENTROPY   -7.87818E+01
 794 ATOM      1  CA  VAL     1       8.480   5.714 -34.044
 795 ATOM      2  CB  VAL     1       9.803   5.201 -33.968
 796 ATOM      3  CA  ASP     2       8.284   2.028 -34.925
 797 ATOM      4  CB  ASP     2       7.460   0.983 -33.832
 798 .
 799 .
 800 .
 801 ATOM    115  CA  LYS    58      28.446  -3.448 -12.936
 802 ATOM    116  CB  LYS    58      26.613  -4.175 -14.514
 803 TER
 804 CONECT    1    3    2
 805 .
 806 .
 807 .
 808 CONECT  113  115  114
 809 CONECT  115  116
 810 \end{verbatim}
 811
 812 where
 813
 814 \begin{description}
 815
 816 \item{CONF} is the number of the conformation from the processed slice of MREMD trajectories.
 817
 818 \item{TEMPERATURE} is the replica temperature.
 819
 820 \item{RMS} is the Calpha rmsd from the reference (experimental) structure.
 821
 822 \item{DIMENSIONLESS FREE ENERGY} is -log(probability) (equation 14 of ref 2) for the conformation at this replica temperature calculated by WHAM.
 823
 824 \item{ENERGY} is the UNRES energy of the conformation at the replica temperature (note that UNRES energy is in general temperature dependent).
 825
 826 \item{ENTROPY} is the omega of equation 15 of ref 2 of the conformation.
 827
 828 \end{description}
 829
 830 In the ATOM entries, CA denotes a Calpha atom and CB denotes UNRES side-chain atom. The CONECT entries specify the C$^\alpha_i\cdots$C$^\alpha_{i-1}$, C$^\alpha_i\cdots$C$^\alpha_{i+1}$ and C$^\alpha_i\cdots$SC$_i$ links.
 831
 832 The PDB files generated for oligomeric proteins are similar except that chains are separated with TER and molecules with ENDMDL records and chain identifiers are included. An example is as follows:
 833
 834 \begin{verbatim}
 835 REMARK CONF     765 TEMPERATURE  301.0 RMS  11.89
 836 REMARK DIMENSIONLESS FREE ENERGY   -4.48514E+02
 837 REMARK ENERGY   -3.58633E+02 ENTROPY    1.51120E+02
 838 ATOM      1  CA  GLY A   1      -0.736  11.305  24.600
 839 ATOM      2  CA  TYR A   2      -3.184   9.928  21.998
 840 ATOM      3  CB  TYR A   2      -1.474  10.815  20.433
 841 .
 842 .
 843 .
 844 ATOM     40  CB  MET A  21      -4.033  -2.913  27.189
 845 ATOM     41  CA  GLY A  22      -5.795 -10.240  27.249
 846 TER
 847 ATOM     42  CA  GLY B   1       6.750  -6.905  19.263
 848 ATOM     43  CA  TYR B   2       5.667  -4.681  16.362
 849 .
 850 .
 851 .
 852 ATOM    163  CB  MET D  21       4.439  12.326  -4.950
 853 ATOM    164  CA  GLY D  22      10.096  14.370  -9.301
 854 TER
 855 CONECT    1    2
 856 CONECT    2    4    3
 857 .
 858 .
 859 .
 860 CONECT   39   41   40
 861 CONECT   42   43
 862 .
 863 .
 864 .
 865 CONECT  162  164  163
 866 ENDMDL
 867 \end{verbatim}
 868
 869 \subsection{The compressed Cartesian coordinates (cx) files}
 870 \label{sect:inoutfiles:cx}
 871
 872 These files contain compressed data in the Europort Data Compression XDRF library format written by Dr. F. van Hoesel, Groeningen University (\href{http://hpcv100.rc.rug.nl/xdrfman.html}{http://hpcv100.rc.rug.nl/xdrfman.html}.
 873 The files are written by the cxwrite subroutine. The resulting cx file contains the omega factors to compute probabilities of conformations at any temperature and any energy-function parameters if Hamiltonian replica
 874 exchange was performed in the preceding UNRES run. The files have general names INPUT[\_par\_yy][\_slice\_xx].cx where xx is slice number and yy is parameter-set.
 875
 876 The items written to the cx file are as follows (the precision is 5 significant digits):
 877
 878 \begin{enumerate}
 879 \item
 880 Cartesian coordinates of Calpha and SC sites</p>
 881 \item
 882 nss (number of disulfide bonds)
 883 \item
 884 if nss$>$0:
 885 \begin{enumerate}
 886 \item
 887 ihpb (first residue of a disulfide link)
 888 \item
 889 jhpb (second residue of a disulfide link)
 890 \item
 891 UNRES energy at that replica temperature that the conformation was at snapshot-recording time,
 892 \item
 893 ln(omega) of eq 15 of ref \cite{liwo_2007},
 894 \end{enumerate}
 895 \item
 896 C$^\alpha$ rmsd
 897 \item
 898 conformation class number (0 if CLASSIFY was not specified).
 899 \end{enumerate}
 900
 901 \newpage
 902
 903 \section{SUPPORT}
 904 \label{sect:support}
 905
 906         Dr. Adam Liwo\\
 907         Faculty of Chemistry, University of Gdansk\\
 908         ul. Sobieskiego 18, 80-952 Gdansk Poland.\\
 909         phone: +48 58 523 5430\\
 910         fax: +48 58 523 5472\\
 911         e-mail: \href{mailto:adam@chem.univ.gda.pl}{\textcolor{blue}{adam@chem.univ.gda.pl}}\\
 912
 913         Dr. Cezary Czaplewski\\
 914         Faculty of Chemistry, University of Gdansk\\
 915         ul. Sobieskiego 18, 80-952 Gdansk Poland.\\
 916         phone: +48 58 523 5430\\
 917         fax: +48 58 523 5472\\
 918         e-mail: \href{mailto:czarek@chem.univ.gda.pl}{\textcolor{blue}{czarek@chem.univ.gda.pl}}\\
 919 \\
 920
 921         Adam Sieradzan\\
 922         Faculty of Chemistry, University of Gdansk\\
 923         ul. Sobieskiego 18, 80-952 Gdansk Poland.\\
 924         phone: +48 58 523 5430\\
 925         fax: +48 58 523 5472\\
 926         e-mail: \href{mailto:adasko@chem.univ.gda.pl}{\textcolor{blue}{adasko@chem.univ.gda.pl}}\\
 927
 928 Prepared by Adam Liwo, 02/19/12.
 929
 930 \LaTeX version, 09/27/12.
 931
 932 \end{document}