************************************************************************* * This program is to compare two protein structures and identify the * best superposition that has the highest TM-score. Input structures * must be in the PDB format. By default, TM-score is normalized by * the second protein. Users can obtain a brief instruction by simply * running the program without arguments. For comments/suggestions, * please contact email: zhng@umich.edu. * * Reference: * Yang Zhang, Jeffrey Skolnick, Proteins, 2004 57:702-10. * * Permission to use, copy, modify, and distribute this program for * any purpose, with or without fee, is hereby granted, provided that * the notices on the head, the reference information, and this * copyright notice appear in all copies or substantial portions of * the Software. It is provided "as is" without express or implied * warranty. ******************* Updating history ************************************ * 2005/10/19: the program was reformed so that the score values. * are not dependent on the specific compilers. * 2006/06/20: selected 'A' if there is altLoc when reading PDB file. * 2007/02/05: fixed a bug with length<15 in TMscore_32. * 2007/02/27: rotation matrix from Chain-1 to Chain-2 was added. * 2007/12/06: GDT-HA score was added, fixed a bug for reading PDB. * 2010/08/02: A new RMSD matrix was used and obsolete statement removed. * 2011/01/03: The length of pdb file names were extended to 500. * 2011/01/30: An open source license is attached to the program. * 2012/05/07: Improved RMSD calculation subroutine which speeds up * TM-score program by 30%. * 2012/06/05: Added option '-l L' which calculates TM-score (and maxsub * and GDT scores) normalized by a specific length 'L'. ************************************************************************* c program TMscore subroutine TMscore_sub(rmsd,gdt_ts,gdt_ha,tmscore,cfname,lprint) include 'DIMENSIONS' PARAMETER(nmax=5000) include 'COMMON.CONTROL' include 'COMMON.IOUNITS' include 'COMMON.CHAIN' include 'COMMON.INTERACT' real*8 rmsd,gdt_ts,gdt_ha,tmscore common/stru/xt(nmax),yt(nmax),zt(nmax),xb(nmax),yb(nmax),zb(nmax) common/nres/nresA(nmax),nresB(nmax),nseqA,nseqB common/para/d,d0,d0_fix common/align/n_ali,iA(nmax),iB(nmax) common/nscore/i_ali(nmax),n_cut ![1,n_ali],align residues for the score dimension k_ali(nmax),k_ali0(nmax) character*500 fnam,pdb(100)!,outname character*80 cfname character*3 aa(-1:20),seqA(nmax),seqB(nmax) character*500 s,du character seq1A(nmax),seq1B(nmax),ali(nmax) character sequenceA(nmax),sequenceB(nmax),sequenceM(nmax) dimension L_ini(100),iq(nmax) common/scores/score,score_maxsub,score_fix,score10 common/GDT/n_GDT05,n_GDT1,n_GDT2,n_GDT4,n_GDT8 double precision score,score_max,score_fix,score_fix_max double precision score_maxsub,score10 dimension xa(nmax),ya(nmax),za(nmax) ccc RMSD: double precision r_1(3,nmax),r_2(3,nmax),r_3(3,nmax),w(nmax) double precision u(3,3),tt(3),rms,drms !armsd is real data w /nmax*1.0/ logical lprint ccc data aa/ 'BCK','GLY','ALA','SER','CYS', & 'VAL','THR','ILE','PRO','MET', & 'ASP','ASN','LEU','LYS','GLU', & 'GLN','ARG','HIS','PHE','TYR', & 'TRP','CYX'/ character*1 slc(-1:20) data slc/'X','G','A','S','C', & 'V','T','I','P','M', & 'D','N','L','K','E', & 'Q','R','H','F','Y', & 'W','C'/ *****instructions -----------------> c call getarg(1,fnam) c if(fnam.eq.' '.or.fnam.eq.'?'.or.fnam.eq.'-h')then c write(*,*) c write(*,*)'Brief instruction for running TM-score program:' c write(*,*)'(For detail: Zhang & Skolnick, Proteins, 2004', c & ' 57:702-10)' c write(*,*) c write(*,*)'1. Run TM-score to compare ''model'' and ', c & '''native'':' c write(*,*)' >TMscore model native' c write(*,*) c write(*,*)'2. TM-score normalized with an assigned scale d0', c & ' e.g. 5 A:' c write(*,*)' >TMscore model native -d 5' c write(*,*) c write(*,*)'3. TM-score normalized by a specific length, ', c & 'e.g. 120 AA:' c write(*,*)' >TMscore model native -l 120' c write(*,*) c write(*,*)'4. TM-score with superposition output, e.g. ', c & '''TM.sup'':' c write(*,*)' >TMscore model native -o TM.sup' c write(*,*)' To view the superimposed structures by rasmol:' c write(*,*)' >rasmol -script TM.sup' c write(*,*) c goto 9999 c endif pdb(1)=cfname pdb(2)=pdbfile ******* options -----------> m_out=-1 m_fix=-1 m_len=-1 c narg=iargc() c i=0 c j=0 c 115 continue c i=i+1 c call getarg(i,fnam) c if(fnam.eq.'-o')then c m_out=1 c i=i+1 c call getarg(i,outname) c elseif(fnam.eq.'-d')then c m_fix=1 c i=i+1 c call getarg(i,fnam) c read(fnam,*)d0_fix c elseif(fnam.eq.'-l')then c m_len=1 c i=i+1 c call getarg(i,fnam) c read(fnam,*)l0_fix c else c j=j+1 c pdb(j)=fnam c endif c if(i.lt.narg)goto 115 c ccccccccc read data from first CA file: c open(unit=10,file=pdb(1),status='old') c i=0 c 101 read(10,104,end=102) s c if(s(1:3).eq.'TER') goto 102 c if(s(1:4).eq.'ATOM')then c if(s(13:16).eq.'CA '.or.s(13:16).eq.' CA '.or.s(13:16). c & eq.' CA')then c if(s(17:17).eq.' '.or.s(17:17).eq.'A')then c i=i+1 c read(s,103)du,seqA(i),du,nresA(i),du,xa(i),ya(i),za(i) c do j=-1,20 c if(seqA(i).eq.aa(j))then c seq1A(i)=slc(j) c goto 21 c endif c enddo c seq1A(i)=slc(-1) c 21 continue c endif c endif c endif c goto 101 c 102 continue c 103 format(A17,A3,A2,i4,A4,3F8.3) c 104 format(A100) c close(10) c nseqA=i c^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ c ccccccccc read data from first CA file: c open(unit=10,file=pdb(2),status='old') c i=0 c 201 read(10,204,end=202) s c if(s(1:3).eq.'TER') goto 202 c if(s(1:4).eq.'ATOM')then c if(s(13:16).eq.'CA '.or.s(13:16).eq.' CA '.or.s(13:16). c & eq.' CA')then c if(s(17:17).eq.' '.or.s(17:17).eq.'A')then c i=i+1 c read(s,203)du,seqB(i),du,nresB(i),du,xb(i),yb(i),zb(i) c do j=-1,20 c if(seqB(i).eq.aa(j))then c seq1B(i)=slc(j) c goto 22 c endif c enddo c seq1B(i)=slc(-1) c 22 continue c endif c endif c endif c goto 201 c 202 continue c 203 format(A17,A3,A2,i4,A4,3F8.3) c 204 format(A100) c close(10) c nseqB=i c^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ****************************************************************** * pickup the aligned residues: ****************************************************************** c k=0 c do i=1,nseqA c do j=1,nseqB c if(nresA(i).eq.nresB(j))then c k=k+1 c iA(k)=i c iB(k)=j c goto 205 c endif c enddo c 205 continue c enddo c n_ali=k !number of aligned residues c if(n_ali.lt.1)then c write(*,*)'There is no common residues in the input structures' c goto 9999 c endif c ************///// * parameters: ***************** noverlap=nres if (nres.gt.nsup+nnt-1) noverlap=nsup+nnt-1 nnsup=0 do i=1,noverlap if (itype(i).ne.ntyp1) then nnsup=nnsup+1 iA(nnsup)=nnsup iB(nnsup)=nnsup endif enddo nseqA=nnsup nseqB=nnsup n_ali=nnsup *** d0-------------> if(nseqB.gt.15)then d0=1.24*(nseqB-15)**(1.0/3.0)-1.8 else d0=0.5 endif if(m_len.eq.1)then d0=1.24*(l0_fix-15)**(1.0/3.0)-1.8 endif if(d0.lt.0.5)d0=0.5 if(m_fix.eq.1)d0=d0_fix *** d0_search -----> d0_search=d0 if(d0_search.gt.8)d0_search=8 if(d0_search.lt.4.5)d0_search=4.5 *** iterative parameters -----> n_it=20 !maximum number of iterations d_output=5 !for output alignment if(m_fix.eq.1)d_output=d0_fix n_init_max=6 !maximum number of L_init n_init=0 L_ini_min=4 if(n_ali.lt.4)L_ini_min=n_ali do i=1,n_init_max-1 n_init=n_init+1 L_ini(n_init)=n_ali/2**(n_init-1) if(L_ini(n_init).le.L_ini_min)then L_ini(n_init)=L_ini_min goto 402 endif enddo n_init=n_init+1 L_ini(n_init)=L_ini_min 402 continue ****************************************************************** * find the maximum score starting from local structures superposition ****************************************************************** score_max=-1 !TM-score score_maxsub_max=-1 !MaxSub-score score10_max=-1 !TM-score10 n_GDT05_max=-1 !number of residues<0.5 n_GDT1_max=-1 !number of residues<1 n_GDT2_max=-1 !number of residues<2 n_GDT4_max=-1 !number of residues<4 n_GDT8_max=-1 !number of residues<8 #ifdef DEBUG write (iout,*) "cref and ccref" #endif noverlap=nres if (nres.gt.nsup+nnt-1) noverlap=nsup+nnt-1 nnsup=0 do i=1,noverlap if (itype(i).ne.ntyp1) then nnsup=nnsup+1 xa(nnsup)=c(1,i) ya(nnsup)=c(2,i) za(nnsup)=c(3,i) xb(nnsup)=cref_pdb(1,i,1) yb(nnsup)=cref_pdb(2,i,1) zb(nnsup)=cref_pdb(3,i,1) c do j=1,3 c cc(j,nnsup)=c(j,i) c ccref(j,nnsup)=cref_pdb(j,i,1) c enddo #ifdef DEBUG write (iout,'(i5,3f10.5,5x,3f10.5)') nnsup, & xa(nnsup),ya(nnsup),za(nnsup),xb(nnsup),yb(nnsup),zb(nnsup) #endif endif enddo do 333 i_init=1,n_init L_init=L_ini(i_init) iL_max=n_ali-L_init+1 do 300 iL=1,iL_max !on aligned residues, [1,nseqA] LL=0 ka=0 do i=1,L_init k=iL+i-1 ![1,n_ali] common aligned r_1(1,i)=xa(iA(k)) r_1(2,i)=ya(iA(k)) r_1(3,i)=za(iA(k)) r_2(1,i)=xb(iB(k)) r_2(2,i)=yb(iB(k)) r_2(3,i)=zb(iB(k)) ka=ka+1 k_ali(ka)=k LL=LL+1 enddo if(i_init.eq.1)then !global superposition call u3b(w,r_1,r_2,LL,2,rms,u,tt,ier) !0:rmsd; 1:u,t; 2:rmsd,u,t armsd=dsqrt(rms/LL) rmsd_ali=armsd else call u3b(w,r_1,r_2,LL,1,rms,u,tt,ier) !u rotate r_1 to r_2 endif do j=1,nseqA xt(j)=tt(1)+u(1,1)*xa(j)+u(1,2)*ya(j)+u(1,3)*za(j) yt(j)=tt(2)+u(2,1)*xa(j)+u(2,2)*ya(j)+u(2,3)*za(j) zt(j)=tt(3)+u(3,1)*xa(j)+u(3,2)*ya(j)+u(3,3)*za(j) enddo d=d0_search-1 call score_fun !init, get scores, n_cut+i_ali(i) for iteration if(score_max.lt.score)then score_max=score ka0=ka do i=1,ka0 k_ali0(i)=k_ali(i) enddo endif if(score10_max.lt.score10)score10_max=score10 if(score_maxsub_max.lt.score_maxsub)score_maxsub_max= & score_maxsub if(n_GDT05_max.lt.n_GDT05)n_GDT05_max=n_GDT05 if(n_GDT1_max.lt.n_GDT1)n_GDT1_max=n_GDT1 if(n_GDT2_max.lt.n_GDT2)n_GDT2_max=n_GDT2 if(n_GDT4_max.lt.n_GDT4)n_GDT4_max=n_GDT4 if(n_GDT8_max.lt.n_GDT8)n_GDT8_max=n_GDT8 *** iteration for extending ----------------------------------> d=d0_search+1 do 301 it=1,n_it LL=0 ka=0 do i=1,n_cut m=i_ali(i) ![1,n_ali] r_1(1,i)=xa(iA(m)) r_1(2,i)=ya(iA(m)) r_1(3,i)=za(iA(m)) r_2(1,i)=xb(iB(m)) r_2(2,i)=yb(iB(m)) r_2(3,i)=zb(iB(m)) ka=ka+1 k_ali(ka)=m LL=LL+1 enddo call u3b(w,r_1,r_2,LL,1,rms,u,tt,ier) !u rotate r_1 to r_2 do j=1,nseqA xt(j)=tt(1)+u(1,1)*xa(j)+u(1,2)*ya(j)+u(1,3)*za(j) yt(j)=tt(2)+u(2,1)*xa(j)+u(2,2)*ya(j)+u(2,3)*za(j) zt(j)=tt(3)+u(3,1)*xa(j)+u(3,2)*ya(j)+u(3,3)*za(j) enddo call score_fun !get scores, n_cut+i_ali(i) for iteration if(score_max.lt.score)then score_max=score ka0=ka do i=1,ka k_ali0(i)=k_ali(i) enddo endif if(score10_max.lt.score10)score10_max=score10 if(score_maxsub_max.lt.score_maxsub)score_maxsub_max & =score_maxsub if(n_GDT05_max.lt.n_GDT05)n_GDT05_max=n_GDT05 if(n_GDT1_max.lt.n_GDT1)n_GDT1_max=n_GDT1 if(n_GDT2_max.lt.n_GDT2)n_GDT2_max=n_GDT2 if(n_GDT4_max.lt.n_GDT4)n_GDT4_max=n_GDT4 if(n_GDT8_max.lt.n_GDT8)n_GDT8_max=n_GDT8 if(it.eq.n_it)goto 302 if(n_cut.eq.ka)then neq=0 do i=1,n_cut if(i_ali(i).eq.k_ali(i))neq=neq+1 enddo if(n_cut.eq.neq)goto 302 endif 301 continue !for iteration 302 continue 300 continue !for shift 333 continue !for initial length, L_ali/M ratio=1 if(m_len.gt.0)then ratio=float(nseqB)/float(l0_fix) endif if(m_len.eq.1)then score_max=score_max*float(nseqB)/float(l0_fix) endif score_GDT=(n_GDT1_max+n_GDT2_max+n_GDT4_max+n_GDT8_max) & /float(4*nseqB) score_GDT_HA=(n_GDT05_max+n_GDT1_max+n_GDT2_max+n_GDT4_max) & /float(4*nseqB) tmscore=score_max gdt_ts=score_GDT*ratio gdt_ha=score_GDT_HA*ratio rmsd=rmsd_ali ****************************************************************** * Output ****************************************************************** *** output TM-scale ----------------------------> if (lprint) then write(iout,*) write(iout,*)'**************************************************', & '***************************' write(iout,*)'* TM-SCORE ', & ' *' write(iout,*)'* A scoring function to assess the similarity of p', & 'rotein structures *' write(iout,*)'* Based on statistics: ', & ' *' write(iout,*)'* 0.0 < TM-score < 0.17, random structural s', & 'imilarity *' write(iout,*)'* 0.5 < TM-score < 1.00, in about the same f', & 'old *' write(iout,*)'* Reference: Yang Zhang and Jeffrey Skolnick, ', & 'Proteins 2004 57: 702-710 *' write(iout,*)'* For comments, please email to: zhng@umich.edu ', & ' *' write(iout,*)'**************************************************', & '***************************' write(iout,*) write(iout,501)pdb(1),nseqA 501 format('Structure1: ',A10,' Length= ',I4) if(m_len.eq.1)then write(iout,411)pdb(2),nseqB write(iout,412)l0_fix else write(iout,502)pdb(2),nseqB endif 411 format('Structure2: ',A10,' Length= ',I4) 412 format('TM-score is notmalized by ',I4) 502 format('Structure2: ',A10,' Length= ',I4, & ' (by which all scores are normalized)') write(iout,503)n_ali 503 format('Number of residues in common= ',I4) write(iout,513)rmsd_ali 513 format('RMSD of the common residues= ',F8.3) write(iout,*) write(iout,504)score_max,d0 504 format('TM-score = ',f6.4,' (d0=',f5.2,')') write(iout,505)score_maxsub_max*ratio 505 format('MaxSub-score= ',f6.4,' (d0= 3.50)') write(iout,506)score_GDT*ratio,n_GDT1_max/float(nseqB)*ratio, & n_GDT2_max/float(nseqB)*ratio,n_GDT4_max/float(nseqB)*ratio, & n_GDT8_max/float(nseqB)*ratio 506 format('GDT-TS-score= ',f6.4,' %(d<1)=',f6.4,' %(d<2)=',f6.4, $ ' %(d<4)=',f6.4,' %(d<8)=',f6.4) write(iout,507)score_GDT_HA*ratio,n_GDT05_max/float(nseqB)*ratio, & n_GDT1_max/float(nseqB)*ratio,n_GDT2_max/float(nseqB)*ratio, & n_GDT4_max/float(nseqB)*ratio 507 format('GDT-HA-score= ',f6.4,' %(d<0.5)=',f6.4,' %(d<1)=',f6.4, $ ' %(d<2)=',f6.4,' %(d<4)=',f6.4) write(iout,*) endif return end c------------------------------------------------------------------------ *** recall and output the superposition of maxiumum TM-score: c LL=0 c do i=1,ka0 c m=k_ali0(i) !record of the best alignment c r_1(1,i)=xa(iA(m)) c r_1(2,i)=ya(iA(m)) c r_1(3,i)=za(iA(m)) c r_2(1,i)=xb(iB(m)) c r_2(2,i)=yb(iB(m)) c r_2(3,i)=zb(iB(m)) c LL=LL+1 c enddo c call u3b(w,r_1,r_2,LL,1,rms,u,t,ier) !u rotate r_1 to r_2 c do j=1,nseqA c xt(j)=t(1)+u(1,1)*xa(j)+u(1,2)*ya(j)+u(1,3)*za(j) c yt(j)=t(2)+u(2,1)*xa(j)+u(2,2)*ya(j)+u(2,3)*za(j) c zt(j)=t(3)+u(3,1)*xa(j)+u(3,2)*ya(j)+u(3,3)*za(j) c enddo c c********* extract rotation matrix ------------> c write(*,*)'-------- rotation matrix to rotate Chain-1 to ', c & 'Chain-2 ------' c write(*,*)'i t(i) u(i,1) u(i,2) ', c & ' u(i,3)' c do i=1,3 c write(*,304)i,t(i),u(i,1),u(i,2),u(i,3) c enddo cc do j=1,nseqA cc xt(j)=t(1)+u(1,1)*xa(j)+u(1,2)*ya(j)+u(1,3)*za(j) cc yt(j)=t(2)+u(2,1)*xa(j)+u(2,2)*ya(j)+u(2,3)*za(j) cc zt(j)=t(3)+u(3,1)*xa(j)+u(3,2)*ya(j)+u(3,3)*za(j) cc write(*,*)j,xt(j),yt(j),zt(j) cc enddo c write(*,*) c 304 format(I2,f18.10,f15.10,f15.10,f15.10) c c********* rmsd in superposed regions ---------------> c d=d_output !for output c call score_fun() !give i_ali(i), score_max=score now c LL=0 c do i=1,n_cut c m=i_ali(i) ![1,nseqA] c r_1(1,i)=xa(iA(m)) c r_1(2,i)=ya(iA(m)) c r_1(3,i)=za(iA(m)) c r_2(1,i)=xb(iB(m)) c r_2(2,i)=yb(iB(m)) c r_2(3,i)=zb(iB(m)) c LL=LL+1 c enddo c call u3b(w,r_1,r_2,LL,0,rms,u,t,ier) c armsd=dsqrt(rms/LL) c rmsd=armsd c c*** output rotated chain1 + chain2-----> c if(m_out.ne.1)goto 999 c OPEN(unit=7,file=outname,status='unknown') !pdb1.aln + pdb2.aln c 900 format(A) c 901 format('select ',I4) c write(7,900)'load inline' c write(7,900)'select atomno<1000' cc write(7,900)'color [255,20,147]' c write(7,900)'wireframe .45' c write(7,900)'select none' c write(7,900)'select atomno>1000' cc write(7,900)'color [100,149,237]' c write(7,900)'wireframe .15' c write(7,900)'color white' c do i=1,n_cut c write(7,901)nresA(iA(i_ali(i))) c write(7,900)'color red' c enddo c write(7,900)'select all' c write(7,900)'exit' c write(7,514)rmsd_ali c 514 format('REMARK RMSD of the common residues=',F8.3) c write(7,515)score_max,d0 c 515 format('REMARK TM-score=',f6.4,' (d0=',f5.2,')') c do i=1,nseqA c write(7,1237)nresA(i),seqA(i),nresA(i),xt(i),yt(i),zt(i) c enddo c write(7,1238) c do i=2,nseqA c write(7,1239)nresA(i-1),nresA(i) c enddo c do i=1,nseqB c write(7,1237)2000+nresB(i),seqB(i),nresB(i),xb(i),yb(i),zb(i) c enddo c write(7,1238) c do i=2,nseqB c write(7,1239)2000+nresB(i-1),2000+nresB(i) c enddo c 1237 format('ATOM ',i5,' CA ',A3,I6,4X,3F8.3) c 1238 format('TER') c 1239 format('CONECT',I5,I5) c 999 continue c c*** record aligned residues by i=[1,nseqA], for sequenceM()------------> c do i=1,nseqA c iq(i)=0 c enddo c do i=1,n_cut c j=iA(i_ali(i)) ![1,nseqA] c k=iB(i_ali(i)) ![1,nseqB] c dis=sqrt((xt(j)-xb(k))**2+(yt(j)-yb(k))**2+(zt(j)-zb(k))**2) c if(dis.lt.d_output)then c iq(j)=1 c endif c enddo c******************************************************************* c*** output aligned sequences c k=0 c i=1 c j=1 c 800 continue c if(i.gt.nseqA.and.j.gt.nseqB)goto 802 c if(i.gt.nseqA.and.j.le.nseqB)then c k=k+1 c sequenceA(k)='-' c sequenceB(k)=seq1B(j) c sequenceM(k)=' ' c j=j+1 c goto 800 c endif c if(i.le.nseqA.and.j.gt.nseqB)then c k=k+1 c sequenceA(k)=seq1A(i) c sequenceB(k)='-' c sequenceM(k)=' ' c i=i+1 c goto 800 c endif c if(nresA(i).eq.nresB(j))then c k=k+1 c sequenceA(k)=seq1A(i) c sequenceB(k)=seq1B(j) c if(iq(i).eq.1)then c sequenceM(k)=':' c else c sequenceM(k)=' ' c endif c i=i+1 c j=j+1 c goto 800 c elseif(nresA(i).lt.nresB(j))then c k=k+1 c sequenceA(k)=seq1A(i) c sequenceB(k)='-' c sequenceM(k)=' ' c i=i+1 c goto 800 c elseif(nresB(j).lt.nresA(i))then c k=k+1 c sequenceA(k)='-' c sequenceB(k)=seq1B(j) c sequenceM(k)=' ' c j=j+1 c goto 800 c endif c 802 continue c c write(*,600)d_output,n_cut,rmsd c 600 format('Superposition in the TM-score: Length(d<',f3.1, c $ ')=',i3,' RMSD=',f6.2) c write(*,603)d_output c 603 format('(":" denotes the residue pairs of distance < ',f3.1, c & ' Angstrom)') c write(*,601)(sequenceA(i),i=1,k) c write(*,601)(sequenceM(i),i=1,k) c write(*,601)(sequenceB(i),i=1,k) c write(*,602)(mod(i,10),i=1,k) c 601 format(2000A1) c 602 format(2000I1) c write(*,*) c c^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ c 9999 END ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c 1, collect those residues with dis