5 from Bio import BiopythonWarning
6 warnings.simplefilter('ignore', BiopythonWarning)
12 from Bio.SVDSuperimposer import SVDSuperimposer
14 from argparse import ArgumentParser
18 def parse_fnat(fnat_out):
26 for line in fnat_out.split("\n"):
28 line=line.rstrip('\n')
29 match=re.search(r'NATIVE: (\d+)(\w) (\d+)(\w)',line)
30 if(re.search(r'^Fnat',line)):
33 nat_correct=int(list[1])
34 nat_total=int(list[2])
35 elif(re.search(r'^Fnonnat',line)):
37 fnonnat=float(list[3])
38 nonnat_count=int(list[1])
39 model_total=int(list[2])
46 # print res1 + ' ' + chain1 + ' ' + res2 + ' ' + chain2
47 inter.append(res1 + chain1)
48 inter.append(res2 + chain2)
49 return (fnat,nat_correct,nat_total,fnonnat,nonnat_count,model_total,inter)
51 def capri_class(fnat,iRMS,LRMS,capri_peptide=False):
56 if(fnat < 0.2 or (LRMS > 5.0 and iRMS > 2.0)):
58 elif((fnat >= 0.2 and fnat < 0.5) and (LRMS <= 5.0 or iRMS <= 2.0) or (fnat >= 0.5 and LRMS > 2.0 and iRMS > 1.0)):
60 elif((fnat >= 0.5 and fnat < 0.8) and (LRMS <= 2.0 or iRMS <= 1.0) or (fnat >= 0.8 and LRMS > 1.0 and iRMS > 0.5)):
62 elif(fnat >= 0.8 and (LRMS <= 1.0 or iRMS <= 0.5)):
68 if(fnat < 0.1 or (LRMS > 10.0 and iRMS > 4.0)):
70 elif((fnat >= 0.1 and fnat < 0.3) and (LRMS <= 10.0 or iRMS <= 4.0) or (fnat >= 0.3 and LRMS > 5.0 and iRMS > 2.0)):
72 elif((fnat >= 0.3 and fnat < 0.5) and (LRMS <= 5.0 or iRMS <= 2.0) or (fnat >= 0.5 and LRMS > 1.0 and iRMS > 1.0)):
74 elif(fnat >= 0.5 and (LRMS <= 1.0 or iRMS <= 1.0)):
80 def capri_class_DockQ(DockQ,capri_peptide=False):
83 return 'Undef for capri_peptides'
85 (c1,c2,c3)=(0.23,0.49,0.80)
88 elif(DockQ >= c1 and DockQ < c2):
90 elif(DockQ >= c2 and DockQ < c3):
98 def calc_DockQ(model,native,use_CA_only=False,capri_peptide=False):
100 exec_path=os.path.dirname(os.path.abspath(sys.argv[0]))
101 atom_for_sup=['CA','C','N','O']
105 cmd_fnat=exec_path + '/fnat ' + model + ' ' + native + ' 5 -all'
106 cmd_interface=exec_path + '/fnat ' + model + ' ' + native + ' 10 -all'
109 cmd_fnat=exec_path + '/fnat ' + model + ' ' + native + ' 4 -all'
110 cmd_interface=exec_path + '/fnat ' + model + ' ' + native + ' 8 -cb'
112 fnat_out = os.popen(cmd_fnat).read()
114 #fnat_out = subprocess.getoutput(cmd_fnat)
117 (fnat,nat_correct,nat_total,fnonnat,nonnat_count,model_total,interface5A)=parse_fnat(fnat_out)
118 assert fnat!=-1, "Error running cmd: %s\n" % (cmd_fnat)
119 inter_out = os.popen(cmd_interface).read()
120 # inter_out = subprocess.getoutput(cmd_interface)
122 (fnat_bb,nat_correct_bb,nat_total_bb,fnonnat_bb,nonnat_count_bb,model_total_bb,interface)=parse_fnat(inter_out)
123 assert fnat_bb!=-1, "Error running cmd: %s\n" % (cmd_interface)
126 #Use same interface as for fnat for iRMS
127 #interface=interface5A
131 pdb_parser = Bio.PDB.PDBParser(QUIET = True)
134 ref_structure = pdb_parser.get_structure("reference", native)
135 sample_structure = pdb_parser.get_structure("model", model)
137 # Use the first model in the pdb-files for alignment
138 # Change the number 0 if you want to align to another structure
139 ref_model = ref_structure[0]
140 sample_model = sample_structure[0]
142 # Make a list of the atoms (in the structures) you wish to align.
143 # In this case we use CA atoms whose index is in the specified range
152 #find atoms common in both sample and native
155 #first read in sample
156 for sample_chain in sample_model:
158 chain=sample_chain.id
160 for sample_res in sample_chain:
162 if sample_res.get_id()[0] != ' ': #Skip hetatm.
164 resname=sample_res.get_id()[1]
165 key=str(resname) + chain
166 for a in atom_for_sup:
167 atom_key=key + '.' + a
169 if atom_key in atoms_def_sample:
170 print(atom_key + ' already added (MODEL)!!!')
171 atoms_def_sample.append(atom_key)
173 #then read in native also present in sample
174 for ref_chain in ref_model:
176 for ref_res in ref_chain:
178 if ref_res.get_id()[0] != ' ': #Skip hetatm.
179 # print ref_res.get_id()
181 resname=ref_res.get_id()[1]
182 key=str(resname) + chain
183 for a in atom_for_sup:
184 atom_key=key + '.' + a
185 if a in ref_res and atom_key in atoms_def_sample:
186 if atom_key in atoms_def_in_both:
187 print(atom_key + ' already added (Native)!!!')
188 atoms_def_in_both.append(atom_key)
191 # print atoms_def_in_both
192 for sample_chain in sample_model:
193 chain=sample_chain.id
194 if chain not in list(chain_res.keys()):
196 for sample_res in sample_chain:
197 if sample_res.get_id()[0] != ' ': #Skip hetatm.
199 resname=sample_res.get_id()[1]
200 key=str(resname) + chain
201 chain_res[chain].append(key)
203 for a in atom_for_sup:
204 atom_key=key + '.' + a
205 if a in sample_res and atom_key in atoms_def_in_both:
206 sample_atoms.append(sample_res[a])
207 common_interface.append(key)
216 # Iterate of all chains in the model in order to find all residues
217 for ref_chain in ref_model:
218 # Iterate of all residues in each model in order to find proper atoms
219 # print dir(ref_chain)
221 if chain not in list(chain_ref.keys()):
223 for ref_res in ref_chain:
224 if ref_res.get_id()[0] != ' ': #Skip hetatm.
226 resname=ref_res.get_id()[1]
227 key=str(resname) + chain
231 # print chain_res.values()
232 if key in chain_res[chain]: # if key is present in sample
234 for a in atom_for_sup:
235 atom_key=key + '.' + a
236 if a in ref_res and atom_key in atoms_def_in_both:
237 chain_ref[chain].append(ref_res[a])
238 common_residues.append(key)
239 #chain_sample.append((ref_res['CA'])
240 if key in common_interface:
241 # Check if residue number ( .get_id() ) is in the list
242 # Append CA atom to list
244 for a in atom_for_sup:
245 atom_key=key + '.' + a
247 if a in ref_res and atom_key in atoms_def_in_both:
248 ref_atoms.append(ref_res[a])
252 #get the ones that are present in native
254 for sample_chain in sample_model:
255 chain=sample_chain.id
256 if chain not in list(chain_sample.keys()):
257 chain_sample[chain]=[]
258 for sample_res in sample_chain:
259 if sample_res.get_id()[0] != ' ': #Skip hetatm.
261 resname=sample_res.get_id()[1]
262 key=str(resname) + chain
263 if key in common_residues:
264 for a in atom_for_sup:
265 atom_key=key + '.' + a
266 if a in sample_res and atom_key in atoms_def_in_both:
267 chain_sample[chain].append(sample_res[a])
269 #if key in common_residues:
271 #sample_atoms.append(sample_res['CA'])
272 #common_interface.append(key)
275 assert len(ref_atoms)!=0, "length of native is zero"
276 assert len(sample_atoms)!=0, "length of model is zero"
277 assert len(ref_atoms)==len(sample_atoms), "Different number of atoms in native and model %d %d\n" % (len(ref_atoms),len(sample_atoms))
279 super_imposer = Bio.PDB.Superimposer()
280 super_imposer.set_atoms(ref_atoms, sample_atoms)
281 super_imposer.apply(sample_model.get_atoms())
284 irms=super_imposer.rms
286 (chain1,chain2)=list(chain_sample.keys())
289 receptor_chain=chain2
290 len1=len(chain_res[chain1])
291 len2=len(chain_res[chain2])
293 assert len1!=0, "%s chain has zero length!\n" % chain1
294 assert len2!=0, "%s chain has zero length!\n" % chain2
298 if(len(chain_sample[chain1]) > len(chain_sample[chain2])):
299 receptor_chain=chain1
308 #print chain_sample.keys()
310 #Set to align on receptor
311 assert len(chain_ref[receptor_chain])==len(chain_sample[receptor_chain]), "Different number of atoms in native and model receptor (chain %c) %d %d\n" % (receptor_chain,len(chain_ref[receptor_chain]),len(chain_sample[receptor_chain]))
313 super_imposer.set_atoms(chain_ref[receptor_chain], chain_sample[receptor_chain])
314 super_imposer.apply(sample_model.get_atoms())
315 receptor_chain_rms=super_imposer.rms
316 #print receptor_chain_rms
317 #print dir(super_imposer)
320 #Grep out the transformed ligand coords
324 #print chain_ref[ligand_chain]
325 #print chain_sample[ligand_chain]
326 #l1=len(chain_ref[ligand_chain])
327 #l2=len(chain_sample[ligand_chain])
332 assert len(chain_ref[ligand_chain])!=0 or len(chain_sample[ligand_chain])!=0, "Zero number of equivalent atoms in native and model ligand (chain %s) %d %d.\nCheck that the residue numbers in model and native is consistent\n" % (ligand_chain,len(chain_ref[ligand_chain]),len(chain_sample[ligand_chain]))
335 assert len(chain_ref[ligand_chain])==len(chain_sample[ligand_chain]), "Different number of atoms in native and model ligand (chain %c) %d %d\n" % (ligand_chain,len(chain_ref[ligand_chain]),len(chain_sample[ligand_chain]))
337 coord1=np.array([atom.coord for atom in chain_ref[ligand_chain]])
338 coord2=np.array([atom.coord for atom in chain_sample[ligand_chain]])
340 #coord1=np.array([atom.coord for atom in chain_ref[receptor_chain]])
341 #coord2=np.array([atom.coord for atom in chain_sample[receptor_chain]])
346 sup=SVDSuperimposer()
347 Lrms = sup._rms(coord1,coord2) #using the private _rms function which does not superimpose
350 #super_imposer.set_atoms(chain_ref[ligand_chain], chain_sample[ligand_chain])
351 #super_imposer.apply(sample_model.get_atoms())
352 #coord1=np.array([atom.coord for atom in chain_ref[receptor_chain]])
353 #coord2=np.array([atom.coord for atom in chain_sample[receptor_chain]])
354 #Rrms= sup._rms(coord1,coord2)
355 #should give same result as above line
356 #diff = coord1-coord2
357 #l = len(diff) #number of atoms
358 #from math import sqrt
359 #print sqrt(sum(sum(diff*diff))/l)
360 #print np.sqrt(np.sum(diff**2)/l)
361 DockQ=(float(fnat) + 1/(1+(irms/1.5)*(irms/1.5)) + 1/(1+(Lrms/8.5)*(Lrms/8.5)))/3
367 info['nat_correct']=nat_correct
368 info['nat_total']=nat_total
370 info['fnonnat']=fnonnat
371 info['nonnat_count']=nonnat_count
372 info['model_total']=model_total
374 info['chain1']=chain1
375 info['chain2']=chain2
378 info['class1']=class1
379 info['class2']=class2
383 def get_pdb_chains(pdb):
384 pdb_parser = Bio.PDB.PDBParser(QUIET = True)
385 pdb_struct = pdb_parser.get_structure("reference", pdb)[0]
391 #ATOM 3315 CB ALA H 126 -21.802 31.674 73.597 1.00 58.05 C
393 def make_two_chain_pdb(pdb,group1,group2): #renumber from 1
394 pdb_parser = Bio.PDB.PDBParser(QUIET = True)
395 pdb_struct = pdb_parser.get_structure("reference", pdb)[0]
401 (code,outfile)=tempfile.mkstemp()
403 io.set_structure(pdb_struct)
405 exec_path=os.path.dirname(os.path.abspath(sys.argv[0]))
406 cmd=exec_path + '/scripts/renumber_pdb.pl ' + outfile
409 return outfile +'.renum'
411 def change_chain(pdb_string,chain):
413 for line in pdb_string:
416 new_str.append("".join(s));
417 return "\n".join(new_str);
419 def make_two_chain_pdb_perm(pdb,group1,group2): #not yet ready
422 for line in f.readlines():
423 if line[0:4] == "ATOM":
428 resnum=int(line[22:26])
429 # print atom + ':' + str(resnum) +':'
430 if chain not in pdb_chains:
432 pdb_chains[chain].append(line)
441 (code,outfile)=tempfile.mkstemp()
444 # print pdb_chains[c]
445 f.write(change_chain(pdb_chains[c],"A"))
448 f.write(change_chain(pdb_chains[c],"B"))
451 exec_path=os.path.dirname(os.path.abspath(sys.argv[0]))
452 cmd=exec_path + '/scripts/renumber_pdb.pl ' + outfile
455 return outfile +'.renum'
459 parser=ArgumentParser(description="DockQ - Quality measure for protein-protein docking models")
460 parser.add_argument('model',metavar='<model>',type=str,nargs=1,help='path to model file')
461 parser.add_argument('native',metavar='<native>',type=str,nargs=1,help='path to native file')
462 parser.add_argument('-capri_peptide',default=False,action='store_true',help='use version for capri_peptide (DockQ cannot not be trusted for this setting)')
463 parser.add_argument('-short',default=False,action='store_true',help='short output')
464 parser.add_argument('-verbose',default=False,action='store_true',help='talk a lot!')
465 parser.add_argument('-quiet',default=False,action='store_true',help='keep quiet!')
466 parser.add_argument('-useCA',default=False,action='store_true',help='use CA instead of backbone')
467 parser.add_argument('-skip_check',default=False,action='store_true',help='skip initial check fo speed up on two chain examples')
468 parser.add_argument('-no_needle',default=False,action='store_true',help='do not use global alignment to fix residue numbering between native and model during chain permutation (use only in case needle is not installed, and the residues between the chains are identical')
469 parser.add_argument('-perm1',default=False,action='store_true',help='use all chain1 permutations to find maximum DockQ (number of comparisons is n! = 24, if combined with -perm2 there will be n!*m! combinations')
470 parser.add_argument('-perm2',default=False,action='store_true',help='use all chain2 permutations to find maximum DockQ (number of comparisons is n! = 24, if combined with -perm1 there will be n!*m! combinations')
471 # parser.add_argument('-comb',default=False,action='store_true',help='use all cyclicchain permutations to find maximum DockQ (number of comparisons is n!*m! = 24*24 = 576 for two tetramers interacting')
472 parser.add_argument('-model_chain1',metavar='model_chain1', type=str,nargs='+', help='pdb chain order to group together partner 1')
473 parser.add_argument('-model_chain2',metavar='model_chain2', type=str,nargs='+', help='pdb chain order to group together partner 2 (complement to partner 1 if undef)')
474 parser.add_argument('-native_chain1',metavar='native_chain1', type=str,nargs='+', help='pdb chain order to group together from native partner 1')
475 parser.add_argument('-native_chain2',metavar='native_chain2', type=str,nargs='+', help='pdb chain order to group together from native partner 2 (complement to partner 1 if undef)')
478 args = parser.parse_args()
481 if(float(Bio.__version__) < bio_ver):
482 print("Biopython version (%s) is too old need at least >=%.2f" % (Bio.__version__,bio_ver))
485 # if(len(sys.argv)!=3):
486 # print "Usage: ./Dock.py <model> <native>"
490 # print args.model[0]
495 exec_path=os.path.dirname(os.path.abspath(sys.argv[0]))
496 fix_numbering=exec_path + '/scripts/fix_numbering.pl'
499 native=args.native[0]
501 use_CA_only=args.useCA
502 capri_peptide=args.capri_peptide
507 if(not args.skip_check):
508 model_chains=get_pdb_chains(model)
509 native_chains=get_pdb_chains(native)
512 # print native_chains
513 if((len(model_chains) > 2 or len(native_chains) > 2) and
514 (args.model_chain1 == None and args.native_chain1 == None)):
515 print("Multi-chain model need sets of chains to group\nuse -native_chain1 and/or -model_chain1 if you want a different mapping than 1-1")
516 print("Model chains : " + str(model_chains))
517 print("Native chains : " + str(native_chains))
519 if not args.skip_check and (len(model_chains) < 2 or len(native_chains)< 2):
520 print("Need at least two chains in the two inputs\n");
523 if len(model_chains) > 2 or len(native_chains)> 2:
524 group1=model_chains[0]
525 group2=model_chains[1]
526 nat_group1=native_chains[0]
527 nat_group2=native_chains[1]
528 if(args.model_chain1 != None):
529 group1=args.model_chain1
531 if(args.model_chain2 != None):
532 group2=args.model_chain2
534 #will use the complement from group1
536 for c in model_chains:
543 if(args.native_chain1 != None):
544 nat_group1=args.native_chain1
545 if(args.native_chain2 != None):
546 nat_group2=args.native_chain2
548 #will use the complement from group1
550 for c in native_chains:
551 if c not in nat_group1:
554 if(args.model_chain1 == None):
565 print('Merging ' + ''.join(group1) + ' -> ' + ''.join(nat_group1) + ' to chain A')
566 print('Merging ' + ''.join(group2) + ' -> ' + ''.join(nat_group2) + ' to chain B')
567 native=make_two_chain_pdb_perm(native,nat_group1,nat_group2)
568 files_to_clean.append(native)
570 if args.perm1 or args.perm2:
575 iter_perm1=itertools.combinations(group1,len(group1))
576 iter_perm2=itertools.combinations(group2,len(group2))
578 iter_perm1=itertools.permutations(group1)
580 iter_perm2=itertools.permutations(group2)
584 for g1 in iter_perm1:#_temp:
586 for g2 in iter_perm2:
592 # print str(g1)+' '+str(g2)
600 print('Starting chain order permutation search (number of permutations: ' + str(pe_tot) + ')')
604 model_renum=make_two_chain_pdb_perm(model_in,g1,g2)
605 model_fixed=model_renum
606 if not args.no_needle:
607 fix_numbering_cmd=fix_numbering + ' ' + model_renum + ' ' + native + ' > /dev/null'
608 model_fixed=model_renum + ".fixed"
609 # print fix_numbering_cmd
610 os.system(fix_numbering_cmd)
611 os.remove(model_renum)
612 if not os.path.exists(model_fixed):
613 print('If you are sure the residues are identical you can use the options -no_needle')
615 test_dict=calc_DockQ(model_fixed,native,use_CA_only)
616 os.remove(model_fixed)
618 print(str(pe)+'/'+str(pe_tot) + ' ' + ''.join(g1) + ' -> ' + ''.join(g2) + ' ' + str(test_dict['DockQ']))
619 if(test_dict['DockQ'] > best_DockQ):
620 best_DockQ=test_dict['DockQ'];
624 best_info='Best score ( ' + str(best_DockQ) +' ) found for model -> native, chain1:' + ''.join(best_g1) + ' -> ' + ''.join(nat_group1) + ' chain2:' + ''.join(best_g2) + ' -> ' + ''.join(nat_group2)
629 print("Current best " + str(best_DockQ))
633 # print 'Best score ( ' + str(best_DockQ) +' ) found for ' + str(best_g1) + ' ' + str(best_g2)
635 model_renum=make_two_chain_pdb_perm(model,group1,group2)
636 model_fixed=model_renum
637 if not args.no_needle:
638 fix_numbering_cmd=fix_numbering + ' ' + model_renum + ' ' + native + ' > /dev/null'
639 model_fixed=model_renum + ".fixed"
640 # print fix_numbering_cmd
641 os.system(fix_numbering_cmd)
642 os.remove(model_renum)
643 if not os.path.exists(model_fixed):
644 print('If you are sure the residues are identical you can use the options -no_needle')
646 info=calc_DockQ(model_fixed,native,use_CA_only)
648 #os.system('cp ' + native + ' native_multichain.pdb')
649 #os.system('cp ' + model_fixed + ' .')
650 os.remove(model_fixed)
651 # files_to_clean.append(model)
652 # files_to_clean.append(model_fixed)
658 info=calc_DockQ(model,native,use_CA_only=use_CA_only,capri_peptide=capri_peptide) #False):
659 # info=calc_DockQ(model,native,use_CA_only=)
665 fnonnat=info['fnonnat']
669 print(("DockQ-capri_peptide %.3f Fnat %.3f iRMS %.3f LRMS %.3f Fnonnat %.3f %s %s %s" % (DockQ,fnat,irms,Lrms,fnonnat,model_in,native_in,best_info)))
671 print(("DockQ %.3f Fnat %.3f iRMS %.3f LRMS %.3f Fnonnat %.3f %s %s %s" % (DockQ,fnat,irms,Lrms,fnonnat,model_in,native_in,best_info)))
675 print('****************************************************************')
676 print('* DockQ-CAPRI peptide *')
677 print('* Do not trust any thing you read.... *')
678 print('* OBS THE DEFINITION OF Fnat and iRMS are different for *')
679 print('* peptides in CAPRI *')
681 print('* For the record: *')
682 print('* Definition of contact <4A all heavy atoms (Fnat) *')
683 print('* Definition of interface <8A CB (iRMS) *')
684 print('* For comments, please email: bjorn.wallner@.liu.se *')
685 print('****************************************************************')
687 print('****************************************************************')
689 print('* Scoring function for protein-protein docking models *')
690 print('* Statistics on CAPRI data: *')
691 print('* 0.00 <= DockQ < 0.23 - Incorrect *')
692 print('* 0.23 <= DockQ < 0.49 - Acceptable quality *')
693 print('* 0.49 <= DockQ < 0.80 - Medium quality *')
694 print('* DockQ >= 0.80 - High quality *')
695 print('* Reference: Sankar Basu and Bjorn Wallner, DockQ: A quality *')
696 print('* measure for protein-protein docking models, submitted *')
698 print('* For the record: *')
699 print('* Definition of contact <5A (Fnat) *')
700 print('* Definition of interface <10A all heavy atoms (iRMS) *')
701 print('* For comments, please email: bjorn.wallner@.liu.se *')
703 print('****************************************************************')
704 print(("Model : %s" % model_in))
705 print(("Native : %s" % native_in))
708 print('Number of equivalent residues in chain ' + info['chain1'] + ' ' + str(info['len1']) + ' (' + info['class1'] + ')')
709 print('Number of equivalent residues in chain ' + info['chain2'] + ' ' + str(info['len2']) + ' (' + info['class2'] + ')')
710 print(("Fnat %.3f %d correct of %d native contacts" % (info['fnat'],info['nat_correct'],info['nat_total'])))
711 print(("Fnonnat %.3f %d non-native of %d model contacts" % (info['fnonnat'],info['nonnat_count'],info['model_total'])))
712 print(("iRMS %.3f" % irms))
713 print(("LRMS %.3f" % Lrms))
714 # print 'CAPRI ' + capri_class(fnat,irms,Lrms,capri_peptide=capri_peptide)
717 peptide_suffix='_peptide'
718 print(('CAPRI{} {}'.format(peptide_suffix,capri_class(fnat,irms,Lrms,capri_peptide=capri_peptide))))
719 print('DockQ_CAPRI ' + capri_class_DockQ(DockQ,capri_peptide=capri_peptide))
720 peptide_disclaimer=''
722 peptide_disclaimer='DockQ not reoptimized for CAPRI peptide evaluation'
723 print(("DockQ {:.3f} {}".format(DockQ,peptide_disclaimer)))
725 for f in files_to_clean:
728 if __name__ == '__main__':