def from_pdb(file):
sequence = []
ssbond = []
- ires = []
+ ssbond_ch = []
+# ires = []
+ chain_start = {}
+ chain_end = {}
+ unres_shift = {}
+ chain=[]
+ ichain=0
+ newchain = True
+
for line in file:
+ if line[0:6] == 'COMPND' and line[11:17] == 'CHAIN:':
+ tmp=line[18:]
+ chain_=tmp.split(', ')
+ chain_[-1]=chain_[-1][0]
+ chain.extend(chain_)
+
if line[0:6] == 'ATOM ' and line[13:15] == 'CA':
aa = three_to_one.get(line[17:20])
sequence.append(aa)
- i = int(line[24:26])
- ires.append(i)
+ i = int(line[22:26])
+# ires.append(i)
+ if newchain:
+ chain_start[chain[ichain]]=i
+ newchain = False
if line[0:3] == 'TER':
sequence.append('XX')
+ chain_end[chain[ichain]]=i
+ ichain=ichain+1
+ newchain = True
if line[0:6] == 'SSBOND':
b=[]
b.append(int(line[17:21]))
b.append(int(line[31:35]))
ssbond.append(b)
-
+ c = []
+ c.append((line[15:16]))
+ c.append((line[29:30]))
+ ssbond_ch.append(c)
if line[0:3] == 'END':
break
while sequence[-1] == 'XX':
if sequence[-1] != 'G':
sequence.append('X')
seq=''.join(sequence)
- if ires[0] != 1:
- ssbond=[ [e[0]-ires[0]+1,e[1]-ires[0]+1] for e in ssbond]
+# if ires[0] != 1:
+# ssbond=[ [e[0]-ires[0]+1,e[1]-ires[0]+1] for e in ssbond]
+ i=0
+ for c in chain:
+ unres_shift[c]=i+chain_start[c]
+ i=i-(chain_end[c]-chain_start[c])-3
+ ssbond=[ [e[0]-unres_shift[c[0]]+1,e[1]-unres_shift[c[1]]+1] for e,c in zip(ssbond,ssbond_ch)]
return seq,ssbond