#!/usr/bin/python -u # -*- coding: iso-8859-1 -*- #********************************************************# # radfrompdb v0.9, Mikael Johansson 5.12.2009 # # # # This script reads a PDB file and tries to figure out # # radii for the atoms, and create a new file which # # contains the radii. # # # # Three output formats can be specified: # # XYZ : XYZ format with radii in the 5th column # # PDBa: original PDB file with radii added to columns # # 82-89 of the ATOM/HETATM lines # # PDBr: original PDB file with radii inserted into # # columns 61-66 (replacing temperature factor) # # # # For usage run program without parameters # # # # Some comments: # # v0.9 : Initial release (5.12.2009) # # # # Check for new versions and contact information at: # # http://www.iki.fi/~mpjohans/ # #********************************************************# import sys, os VERSION='v0.9' DATE='5.12.2009' # List of residues, their atoms, and radii. Format: # Residue name (3 chars), atomtype/radius, Python dictionary # style. Should be clear from the examples # # For the standard atom types, ProtOr united group # radii (that is, no hydrogens) are used, from # J. Tsai, R. Taylor, C. Chothia, and M. Gerstein, # J. Mol. Biol. 290 (1999) 253-266. # # Naming convention for the atom types is from # J. Tsai, N. Voss, M. Gerstein, # Bioinformatics 17 (2001) 949-956. C3H0=1.61; C3H1=1.76; C4H1=1.88; C4H2=1.88; C4H3=1.88 N3H0=1.64; N3H1=1.64; N3H2=1.64; N4H3=1.64 O1H0=1.42; O2H1=1.46 S2H0=1.77; S2H1=1.77 # The following not really necessary for this script... C3H0s=1.61; C3H0b=1.61; C3H1s=1.76; C3H1b=1.76 C4H1s=1.88; C4H1b=1.88; C4H2s=1.88; C4H2b=1.88; C4H3u=1.88 N3H0u=1.64; N3H1b=1.64; N3H1s=1.64; N3H2u=1.64; N4H3u=1.64 O1H0u=1.42; O2H1u=1.46 S2H0u=1.77; S2H1u=1.77 # Non-standard, more or less ad hoc C3H2=1.76 # =CH2, needed for the haems O2H2=1.5 # water O2H0=1.5 # -O- bridge in TGL and DMU RESIDUES={ 'ALA':{'N':N3H1s,'CA':C4H1b,'C':C3H0s,'O':O1H0u,'CB':C4H3u}, 'ARG':{'N':N3H1b,'CA':C4H1s,'C':C3H0s,'O':O1H0u,'CB':C4H2s,'CG' :C4H2s,'CD' :C4H2s,'NE' :N3H1b, 'CZ' :C3H0b,'NH1':N3H2u,'NH2':N3H2u}, 'ASN':{'N':N3H1s,'CA':C4H1s,'C':C3H0s,'O':O1H0u,'CB':C4H2s,'CG' :C3H0b,'OD1':O1H0u,'ND2':N3H2u}, 'ASP':{'N':N3H1s,'CA':C4H1s,'C':C3H0s,'O':O1H0u,'CB':C4H2s,'CG' :C3H0b,'OD1':O1H0u,'OD2':O1H0u}, 'CSS':{'N':N3H1s,'CA':C4H1s,'C':C3H0s,'O':O1H0u,'CB':C4H2b,'SG' :S2H0u,'SD' :S2H1u}, 'CYS':{'N':N3H1s,'CA':C4H1s,'C':C3H0s,'O':O1H0u,'CB':C4H2b,'SG' :S2H1u}, 'GLN':{'N':N3H1s,'CA':C4H1s,'C':C3H0s,'O':O1H0u,'CB':C4H2s,'CG' :C4H2s,'CD' :C3H0b,'OE1':O1H0u, 'NE2':N3H2u}, 'GLU':{'N':N3H1s,'CA':C4H1s,'C':C3H0s,'O':O1H0u,'CB':C4H2s,'CG' :C4H2s,'CD' :C3H0b,'OE1':O1H0u, 'OE2':O1H0u}, 'GLY':{'N':N3H1s,'CA':C4H2s,'C':C3H0b,'O':O1H0u}, 'HIS':{'N':N3H1s,'CA':C4H1s,'C':C3H0s,'O':O1H0u,'CB':C4H2s,'CG' :C3H0b,'ND1':N3H1b,'CD2':C3H1s, 'CE1':C3H1s,'NE2':N3H1b}, 'ILE':{'N':N3H1s,'CA':C4H1s,'C':C3H0s,'O':O1H0u,'CB':C4H1b,'CG1':C4H2b,'CG2':C4H3u,'CD1':C4H3u}, 'LEU':{'N':N3H1s,'CA':C4H1s,'C':C3H0s,'O':O1H0u,'CB':C4H2s,'CG' :C4H1b,'CD1':C4H3u,'CD2':C4H3u}, 'LYS':{'N':N3H1s,'CA':C4H1s,'C':C3H0s,'O':O1H0u,'CB':C4H2s,'CG' :C4H2s,'CD' :C4H2s,'CE' :C4H2b, 'NZ' :N4H3u}, 'MET':{'N':N3H1s,'CA':C4H1s,'C':C3H0s,'O':O1H0u,'CB':C4H2s,'CG' :C4H2b,'SD' :S2H0u,'CE' :C4H3u}, 'PHE':{'N':N3H1s,'CA':C4H1s,'C':C3H0s,'O':O1H0u,'CB':C4H2s,'CG' :C3H0b,'CD1':C3H1s,'CD2':C3H1b, 'CE1':C3H1b,'CE2':C3H1b,'CZ' :C3H1b}, 'PRO':{'N':N3H0u,'CA':C4H1b,'C':C3H0s,'O':O1H0u,'CB':C4H2b,'CG' :C4H2b,'CD' :C4H2s}, 'SER':{'N':N3H1s,'CA':C4H1s,'C':C3H0s,'O':O1H0u,'CB':C4H2s,'OG' :O2H1u}, 'THR':{'N':N3H1s,'CA':C4H1s,'C':C3H0s,'O':O1H0u,'CB':C4H1b,'OG1':O2H1u,'CG2':C4H3u}, 'TRP':{'N':N3H1s,'CA':C4H1s,'C':C3H0s,'O':O1H0u,'CB':C4H2s,'CG' :C3H0b,'CD1':C3H1s,'CD2':C3H0b, 'NE1':N3H1b,'CE2':C3H0b,'CE3':C3H1b,'CZ2':C3H1b,'CZ3':C3H1b,'CH2':C3H1b}, 'TYR':{'N':N3H1s,'CA':C4H1s,'C':C3H0s,'O':O1H0u,'CB':C4H2s,'CG' :C3H0b,'CD1':C3H1s,'CD2':C3H1s, 'CE1':C3H1s,'CE2':C3H1s,'CZ' :C3H0b,'OH':O2H1u}, 'VAL':{'N':N3H1s,'CA':C4H1s,'C':C3H0s,'O':O1H0u,'CB':C4H1b,'CG1':C4H3u,'CG2':C4H3u}, # Non-amino acid additions: 'HEA':{'FE':1.9,'CHA':C3H1,'CHB':C3H1,'CHC':C3H1,'CHD':C3H1, 'NA':N3H0,'N_A':N3H0,'C1A':C3H0,'C2A':C3H0,'C3A':C3H0,'C4A':C3H0,'CMA':C3H1,'OMA':O1H0, 'CAA':C4H2,'CBA':C4H2,'CGA':C3H0,'O1A':O1H0,'O2A':O1H0, 'NB':N3H0,'N_B':N3H0,'C1B':C3H0,'C2B':C3H0,'C3B':C3H0,'C4B':C3H0,'CMB':C4H3, 'NC':N3H0,'N_C':N3H0,'C1C':C3H0,'C2C':C3H0,'C3C':C3H0,'C4C':C3H0,'CMC':C4H3,'CAC':C3H1,'CBC':C3H2, 'ND':N3H0,'N_D':N3H0,'C1D':C3H0,'C2D':C3H0,'C3D':C3H0,'C4D':C3H0,'CMD':C4H1, 'CAD':C4H2,'CBD':C4H2,'CGD':C3H0,'O1D':O1H0,'O2D':O1H0, 'C11':C4H1,'O11':O2H1,'C12':C4H2,'C13':C4H2,'C14':C3H1,'C15':C3H0,'C16':C4H2,'C17':C4H2, 'C18':C3H1,'C19':C3H0,'C20':C4H2,'C21':C4H2,'C22':C3H1,'C23':C3H0,'C24':C4H3,'C25':C4H3, 'C26':C4H3,'C27':C4H3}, 'HEO':{'FE':1.9,'CHA':C3H1,'CHB':C3H1,'CHC':C3H1,'CHD':C3H1, 'NA':N3H0,'N_A':N3H0,'C1A':C3H0,'C2A':C3H0,'C3A':C3H0,'C4A':C3H0,'CMA':C4H3, 'CAA':C4H2,'CBA':C4H2,'CGA':C3H0,'O1A':O1H0,'O2A':O1H0, 'NB':N3H0,'N_B':N3H0,'C1B':C3H0,'C2B':C3H0,'C3B':C3H0,'C4B':C3H0,'CMB':C4H3, 'NC':N3H0,'N_C':N3H0,'C1C':C3H0,'C2C':C3H0,'C3C':C3H0,'C4C':C3H0,'CMC':C4H3,'CAC':C3H1,'CBC':C3H2, 'ND':N3H0,'N_D':N3H0,'C1D':C3H0,'C2D':C3H0,'C3D':C3H0,'C4D':C3H0,'CMD':C4H1, 'CAD':C4H2,'CBD':C4H2,'CGD':C3H0,'O1D':O1H0,'O2D':O1H0, 'C11':C4H1,'O11':O2H1,'C12':C4H2,'C13':C4H2,'C14':C3H1,'C15':C3H0,'C16':C4H2,'C17':C4H2, 'C18':C3H1,'C19':C3H0,'C20':C4H2,'C21':C4H2,'C22':C3H1,'C23':C3H0,'C24':C4H3,'C25':C4H3, 'C26':C4H3,'C27':C4H3}, 'HEM':{'FE':1.9,'CHA':C3H1,'CHB':C3H1,'CHC':C3H1,'CHD':C3H1, 'NA':N3H0,'N_A':N3H0,'C1A':C3H0,'C2A':C3H0,'C3A':C3H0,'C4A':C3H0,'CMA':C4H3, 'CAA':C4H2,'CBA':C4H2,'CGA':C3H0,'O1A':O1H0,'O2A':O1H0, 'NB':N3H0,'N_B':N3H0,'C1B':C3H0,'C2B':C3H0,'C3B':C3H0,'C4B':C3H0,'CMB':C4H3, 'NC':N3H0,'N_C':N3H0,'C1C':C3H0,'C2C':C3H0,'C3C':C3H0,'C4C':C3H0,'CMC':C4H3,'CAC':C3H1,'CBC':C3H2, 'ND':N3H0,'N_D':N3H0,'C1D':C3H0,'C2D':C3H0,'C3D':C3H0,'C4D':C3H0,'CMD':C4H1, 'CAD':C4H2,'CBD':C4H2,'CGD':C3H0,'O1D':O1H0,'O2D':O1H0, 'CAB':C3H1,'CBB':C3H2}, 'HOH':{'O':O2H2}, 'TGL':{'CA1':C3H0,'CA2':C4H2,'CA3':C4H2,'CA4':C4H2,'CA5':C4H2,'CA6':C4H2,'CA7':C4H2,'CA8':C4H2, 'CA9':C4H2,'OA1':O1H0,'CB1':C3H0,'CB2':C4H2,'CB3':C4H2,'CB4':C4H2,'CB5':C4H2,'CB6':C4H2, 'CB7':C4H2,'CB8':C4H2,'CB9':C4H2,'C10':C4H2,'C11':C4H2,'C12':C4H2,'C13':C4H2,'C14':C4H2, 'OB1':O1H0,'OG1':O2H0,'CG1':C4H2,'CG2':C4H1,'OG2':O2H0,'CG3':C4H2,'OG3':O2H0,'CC1':C3H0, 'CC2':C4H2,'CC3':C4H2,'CC4':C4H2,'CC5':C4H2,'CC6':C4H2,'CC7':C4H2,'CC8':C4H2,'CC9':C4H2, 'C15':C4H2,'C16':C4H2,'C17':C4H2,'C18':C4H2,'C19':C4H2,'OC1':O1H0,'C20':C4H2,'C21':C4H2, 'C22':C4H2,'C23':C4H2,'C24':C4H2,'C25':C4H2,'C26':C4H2,'C27':C4H2,'C28':C4H3,'C29':C4H2, 'C30':C4H2,'C31':C4H2,'C32':C4H3,'C33':C4H2,'C34':C4H2,'C35':C4H2,'C36':C4H3}, 'DMU':{'C1':C4H1,'C2':C4H1,'C3':C4H1,'C4':C4H1,'O5':O2H0,'C6':C4H1,'O7':O2H0,'O16':O2H0, 'C18':C4H2,'C19':C4H2,'C19':C4H2,'C22':C4H2,'C25':C4H2,'C28':C4H2,'C31':C4H2,'C34':C4H2, 'C37':C4H2,'C40':C4H2,'C43':C4H3,'O49':O2H1,'O55':O2H1,'C57':C4H2,'O61':O2H0,'C5':C4H1, 'C7':C4H1,'C8':C4H1,'C9':C4H1,'O1':O2H0,'C10':C4H1,'O2':O2H0,'O3':O2H1,'O4':O2H1, 'C11':C4H2,'O6':O2H1} } # Fallback radii if not found from the RESIDUE definition above ATOMS={ 'H' : 0.0, 'Mg': 1.4, 'Fe': 1.9, 'Cu': 1.9, 'Xx': 1.8 } # Fallback radius if nothing else matches #DEFAULTRAD=0.0 DEFAULTRAD=1.8 class AtomClass: def __init__(self): self.name=None # atom name, ex 'CA' self.elem=None # atom element, ex. 'C' self.x=None # x-coordinate self.y=None self.z=None self.radius=None # radius of the atom #***************** # def usage() #**************** def usage(): dummy, prog=os.path.split(sys.argv[0]) print "Usage: "+prog+" [-noh] [-f XYZ|PDBa|PDBq] " print " -noh: Ignore any hydrogens in . Only used for XYZ format." print " -f: Format of the output" print " XYZ: XYZ format with additional column for radii (default)" print " PDBa: radii added to columns 82-89 with 5 decimals" print " PDBr: columns 61-66 replaced by radii with 3 decimals" print " : PDB file to be parsed" #**************** # def init() #*************** def init(): args=sys.argv noh=False format='XYZ' while len(args)>2: if args[1]=='-noh': noh=1 del args[1] continue elif args[1]=='-f': format=args[2].upper() if (format<>'XYZ') and (format<>'PDBA') and (format.upper<>'PDBR'): sys.stderr.write("Error: Unknown format: "+args[2]+"\n") usage() sys.exit(1) del args[1:3] continue else: sys.stderr.write("Error: Unknown option: "+args[1]+"\n") usage() sys.exit(1) if len(args)<>2: sys.stderr.write("Error: No input file specified\n") usage() sys.exit(1) infile=open(args[1],'r') if format<>'XYZ': noh=False return format, noh, infile #****************** # def outit() #************** def outit(infile): infile.close() #****************** # def GetPDBAtoms() #******************* def GetPDBAtoms(noh, infile, outfile): AN=0 # Current atom number atoms=[] line=infile.readline() while line: # Check if we have an ATOM or HETATM line: if (line[0:4]<>"ATOM") and (line[0:6]<>"HETATM"): if (format=='PDBR') or (format=='PDBA'): outfile.write(line) line=infile.readline() continue # Get element symbol. Assumes element in cols 13-14, and that # one character symbols are found in col 14, with a possible # number prefix. element=line[12:14].lstrip(' 1234567890').capitalize() if (element=='H') and (noh): # No need to write "PDB" style, as noh is always False for these formats line=infile.readline() continue # Get atom name from cols 13-16. Remove possible leading space # or number, and trailing space. atomname=line[12:16].lstrip(' 1234567890').rstrip() # Get residue name from cols 18-20 resname=line[17:20] # Get coordinates from cols 31-54 xcoord=float(line[30:38]) ycoord=float(line[38:46]) zcoord=float(line[46:54]) # Get radius of atom radius=DEFAULTRAD try: # first, check if the specified residue and atomname combination is defined radius=RESIDUES[resname][atomname] except KeyError: # if not, then check if a default radius for the element is defined if atomname=='UNK': # If atomname is UNK, set element to Xx instead of Un element='Xx' try: radius=ATOMS[element] except KeyError: # if not, then just use default radius and warn the user sys.stderr.write("WARNING: Failed to set radius for atom #%i, using %6.4f\n" % (AN, DEFAULTRAD)) sys.stderr.write(" Got element:"+element+", atomname:"+atomname+", resname:"+resname+" from line:\n") sys.stderr.write(" "+line) atoms.append(AtomClass()) atoms[AN].name=atomname atoms[AN].elem=element atoms[AN].x=xcoord atoms[AN].y=ycoord atoms[AN].z=zcoord atoms[AN].radius=radius AN=AN+1 if format=='PDBR': line=line.ljust(80) radstring='%6.3f'%(radius) if len(radstring)>6: # Somewhat unneccessary, only true if radius>99.999... sys.stderr.write("ERROR: Radius of element too large:"+`radius`+" from line:\n") sys.stderr.write(line) sys.exit(2) line=line[:60]+radstring+line[66:] outfile.write(line) elif format=='PDBA': line=line.ljust(81) radstring='%8.5f'%(radius) if len(radstring)>8: # Somewhat unneccessary, only true if radius>99.999... sys.stderr.write("ERROR: Radius of element too large:"+`radius`+" from line:\n") sys.stderr.write(line) sys.exit(2) line=line.rstrip('\r\n')+radstring+'\n' outfile.write(line) line=infile.readline() return atoms #*************** # def PutXYZ() #************ def PutXYZ(atoms, outfile): outfile.write(" "+`len(atoms)`+"\n") outfile.write("#XYZ file with atomic radii in 5th column, created by radfrompdb\n") for i in atoms: outfile.write(" %2s %10.5f %10.5f %10.5f %10.5f\n" % (i.elem, i.x, i.y, i.z, i.radius)) return #***********# #***********# # Main # #***********# #***********# #print "\nradfrompdb "+VERSION+" by Mikael Johansson, "+DATE+"\n" format, noh, infile=init() atoms=GetPDBAtoms(noh, infile, sys.stdout) if format=='XYZ': PutXYZ(atoms, sys.stdout) outit(infile)