This python script is capable of generating a set of GAMESS input files for potential energy surface scanning. It needs a reference GAMESS input and XML file specifying degrees of freedom to scan. PES scan can be done using linear translations, and rotations of atomic groups. Translation vectors can be specified using pair of atoms (bonds) or raw vectors. Axis of rotation can be specified using raw vectors, triplets of atoms (plane angle scan) or pair of atoms (rotation around bond scan).
To use the script save two files «vector.py» and «pes-scan.py» in the same directory. At least two input files are consumed by the script. The first one is a reference GAMESS input which will be replicated by substituting respective coordinates to it. Here is one, defining propane molecule:
$CONTRL COORD=CART UNITS=ANGS $END $DATA test.pdb C1 C 6.0 -0.0000000000 -0.0000000000 0.0000000000 C 6.0 1.1710000000 0.9750000000 -0.2200000000 C 6.0 2.2660000000 0.2830000000 -1.0520000000 H 1.0 -0.7850000000 0.2440000000 -0.6530000000 H 1.0 0.3220000000 -0.9810000000 -0.1900000000 H 1.0 1.5670000000 1.2620000000 0.7100000000 H 1.0 2.6940000000 0.9760000000 -1.7140000000 H 1.0 -0.3350000000 0.0730000000 0.9930000000 H 1.0 0.8280000000 1.8250000000 -0.7310000000 H 1.0 1.8430000000 -0.5080000000 -1.5980000000 H 1.0 3.0080000000 -0.0920000000 -0.4110000000 $END
The second one is an XML, specifying degrees of freedom to scan.
Here is sample XML, specifying 2D PES scan. One coord is torsion angle of terminal methyl, the second is C-C bond length of terminal methyl:
<scan> <coord type="torsion" start="-180.0" stop="180.0" nsteps="12"> <atoms>2 1</atoms> <move>1 4 5 8</move> </coord> <coord type="shift" start="0" stop="1.0" nsteps="2"> <atoms>2 1</atoms> <move>1 4 5 8</move> </coord> </scan>
Coordinate type can be one of the following: shift, angle or torsion. Respective vector is defined as pair of atoms IDs <atoms>I J</atoms> in case of either bond or torsion, or as three atoms IDs <atoms>I J K</atoms> in case of valence angle. Alternatively translational/rotational vector can be defined as a raw vector using container <vector>X Y Z</vector>. In case of rotation specify also origin of rotation using container <origin>X Y Z<origin>. All shifts are relative reference geometry given in input GAMESS file. Container <move<ID1 ID2 ID3 …<move> specifies atoms to move.
Given these two files run the following command:
$ ./pes-scan.py -p -i test.inp -c test.xml
which will produce a set of «conf_ID.inp» files, which represent single points on a grid to scan. You can convert resulting files to pdb format with OpenBabel and load them with your favorite molecular visualization program to check the resulting geometries:
$ for f in conf*.inp; do babel -igamin $f -opdb `echo $f | sed 's/inp$/pdb/'`; done
Here is superimposed set of structures, generated using the sample files:
Supporting module «vector.py», needed by main script. Save it and put in the same directory where main script resides.
import math class vector3: def __init__(self, x=0, y=0, z=0): self.x=x self.y=y self.z=z def __add__(self, v): a = vector3(self.x+v.x, self.y+v.y, self.z+v.z) return a def __iadd__(self, v): self.x+=v.x self.y+=v.y self.z+=v.z return self def __sub__(self, v): a = vector3(self.x-v.x, self.y-v.y, self.z-v.z) return a def __rmul__(self, c): a = vector3(self.x*c, self.y*c, self.z*c) return a def __imul__(self, c): self.x*=c self.y*=c self.z*=c return self def __mul__(self, v): a = self.x*v.x+self.y*v.y+self.z*v.z return a def __pow__(self, v): a=vector3(self.y*v.z-self.z*v.y, -self.x*v.z+self.z*v.x, self.x*v.y-self.y*v.x) return a def norm(self): l = math.sqrt(self.x**2 + self.y**2 + self.z**2) return l def normalize(self): l = self.norm() self.x /= l self.y /= l self.z /= l # return self class matrix3x3: def __init__(self): self.m11=1 self.m12=0 self.m13=0 self.m21=0 self.m22=1 self.m23=0 self.m31=0 self.m32=0 self.m33=1 def __mul__(self, v): a = vector3(self.m11*v.x + self.m12*v.y + self.m13*v.z, self.m21*v.x + self.m22*v.y + self.m23*v.z, self.m31*v.x + self.m32*v.y + self.m33*v.z); return a def rot(self, v, t_deg): t=math.pi*t_deg/180.0 self.m11 = v.x*v.x*(1-math.cos(t))+math.cos(t) self.m12 = v.x*v.y*(1-math.cos(t))-v.z*math.sin(t) self.m13 = v.x*v.z*(1-math.cos(t))+v.y*math.sin(t) self.m21 = v.x*v.y*(1-math.cos(t))+v.z*math.sin(t) self.m22 = v.y*v.y*(1-math.cos(t))+math.cos(t) self.m23 = v.y*v.z*(1-math.cos(t))-v.x*math.sin(t) self.m31 = v.x*v.z*(1-math.cos(t))-v.y*math.sin(t) self.m32 = v.y*v.z*(1-math.cos(t))+v.x*math.sin(t) self.m33 = v.z*v.z*(1-math.cos(t))+math.cos(t)
Main script «pes-scan.py»
#!/usr/bin/python #################################### # Potential energy surface scanner # # by piton at erg.biophys.msu.ru # #################################### import os, sys, string, re, copy, threading, xml.dom.minidom from stat import * from optparse import OptionParser from vector import * VALID_COORD_TYPES=["shift", "angle", "torsion"] global sem, points_done, pd_lock, DEBUG, parse_only def getText(nodelist): rc = "" for node in nodelist: if node.nodeType == node.TEXT_NODE: rc = rc + node.data return rc class cScanDim: def __init__(self, type, start, stop, nsteps, move, vector = None, origin = None, atoms = None): if not type in VALID_COORD_TYPES: raise NameError, "Unknown coord type '"+type+"'" self.type=type if not ( (vector and origin) or atoms): raise NameError, "Either atoms or vector and origin should be defined" self.atoms=[] if vector and origin: self.origin=copy.deepcopy(origin) self.vector=copy.deepcopy(vector) else: for iat in atoms.split(): self.atoms.append(int(iat)-1) if ( len(self.atoms) != 2 and ( type == "shift" or type == "torsion" ) ) or (len(self.atoms) != 3 and type == "angle"): raise NameError, "Inconsistent coord type number of atoms" self.start=start self.stop=stop self.step=(self.stop-self.start)/nsteps self.vals=[] i=0 while i < nsteps: self.vals.append(self.start + i*self.step) i+=1 self.vals.append(self.stop) self.move=[] for at in move.split(): self.move.append(int(at)-1) def shift(self, mol, value): if len(self.atoms): if self.type == "angle": self.origin = mol.atoms[self.atoms[1]].vector self.vector = ( mol.atoms[self.atoms[0]].vector - self.origin ) ** ( mol.atoms[self.atoms[2]].vector - self.origin ) else: self.origin = mol.atoms[self.atoms[0]].vector self.vector = mol.atoms[self.atoms[1]].vector - self.origin self.vector.normalize() if self.type == "shift": for iat in self.move: mol.atoms[iat].vector += value * self.vector else: M = matrix3x3() M.rot(self.vector, value) for iat in self.move: mol.atoms[iat].vector = self.origin + M * ( mol.atoms[iat].vector - self.origin) return mol class cAtom: def __init__(self, name, znuc, x, y, z): self.name=name self.znuc=znuc self.vector=vector3(x,y,z) self.basis = "" class cMolecule: def __init__(self): self.atoms=[] def AddAtom(self, name, znuc, x, y, z): self.atoms.append( cAtom( name, znuc, x, y, z ) ) def thread_func(inpname, outname, point): global sem, points_done, pd_lock, parse_only if not parse_only: os.system('gamess -i '+inpname+' -o '+outname) if S_ISREG(os.stat(outname)[ST_MODE]): re_E = re.compile('^\s*FINAL\s+ENERGY\s+IS\s+(-?\d+\.\d+)\s+AFTER\s+\d+\s+ITERATIONS\s*$', re.IGNORECASE) out = open(outname, 'r') for line in out: res = re_E.search(line) if res: point.append(float(res.group(1))) out.close() pd_lock.acquire() points_done+=1 pd_lock.release() sem.release() ###################### # Parse command line # ###################### parser = OptionParser() parser.add_option('-i', dest='inp_file', type='string', help='GAMESS input file', metavar='FILE') parser.add_option('-c',dest='coord_file', help='file with coodinates to scan', metavar='FILE') parser.add_option('-o',dest='pes_file', help='output file', metavar='FILE') parser.add_option('-n',dest='np', help='number of processes to spawn', metavar='NP') parser.add_option('-p', dest='pretend', help='pretend, i.e. just generate input files', action='store_true',default=False) parser.add_option('-d','--debug', dest='debug', help='Enable extra output for debugging', action='store_true',default=False) parser.add_option('-l', dest='parse_only', help='only parse log files', action='store_true',default=False) parser.add_option('-s', dest='sync', help='vary coordinates synchronously', action='store_true',default=False) (opts, args) = parser.parse_args() parse_only = opts.parse_only if not opts.inp_file and not parse_only: print 'Please specify GAMESS input file' sys.exit(1) else: INP_FILE=opts.inp_file if not opts.coord_file: print 'Please specify file with coordinates definitions' sys.exit(1) else: COORD_FILE=opts.coord_file if not opts.pes_file and not opts.pretend: print 'Please specify output file' sys.exit(1) else: PES_FILE=opts.pes_file if not opts.np: # print 'Number of processes to run is not defined. Will do one point at a time.' NP=1 else: NP=int(opts.np) if NP<1: NP=1 DEBUG=opts.debug #sys.exit(0) ################# # Read molecule # ################# print "\nParsing input file..." sys.stdout.flush() f=open(INP_FILE, 'r') indata=0; gamess_stuff="" re_DATA = re.compile('^\s*\$DATA\s*$', re.IGNORECASE) re_END = re.compile('^\s*\$END\s*$', re.IGNORECASE) re_CART = re.compile('^\s*([^\s]+)\s+(\d+)\.?0\s+(-?\d+\.\d+)\s+(-?\d+\.\d+)\s+(-?\d+\.\d+)\s*$') mol = cMolecule() iat=0 for line in f: if re_DATA.match(line): indata=1 if indata: res=re_CART.search(line) if res: mol.AddAtom(res.group(1), int(res.group(2)), float(res.group(3)), float(res.group(4)), float(res.group(5))) iat += 1 elif iat and not re_END.match(line): mol.atoms[iat-1].basis += line else: gamess_stuff = gamess_stuff + line if re_END.match(line): indata=0 f.close() if DEBUG: print "\nAtoms:" i=1 for atom in mol.atoms: print str(i) + ': ' + atom.name + ' ' + str(atom.znuc) + ' ' + str(atom.vector.x) + ' ' + str(atom.vector.y) + ' ' + str(atom.vector.z) i+=1 print '\n' #print gamess_stuff #for at in mol.atoms: # print at.name, at.znuc, at.vector.x, at.vector.y, at.vector.z ########################### # Generate list of coords # ########################### print "\nParsing coodinates 2 scan..." sys.stdout.flush() C=[] dom_top = xml.dom.minidom.parse(COORD_FILE) dom_coords = dom_top.getElementsByTagName("coord"); for dom_coord in dom_coords: coord_type = dom_coord.getAttribute("type") start_value = float(dom_coord.getAttribute("start")) stop_value = float(dom_coord.getAttribute("stop")) nsteps_value = int(dom_coord.getAttribute("nsteps")) dom_move = dom_coord.getElementsByTagName("move") if ( len(dom_move) != 1 ): raise NameError, "Multiple or none 'move' containers" move_list = getText(dom_move[0].childNodes) dom_atoms = dom_coord.getElementsByTagName("atoms") if len(dom_atoms): if len(dom_atoms) != 1: raise NameError, "Multiple 'atoms' containers" C.append( cScanDim(type=coord_type, start=start_value , stop=stop_value, nsteps=nsteps_value, move=move_list, atoms=getText(dom_atoms[0].childNodes)) ) else: if coord_type == "angle": raise NameError, "Coord type 'angle' needs to be defined using 'atoms' directive" dom_vector = dom_coord.getElementsByTagName("vector") dom_origin = dom_coord.getElementsByTagName("origin") if ( len(dom_vector) != 1 or len(dom_origin) != 1 ): raise NameError, "Multiple or none 'vector' or 'origin' containers" text_vector = getText(dom_vector[0].childNodes) text_origin = getText(dom_origin[0].childNodes) if ( len(text_vector.split()) != 3 or len(text_origin.split()) != 3 ): raise NameError, "Bad 'vector' or 'origin' definition" vector = vector3(float(text_vector.split()[0]), float(text_vector.split()[1]), float(text_vector.split()[2])) origin = vector3(float(text_origin.split()[0]), float(text_origin.split()[1]), float(text_origin.split()[2])) vector.normalize() C.append( cScanDim(type=coord_type, start=start_value , stop=stop_value , nsteps=nsteps_value, move=move_list, vector=vector, origin=origin)) # if DEBUG: # print # print coord_type, start_value, stop_value, nsteps_value # print "move_list: ", move_list # print "vector: ", vector.x, " ",vector.y, " ", vector.z # print "origin: ", origin.x, " ", origin.y, " ", origin.z if not len(C): raise NameError, "No coordinates for scan were defined" ################# # Generate mesh # ################# print '\n\nGenerating mesh...', sys.stdout.flush() if opts.sync: numpoints = len(C[0].vals) else: numpoints=1 for coord in C: numpoints *= len(coord.vals) print "Number of points: ", numpoints mesh=[] for i in range(0,numpoints): point=[] j=0 l=i while j < len(C): if opts.sync: if len(C[j].vals) != numpoints: raise NameError, "Inconsistent number of steps" point.append(C[j].vals[i]) else: k = l - l / len(C[j].vals) * len(C[j].vals) l = l / len(C[j].vals) point.append(C[j].vals[k]) j+=1 mesh.append(point) i+=1 print " Done" ######################## # Generate input files # ######################## if not parse_only: print "\n\nGenerating input files...", sys.stdout.flush() iconf=0 for point in mesh: conf=copy.deepcopy(mol) i=0 while i < len(point): conf = C[i].shift(conf, point[i]) i+=1 # write conformation inpname = 'conf_'+str(iconf).zfill(5)+'.inp' f=open(inpname, 'w') f.write('\n $DATA\nconformation #'+str(iconf)+'\nC1\n') for atom in conf.atoms: f.write(atom.name+' '+str(atom.znuc)+'.0 '+str(atom.vector.x)+' '+str(atom.vector.y)+' '+str(atom.vector.z)+'\n'+atom.basis) f.write(' $END\n') f.write(gamess_stuff) f.close() iconf += 1 print " Done" #################### # Calculate points # #################### if not opts.pretend: print "Starting calculations" sem = threading.Semaphore(NP) pd_lock = threading.Lock() points_done=0 thrd_list=[] for i in range(0, numpoints+NP): sem.acquire() print "\rProgress: "+str(points_done)+' / '+str(len(mesh)), sys.stdout.flush() if i < numpoints: inpname = 'conf_'+str(i).zfill(5)+'.inp' outname = 'conf_'+str(i).zfill(5)+'.log' thrd_list.append(threading.Thread(target=thread_func, args=(inpname, outname, mesh[i]))) thrd_list[len(thrd_list)-1].start() for thrd in thrd_list: thrd.join() ##################### # Print PES to file # ##################### log = open(PES_FILE, 'w') for point in mesh: for x in point: log.write(str(x)+' ') log.write('\n') log.close() print "Done"