Sampling and writing to pym/rmf
![](https://secure.gravatar.com/avatar/7e51df37f2de03575035da8fd0dbe37c.jpg?s=120&d=mm&r=g)
Hello,
I'm relatively new to all this so please let me know if i'm making any obvious errors ...
Essentially all i'm trying to do is generate an ensemble of models made from four subunits - constrained by MS connectivity restraints. The models get scored but nothing seems to write to the pymol file. Ideally i'd like to write to an .rmf but i haven't worked that one out either ...
Is this a reasonable way to go about my problem ?
Many thanks,
Josh
-------------------------------------------
import IMP import IMP.atom import IMP.rmf import inspect import IMP.container import IMP.display import IMP.statistics #import IMP.example import sys, math, os, optparse import RMF
from optparse import OptionParser
# Convert the arguments into strings and number Firstpdb = str(sys.argv[1]) Secondpdb = str(sys.argv[2]) Thirdpdb = str(sys.argv[3]) Fourthpdb = str(sys.argv[4]) models = float(sys.argv[5])
#*****************************************
# the spring constant to use, it doesnt really matter k=100 # the target resolution for the representation, this is used to specify how detailed # the representation used should be resolution=300 # the box to perform everything bb=IMP.algebra.BoundingBox3D(IMP.algebra.Vector3D(0,0,0), IMP.algebra.Vector3D(300, 300, 300))
# this function creates the molecular hierarchies for the various involved proteins def create_representation(): m= IMP.Model() all=IMP.atom.Hierarchy.setup_particle(IMP.Particle(m)) all.set_name("the universe") # create a protein, represented as a set of connected balls of appropriate # radii and number, chose by the resolution parameter and the number of # amino acids.
def create_protein_from_pdbs(name, files):
def create_from_pdb(file): sls=IMP.SetLogState(IMP.NONE) datadir = os.getcwd() print datadir t=IMP.atom.read_pdb( datadir+'/' + file, m, IMP.atom.ATOMPDBSelector()) del sls #IMP.atom.show_molecular_hierarchy(t) c=IMP.atom.Chain(IMP.atom.get_by_type(t, IMP.atom.CHAIN_TYPE)[0]) if c.get_number_of_children()==0: IMP.atom.show_molecular_hierarchy(t) # there is no reason to use all atoms, just approximate the pdb shape instead s=IMP.atom.create_simplified_along_backbone(c, resolution/300.0) IMP.atom.destroy(t) # make the simplified structure rigid rb=IMP.atom.create_rigid_body(s) # rb=IMP.atom.create_rigid_body(c) rb.set_coordinates_are_optimized(True) return s # return c
h= create_from_pdb(files[0]) h.set_name(name) all.add_child(h)
create_protein_from_pdbs("A", [Firstpdb]) create_protein_from_pdbs("B", [Secondpdb]) create_protein_from_pdbs("C", [Thirdpdb]) create_protein_from_pdbs("D", [Fourthpdb]) #create_protein_from_pdbs("C", ["rpt3_imp.pdb"]) return (m, all)
# create the needed restraints and add them to the model
def create_restraints(m, all): def add_connectivity_restraint(s):
tr= IMP.core.TableRefiner() rps=[] for sc in s: ps= sc.get_selected_particles() rps.append(ps[0]) tr.add_particle(ps[0], ps)
# duplicate the IMP.atom.create_connectivity_restraint functionality
score= IMP.core.KClosePairsPairScore(IMP.core.HarmonicSphereDistancePairScore(0,1),tr)
r= IMP.core.MSConnectivityRestraint(m,score)
iA = r.add_type([rps[0]]) iB = r.add_type([rps[1]]) iC = r.add_type([rps[2]]) iD = r.add_type([rps[3]]) n1 = r.add_composite([iA, iB, iC, iD]) n2 = r.add_composite([iA, iB], n1) n3 = r.add_composite([iC, iD], n1) n4 = r.add_composite([iB, iC, iD], n1)
m.add_restraint(r)
evr=IMP.atom.create_excluded_volume_restraint([all]) m.add_restraint(evr) # a Selection allows for natural specification of what the restraints act on S= IMP.atom.Selection sA=S(hierarchy=all, molecule="A") sB=S(hierarchy=all, molecule="B") sC=S(hierarchy=all, molecule="C") sD=S(hierarchy=all, molecule="D") add_connectivity_restraint([sA, sB, sC, sD])
# find acceptable conformations of the model def get_conformations(m): sampler= IMP.core.MCCGSampler(m) sampler.set_bounding_box(bb) # magic numbers, experiment with them and make them large enough for things to work sampler.set_number_of_conjugate_gradient_steps(100) sampler.set_number_of_monte_carlo_steps(20) sampler.set_number_of_attempts(models) # We don't care to see the output from the sampler sampler.set_log_level(IMP.SILENT) # return the IMP.ConfigurationSet storing all the found configurations that # meet the various restraint maximum scores. cs= sampler.create_sample() return cs
# cluster the conformations and write them to a file def analyze_conformations(cs, all, gs): # we want to cluster the configurations to make them easier to understand # in the case, the clustering is pretty meaningless embed= IMP.statistics.ConfigurationSetXYZEmbedding(cs,
IMP.container.ListSingletonContainer(IMP.atom.get_leaves(all)), True) cluster= IMP.statistics.create_lloyds_kmeans(embed, 10, 10000) # dump each cluster center to a file so it can be viewed. for i in range(cluster.get_number_of_clusters()): center= cluster.get_cluster_center(i) cs.load_configuration(i) w= IMP.display.PymolWriter("cluster.%d.pym"%i) for g in gs: w.add_geometry(g)
#****************************************************************************************** # now do the actual work
(m,all)= create_representation() IMP.atom.show_molecular_hierarchy(all) create_restraints(m, all)
# in order to display the results, we need something that maps the particles onto # geometric objets. The IMP.display.Geometry objects do this mapping. # IMP.display.XYZRGeometry map an IMP.core.XYZR particle onto a sphere gs=[] for i in range(all.get_number_of_children()): color= IMP.display.get_display_color(i) n= all.get_child(i) name= n.get_name() g= IMP.atom.HierarchyGeometry(n) g.set_color(color) gs.append(g)
cs= get_conformations(m)
print "found", cs.get_number_of_configurations(), "solutions"
ListScores = [] for i in range(0, cs.get_number_of_configurations()): cs.load_configuration(i) # print the configuration print "solution number: ",i,"scored :", m.evaluate(False) ListScores.append(m.evaluate(False))
f1 = open("out_scores.csv", "w") f1.write("\n".join(map(lambda x: str(x), ListScores))) f1.close()
# for each of the configuration, dump it to a file to view in pymol for i in range(0, cs.get_number_of_configurations()): JOSH = cs.load_configuration(i) S= IMP.atom.Selection h= IMP.atom.Hierarchy.get_children(cs) tfn = IMP.create_temporary_file_name("josh%d"%i, ".rmf") rh = RMF.create_rmf_file(tfn)
# add the hierarchy to the file IMP.rmf.add_hierarchies(rh, h)
# add the current configuration to the file as frame 0 IMP.rmf.save_frame(rh)
for g in gs: w.add_geometry(g)
analyze_conformations(cs, all, gs)
![](https://secure.gravatar.com/avatar/e40a56ed126a0d24557673b0ac7b03e2.jpg?s=120&d=mm&r=g)
Hi Josh, from a very superficial look, your code to write the RMF files seems fine - do you get an output RMF file? Could you load it in Chimera?
On Tue, Jul 1, 2014 at 2:40 AM, Josh Bullock jma.bullock@gmail.com wrote:
> Hello, > > I'm relatively new to all this so please let me know if i'm making any > obvious errors ... > > Essentially all i'm trying to do is generate an ensemble of models made > from four subunits - constrained by MS connectivity restraints. The models > get scored but nothing seems to write to the pymol file. Ideally i'd like > to write to an .rmf but i haven't worked that one out either ... > > Is this a reasonable way to go about my problem ? > > Many thanks, > > Josh > > ------------------------------------------- > > import IMP > import IMP.atom > import IMP.rmf > import inspect > import IMP.container > import IMP.display > import IMP.statistics > #import IMP.example > import sys, math, os, optparse > import RMF > > from optparse import OptionParser > > > # Convert the arguments into strings and number > Firstpdb = str(sys.argv[1]) > Secondpdb = str(sys.argv[2]) > Thirdpdb = str(sys.argv[3]) > Fourthpdb = str(sys.argv[4]) > models = float(sys.argv[5]) > > #***************************************** > > # the spring constant to use, it doesnt really matter > k=100 > # the target resolution for the representation, this is used to specify > how detailed > # the representation used should be > resolution=300 > # the box to perform everything > bb=IMP.algebra.BoundingBox3D(IMP.algebra.Vector3D(0,0,0), > IMP.algebra.Vector3D(300, 300, 300)) > > > # this function creates the molecular hierarchies for the various involved > proteins > def create_representation(): > m= IMP.Model() > all=IMP.atom.Hierarchy.setup_particle(IMP.Particle(m)) > all.set_name("the universe") > # create a protein, represented as a set of connected balls of > appropriate > # radii and number, chose by the resolution parameter and the number of > # amino acids. > > def create_protein_from_pdbs(name, files): > > def create_from_pdb(file): > sls=IMP.SetLogState(IMP.NONE) > datadir = os.getcwd() > print datadir > t=IMP.atom.read_pdb( datadir+'/' + file, m, > IMP.atom.ATOMPDBSelector()) > del sls > #IMP.atom.show_molecular_hierarchy(t) > c=IMP.atom.Chain(IMP.atom.get_by_type(t, > IMP.atom.CHAIN_TYPE)[0]) > if c.get_number_of_children()==0: > IMP.atom.show_molecular_hierarchy(t) > # there is no reason to use all atoms, just approximate the > pdb shape instead > s=IMP.atom.create_simplified_along_backbone(c, > resolution/300.0) > IMP.atom.destroy(t) > # make the simplified structure rigid > rb=IMP.atom.create_rigid_body(s) > # rb=IMP.atom.create_rigid_body(c) > rb.set_coordinates_are_optimized(True) > return s > # return c > > h= create_from_pdb(files[0]) > h.set_name(name) > all.add_child(h) > > create_protein_from_pdbs("A", [Firstpdb]) > create_protein_from_pdbs("B", [Secondpdb]) > create_protein_from_pdbs("C", [Thirdpdb]) > create_protein_from_pdbs("D", [Fourthpdb]) > #create_protein_from_pdbs("C", ["rpt3_imp.pdb"]) > return (m, all) > > # create the needed restraints and add them to the model > > def create_restraints(m, all): > def add_connectivity_restraint(s): > > tr= IMP.core.TableRefiner() > rps=[] > for sc in s: > ps= sc.get_selected_particles() > rps.append(ps[0]) > tr.add_particle(ps[0], ps) > > # duplicate the IMP.atom.create_connectivity_restraint > functionality > > score= > IMP.core.KClosePairsPairScore(IMP.core.HarmonicSphereDistancePairScore(0,1),tr) > > r= IMP.core.MSConnectivityRestraint(m,score) > > iA = r.add_type([rps[0]]) > iB = r.add_type([rps[1]]) > iC = r.add_type([rps[2]]) > iD = r.add_type([rps[3]]) > n1 = r.add_composite([iA, iB, iC, iD]) > n2 = r.add_composite([iA, iB], n1) > n3 = r.add_composite([iC, iD], n1) > n4 = r.add_composite([iB, iC, iD], n1) > > m.add_restraint(r) > > evr=IMP.atom.create_excluded_volume_restraint([all]) > m.add_restraint(evr) > # a Selection allows for natural specification of what the restraints > act on > S= IMP.atom.Selection > sA=S(hierarchy=all, molecule="A") > sB=S(hierarchy=all, molecule="B") > sC=S(hierarchy=all, molecule="C") > sD=S(hierarchy=all, molecule="D") > add_connectivity_restraint([sA, sB, sC, sD]) > > > # find acceptable conformations of the model > def get_conformations(m): > sampler= IMP.core.MCCGSampler(m) > sampler.set_bounding_box(bb) > # magic numbers, experiment with them and make them large enough for > things to work > sampler.set_number_of_conjugate_gradient_steps(100) > sampler.set_number_of_monte_carlo_steps(20) > sampler.set_number_of_attempts(models) > # We don't care to see the output from the sampler > sampler.set_log_level(IMP.SILENT) > # return the IMP.ConfigurationSet storing all the found configurations > that > # meet the various restraint maximum scores. > cs= sampler.create_sample() > return cs > > > # cluster the conformations and write them to a file > def analyze_conformations(cs, all, gs): > # we want to cluster the configurations to make them easier to > understand > # in the case, the clustering is pretty meaningless > embed= IMP.statistics.ConfigurationSetXYZEmbedding(cs, > > IMP.container.ListSingletonContainer(IMP.atom.get_leaves(all)), True) > cluster= IMP.statistics.create_lloyds_kmeans(embed, 10, 10000) > # dump each cluster center to a file so it can be viewed. > for i in range(cluster.get_number_of_clusters()): > center= cluster.get_cluster_center(i) > cs.load_configuration(i) > w= IMP.display.PymolWriter("cluster.%d.pym"%i) > for g in gs: > w.add_geometry(g) > > > > #****************************************************************************************** > # now do the actual work > > (m,all)= create_representation() > IMP.atom.show_molecular_hierarchy(all) > create_restraints(m, all) > > # in order to display the results, we need something that maps the > particles onto > # geometric objets. The IMP.display.Geometry objects do this mapping. > # IMP.display.XYZRGeometry map an IMP.core.XYZR particle onto a sphere > gs=[] > for i in range(all.get_number_of_children()): > color= IMP.display.get_display_color(i) > n= all.get_child(i) > name= n.get_name() > g= IMP.atom.HierarchyGeometry(n) > g.set_color(color) > gs.append(g) > > cs= get_conformations(m) > > print "found", cs.get_number_of_configurations(), "solutions" > > ListScores = [] > for i in range(0, cs.get_number_of_configurations()): > cs.load_configuration(i) > # print the configuration > print "solution number: ",i,"scored :", m.evaluate(False) > ListScores.append(m.evaluate(False)) > > f1 = open("out_scores.csv", "w") > f1.write("\n".join(map(lambda x: str(x), ListScores))) > f1.close() > > # for each of the configuration, dump it to a file to view in pymol > for i in range(0, cs.get_number_of_configurations()): > JOSH = cs.load_configuration(i) > S= IMP.atom.Selection > h= IMP.atom.Hierarchy.get_children(cs) > tfn = IMP.create_temporary_file_name("josh%d"%i, ".rmf") > rh = RMF.create_rmf_file(tfn) > > # add the hierarchy to the file > IMP.rmf.add_hierarchies(rh, h) > > # add the current configuration to the file as frame 0 > IMP.rmf.save_frame(rh) > > for g in gs: > w.add_geometry(g) > > analyze_conformations(cs, all, gs) > > > _______________________________________________ > IMP-users mailing list > IMP-users@salilab.org > https://salilab.org/mailman/listinfo/imp-users > >
![](https://secure.gravatar.com/avatar/cfe8857a24d561e8e700ab18e3ba7ec8.jpg?s=120&d=mm&r=g)
On 7/1/14, 2:40 AM, Josh Bullock wrote: > Essentially all i'm trying to do is generate an ensemble of models made > from four subunits - constrained by MS connectivity restraints. The > models get scored but nothing seems to write to the pymol file. Ideally > i'd like to write to an .rmf but i haven't worked that one out either ...
Most likely none of the attempts generated an acceptable model (i.e. with all restraints below the maximum values). Either your set of restraints is nonsensical (so they can't be satisfied) or you need to tweak the "magic numbers" in the script to give the optimizer more time to satisfy them.
Ben
participants (3)
-
Barak Raveh
-
Ben Webb
-
Josh Bullock