Sampling and writing to pym/rmf

1 Jul 2014


      Hello,
I'm relatively new to all this so please let me know if i'm making any
obvious errors ...
Essentially all i'm trying to do is generate an ensemble of models made
from four subunits - constrained by MS connectivity restraints. The models
get scored but nothing seems to write to the pymol file. Ideally i'd like
to write to an .rmf but i haven't worked that one out either ...
Is this a reasonable way to go about my problem ?
Many thanks,
Josh
-------------------------------------------
import IMP
import IMP.atom
import IMP.rmf
import inspect
import IMP.container
import IMP.display
import IMP.statistics
#import IMP.example
import sys, math, os, optparse
import RMF
from optparse import OptionParser
# Convert the arguments into strings and number
Firstpdb = str(sys.argv[1])
Secondpdb = str(sys.argv[2])
Thirdpdb = str(sys.argv[3])
Fourthpdb = str(sys.argv[4])
models = float(sys.argv[5])
#*****************************************
# the spring constant to use, it doesnt really matter
k=100
# the target resolution for the representation, this is used to specify how
detailed
# the representation used should be
resolution=300
# the box to perform everything
bb=IMP.algebra.BoundingBox3D(IMP.algebra.Vector3D(0,0,0),
                             IMP.algebra.Vector3D(300, 300, 300))
# this function creates the molecular hierarchies for the various involved
proteins
def create_representation():
    m= IMP.Model()
    all=IMP.atom.Hierarchy.setup_particle(IMP.Particle(m))
    all.set_name("the universe")
    # create a protein, represented as a set of connected balls of
appropriate
    # radii and number, chose by the resolution parameter and the number of
    # amino acids.
def create_protein_from_pdbs(name, files):
def create_from_pdb(file):
            sls=IMP.SetLogState(IMP.NONE)
            datadir = os.getcwd()
            print datadir
    t=IMP.atom.read_pdb( datadir+'/' + file, m,
                                 IMP.atom.ATOMPDBSelector())
            del sls
            #IMP.atom.show_molecular_hierarchy(t)
            c=IMP.atom.Chain(IMP.atom.get_by_type(t,
IMP.atom.CHAIN_TYPE)[0])
            if c.get_number_of_children()==0:
                IMP.atom.show_molecular_hierarchy(t)
            # there is no reason to use all atoms, just approximate the pdb
shape instead
            s=IMP.atom.create_simplified_along_backbone(c,
                                                        resolution/300.0)
            IMP.atom.destroy(t)
            # make the simplified structure rigid
            rb=IMP.atom.create_rigid_body(s)
#            rb=IMP.atom.create_rigid_body(c)
            rb.set_coordinates_are_optimized(True)
            return s
#            return c
h= create_from_pdb(files[0])
        h.set_name(name)
        all.add_child(h)
create_protein_from_pdbs("A", [Firstpdb])
    create_protein_from_pdbs("B", [Secondpdb])
    create_protein_from_pdbs("C", [Thirdpdb])
    create_protein_from_pdbs("D", [Fourthpdb])
    #create_protein_from_pdbs("C", ["rpt3_imp.pdb"])
    return (m, all)
# create the needed restraints and add them to the model
def create_restraints(m, all):
    def add_connectivity_restraint(s):
tr= IMP.core.TableRefiner()
        rps=[]
        for sc in s:
            ps= sc.get_selected_particles()
            rps.append(ps[0])
            tr.add_particle(ps[0], ps)
# duplicate the IMP.atom.create_connectivity_restraint functionality
score=
IMP.core.KClosePairsPairScore(IMP.core.HarmonicSphereDistancePairScore(0,1),tr)
r= IMP.core.MSConnectivityRestraint(m,score)
iA = r.add_type([rps[0]])
        iB = r.add_type([rps[1]])
        iC = r.add_type([rps[2]])
        iD = r.add_type([rps[3]])
        n1 = r.add_composite([iA, iB, iC, iD])
        n2 = r.add_composite([iA, iB], n1)
        n3 = r.add_composite([iC, iD], n1)
        n4 = r.add_composite([iB, iC, iD], n1)
m.add_restraint(r)
evr=IMP.atom.create_excluded_volume_restraint([all])
    m.add_restraint(evr)
    # a Selection allows for natural specification of what the restraints
act on
    S= IMP.atom.Selection
    sA=S(hierarchy=all, molecule="A")
    sB=S(hierarchy=all, molecule="B")
    sC=S(hierarchy=all, molecule="C")
    sD=S(hierarchy=all, molecule="D")
    add_connectivity_restraint([sA, sB, sC, sD])
# find acceptable conformations of the model
def get_conformations(m):
    sampler= IMP.core.MCCGSampler(m)
    sampler.set_bounding_box(bb)
    # magic numbers, experiment with them and make them large enough for
things to work
    sampler.set_number_of_conjugate_gradient_steps(100)
    sampler.set_number_of_monte_carlo_steps(20)
    sampler.set_number_of_attempts(models)
    # We don't care to see the output from the sampler
    sampler.set_log_level(IMP.SILENT)
    # return the IMP.ConfigurationSet storing all the found configurations
that
    # meet the various restraint maximum scores.
    cs= sampler.create_sample()
    return cs
# cluster the conformations and write them to a file
def analyze_conformations(cs, all, gs):
    # we want to cluster the configurations to make them easier to
understand
    # in the case, the clustering is pretty meaningless
    embed= IMP.statistics.ConfigurationSetXYZEmbedding(cs,
IMP.container.ListSingletonContainer(IMP.atom.get_leaves(all)), True)
    cluster= IMP.statistics.create_lloyds_kmeans(embed, 10, 10000)
    # dump each cluster center to a file so it can be viewed.
    for i in range(cluster.get_number_of_clusters()):
        center= cluster.get_cluster_center(i)
        cs.load_configuration(i)
        w= IMP.display.PymolWriter("cluster.%d.pym"%i)
        for g in gs:
            w.add_geometry(g)
#******************************************************************************************
# now do the actual work
(m,all)= create_representation()
IMP.atom.show_molecular_hierarchy(all)
create_restraints(m, all)
# in order to display the results, we need something that maps the
particles onto
# geometric objets. The IMP.display.Geometry objects do this mapping.
# IMP.display.XYZRGeometry map an IMP.core.XYZR particle onto a sphere
gs=[]
for i in range(all.get_number_of_children()):
    color= IMP.display.get_display_color(i)
    n= all.get_child(i)
    name= n.get_name()
    g= IMP.atom.HierarchyGeometry(n)
    g.set_color(color)
    gs.append(g)
cs= get_conformations(m)
print "found", cs.get_number_of_configurations(), "solutions"
ListScores = []
for i in range(0, cs.get_number_of_configurations()):
        cs.load_configuration(i)
        # print the configuration
        print "solution number: ",i,"scored :", m.evaluate(False)
        ListScores.append(m.evaluate(False))
f1 = open("out_scores.csv", "w")
f1.write("\n".join(map(lambda x: str(x), ListScores)))
f1.close()
# for each of the configuration, dump it to a file to view in pymol
for i in range(0, cs.get_number_of_configurations()):
    JOSH = cs.load_configuration(i)
    S= IMP.atom.Selection
    h= IMP.atom.Hierarchy.get_children(cs)
    tfn = IMP.create_temporary_file_name("josh%d"%i, ".rmf")
    rh = RMF.create_rmf_file(tfn)
# add the hierarchy to the file
    IMP.rmf.add_hierarchies(rh, h)
# add the current configuration to the file as frame 0
    IMP.rmf.save_frame(rh)
for g in gs:
        w.add_geometry(g)
analyze_conformations(cs, all, gs)

Josh Bullock

Barak Raveh

Ben Webb

tags (0)

participants (3)