Dear users,
I'd tried to generate 5 models from Modeller v9.15 trough my model-multi.py script [1] and the program gave me this error:
guest@labimm-118:~/Documents/charilma/CpLAN$ mod9.15 model-multi.py
Could not find platform independent libraries <prefix>
Could not find platform dependent libraries <exec_prefix>
Consider setting $PYTHONHOME to <prefix>[:<exec_prefix>]
'import site' failed; use -v for traceback
Traceback (most recent call last):
File "model-multi.py", line 48, in ?
a.make()
File "/usr/lib/modeller9.15/modlib/modeller/automodel/automodel.py", line 110, in make
self.homcsr(exit_stage)
File "/usr/lib/modeller9.15/modlib/modeller/automodel/automodel.py", line 475, in homcsr
aln = self.read_alignment()
File "/usr/lib/modeller9.15/modlib/modeller/automodel/automodel.py", line 465, in read_alignment
aln.append(file=self.alnfile, align_codes=codes)
File "/usr/lib/modeller9.15/modlib/modeller/alignment.py", line 79, in append
allow_alternates)
_modeller.ModellerError:
read_al_373E> Protein specified in ALIGN_CODES(i) was not found in
the alignment file; ALIGN_CODES( 4) = G8EW14.fasta
As
I'm sending my alignment file [2] and my template file [3], I wonder if
there is anyone that could help me to circumvent this error.
Regards.
[1] model-multi.py
# -*- coding: utf-8 -*-
# File: model-multi.py
# Reading the ali file and generating 5 model
from modeller import *
from modeller.automodel import *
from modeller.scripts import complete_pdb
log.verbose()
env = environ()
# Give less weight to all soft-sphere restraints:
env.schedule_scale = physical.values(default=1.0, soft_sphere=0.7)
#Considering heteroatoms and waters molecules
env.io.hetatm = env.io.water = True
# Directories with input atom files:
env.io.atom_files_directory = './:../atom_files'
env.libs.topology.read(file='$(LIB)/top_heav.lib')
env.libs.parameters.read(file='$(LIB)/par.lib')
# Modelling 'sequence' with file.ali
a = automodel(env, alnfile='CpLANcab.ali',
knowns=('4LXJ','4K0F','4WMZ'),
sequence=('G8EW14.fasta'),
# assess_methods=(assess.DOPE,
# assess.normalized_dope,
# assess.GA341))
assess_methods= (assess.DOPE, assess.normalized_dope, assess.GA341)
)
# Generating 5 models
a.starting_model = 1
a.ending_model = 5
# Very thorough Variable Target Function Method (VTFM) optimization:
a.library_schedule = autosched.slow
a.max_var_iterations = 300
# Thorough MD optimization:
a.md_level = refine.slow
# Repeat the whole cycle 2 times and do not stop unless obj.func. > 1E6
a.repeat_optimization = 2
a.max_molpdf = 1e6
a.make()
# Get clusters
a.cluster(cluster_cut=1.00)
# END OF MODEL CONSTRUCTION
# PRINT RESULTS
# Open a file
fo = open("model-multi.out", "w")
# Get a list of all successfully built models from a.outputs
ok_models = filter(lambda x: x['failure'] is None, a.outputs)
# Printing out a summary of all successfully generated models
print >> fo, '\n>> Summary of successfully produced model'
fields = [x for x in ok_models[0].keys() if x.endswith(' score')]
fields.sort()
fields = ['molpdf'] + fields
header = '%-25s ' % 'Filename' + " ".join(['%14s' % x for x in fields])
print >> fo, header
print >> fo, '-' * len(header)
for mdl in ok_models:
text = '%-25s' % mdl['name']
for field in fields:
if isinstance(mdl[field], (tuple, list)):
text = text + ' %14.5f' % mdl[field][0]
else:
text = text + ' %14.5f' % mdl[field]
print >> fo, text
print >> fo, ''
# Printing top model results
print >> fo, '>> Top model results:'
# Rank models by molpdf score
key = 'molpdf'
ok_models.sort(lambda a,b: cmp(a[key], b[key]))
# Get top model - molpdf
m = ok_models[0]
print "Top model_molpdf: %s (molpdf %.3f)" % (m['name'], m[key])
print >> fo, 'molpdf: ', m[key], '(file: ', m['name'], ')'
# Rank models by DOPE score
key = 'DOPE score'
ok_models.sort(lambda a,b: cmp(a[key], b[key]))
# Get top model - DOPE
m = ok_models[0]
print "Top model_DOPE: %s (DOPE score %.3f)" % (m['name'], m[key])
print >> fo, 'DOPE score: ', m[key], '(file: ', m['name'], ')'
# Rank models by normalized DOPE score
key = 'GA341 score'
ok_models.sort(lambda a,b: cmp(a[key], b[key]))
# Get top model - normalized DOPE
m = ok_models[0]
print "Top model_GA341: %s (GA341 score %.3f)" % (m['name'], m[key][0])
print >> fo, 'GA341 score: ', m[key][0], '(file: ', m['name'], ')'
# Rank models by normalized DOPE score
key = 'Normalized DOPE score'
ok_models.sort(lambda a,b: cmp(a[key], b[key]))
# Get top model - normalized DOPE
m = ok_models[0]
print "Top model_nDOPE (z): %s (Normalized DOPE score %.3f)" % (m['name'], m[key])
print >> fo, 'Normalized DOPE score: ', m[key], '(file: ', m['name'], ')'
# Read a model previously generated by Modeller's automodel class
mdl = complete_pdb(env, './cluster.opt')
# Select all atoms in the first chain
atmsel = selection(mdl)
score = atmsel.assess_dope()
zscore = mdl.assess_normalized_dope()
score2 = mdl.assess_ga341()
# Printing assess results
print >> fo, '\n>> Cluster results:'
fo2 = open("cluster.opt", "r")
lines = [ i.rstrip() for i in fo2.readlines()]
# 3rd line
print >> fo, lines[1], '(molpdf)'
print >> fo, 'DOPE score: ', score
print >> fo, 'GA341 score: ', score2[0]
print >> fo, 'Normalized DOPE score: ', zscore
# Close opened file
fo.close()
#END OF PRINT RESULTS
[2] CpLANcab.ali
>P1;4LXJ
structureX:4LXJ:
6 :A:+715 :A:MOL_ID 1; MOLECULE LANOSTEROL 14-ALPHA DEMETHYLASE;
CHAIN A; SYNONYM CYPLI, CYTOCHROME P450 51, CYTOCHROME P450-14DM, C
P450-LIA1, STEROL 14-ALPHA DEMETHYLASE; EC 1.14.13.70; ENGINEERED
YES:MOL_ID 1; ORGANISM_SCIENTIFIC SACCHAROMYCES CEREVISIAE;
ORGANISM_COMMON BAKER'S YEAST; ORGANISM_TAXID 4932; GENE ERG11,
CYP51, YHR007C; EXPRESSION_SYSTEM SACCHAROMYCES CEREVISIAE;
EXPRESSION_SYSTEM_TAXID 4932: 1.90: 0.20
MSATKSIVGEALEYVNIGLSH-FLALPLAQRISLIII----IPFIYNIVWQLLYSLRKDRPPLVFYWIPWVGSAV
VYGMKPYEFFEECQKKYGDIFSFVLLGRVMTVYLGPKGHEFVFNAKLADVSAEAAYAHLTTPVFGKGVIYDCPNS
RLMEQKKFVKGALTKEAFKSYVPLIAEEVYKYFRDSKNFRLNERTTGTIDVMVTQPEMTIFTASRSLLGKEMRAK
LDTDFAYLYSDLDKGFTPINFVFPNLPLEHYRKRDHAQKAISGTYMSLIKERRKNNDIQDRDLIDSLMKNSTYKD
GVKMTDQEIANLLIGVLMGGQHTSAATSAWILLHLAERPDVQQELYEEQMRVL---DGGKKELTYDLLQEMPLLN
QTIKETLRMHHPLHSLFRKVMKDMHVP--------NTSYVIPAGYHVLVSPGYTHLRDEYFPNAHQFNIHRWNND
SASS------YSVGEEVDYGFGAISKGVSSPYLPFGGGRHRCIGEHFAYCQLGVLMSIFIRTLKWHYPEGKTVPP
PDFTSMVTLPTGPAKIIWEKRNPEQKIGGRH---HH*
>P1;4K0F
structureX:4K0F:
6 :A:+655 :A:MOL_ID 1; MOLECULE LANOSTEROL 14-ALPHA DEMETHYLASE;
CHAIN A; ENGINEERED YES:MOL_ID 1; ORGANISM_SCIENTIFIC SACCHAROMYCES
CEREVISIAE; ORGANISM_COMMON BAKER'S YEAST; ORGANISM_TAXID 307796;
STRAIN YJM789; GENE ERG11, SCY_2394; EXPRESSION_SYSTEM SACCHAROMYCES
CEREVISIAE; EXPRESSION_SYSTEM_TAXID 4932: 2.19: 0.20
MSATKSIVGEALEYVNIGLSH-FLALPLAQRISLIII----IPFIYNIVWQLLYSLRKDRPPLVFYWIPWVGSAV
VYGMKPYEFFEECQKKYGDIFSFVLLGRVMTVYLGPKGHEFVFNAKLADVSAEAAYAHLTTPVFGKGVIYDCPNS
RLMEQKKFVKGALTKEAFKSYVPLIAEEVYKYFRDSKNFRLNERTTGTIDVMVTQPEMTIFTASRSLLGKEMRAK
LDTDFAYLYSDLDKGFTPINFVFPNLPLEHYRKRDHAQKAISGTYMSLIKERRKNNDIQDRDLIDSLMKNSTYKD
GVKMTDQEIANLLIGVLMGGQHTSAATSAWILLHLAERPDVQQELYEEQMRVL---DGGKKELTYDLLQEMPLLN
QTIKETLRMHHPLHSLFRKVMKDMHVP--------NTSYVIPAGYHVLVSPGYTHLRDEYFPNAHQFNIHRWNND
SASS------YSVGEEVDYGFGAISKGVSSPYLPFGGGRHRCIGEHFAYCQLGVLMSIFIRTLKWHYPEGKTVPP
PDFTSMVTLPTGPAKIIWEKRNPEQKIGGRHHHHHH*
>P1;4WMZ
structureX:4WMZ:
7 :A:+684 :A:MOL_ID 1; MOLECULE LANOSTEROL 14-ALPHA DEMETHYLASE;
CHAIN A; ENGINEERED YES:MOL_ID 1; ORGANISM_SCIENTIFIC SACCHAROMYCES
CEREVISIAE; ORGANISM_COMMON BAKER'S YEAST; ORGANISM_TAXID 307796;
STRAIN YJM789; GENE ERG11, SCY_2394; EXPRESSION_SYSTEM SACCHAROMYCES
CEREVISIAE; EXPRESSION_SYSTEM_TAXID 4932; EXPRESSION_SYSTEM_STRAIN
AD2DELTA: 2.05: 0.20
MSATKSIVGEALEYVNIGLSH-FLALPLAQRISLIII----IPFIYNIVWQLLYSLRKDRPPLVFYWIPWVGSAV
VYGMKPYEFFEECQKKYGDIFSFVLLGRVMTVYLGPKGHEFVFNAKLADVSAEAAYAHLTTPVFGKGVIYDCPNS
RLMEQKKFVKGALTKEAFKSYVPLIAEEVYKYFRDSKNFRLNERTTGTIDVMVTQPEMTIFTASRSLLGKEMRAK
LDTDFAYLYSDLDKGFTPINFVFPNLPLEHYRKRDHAQKAISGTYMSLIKERRKNNDIQDRDLIDSLMKNSTYKD
GVKMTDQEIANLLIGVLMGGQHTSAATSAWILLHLAERPDVQQELYEEQMRVL---DGGKKELTYDLLQEMPLLN
QTIKETLRMHHPLHSLFRKVMKDMHVP--------NTSYVIPAGYHVLVSPGYTHLRDEYFPNAHQFNIHRWNND
SASS------YSVGEEVDYGFGAISKGVSSPYLPFGGGRHRCIGEHFAYCQLGVLMSIFIRTLKWHYPEGKTVPP
PDFTSMVTLPTGPAKIIWEKRNPEQKIGGRHHHHHH*
>P1;G8EW14
sequence:G8EW14.fasta:::::::0.00: 0.00
MSAIIPQVQQLLGQVAQFFPPWFAALPTSLKVAIAVVGIPALIIGLNVFQQLCLPRKKDLPPVVFHYIPWFGSAA
YYGENPYKFLFECRDKYGDLFTFILMGRRITVALGPKGNNLSLGGKISQVSAEEAYTHLTTPVFGKGVVYDCPNE
MLMQQKKFIKSGLTTESLQSYPPMITSECEDFFTKEVGIS-PQKPSATLDLLKAMSELIILTASRTLQGKEVRES
LNGQFAKYYEDLDGGFTPLNFMFPNLPLPSYKRRDEAQKAMSDFYLKIMENRRKGESDHEHDMIENL-QSCKYRN
GVPLSDRDIAHIMIALLMAGQHTSSATSSWTLLHLADRPDVVEALYQEQKQKLGNPDGTFRDYRYEDLKELPIMD
SIIRETLRMHAPIHSIYRKVLSDIPVPPSLSAPSENGQYIIPKGHYIMAAPGVSQMDPRIWQDAKVWNPARWHDE
KGFAAAAMVQYTKAEQVDYGFGSVSKGTESPYQPFGAGRHRCVGEQFAYTQLSTIFTYVVRNFTLKLAVPK-FPE
TNYRTMIVQPNNPL-VTFTLRNAEVKQEV-------*
[3] G8EW14.fasta
>G8EW14:A|PDBID|CHAIN|SEQUENCE
MSAIIPQVQQLLGQVAQFFPPWFAALPTSLKVAIAVVGIPALIIGLNVFQQLCLPRKKDLPPVVFHYIPWFGSAAYYGEN
PYKFLFECRDKYGDLFTFILMGRRITVALGPKGNNLSLGGKISQVSAEEAYTHLTTPVFGKGVVYDCPNEMLMQQKKFIK
SGLTTESLQSYPPMITSECEDFFTKEVGISPQKPSATLDLLKAMSELIILTASRTLQGKEVRESLNGQFAKYYEDLDGGF
TPLNFMFPNLPLPSYKRRDEAQKAMSDFYLKIMENRRKGESDHEHDMIENLQSCKYRNGVPLSDRDIAHIMIALLMAGQH
TSSATSSWTLLHLADRPDVVEALYQEQKQKLGNPDGTFRDYRYEDLKELPIMDSIIRETLRMHAPIHSIYRKVLSDIPVP
PSLSAPSENGQYIIPKGHYIMAAPGVSQMDPRIWQDAKVWNPARWHDEKGFAAAAMVQYTKAEQVDYGFGSVSKGTESPY
QPFGAGRHRCVGEQFAYTQLSTIFTYVVRNFTLKLAVPKFPETNYRTMIVQPNNPLVTFTLRNAEVKQEV