+ minor changes
authorfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Mon, 4 Feb 2008 15:51:54 +0000 (15:51 +0000)
committerfabio <fabio@e1793c9e-67f9-0310-80fc-b846ff1f7b36>
Mon, 4 Feb 2008 15:51:54 +0000 (15:51 +0000)
+ restored some dataset creation functionality
+ stored random but fixed init values in a pickle file

git-svn-id: http://svn.tuebingen.mpg.de/ag-raetsch/projects/QPalma@7681 e1793c9e-67f9-0310-80fc-b846ff1f7b36

qpalma/Configuration.py
qpalma/parsers.py
qpalma/tools/parseGff.py
scripts/compile_dataset.py
scripts/evaluation.py [new file with mode: 0644]
scripts/qpalma_main.py
tools/data_tools/filterReads.c

index a4c2876..c6f1ab2 100644 (file)
 
 import numpy.matlib
 import os.path
+import cPickle
 
-fixedParamQ = numpy.matlib.mat(
-[[ 0.62870709], [ 0.7012026 ], [ 0.60236784],
-       [ 0.51316259], [ 0.20220814], [ 0.70324863], [ 0.37218684], [ 0.82178927],
-       [ 0.51316259], [ 0.20220814], [ 0.70324863], [ 0.37218684], [ 0.82178927],
-       [ 0.51316259], [ 0.20220814], [ 0.70324863], [ 0.37218684], [ 0.82178927],
-       [ 0.51316259], [ 0.20220814], [ 0.70324863], [ 0.37218684], [ 0.82178927],
-       [ 0.60394866], [ 0.70371272], [ 0.07548074], [ 0.63412803], [ 0.97442266],
-       [ 0.13216791], [ 0.71041168], [ 0.2093887 ], [ 0.35227344], [ 0.3405142 ],
-       [ 0.69677236], [ 0.41673747], [ 0.564245  ], [ 0.37613432], [ 0.88805642],
-       [ 0.69677236], [ 0.41673747], [ 0.564245  ], [ 0.37613432], [ 0.88805642],
-       [ 0.69677236], [ 0.41673747], [ 0.564245  ], [ 0.37613432], [ 0.88805642],
-       [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
-       [ 0.08188783], [ 0.54884803], [ 0.84039558], [ 0.6982093 ], [ 0.41686176],
-       [ 0.38568873], [ 0.29401347], [ 0.12704074], [ 0.30640858], [ 0.89578031],
-       [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
-       [ 0.77847185], [ 0.04283567], [ 0.45107823], [ 0.89789891], [ 0.41045519],
-       [ 0.49073531], [ 0.29727627], [ 0.94711483], [ 0.24898204], [ 0.26181212],
-       [ 0.71760957], [ 0.60326883], [ 0.80887576], [ 0.09448718], [ 0.88064525],
-       [ 0.84317654], [ 0.48893703], [ 0.24847021], [ 0.84203596], [ 0.34104156],
-       [ 0.75604701], [ 0.91703057], [ 0.69325475], [ 0.61276969], [ 0.16335226],
-       [ 0.4684374 ], [ 0.16553371], [ 0.79594434], [ 0.6440283 ], [ 0.80922237],
-       [ 0.5349296 ], [ 0.31924316], [ 0.10960695], [ 0.40151062], [ 0.50473641],
-       [ 0.14812671], [ 0.73523169], [ 0.35141625], [ 0.80364238], [ 0.02128181],
-       [ 0.0061226 ], [ 0.34541924], [ 0.07694485], [ 0.05551339], [ 0.23087636],
-       [ 0.87016395], [ 0.31682377], [ 0.27375113], [ 0.72226332], [ 0.62914149],
-       [ 0.59236012], [ 0.2070238 ], [ 0.52390942], [ 0.11894098], [ 0.55725917],
-       [ 0.72706009], [ 0.087196  ], [ 0.04745082], [ 0.95636492], [ 0.31524576],
-       [ 0.79685218], [ 0.80386771], [ 0.70942604], [ 0.82869417], [ 0.26906569],
-       [ 0.51848039], [ 0.64169354], [ 0.07114973], [ 0.39249454], [ 0.07002803],
-       [ 0.94667567], [ 0.02252752], [ 0.01039039], [ 0.5721312 ], [ 0.06065969],
-       [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
-       [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
-       [ 0.08188783], [ 0.54884803], [ 0.84039558], [ 0.6982093 ], [ 0.41686176],
-       [ 0.38568873], [ 0.29401347], [ 0.12704074], [ 0.30640858], [ 0.89578031],
-       [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
-       [ 0.77847185], [ 0.04283567], [ 0.45107823], [ 0.89789891], [ 0.41045519],
-       [ 0.49073531], [ 0.29727627], [ 0.94711483], [ 0.24898204], [ 0.26181212],
-       [ 0.71760957], [ 0.60326883], [ 0.80887576], [ 0.09448718], [ 0.88064525],
-       [ 0.84317654], [ 0.48893703], [ 0.24847021], [ 0.84203596], [ 0.34104156],
-       [ 0.75604701], [ 0.91703057], [ 0.69325475], [ 0.61276969], [ 0.16335226],
-       [ 0.4684374 ], [ 0.16553371], [ 0.79594434], [ 0.6440283 ], [ 0.80922237],
-       [ 0.5349296 ], [ 0.31924316], [ 0.10960695], [ 0.40151062], [ 0.50473641],
-       [ 0.14812671], [ 0.73523169], [ 0.35141625], [ 0.80364238], [ 0.02128181],
-       [ 0.0061226 ], [ 0.34541924], [ 0.07694485], [ 0.05551339], [ 0.23087636],
-       [ 0.87016395], [ 0.31682377], [ 0.27375113], [ 0.72226332], [ 0.62914149],
-       [ 0.59236012], [ 0.2070238 ], [ 0.52390942], [ 0.11894098], [ 0.55725917],
-       [ 0.72706009], [ 0.087196  ], [ 0.04745082], [ 0.95636492], [ 0.31524576],
-       [ 0.79685218], [ 0.80386771], [ 0.70942604], [ 0.82869417], [ 0.26906569],
-       [ 0.51848039], [ 0.64169354], [ 0.07114973], [ 0.39249454], [ 0.07002803],
-       [ 0.94667567], [ 0.02252752], [ 0.01039039], [ 0.5721312 ], [ 0.06065969],
-       [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
-       [ 0.08188783], [ 0.54884803], [ 0.84039558], [ 0.6982093 ], [ 0.41686176],
-       [ 0.38568873], [ 0.29401347], [ 0.12704074], [ 0.30640858], [ 0.89578031],
-       [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
-       [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
-       [ 0.77847185], [ 0.04283567], [ 0.45107823], [ 0.89789891], [ 0.41045519],
-       [ 0.49073531], [ 0.29727627], [ 0.94711483], [ 0.24898204], [ 0.26181212],
-       [ 0.71760957], [ 0.60326883], [ 0.80887576], [ 0.09448718], [ 0.88064525],
-       [ 0.84317654], [ 0.48893703], [ 0.24847021], [ 0.84203596], [ 0.34104156],
-       [ 0.75604701], [ 0.91703057], [ 0.69325475], [ 0.61276969], [ 0.16335226],
-       [ 0.4684374 ], [ 0.16553371], [ 0.79594434], [ 0.6440283 ], [ 0.80922237],
-       [ 0.5349296 ], [ 0.31924316], [ 0.10960695], [ 0.40151062], [ 0.50473641],
-       [ 0.14812671], [ 0.73523169], [ 0.35141625], [ 0.80364238], [ 0.02128181],
-       [ 0.0061226 ], [ 0.34541924], [ 0.07694485], [ 0.05551339], [ 0.23087636],
-       [ 0.87016395], [ 0.31682377], [ 0.27375113], [ 0.72226332], [ 0.62914149],
-       [ 0.59236012], [ 0.2070238 ], [ 0.52390942], [ 0.11894098], [ 0.55725917],
-       [ 0.72706009], [ 0.087196  ], [ 0.04745082], [ 0.95636492], [ 0.31524576],
-       [ 0.79685218], [ 0.80386771], [ 0.70942604], [ 0.82869417], [ 0.26906569],
-       [ 0.51848039], [ 0.64169354], [ 0.07114973], [ 0.39249454], [ 0.07002803],
-       [ 0.94667567], [ 0.02252752], [ 0.01039039], [ 0.5721312 ], [ 0.06065969],
-       [ 0.69422476], [ 0.4310939 ], [ 0.03069099], [ 0.35969779], [ 0.18047331],
-       [ 0.60394866], [ 0.70371272], [ 0.07548074], [ 0.63412803], [ 0.97442266],
-       [ 0.13216791], [ 0.71041168], [ 0.2093887 ], [ 0.35227344], [ 0.3405142 ],
-       [ 0.69677236], [ 0.41673747], [ 0.564245  ], [ 0.37613432], [ 0.88805642],
-       [ 0.69677236], [ 0.41673747], [ 0.564245  ], [ 0.37613432], [ 0.88805642],
-       [ 0.69677236], [ 0.41673747], [ 0.564245  ], [ 0.37613432], [ 0.88805642],
-       [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
-       [ 0.08188783], [ 0.54884803], [ 0.84039558], [ 0.6982093 ], [ 0.41686176],
-       [ 0.38568873], [ 0.29401347], [ 0.12704074], [ 0.30640858], [ 0.89578031],
-       [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
-       [ 0.77847185], [ 0.04283567], [ 0.45107823], [ 0.89789891], [ 0.41045519],
-       [ 0.49073531], [ 0.29727627], [ 0.94711483], [ 0.24898204], [ 0.26181212],
-       [ 0.71760957], [ 0.60326883], [ 0.80887576], [ 0.09448718], [ 0.88064525],
-       [ 0.84317654], [ 0.48893703], [ 0.24847021], [ 0.84203596], [ 0.34104156],
-       [ 0.75604701], [ 0.91703057], [ 0.69325475], [ 0.61276969], [ 0.16335226],
-       [ 0.4684374 ], [ 0.16553371], [ 0.79594434], [ 0.6440283 ], [ 0.80922237],
-       [ 0.5349296 ], [ 0.31924316], [ 0.10960695], [ 0.40151062], [ 0.50473641],
-       [ 0.14812671], [ 0.73523169], [ 0.35141625], [ 0.80364238], [ 0.02128181],
-       [ 0.0061226 ], [ 0.34541924], [ 0.07694485], [ 0.05551339], [ 0.23087636],
-       [ 0.87016395], [ 0.31682377], [ 0.27375113], [ 0.72226332], [ 0.62914149],
-       [ 0.59236012], [ 0.2070238 ], [ 0.52390942], [ 0.11894098], [ 0.55725917],
-       [ 0.72706009], [ 0.087196  ], [ 0.04745082], [ 0.95636492], [ 0.31524576],
-       [ 0.79685218], [ 0.80386771], [ 0.70942604], [ 0.82869417], [ 0.26906569],
-       [ 0.51848039], [ 0.64169354], [ 0.07114973], [ 0.39249454], [ 0.07002803],
-       [ 0.94667567], [ 0.02252752], [ 0.01039039], [ 0.5721312 ], [ 0.06065969],
-       [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
-       [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
-       [ 0.08188783], [ 0.54884803], [ 0.84039558], [ 0.6982093 ], [ 0.41686176],
-       [ 0.38568873], [ 0.29401347], [ 0.12704074], [ 0.30640858], [ 0.89578031],
-       [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
-       [ 0.77847185], [ 0.04283567], [ 0.45107823], [ 0.89789891], [ 0.41045519],
-       [ 0.49073531], [ 0.29727627], [ 0.94711483], [ 0.24898204], [ 0.26181212],
-       [ 0.71760957], [ 0.60326883], [ 0.80887576], [ 0.09448718], [ 0.88064525],
-       [ 0.84317654], [ 0.48893703], [ 0.24847021], [ 0.84203596], [ 0.34104156],
-       [ 0.75604701], [ 0.91703057], [ 0.69325475], [ 0.61276969], [ 0.16335226],
-       [ 0.4684374 ], [ 0.16553371], [ 0.79594434], [ 0.6440283 ], [ 0.80922237],
-       [ 0.5349296 ], [ 0.31924316], [ 0.10960695], [ 0.40151062], [ 0.50473641],
-       [ 0.14812671], [ 0.73523169], [ 0.35141625], [ 0.80364238], [ 0.02128181],
-       [ 0.0061226 ], [ 0.34541924], [ 0.07694485], [ 0.05551339], [ 0.23087636],
-       [ 0.87016395], [ 0.31682377], [ 0.27375113], [ 0.72226332], [ 0.62914149],
-       [ 0.59236012], [ 0.2070238 ], [ 0.52390942], [ 0.11894098], [ 0.55725917],
-       [ 0.72706009], [ 0.087196  ], [ 0.04745082], [ 0.95636492], [ 0.31524576],
-       [ 0.79685218], [ 0.80386771], [ 0.70942604], [ 0.82869417], [ 0.26906569],
-       [ 0.51848039], [ 0.64169354], [ 0.07114973], [ 0.39249454], [ 0.07002803],
-       [ 0.94667567], [ 0.02252752], [ 0.01039039], [ 0.5721312 ], [ 0.06065969],
-       [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
-       [ 0.08188783], [ 0.54884803], [ 0.84039558], [ 0.6982093 ], [ 0.41686176],
-       [ 0.38568873], [ 0.29401347], [ 0.12704074], [ 0.30640858], [ 0.89578031],
-       [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
-       [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
-       [ 0.77847185], [ 0.04283567], [ 0.45107823], [ 0.89789891], [ 0.41045519],
-       [ 0.49073531], [ 0.29727627], [ 0.94711483], [ 0.24898204], [ 0.26181212],
-       [ 0.71760957], [ 0.60326883], [ 0.80887576], [ 0.09448718], [ 0.88064525],
-       [ 0.84317654], [ 0.48893703], [ 0.24847021], [ 0.84203596], [ 0.34104156],
-       [ 0.75604701], [ 0.91703057], [ 0.69325475], [ 0.61276969], [ 0.16335226],
-       [ 0.4684374 ], [ 0.16553371], [ 0.79594434], [ 0.6440283 ], [ 0.80922237],
-       [ 0.5349296 ], [ 0.31924316], [ 0.10960695], [ 0.40151062], [ 0.50473641],
-       [ 0.14812671], [ 0.73523169], [ 0.35141625], [ 0.80364238], [ 0.02128181],
-       [ 0.0061226 ], [ 0.34541924], [ 0.07694485], [ 0.05551339], [ 0.23087636],
-       [ 0.87016395], [ 0.31682377], [ 0.27375113], [ 0.72226332], [ 0.62914149],
-       [ 0.59236012], [ 0.2070238 ], [ 0.52390942], [ 0.11894098], [ 0.55725917],
-       [ 0.72706009], [ 0.087196  ], [ 0.04745082], [ 0.95636492], [ 0.31524576],
-       [ 0.79685218], [ 0.80386771], [ 0.70942604], [ 0.82869417], [ 0.26906569],
-       [ 0.51848039], [ 0.64169354], [ 0.07114973], [ 0.39249454], [ 0.07002803],
-       [ 0.94667567], [ 0.02252752], [ 0.01039039], [ 0.5721312 ], [ 0.06065969],
-       [ 0.69422476], [ 0.4310939 ], [ 0.03069099], [ 0.35969779], [ 0.18047331],
-       [ 0.60394866], [ 0.70371272], [ 0.07548074], [ 0.63412803], [ 0.97442266],
-       [ 0.13216791], [ 0.71041168], [ 0.2093887 ], [ 0.35227344], [ 0.3405142 ],
-       [ 0.69677236], [ 0.41673747], [ 0.564245  ], [ 0.37613432], [ 0.88805642],
-       [ 0.69677236], [ 0.41673747], [ 0.564245  ], [ 0.37613432], [ 0.88805642],
-       [ 0.69677236], [ 0.41673747], [ 0.564245  ], [ 0.37613432], [ 0.88805642],
-       [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
-       [ 0.08188783], [ 0.54884803], [ 0.84039558], [ 0.6982093 ], [ 0.41686176],
-       [ 0.38568873], [ 0.29401347], [ 0.12704074], [ 0.30640858], [ 0.89578031],
-       [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
-       [ 0.77847185], [ 0.04283567], [ 0.45107823], [ 0.89789891], [ 0.41045519],
-       [ 0.49073531], [ 0.29727627], [ 0.94711483], [ 0.24898204], [ 0.26181212],
-       [ 0.71760957], [ 0.60326883], [ 0.80887576], [ 0.09448718], [ 0.88064525],
-       [ 0.84317654], [ 0.48893703], [ 0.24847021], [ 0.84203596], [ 0.34104156],
-       [ 0.75604701], [ 0.91703057], [ 0.69325475], [ 0.61276969], [ 0.16335226],
-       [ 0.4684374 ], [ 0.16553371], [ 0.79594434], [ 0.6440283 ], [ 0.80922237],
-       [ 0.5349296 ], [ 0.31924316], [ 0.10960695], [ 0.40151062], [ 0.50473641],
-       [ 0.14812671], [ 0.73523169], [ 0.35141625], [ 0.80364238], [ 0.02128181],
-       [ 0.0061226 ], [ 0.34541924], [ 0.07694485], [ 0.05551339], [ 0.23087636],
-       [ 0.87016395], [ 0.31682377], [ 0.27375113], [ 0.72226332], [ 0.62914149],
-       [ 0.59236012], [ 0.2070238 ], [ 0.52390942], [ 0.11894098], [ 0.55725917],
-       [ 0.72706009], [ 0.087196  ], [ 0.04745082], [ 0.95636492], [ 0.31524576],
-       [ 0.79685218], [ 0.80386771], [ 0.70942604], [ 0.82869417], [ 0.26906569],
-       [ 0.51848039], [ 0.64169354], [ 0.07114973], [ 0.39249454], [ 0.07002803],
-       [ 0.94667567], [ 0.02252752], [ 0.01039039], [ 0.5721312 ], [ 0.06065969],
-       [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
-       [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
-       [ 0.08188783], [ 0.54884803], [ 0.84039558], [ 0.6982093 ], [ 0.41686176],
-       [ 0.38568873], [ 0.29401347], [ 0.12704074], [ 0.30640858], [ 0.89578031],
-       [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
-       [ 0.77847185], [ 0.04283567], [ 0.45107823], [ 0.89789891], [ 0.41045519],
-       [ 0.49073531], [ 0.29727627], [ 0.94711483], [ 0.24898204], [ 0.26181212],
-       [ 0.71760957], [ 0.60326883], [ 0.80887576], [ 0.09448718], [ 0.88064525],
-       [ 0.84317654], [ 0.48893703], [ 0.24847021], [ 0.84203596], [ 0.34104156],
-       [ 0.75604701], [ 0.91703057], [ 0.69325475], [ 0.61276969], [ 0.16335226],
-       [ 0.4684374 ], [ 0.16553371], [ 0.79594434], [ 0.6440283 ], [ 0.80922237],
-       [ 0.5349296 ], [ 0.31924316], [ 0.10960695], [ 0.40151062], [ 0.50473641],
-       [ 0.14812671], [ 0.73523169], [ 0.35141625], [ 0.80364238], [ 0.02128181],
-       [ 0.0061226 ], [ 0.34541924], [ 0.07694485], [ 0.05551339], [ 0.23087636],
-       [ 0.87016395], [ 0.31682377], [ 0.27375113], [ 0.72226332], [ 0.62914149],
-       [ 0.59236012], [ 0.2070238 ], [ 0.52390942], [ 0.11894098], [ 0.55725917],
-       [ 0.72706009], [ 0.087196  ], [ 0.04745082], [ 0.95636492], [ 0.31524576],
-       [ 0.79685218], [ 0.80386771], [ 0.70942604], [ 0.82869417], [ 0.26906569],
-       [ 0.51848039], [ 0.64169354], [ 0.07114973], [ 0.39249454], [ 0.07002803],
-       [ 0.94667567], [ 0.02252752], [ 0.01039039], [ 0.5721312 ], [ 0.06065969],
-       [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
-       [ 0.08188783], [ 0.54884803], [ 0.84039558], [ 0.6982093 ], [ 0.41686176],
-       [ 0.38568873], [ 0.29401347], [ 0.12704074], [ 0.30640858], [ 0.89578031],
-       [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
-       [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
-       [ 0.77847185], [ 0.04283567], [ 0.45107823], [ 0.89789891], [ 0.41045519],
-       [ 0.49073531], [ 0.29727627], [ 0.94711483], [ 0.24898204], [ 0.26181212],
-       [ 0.71760957], [ 0.60326883], [ 0.80887576], [ 0.09448718], [ 0.88064525],
-       [ 0.84317654], [ 0.48893703], [ 0.24847021], [ 0.84203596], [ 0.34104156],
-       [ 0.75604701], [ 0.91703057], [ 0.69325475], [ 0.61276969], [ 0.16335226],
-       [ 0.4684374 ], [ 0.16553371], [ 0.79594434], [ 0.6440283 ], [ 0.80922237],
-       [ 0.5349296 ], [ 0.31924316], [ 0.10960695], [ 0.40151062], [ 0.50473641],
-       [ 0.14812671], [ 0.73523169], [ 0.35141625], [ 0.80364238], [ 0.02128181],
-       [ 0.0061226 ], [ 0.34541924], [ 0.07694485], [ 0.05551339], [ 0.23087636],
-       [ 0.87016395], [ 0.31682377], [ 0.27375113], [ 0.72226332], [ 0.62914149],
-       [ 0.59236012], [ 0.2070238 ], [ 0.52390942], [ 0.11894098], [ 0.55725917],
-       [ 0.72706009], [ 0.087196  ], [ 0.04745082], [ 0.95636492], [ 0.31524576],
-       [ 0.79685218], [ 0.80386771], [ 0.70942604], [ 0.82869417], [ 0.26906569],
-       [ 0.51848039], [ 0.64169354], [ 0.07114973], [ 0.39249454], [ 0.07002803],
-       [ 0.94667567], [ 0.02252752], [ 0.01039039], [ 0.5721312 ], [ 0.06065969],
-       [ 0.69422476], [ 0.4310939 ], [ 0.03069099], [ 0.35969779], [ 0.18047331],
-       [ 0.60394866], [ 0.70371272], [ 0.07548074], [ 0.63412803], [ 0.97442266],
-       [ 0.13216791], [ 0.71041168], [ 0.2093887 ], [ 0.35227344], [ 0.3405142 ],
-       [ 0.69677236], [ 0.41673747], [ 0.564245  ], [ 0.37613432], [ 0.88805642],
-       [ 0.69677236], [ 0.41673747], [ 0.564245  ], [ 0.37613432], [ 0.88805642],
-       [ 0.69677236], [ 0.41673747], [ 0.564245  ], [ 0.37613432], [ 0.88805642],
-       [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
-       [ 0.08188783], [ 0.54884803], [ 0.84039558], [ 0.6982093 ], [ 0.41686176],
-       [ 0.38568873], [ 0.29401347], [ 0.12704074], [ 0.30640858], [ 0.89578031],
-       [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
-       [ 0.77847185], [ 0.04283567], [ 0.45107823], [ 0.89789891], [ 0.41045519],
-       [ 0.49073531], [ 0.29727627], [ 0.94711483], [ 0.24898204], [ 0.26181212],
-       [ 0.71760957], [ 0.60326883], [ 0.80887576], [ 0.09448718], [ 0.88064525],
-       [ 0.84317654], [ 0.48893703], [ 0.24847021], [ 0.84203596], [ 0.34104156],
-       [ 0.75604701], [ 0.91703057], [ 0.69325475], [ 0.61276969], [ 0.16335226],
-       [ 0.4684374 ], [ 0.16553371], [ 0.79594434], [ 0.6440283 ], [ 0.80922237],
-       [ 0.5349296 ], [ 0.31924316], [ 0.10960695], [ 0.40151062], [ 0.50473641],
-       [ 0.14812671], [ 0.73523169], [ 0.35141625], [ 0.80364238], [ 0.02128181],
-       [ 0.0061226 ], [ 0.34541924], [ 0.07694485], [ 0.05551339], [ 0.23087636],
-       [ 0.87016395], [ 0.31682377], [ 0.27375113], [ 0.72226332], [ 0.62914149],
-       [ 0.59236012], [ 0.2070238 ], [ 0.52390942], [ 0.11894098], [ 0.55725917],
-       [ 0.72706009], [ 0.087196  ], [ 0.04745082], [ 0.95636492], [ 0.31524576],
-       [ 0.79685218], [ 0.80386771], [ 0.70942604], [ 0.82869417], [ 0.26906569],
-       [ 0.51848039], [ 0.64169354], [ 0.07114973], [ 0.39249454], [ 0.07002803],
-       [ 0.94667567], [ 0.02252752], [ 0.01039039], [ 0.5721312 ], [ 0.06065969],
-       [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
-       [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
-       [ 0.08188783], [ 0.54884803], [ 0.84039558], [ 0.6982093 ], [ 0.41686176],
-       [ 0.38568873], [ 0.29401347], [ 0.12704074], [ 0.30640858], [ 0.89578031],
-       [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
-       [ 0.77847185], [ 0.04283567], [ 0.45107823], [ 0.89789891], [ 0.41045519],
-       [ 0.49073531], [ 0.29727627], [ 0.94711483], [ 0.24898204], [ 0.26181212],
-       [ 0.71760957], [ 0.60326883], [ 0.80887576], [ 0.09448718], [ 0.88064525],
-       [ 0.84317654], [ 0.48893703], [ 0.24847021], [ 0.84203596], [ 0.34104156],
-       [ 0.75604701], [ 0.91703057], [ 0.69325475], [ 0.61276969], [ 0.16335226],
-       [ 0.4684374 ], [ 0.16553371], [ 0.79594434], [ 0.6440283 ], [ 0.80922237],
-       [ 0.5349296 ], [ 0.31924316], [ 0.10960695], [ 0.40151062], [ 0.50473641],
-       [ 0.14812671], [ 0.73523169], [ 0.35141625], [ 0.80364238], [ 0.02128181],
-       [ 0.0061226 ], [ 0.34541924], [ 0.07694485], [ 0.05551339], [ 0.23087636],
-       [ 0.87016395], [ 0.31682377], [ 0.27375113], [ 0.72226332], [ 0.62914149],
-       [ 0.59236012], [ 0.2070238 ], [ 0.52390942], [ 0.11894098], [ 0.55725917],
-       [ 0.72706009], [ 0.087196  ], [ 0.04745082], [ 0.95636492], [ 0.31524576],
-       [ 0.79685218], [ 0.80386771], [ 0.70942604], [ 0.82869417], [ 0.26906569],
-       [ 0.51848039], [ 0.64169354], [ 0.07114973], [ 0.39249454], [ 0.07002803],
-       [ 0.94667567], [ 0.02252752], [ 0.01039039], [ 0.5721312 ], [ 0.06065969],
-       [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
-       [ 0.08188783], [ 0.54884803], [ 0.84039558], [ 0.6982093 ], [ 0.41686176],
-       [ 0.38568873], [ 0.29401347], [ 0.12704074], [ 0.30640858], [ 0.89578031],
-       [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
-       [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
-       [ 0.77847185], [ 0.04283567], [ 0.45107823], [ 0.89789891], [ 0.41045519],
-       [ 0.49073531], [ 0.29727627], [ 0.94711483], [ 0.24898204], [ 0.26181212],
-       [ 0.71760957], [ 0.60326883], [ 0.80887576], [ 0.09448718], [ 0.88064525],
-       [ 0.84317654], [ 0.48893703], [ 0.24847021], [ 0.84203596], [ 0.34104156],
-       [ 0.75604701], [ 0.91703057], [ 0.69325475], [ 0.61276969], [ 0.16335226],
-       [ 0.4684374 ], [ 0.16553371], [ 0.79594434], [ 0.6440283 ], [ 0.80922237],
-       [ 0.5349296 ], [ 0.31924316], [ 0.10960695], [ 0.40151062], [ 0.50473641],
-       [ 0.14812671], [ 0.73523169], [ 0.35141625], [ 0.80364238], [ 0.02128181],
-       [ 0.0061226 ], [ 0.34541924], [ 0.07694485], [ 0.05551339], [ 0.23087636],
-       [ 0.87016395], [ 0.31682377], [ 0.27375113], [ 0.72226332], [ 0.62914149],
-       [ 0.59236012], [ 0.2070238 ], [ 0.52390942], [ 0.11894098], [ 0.55725917],
-       [ 0.72706009], [ 0.087196  ], [ 0.04745082], [ 0.95636492], [ 0.31524576],
-       [ 0.79685218], [ 0.80386771], [ 0.70942604], [ 0.82869417], [ 0.26906569],
-       [ 0.51848039], [ 0.64169354], [ 0.07114973], [ 0.39249454], [ 0.07002803],
-       [ 0.94667567], [ 0.02252752], [ 0.01039039], [ 0.5721312 ], [ 0.06065969],
-       [ 0.69422476], [ 0.4310939 ], [ 0.03069099], [ 0.35969779], [ 0.18047331],
-       [ 0.4177651 ], [ 0.01360547], [ 0.29069319]
-       ])
-
-fixedParam = numpy.matlib.mat([[ 0.62870709], [ 0.7012026 ], [ 0.60236784],
-  [ 0.51316259], [ 0.20220814], [ 0.70324863], [ 0.37218684], [ 0.82178927],
-  [ 0.60394866], [ 0.70371272], [ 0.07548074], [ 0.63412803], [ 0.97442266],
-  [ 0.13216791], [ 0.71041168], [ 0.2093887 ], [ 0.35227344], [ 0.3405142 ],
-  [ 0.69677236], [ 0.41673747], [ 0.564245  ], [ 0.37613432], [ 0.88805642],
-  [ 0.88691608], [ 0.69476752], [ 0.81659504], [ 0.17801859], [ 0.71048235],
-  [ 0.08188783], [ 0.54884803], [ 0.84039558], [ 0.6982093 ], [ 0.41686176],
-  [ 0.38568873], [ 0.29401347], [ 0.12704074], [ 0.30640858], [ 0.89578031],
-  [ 0.84621571], [ 0.11783439], [ 0.0944695 ], [ 0.34081575], [ 0.44157643],
-  [ 0.77847185], [ 0.04283567], [ 0.45107823], [ 0.89789891], [ 0.41045519],
-  [ 0.49073531], [ 0.29727627], [ 0.94711483], [ 0.24898204], [ 0.26181212],
-  [ 0.71760957], [ 0.60326883], [ 0.80887576], [ 0.09448718], [ 0.88064525],
-  [ 0.84317654], [ 0.48893703], [ 0.24847021], [ 0.84203596], [ 0.34104156],
-  [ 0.75604701], [ 0.91703057], [ 0.69325475], [ 0.61276969], [ 0.16335226],
-  [ 0.4684374 ], [ 0.16553371], [ 0.79594434], [ 0.6440283 ], [ 0.80922237],
-  [ 0.5349296 ], [ 0.31924316], [ 0.10960695], [ 0.40151062], [ 0.50473641],
-  [ 0.14812671], [ 0.73523169], [ 0.35141625], [ 0.80364238], [ 0.02128181],
-  [ 0.0061226 ], [ 0.34541924], [ 0.07694485], [ 0.05551339], [ 0.23087636],
-  [ 0.87016395], [ 0.31682377], [ 0.27375113], [ 0.72226332], [ 0.62914149],
-  [ 0.59236012], [ 0.2070238 ], [ 0.52390942], [ 0.11894098], [ 0.55725917],
-  [ 0.72706009], [ 0.087196  ], [ 0.04745082], [ 0.95636492], [ 0.31524576],
-  [ 0.79685218], [ 0.80386771], [ 0.70942604], [ 0.82869417], [ 0.26906569],
-  [ 0.51848039], [ 0.64169354], [ 0.07114973], [ 0.39249454], [ 0.07002803],
-  [ 0.94667567], [ 0.02252752], [ 0.01039039], [ 0.5721312 ], [ 0.06065969],
-  [ 0.69422476], [ 0.4310939 ], [ 0.03069099], [ 0.35969779], [ 0.18047331],
-  [ 0.4177651 ], [ 0.01360547], [ 0.29069319]])
+###############################################################################
+#
+# Load a random but fixed initial parameter vector this makes debugging easier
+#
+###############################################################################
 
-#fixedParamQ = numpy.matlib.mat(range(1300))
-#fixedParamQ = fixedParamQ.reshape((1300,1))
+fixedParamQ = cPickle.load(open('/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/randInitParam.pickle'))
 
 ###########################################################
 #
 # The parameters for the QPalma algorithm
 #
 #
-
 C = 1
 
+
+
 ###############################################################################
 # 
 # CHOOSING THE MODE 
@@ -323,8 +34,6 @@ C = 1
 #mode = 'normal'
 mode = 'using_quality_scores'
 
-
-
 ###############################################################################
 # 
 # When using quality scores our scoring function is defined as
@@ -424,21 +133,20 @@ prediction_end    = 2200
 
 joinp = os.path.join
 
-tmp_dir = '/fml/ag-raetsch/home/fabio/tmp/solexa_tmp'
-
-data_path      = '/fml/ag-raetsch/share/projects/qpalma/solexa'  
+tmp_dir              = '/fml/ag-raetsch/home/fabio/tmp/solexa_tmp'
+data_path            = '/fml/ag-raetsch/share/projects/qpalma/solexa'  
 
-dna_filename   = joinp(data_path,'allGenes.pickle')
-est_filename   = joinp(data_path,'remapped_solexa_data/map_best_hit.18.unambig')
-
-remapped_path  = joinp(data_path,'remapped_solexa_data')
-annot_path     = joinp(data_path,'annotation_data')
 original_path  = joinp(data_path,'original_solexa_data')
+annot_path     = joinp(data_path,'annotation_data')
+remapped_path  = joinp(data_path,'remapped_solexa_data')
 
-joinp(annot_path,'TAIR7_GFF3_genes_Chr1.gff_v1')
+dna_flat_fn    = joinp(data_path,'allGenes.pickle')
+gff_fn         = joinp(annot_path,'TAIR7_GFF3_genes_Chr1.gff_v1')
+filtered_fn    = joinp(data_path,'filteredReads_1_recent')
+remapped_fn    = joinp(remapped_path,'map_best_hit.18.unambig')
 
+dataset_fn = '/fml/ag-raetsch/home/fabio/svn/projects/QPalma/scripts/chr1_dataset.pickle'
 
-data_filename = ''
 ###############################################################################
 #
 # SANITY CHECKS
@@ -450,7 +158,7 @@ assert numAccSuppPoints    > 1
 assert numLengthSuppPoints > 1 
 assert numQualSuppPoints   > 1
 
-assert os.path.exists(dna_filename), 'DNA data does not exist!'
-assert os.path.exists(est_filename), 'EST/Reads data does not exist!'
-#assert os.path.exists(tair7_seq_filename), 'Sequence data does not exist!'
-
+assert os.path.exists(dna_flat_fn), 'DNA data does not exist!'
+assert os.path.exists(gff_fn), 'EST/Reads data does not exist!'
+assert os.path.exists(filtered_fn), 'EST/Reads data does not exist!'
+assert os.path.exists(remapped_fn), 'EST/Reads data does not exist!'
index 81ec5b7..a0b968d 100644 (file)
@@ -30,7 +30,7 @@ class FilteredReadParser(ReadParser):
    """
 
    def __init__(self,filename):
-      ReadParser.__init__(filename)
+      ReadParser.__init__(self,filename)
 
    def parseLine(self,line):
       """
@@ -44,11 +44,11 @@ class FilteredReadParser(ReadParser):
       read_size = int(read_size)
 
       prb = [ord(elem)-50 for elem in prb]
-      cal = [ord(elem)-64 for elem in cal]
+      cal_prb = [ord(elem)-64 for elem in cal_prb]
       chastity = [ord(elem)+10 for elem in chastity]
 
       p_start = int(p_start)
-      exons_stop = int(exon_stop)
+      exon_stop = int(exon_stop)
       exon_start = int(exon_start)
       p_stop = int(p_stop)
 
@@ -60,10 +60,20 @@ class FilteredReadParser(ReadParser):
    def next(self):
       for line in self.fh:
          line = line.strip()
-          yield self.parseLine(line)
+         yield self.parseLine(line)
 
       raise StopIteration
 
+   def parse(self):
+      entries = {}
+
+      for elem in self.fh:
+         line_d = self.parseLine(elem)
+         id = line_d['id']
+         assert id not in entries
+         entries[id] = line_d
+         
+      return entries
 
 class RemappedReadParser(ReadParser):
    """
@@ -77,7 +87,7 @@ class RemappedReadParser(ReadParser):
    """
 
    def __init__(self,filename):
-      ReadParser.__init__(filename)
+      ReadParser.__init__(self,filename)
 
    def parseLine(self,line):
       """
@@ -96,7 +106,19 @@ class RemappedReadParser(ReadParser):
    def next(self):
       for line in self.fh:
          line = line.strip()
-          yield self.parseLine(line)
+         yield self.parseLine(line)
 
       raise StopIteration
 
+   def parse(self):
+      entries = {}
+
+      for elem in self.fh:
+         line_d = self.parseLine(elem)
+         id = line_d['id']
+         try:
+            entries[id] = [line_d]
+         except:
+            old_entry = entries[id]
+            old_entry.append(line_d)
+            entries[id] = old_entry
index daeffe9..8e63918 100644 (file)
@@ -9,7 +9,8 @@ from PyGff import *
 import cPickle
 import copy
 
-def parse_gff(gff_fid):
+def parse_gff(gff_filename):
+   gff_fid = open(gff_filename)
    reader = csv.reader(gff_fid, delimiter='\t', quoting=csv.QUOTE_NONE)
 
    allGenes = {}
@@ -91,9 +92,9 @@ def parse_gff(gff_fid):
 
 
 def createGffPickle(annotFile,pickleFile):
-   gff_fid = open(annotFile)
+   #gff_fid = open(annotFile)
    pickle_fid = open(pickleFile,'w+')
-   allGenes = parse_gff(gff_fid)
+   allGenes = parse_gff(annotFile)
    #for key,val in allGenes.iteritems():
       #print key
    cPickle.dump(allGenes,pickle_fid)
index 51c5d9d..30b651c 100644 (file)
@@ -2,14 +2,23 @@
 # -*- coding: utf-8 -*-
 
 import sys
+import os
 import pdb
 import io_pickle
 
+import numpy
+from numpy.matlib import mat,zeros,ones,inf
+
+import qpalma
 import qpalma.tools
 from qpalma.tools.parseGff import parse_gff
 
 from qpalma.parsers import FilteredReadParser, RemappedReadParser
 
+from Genefinding import *
+
+from genome_utils import load_genomic
+
 help = """
 
    Usage of this script is:
@@ -48,10 +57,10 @@ info = """
 def compile_d(gff_file,dna_flat_files,filtered_reads,remapped_reads,tmp_dir,dataset_file):
 
    assert os.path.exists(gff_file)
-   for file in dna_flat_files:
-      assert os.path.exists(file)
+   #for file in dna_flat_files:
+   #   assert os.path.exists(file)
 
-   assert os.path.exists(solexa_reads)
+   assert os.path.exists(filtered_reads)
    assert os.path.exists(remapped_reads)
 
    assert not os.path.exists(dataset_file), 'The data_file already exists!'
@@ -59,12 +68,12 @@ def compile_d(gff_file,dna_flat_files,filtered_reads,remapped_reads,tmp_dir,data
    joinp = os.path.join
 
    # first read the gff file(s) and create pickled gene information
-   allGenes = parse_gff(gff_file,joinp(tmp_dir,'gff_info.pickle'))
+   allGenes = parse_gff(gff_file) #,joinp(tmp_dir,'gff_info.pickle'))
    
-   dna_filename = Conf.dna_filename
-   est_filename = Conf.est_filename
-
+   print 'parsing filtered reads'
    frp = FilteredReadParser(filtered_reads)
+
+   print 'parsing remapped reads'
    all_filtered_reads = frp.parse()
 
    rrp = RemappedReadParser(remapped_reads)
@@ -78,21 +87,27 @@ def compile_d(gff_file,dna_flat_files,filtered_reads,remapped_reads,tmp_dir,data
    Ests = []
    Qualities = []
    SplitPositions = []
+   Ids = []
    
    # Iterate over all remapped reads in order to generate for each read an
    # training / prediction example
-   for reRead in all_remapped_reads:
-      currentId = reRead['id']
-      fRead = all_filtered_reads[currentId]
+   for id,currentFRead in all_filtered_reads.items():
+
+      try:
+         currentRReads = all_remapped_reads[id]
+      except:
+         currentRReads = None
+
+      gene_id = currentFRead['gene_id']
       currentGene = allGenes[gene_id]
 
       chrom             = 'chr1'
       genome_file       = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome/'
       sscore_filename   = '/fml/ag-raetsch/share/projects/genefinding/A_thaliana/jonas_new_genes/exp/sensors/don/output_SVMWD/pred/contig_1+' 
 
-      seq, acc, don, exons, est, qual, spos = process_read(reRead,fRead,currentGene,chrom,genome_file,sscore_filename)
+      seq, acc, don, exons, est, qual, spos = process_read(currentRReads,currentFRead,currentGene,chrom,genome_file,sscore_filename)
       
-      if newExample == None:
+      if seq == '':
          continue
 
       Sequences.append(seq)
@@ -102,41 +117,45 @@ def compile_d(gff_file,dna_flat_files,filtered_reads,remapped_reads,tmp_dir,data
       Ests.append(est)
       Qualities.append(qual)
       SplitPositions.append(spos)
+      Ids.append(id)
 
+   dataset = {'Sequences':Sequences, 'Acceptors':Acceptors, 'Donors':Donors,\
+   'Exons':Exons, 'Ests':Ests, 'Qualities':Qualities,\
+   'SplitPositions':SplitPositions,'Ids':Ids}
 
-   dataset = {'Sequences':Sequences, 'Acceptors':Acceptors, 'Donors':Donors\
-   'Exons':Exons, 'Ests':Ests, 'Qualities':Qualities, 'SplitPositions':SplitPositions}
    # saving new dataset
    io_pickle.save(dataset_file,dataset)
 
 
-def process_read(reRead,fRead,currentGene,chrom,genome_file,sscore_filename):
+def process_read(reReads,fRead,currentGene,chrom,genome_file,sscore_filename):
    """
    The purpose of this function is to create an example for QPalma 
    by using a 
 
    """
-
    # use matlab-style functions to access dna sequence
-   dna = load_genomic(chrom,'+',currentGene.start,currentGene.stop,genome_file,one_based=True)
-   dna = dna.lower()
-   currentSeq = dna
+   currentSeq = load_genomic(chrom,'+',currentGene.start,currentGene.stop,genome_file,one_based=True)
+   currentSeq = currentSeq.lower()
+
+   nil = ('','','','','','','')
 
    if len(currentSeq) < (currentGene.stop - currentGene.start):
-      return None
+      return nil
 
-   p_start = fRead['p_start']
-   exon_stop = fRead['exon_stop']
-   exon_start = fRead['exon_start']
-   p_stop = fRead['p_stop']
+   p_start     = fRead['p_start']
+   exon_stop   = fRead['exon_stop']
+   exon_start  = fRead['exon_start']
+   p_stop      = fRead['p_stop']
 
-   currentSeq = fRead['seq']
+   currentReadSeq = fRead['seq']
+   quality = fRead['prb']
+   spos = fRead['splitpos']
 
    assert p_start < exon_stop < exon_start < p_stop, 'Invalid Indices'
    assert exon_stop - p_start + p_stop - exon_start == 36, 'Invalid read size'
    assert p_stop - p_start >= 36
 
-   currentExons = zeros((2,2))
+   currentExons = zeros((2,2),dtype=numpy.int)
 
    currentExons[0,0] = p_start
    currentExons[0,1] = exon_stop
@@ -153,6 +172,8 @@ def process_read(reRead,fRead,currentGene,chrom,genome_file,sscore_filename):
 
    # if we perform testing we cut a much wider region as we want to detect
    # how good we perform on a region
+   test = False
+
    if test:
       up_cut = up_cut-500
       if up_cut < 0:
@@ -177,17 +198,16 @@ def process_read(reRead,fRead,currentGene,chrom,genome_file,sscore_filename):
    try:
       if not (currentSeq[int(currentExons[0,1])] == 'g' and\
       currentSeq[int(currentExons[0,1])+1] in ['c','t' ]):
-         continue
+         return nil
 
       if not (currentSeq[int(currentExons[1,0])-1] == 'g' and currentSeq[int(currentExons[1,0])-2] == 'a'):
-         continue
+         return nil
 
    except:
       pdb.set_trace()
 
    # now we want to use interval_query to get the predicted splice scores
    # trained on the TAIR sequence and annotation
-   from Genefinding import *
    
    interval_matrix = createIntArrayFromList([currentGene.start+up_cut,currentGene.start+down_cut])
    num_fields = 1
@@ -199,7 +219,6 @@ def process_read(reRead,fRead,currentGene,chrom,genome_file,sscore_filename):
    assert num_hits <= len(currentSeq)
    
    #print 'Acceptor hits: %d' % num_hits
-
    pos = createIntArrayFromList([0]*num_hits)
    indices = createIntArrayFromList([0]*num_hits)
    scores = createDoubleArrayFromList([0]*num_hits*num_fields)
@@ -214,7 +233,6 @@ def process_read(reRead,fRead,currentGene,chrom,genome_file,sscore_filename):
       acc[position] = scores[i]
 
    acc = acc[1:] + [-inf]
-   Acceptors.append(acc)
 
    del gf
 
@@ -239,13 +257,12 @@ def process_read(reRead,fRead,currentGene,chrom,genome_file,sscore_filename):
    except:
       pdb.set_trace()
 
-   don = [-inf] + currentDonors[:-1]
-
-   return seq, acc, don, currentExons, est, qual, spos
+   don = [-inf] + don[:-1]
 
+   return currentSeq, acc, don, currentExons, currentReadSeq, quality, spos
 
 def reverse_complement(seq):
-   map {'a':'t','c':'g','g':'c','t':'a'}
+   map {'a':'t','c':'g','g':'c','t':'a'}
 
    new_seq = [map[elem] for elem in seq]
    new_seq.reverse()
@@ -258,5 +275,4 @@ if __name__ == '__main__':
       print info
 
    assert len(sys.argv) == 6, help
-
-   compile_d(gff_file,dna_flat_files,solexa_reads,remapped_reads,dataset_file):
+   compile_d(gff_file,dna_flat_files,solexa_reads,remapped_reads,dataset_file)
diff --git a/scripts/evaluation.py b/scripts/evaluation.py
new file mode 100644 (file)
index 0000000..117682f
--- /dev/null
@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+import os
+import pdb
+import io_pickle
+
+def evaluate(dataset_fn,prediction_fn):
+   dataset = io_pickle.load(dataset_fn)
+   prediction = io_pickle.load(prediction_fn)
+
+   data = io_pickle.load(filename)
+
+   Sequences   = data['Sequences']
+   Acceptors   = data['Acceptors']
+   Donors      = data['Donors']
+   Exons       = data['Exons']
+   Ests        = data['Ests']
+   Qualities   = data['Qualities']
+   SplitPositions = data['SplitPositions']
+
+
+
+if __name__ == '__main__':
+   dataset_fn = sys.argv[1]
+   prediction_fn = sys.argv[2]
+
+   evaluate(dataset_fn,prediction_fn)
index f5438df..34ebda3 100644 (file)
@@ -51,6 +51,9 @@ class QPalma:
    def __init__(self):
       self.ARGS = Param()
 
+      from compile_dataset import compile_d
+
+      compile_d(Conf.gff_fn,Conf.dna_flat_fn,Conf.filtered_fn,Conf.remapped_fn,Conf.tmp_dir,Conf.dataset_fn)
       #gen_file= '%s/genome.config' % self.ARGS.basedir
       #ginfo_filename = 'genome_info.pickle'
       #self.genome_info = fetch_genome_info(ginfo_filename)
@@ -66,7 +69,7 @@ class QPalma:
       elif Conf.mode == 'using_quality_scores':
 
          #Sequences, Acceptors, Donors, Exons, Ests, Qualities, SplitPos = paths_load_data_solexa('training',None,self.ARGS)
-         data_filename = Conf.data_filename
+         data_filename = Conf.dataset_fn
 
          Sequences, Acceptors, Donors, Exons, Ests, Qualities, SplitPos = paths_load_data(data_filename,'training',None,self.ARGS)
 
@@ -129,14 +132,6 @@ class QPalma:
       # Set the parameters such as limits penalties for the Plifs
       [h,d,a,mmatrix,qualityPlifs] = set_param_palma(param,self.ARGS.train_with_intronlengthinformation)
 
-      # delete splicesite-score-information
-      #if not self.ARGS.train_with_splicesitescoreinformation:
-      #   for i in range(len(Acceptors)):
-      #      if Acceptors[i] > -20:
-      #         Acceptors[i] = 1
-      #      if Donors[i] >-20:
-      #         Donors[i] = 1
-
       # Initialize solver 
       if Conf.USE_OPT:
          self.plog('Initializing problem...\n')
@@ -474,9 +469,11 @@ class QPalma:
       pdb.set_trace()
 
    def predict(self,param_filename,beg,end):
-
       self.logfh = open('_qpalma_predict.log','w+')
 
+      beg = Conf.prediction_begin
+      end = Conf.prediction_end
+
       Sequences   = self.Sequences[beg:end]
       Exons       = self.Exons[beg:end]
       Ests        = self.Ests[beg:end]
@@ -734,6 +731,7 @@ def fetch_genome_info(ginfo_filename):
    else:
       return cPickle.load(open(ginfo_filename))
 
+
 def plifs2param(h,d,a,mmatrix,qualityPlifs):
    donSP       = Conf.numDonSuppPoints
    accSP       = Conf.numAccSuppPoints
@@ -756,6 +754,7 @@ def plifs2param(h,d,a,mmatrix,qualityPlifs):
 
    return param
 
+
 def load_param(filename):
    param = None 
    #try:
@@ -769,6 +768,7 @@ def load_param(filename):
    param = cPickle.load(open(filename))
    return param
 
+
 def evaluateExample(dna,est,exons,SpliceAlign,newEstAlign,spos):
    newExons = []
    oldElem = -1
@@ -806,6 +806,7 @@ def evaluateExample(dna,est,exons,SpliceAlign,newEstAlign,spos):
 
    return e1_b_off,e1_e_off,e2_b_off,e2_e_off,newExons
 
+
 def calcStat(Acceptor,Donor,Exons):
    maxAcc = -100
    minAcc = 100
@@ -852,8 +853,8 @@ def calcStat(Acceptor,Donor,Exons):
       maxDon = max(max(don),maxDon)
       minDon = min(min(don),minDon)
 
-if __name__ == '__main__':
 
+if __name__ == '__main__':
    qpalma = QPalma()
 
    if len(sys.argv) == 2:
index ece462a..5022c45 100644 (file)
@@ -51,7 +51,7 @@ int read_nr = 1;
 int combined_reads = 0;
 int main(int argc, char* argv[]) {
 
-   if(argc != 4) {
+   if(argc != 5) {
       printf("%s\n",info);
       exit(EXIT_FAILURE);
    }
@@ -70,6 +70,9 @@ int main(int argc, char* argv[]) {
    FILE *reads_fs = fopen(reads_filename,"r");
    FILE *out_fs = fopen(output_filename,"w");
 
+   read_nr = atoi(argv[4]);
+   read_nr++;
+
    if(gff_fs == NULL) {
       printf("Error: Could not open file: %s",gff_filename);
       exit(EXIT_FAILURE);
@@ -461,7 +464,7 @@ void combine_info(int exon_stop, int exon_start, void** upstream, int up_size, v
          // exons_stop  : the position in the dna where the first exons ends
          // exons_start : the position in the dna where the second exons starts
          // p_stop  : the position in the dna where the (truncated) second read ends
-         fprintf(out_fs,"%08d\t%d\t%c\t%s\t%d\t%d\t%s\t%s\t%s\t%s\t%d\t%d\t%d\t%d\n",
+         fprintf(out_fs,"%d\t%d\t%c\t%s\t%d\t%d\t%s\t%s\t%s\t%s\t%d\t%d\t%d\t%d\n",
          read_nr,new_chr,new_strand,new_seq,splitpos,read_size,new_prb,new_cal_prb,new_chastity,gene_id,p_start,exon_stop,exon_start,p_stop);
          read_nr++;