recursive-include ParaParser *.cpp *.h *.i Makefile
recursive-include DynProg *.cpp *.h *.i Makefile
recursive-include doc *.tex Makefile
+recursive-include scripts *.py
+recursive-include qpalma *.py
+recursive-include tests *.py
include test.conf
\begin{enumerate}
\item Install the Pythongrid and the Genefinding tool packages
-\item Update your PYTHONPATH variable to poin to the above packages
+\item Update your PYTHONPATH variable to point to the above packages
\item Unpack the QPalma tarball via
\item[$\rightarrow$] tar -xzvf QPalma-1.0.tar.gz
\item Enter the QPalma-1.0 directory and type:
import sys
import os
+import pdb
+from numpy import inf
-from qpalma.sequence_utils import SeqSpliceInfo,get_flat_file_size,DataAccessWrapper
+from qpalma.sequence_utils import SeqSpliceInfo,get_flat_file_size,DataAccessWrapper,reverse_complement
class LookupTable:
accessWrapper = DataAccessWrapper(settings)
self.seqInfo = SeqSpliceInfo(accessWrapper,settings['allowed_fragments'])
- self.strands = ['+','-']
-
# take care that it also works say if we get a chromo_list [1,3,5]
# those are mapped then to 0,1,2
self.chromo_list = chromo_list
self.acceptorScores = {}
self.donorScores = {}
- for strandId in self.strands:
- self.genomicSequences[strandId] = [0]*(len(self.chromo_list))
- self.acceptorScores[strandId] = [0]*(len(self.chromo_list))
- self.donorScores[strandId] = [0]*(len(self.chromo_list))
+ self.genomicSequences = [0]*(len(self.chromo_list))
+ self.acceptorScoresPos = [0]*(len(self.chromo_list))
+ self.donorScoresPos = [0]*(len(self.chromo_list))
+ self.acceptorScoresNeg = [0]*(len(self.chromo_list))
+ self.donorScoresNeg = [0]*(len(self.chromo_list))
for chrId in self.chromo_list:
- for strandId in self.strands:
- print (chrId,strandId)
- self.prefetch_seq_and_scores(chrId,strandId)
+ self.prefetch_seq_and_scores(chrId)
- def get_seq_and_scores(self,chromo,strand,start,stop):
+ def get_seq_and_scores(self,chromo,strand,_start,_stop):
assert chromo in self.chromo_list
- assert strand in self.strands
+
+ if strand == '-':
+ total_size = self.seqInfo.getFragmentSize(chromo)
+ startP = total_size - _stop
+ stopP = total_size - _start
+ stop = stopP
+ start = startP
assert start <= stop
chromo_idx = self.chromo_map[chromo]
- start -= 1
- stop -= 1
-
- currentSequence = self.genomicSequences[strand][chromo_idx][start:stop]
-
- if strand == '-':
- start += 2
- stop += 2
-
- currentAcceptorScores = self.acceptorScores[strand][chromo_idx][start:stop]
- currentDonorScores = self.donorScores[strand][chromo_idx][start:stop]
+ currentSequence = self.genomicSequences[chromo_idx][start:stop]
+ if strand == '+':
+ currentAcceptorScores = self.acceptorScoresPos[chromo_idx][start:stop]
+ currentDonorScores = self.donorScoresPos[chromo_idx][start:stop]
+ elif strand == '-':
+ currentAcceptorScores = self.acceptorScoresNeg[chromo_idx][_start:_stop]
+ currentDonorScores = self.donorScoresNeg[chromo_idx][_start:_stop]
if strand == '-':
- currentSequence = currentSequence[::-1]
- currentAcceptorScores.reverse()
- currentDonorScores.reverse()
+ currentSequence = reverse_complement(currentSequence)
return currentSequence, currentAcceptorScores, currentDonorScores
- def prefetch_seq_and_scores(self,chromo,strand):
+ def prefetch_seq_and_scores(self,chromo):
"""
This function expects an interval, chromosome and strand information and
returns then the genomic sequence of this interval and the associated scores.
"""
assert chromo in self.chromo_list
- assert strand in self.strands
- chromo_idx = self.chromo_map[chromo]
+ genomicSeq_start = 0
+ genomicSeq_stop = self.seqInfo.getFragmentSize(chromo)
- genomicSeq_start = 1
- genomicSeq_stop = self.seqInfo.chromo_sizes[chromo]-1
+ print 'lt total_size %d' % self.seqInfo.getFragmentSize(chromo)
- print 'lt total_size %d' % self.seqInfo.chromo_sizes[chromo]
+ strand = '+'
+ genomicSeq,currentAcc,currentDon = self.seqInfo.get_seq_and_scores(chromo,strand,genomicSeq_start,genomicSeq_stop,False,False)
+ print len(currentAcc),len(currentDon)
+ genomicSeq = genomicSeq.lower()
- if strand == '-':
- genomicSeq_stop = self.seqInfo.chromo_sizes[chromo]+1
+ chromo_idx = self.chromo_map[chromo]
+
+ self.genomicSequences[chromo_idx] = genomicSeq
+ self.acceptorScoresPos[chromo_idx] = currentAcc
+ self.donorScoresPos[chromo_idx] = currentDon
+
+ strand = '-'
genomicSeq,currentAcc,currentDon = self.seqInfo.get_seq_and_scores(chromo,strand,genomicSeq_start,genomicSeq_stop,False,False)
+ print len(currentAcc),len(currentDon)
genomicSeq = genomicSeq.lower()
- if strand == '-':
- genomicSeq = genomicSeq[::-1]
- currentAcc.reverse()
- currentDon.reverse()
+ newCurrentAcc = [-inf]
+ newCurrentAcc.extend(currentAcc[:-1])
+ currentAcc = newCurrentAcc
+ newCurrentDon = [-inf]
+ newCurrentDon.extend(currentDon[:-1])
+ currentDon = newCurrentDon
+ print len(currentAcc),len(currentDon)
+
+ self.acceptorScoresNeg[chromo_idx] = currentAcc
+ self.donorScoresNeg[chromo_idx] = currentDon
- self.genomicSequences[strand][chromo_idx] = genomicSeq
- self.acceptorScores[strand][chromo_idx] = currentAcc
- self.donorScores[strand][chromo_idx] = currentDon
settings['training_dir'] = jp(result_dir, 'training')
settings['alignment_dir'] = jp(result_dir, 'alignment')
+ # the global logfile will be created in the result dir
+ settings['global_log_fn'] = jp(result_dir, 'qpalma.log')
+
for dir_name in ['approximation_dir','dataset_dir', 'preproc_dir', 'postproc_dir',\
'prediction_dir', 'training_dir', 'alignment_dir']:
try:
self.result_files.append(result_fn)
current_job = KybJob(gridtools.PostProcessingTaskStarter,[self.settings,chunk_fn,result_fn])
- current_job.h_vmem = '15.0G'
+ current_job.h_vmem = '2.0G'
current_job.express = 'True'
print "job #1: ", current_job.nativeSpecification
# we do not need the full dataset anymore
del dataset
- # Load parameter vector to predict with
- param = cPickle.load(open(settings['prediction_param_fn']))
-
- self.predict(prediction_set,param,settings)
+ self.predict(prediction_set,settings)
- def predict(self,prediction_set,param,settings):
+ def predict(self,prediction_set,settings):
"""
This method...
"""
+ # Load parameter vector to predict with
+ param = cPickle.load(open(settings['prediction_param_fn']))
+
# Set the parameters such as limits/penalties for the Plifs
[h,d,a,mmatrix,qualityPlifs] =\
set_param_palma(param,self.ARGS.train_with_intronlengthinformation,settings)
print 'Error occurred while trying to obtain file size'
raise DataAccessionException
- return int(out)
+ # subtract one because of wc -c output
+ return int(out)-1
def reverse_complement(seq):
self.sscore_fmt = settings['splice_score_file_fmt']
- def get_genomic_fragment_fn(self,id,strand):
- if strand == '+':
- return os.path.join(self.genomic_dir,self.genomic_fmt%id)
- elif strand == '-':
- return os.path.join(self.genomic_dir,(self.genomic_fmt%id)+'.neg')
- else:
- assert False
+ def get_genomic_fragment_fn(self,id):
+ return os.path.join(self.genomic_dir,self.genomic_fmt%id)
def get_splice_site_scores_fn(self,id,strand):
"""
- def __init__(self,accessWrapper,id_list):
+ def __init__(self,accessWrapper,fragment_list):
self.accessWrapper = accessWrapper
self.chromo_sizes = {}
+
+ # take care that it also works say if we get a chromo_list [1,3,5]
+ # those are mapped then to 0,1,2
+ self.fragment_list = fragment_list
+ self.chromo_map = {}
+ for idx,elem in enumerate(fragment_list):
+ self.chromo_map[elem] = idx
print "Fetching fragment sizes..."
- for chromo in id_list:
- filename = self.accessWrapper.get_genomic_fragment_fn(chromo,'+')
+ for id in fragment_list:
+ filename = self.accessWrapper.get_genomic_fragment_fn(id)
total_size = get_flat_file_size(filename)
- self.chromo_sizes[chromo] = total_size
+ self.chromo_sizes[self.chromo_map[id]] = total_size
print "done"
- def getSpliceScores(self,chromo,strand,intervalBegin,intervalEnd,genomicSeq=''):
+ def getFragmentSize(self,id):
+ return self.chromo_sizes[self.chromo_map[id]]
+
+
+ def getSpliceScores(self,id,strand,intervalBegin,intervalEnd,genomicSeq=''):
"""
Now we want to use interval_query to get the predicted splice scores trained
on the TAIR sequence and annotation.
pos_size = new_intp()
intp_assign(pos_size,1)
- total_size = self.chromo_sizes[chromo]
+ total_size = self.getFragmentSize(id)
- acc_fn, don_fn = self.accessWrapper.get_splice_site_scores_fn(chromo,strand)
+ acc_fn, don_fn = self.accessWrapper.get_splice_site_scores_fn(id,strand)
# fetch acceptor scores
- acc = doIntervalQuery(chromo,strand,intervalBegin,intervalEnd,acc_fn,total_size)
+ acc = doIntervalQuery(id,strand,intervalBegin,intervalEnd,acc_fn,total_size)
# fetch donor scores
- don = doIntervalQuery(chromo,strand,intervalBegin,intervalEnd,don_fn,total_size)
+ don = doIntervalQuery(id,strand,intervalBegin,intervalEnd,don_fn,total_size)
return acc, don
def fetch_sequence(self,chromo,strand,genomicSeq_start,genomicSeq_stop):
- #chromo_string = 'chr%d' % chromo
- #genomicSeq = load_genomic(chromo_string,strand,genomicSeq_start,genomicSeq_stop,self.flat_files,one_based=False)
- filename = self.accessWrapper.get_genomic_fragment_fn(chromo,strand)
- #print filename,genomicSeq_start,genomicSeq_stop
+ filename = self.accessWrapper.get_genomic_fragment_fn(chromo)
genomicSeq = my_load_genomic(filename,'+',genomicSeq_start,genomicSeq_stop,one_based=False)
genomicSeq = genomicSeq.strip().lower()
return genomicSeq
- def get_seq_and_scores(self,chromo,strand,genomicSeq_start,genomicSeq_stop,only_seq=False,perform_checks=True):
+ def get_seq_and_scores(self,id,strand,genomicSeq_start,genomicSeq_stop,only_seq=False,perform_checks=True):
"""
This function expects an interval, chromosome and strand information and
returns then the genomic sequence of this interval and the associated scores.
# do not have any score predictions
NO_SCORE_WINDOW_SIZE = 205
+ chromo = self.chromo_map[id]
+
total_size = self.chromo_sizes[chromo]
- #print genomicSeq_start,genomicSeq_stop
+ if strand == '-':
+ s_start = total_size - genomicSeq_stop
+ s_stop = total_size - genomicSeq_start
+ genomicSeq_start = s_start
+ genomicSeq_stop = s_stop
assert genomicSeq_start < genomicSeq_stop, pdb.set_trace()
- if strand == '+':
- s_start = genomicSeq_start - 1
- s_stop = genomicSeq_stop - 1
-
- genomicSeq_start -= 1
- genomicSeq_stop -= 1
+ genomicSeq = self.fetch_sequence(id,strand,genomicSeq_start,genomicSeq_stop)
if strand == '-':
- s_start = total_size - genomicSeq_stop + 1
- s_stop = total_size - genomicSeq_start + 1
-
- assert genomicSeq_start < genomicSeq_stop
-
- genomicSeq = self.fetch_sequence(chromo,strand,s_start,s_stop)
+ genomicSeq = reverse_complement(genomicSeq)
if only_seq:
return genomicSeq
if intervalEnd == total_size-1:
lookup_offset_end = 0
- currentAcc, currentDon = self.getSpliceScores(chromo,strand,intervalBegin,intervalEnd,genomicSeq)
+ currentAcc, currentDon = self.getSpliceScores(id,strand,intervalBegin,intervalEnd,genomicSeq)
if strand == '-':
currentAcc = currentAcc[1:]
don_pos = [p for p,e in enumerate(currentDon) if e != -inf and p > 0]
if perform_checks and not ag_tuple_pos == acc_pos:
- print 'ACC: Chromo/strand: %d/%s' % (chromo,strand)
+ print 'ACC: Chromo/strand: %d/%s' % (id,strand)
print ag_tuple_pos[:10],ag_tuple_pos[-10:]
print acc_pos[:10],acc_pos[-10:]
pdb.set_trace()
if perform_checks and not gt_tuple_pos == don_pos:
- print 'DON: Chromo/strand: %d/%s' % (chromo,strand)
+ print 'DON: Chromo/strand: %d/%s' % (id,strand)
print gt_tuple_pos[:10],gt_tuple_pos[-10:]
print don_pos[:10],don_pos[-10:]
pdb.set_trace()
num_splits = 5
-#
-# Specifiy here the filename of the global logfile QPalma uses to report everything
-#
-
-global_log_fn = /fml/ag-raetsch/home/fabio/tmp/sandbox/first_test/qpalma.log
-
#
# The parameter you want to do prediction with:
#
from qpalma.Run import Run
+from qpalma.SettingsParser import parseSettings
from qpalma.OutputFormat import alignment_reconstruct
-
from qpalma.sequence_utils import DataAccessWrapper,SeqSpliceInfo,reverse_complement
+from qpalma.Lookup import LookupTable
+
+
jp = os.path.join
-def createScoresForSequence(full_chromo_seq):
+def createScoresForSequence(full_chromo_seq,reverse_strand=False):
"""
Given a genomic sequence this function calculates random scores for all
ocurring splice sites.
acc_pos = []
don_pos = []
+ total_size = len(full_chromo_seq)
+
# the first/last 205 pos do not have any predictions
for pos,elem in enumerate(full_chromo_seq[205:-205]):
if full_chromo_seq[pos-2:pos] == 'ag':
for idx in range(len(don_pos)):
don_scores[idx] = random.uniform(0.1,1.0)
+ # recalculate indices and reverse them in order to have positions relative
+ # to positive strand
+ if reverse_strand:
+ acc_pos = [total_size-1-e for e in acc_pos]
+ acc_pos.reverse()
+ acc_scores.reverse()
+
+ don_pos = [total_size-1-e for e in don_pos]
+ don_pos.reverse()
+ don_scores.reverse()
+
acc_pos = array.array('I',acc_pos)
acc_scores = array.array('f',acc_scores)
don_pos.tofile(open(don_pos_fn, 'wb'))
-
def create_mini_chromosome():
chromo_fn = 'test_data/chromo1.flat'
full_chromo_seq = chromo_fh.read()
full_chromo_seq = full_chromo_seq.strip()
+ print full_chromo_seq[:200]
+
# create data for forward strand
- acc_pos,acc_scores,don_pos,don_scores = createScoresForSequence(full_chromo_seq)
+ acc_pos,acc_scores,don_pos,don_scores = createScoresForSequence(full_chromo_seq,reverse_strand=False)
+ print acc_pos[:5]
+ print don_pos[:5]
saveSpliceInfo(acc_pos,acc_scores,don_pos,don_scores,'+')
# create data for reverse strand
full_chromo_seq_rev = reverse_complement(full_chromo_seq)
- acc_pos,acc_scores,don_pos,don_scores = createScoresForSequence(full_chromo_seq_rev)
- # positions are always stored relative to positive strand
- #acc_scores.reverse()
- #acc_scores.
+ total_size = len(full_chromo_seq_rev)
+
+ print full_chromo_seq_rev[:200]
+ acc_pos,acc_scores,don_pos,don_scores = createScoresForSequence(full_chromo_seq_rev,reverse_strand=True)
+ acc_pos = [total_size-1-e for e in acc_pos]
+ acc_pos.reverse()
+ print acc_pos[:5]
+ don_pos = [total_size-1-e for e in don_pos]
+ don_pos.reverse()
+ print don_pos[:5]
- don_scores.reverse()
+ #
+ # Remember: The positions are always saved one-based.
+ #
- saveSpliceInfo(acc_pos,acc_scores,don_pos,don_scores,'-')
+ #saveSpliceInfo(acc_pos,acc_scores,don_pos,don_scores,'-')
+ a = 103
+ b = 255
+ print 'pos: %s'%full_chromo_seq[a:b]
+ print 'neg: %s'%full_chromo_seq_rev[a:b]
+ total_size = len(full_chromo_seq)
+ ap = total_size-b
+ bp = total_size-a
+ print 'png: %s'%reverse_complement(full_chromo_seq[ap:bp])
+
+
+def test_rev_comp():
+ get_seq_and_scores(self,chromo,strand,genomicSeq_start,genomicSeq_stop,only_seq=False,perform_checks=True)
class TestQPalmaPrediction(unittest.TestCase):
"""
example = (currentSeqInfo,read,currentQualities)
self.prediction_set[id] = [example]
-
+
def testAlignments(self):
+
+ settings = parseSettings('testcase.conf')
+
+ print self.prediction_set
+ for example_key in self.prediction_set.keys():
+ print 'Current example %d' % example_key
+
+ for example in self.prediction_set[example_key]:
+ print example
+ print 'size'
+ print len(example)
+
+ accessWrapper = DataAccessWrapper(settings)
+ seqInfo = SeqSpliceInfo(accessWrapper,settings['allowed_fragments'])
+
+ qp = QPalma(seqInfo,True)
+ allPredictions = qp.predict(self.prediction_set,settings)
+
+ for current_prediction in allPredictions:
+ align_str = print_prediction(current_prediction)
+ print align_str
+
+ id = current_prediction['id']
+ seq = current_prediction['read']
+ dna = current_prediction['dna']
+ chromo = current_prediction['chr']
+ strand = current_prediction['strand']
+ start_pos = current_prediction['start_pos']
+ predExons = current_prediction['predExons']
+
+ numExons = int(math.ceil(len(predExons) / 2))
+
+ print alignment_reconstruct(current_prediction,numExons)
+ print id,start_pos,predExons
+
+ print 'Problem counter is %d' % qp.problem_ctr
+
+
+ def _testAlignments(self):
run_dir = '/fml/ag-raetsch/home/fabio/tmp/newest_run/alignment/saved_run'
run = cPickle.load(open(jp(run_dir,'run_obj.pickle')))
print 'Problem counter is %d' % qp.problem_ctr
+def simple_check():
+ # fetch the data needed
+ settings = {}
+
+ #settings['genome_dir'] = '/fml/ag-raetsch/share/projects/genomes/A_thaliana_best/genome'
+ #settings['acceptor_scores_loc'] = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/acc'
+ #settings['donor_scores_loc'] = '/fml/ag-raetsch/home/fabio/tmp/interval_query_files/don'
+
+ #settings['genome_file_fmt'] = 'chr%d.dna.flat'
+ #settings['splice_score_file_fmt']= 'contig_%d%s'
+
+ #allowed_fragments = [1]
+ #settings['allowed_fragments'] = allowed_fragments
+
+ settings['genome_dir'] = 'test_data/'
+ settings['acceptor_scores_loc'] = 'test_data/acc'
+ settings['donor_scores_loc'] = 'test_data/don'
+
+ settings['genome_file_fmt'] = 'chromo%d.flat'
+ settings['splice_score_file_fmt']= 'chromo_%d%s'
+
+ allowed_fragments = [1]
+ settings['allowed_fragments'] = allowed_fragments
+
+ accessWrapper = DataAccessWrapper(settings)
+ seqInfo = SeqSpliceInfo(accessWrapper,allowed_fragments)
+
+ b = 0
+ e = 10000
+ seq = seqInfo.get_seq_and_scores(1,'-',b,e,only_seq=True,perform_checks=False)
+
+ total_size = seqInfo.getFragmentSize(1)
+ bp = total_size - e
+ ep = total_size - b
+ seqp = seqInfo.get_seq_and_scores(1,'+',b,e,only_seq=True,perform_checks=False)
+ seqp = reverse_complement(seqp)
+
+ print seq[0:100] == seqp[0:100]
+ print seq[534:1652] == seqp[534:1652]
+ print seq[-100:] == seqp[-100:]
+
+ lt1 = LookupTable(settings)
+ dna,acc,don= lt1.get_seq_and_scores(1,'-',b,e)
+
+ print seq[:100] == seqp[:100] == dna[:100]
+ print seq[-100:] == seqp[-100:] == dna[-100:]
+
+ b = 206
+ e = 300
+ dna,acc,don = seqInfo.get_seq_and_scores(1,'-',b,e,only_seq=False,perform_checks=False)
+
+ lt1 = LookupTable(settings)
+ _dna,_acc,_don= lt1.get_seq_and_scores(1,'-',b,e)
+
+
+ print dna == _dna
+ print acc == _acc
+ print don == _don
+
+ from numpy import inf
+ print [p for p,e in enumerate(acc) if e != -inf]
+ print [p for p,e in enumerate(_acc) if e != -inf]
+
+
+
if __name__ == '__main__':
- create_mini_chromosome()
+ #create_mini_chromosome()
+ simple_check()
#suite = unittest.TestLoader().loadTestsFromTestCase(TestQPalmaPrediction)
#unittest.TextTestRunner(verbosity=2).run(suite)
--- /dev/null
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Written (W) 2008 Fabio De Bona
+# Copyright (C) 2008 Max-Planck-Society
+
+#
+# This file performs a lot of checks in order to verify the installation of
+# QPalma
+#
+
+import os
+import sys
+import unittest
+
+from test_approximation import TestApproximation
+from test_qpalma import TestQPalmaPrediction
+from test_utils import TestSequenceUtils,TestLookupTable
+
+SUCCESS = True
+
+def check_for_module(module_name):
+
+ try:
+ exec("import %s"%module_name)
+ except:
+ return False
+ sys.exc_info()
+
+ return True
+
+
+if __name__ == '__main__':
+ log_fn = 'error.log'
+
+ if os.path.exists(log_fn):
+ os.remove(log_fn)
+
+ out_fh = open(log_fn,'w+')
+
+ print '\nChecking if your installation was successful...'
+
+ print '\nChecking for needed modules...\n'
+
+
+ module_list = ['numpy','DRMAA','Genefinding','pythongrid','QPalmaDP','qpalma']
+
+ for mod in module_list:
+ status = check_for_module(mod)
+ if status:
+ mes = "Successful!"
+ else:
+ mes = "Failed!"
+ SUCCESS = False
+
+ line = 'Status of module %s:\t%s'%(mod,mes)
+ print line
+ out_fh.write(line+'\n')
+
+ print '\nChecking for optional modules...\n'
+
+ module_list = ['pycplex', 'cvxopt', 'pymosek']
+
+ for mod in module_list:
+ line = 'Status of module %s:\t%s'%(mod,str(check_for_module(mod)))
+ print line
+ out_fh.write(line+'\n')
+
+ # after checking the modules we run some simple testcases on QPalma.
+ data_suite = unittest.TestLoader().loadTestsFromTestCase(TestLookupTable)
+ approximation_suite = unittest.TestLoader().loadTestsFromTestCase(TestApproximation)
+ prediction_suite = unittest.TestLoader().loadTestsFromTestCase(TestQPalmaPrediction)
+
+ all_suites = unittest.TestSuite([data_suite, approximation_suite, prediction_suite])
+ unittest.TextTestRunner(verbosity=2).run(all_suites)
+
+ if SUCCESS:
+ print '\n\n--- All checks where successful!! ---\n\n'
+ else:
+ print '\n\n--- Send the file error.log to qpalma@tuebingen.mpg.de ---\n\n'
+ env = os.environ
+ lines = ''
+ for var in ['PATH','LD_LIBRARY_PATH','PYTHONPATH']:
+ lines += '%s=%s\n'%(var,env[var])
+ out_fh.write(lines)
+
+ out_fh.close()