Best Python code snippet using localstack_python
rmvRedundancy.py
Source:rmvRedundancy.py
1#!/usr/bin/env python2"""3This program removes redundancy among queries compared to subjects.4"""5import os6import sys7import getopt8import logging9import time10import shutil11from pyRepet.launcher.programLauncher import programLauncher12from pyRepetUnit.commons.launcher.Launcher import Launcher13from pyRepetUnit.commons.sql.RepetJob import RepetJob14from pyRepetUnit.commons.seq.FastaUtils import FastaUtils15import ConfigParser16def help():17 """18 Give the list of the command-line options.19 """20 print21 print "usage:",sys.argv[0]," [ options ]"22 print "options:"23 print " -h: this help"24 print " -q: fasta filename of the queries"25 print " -s: fasta filename of the subjects (same as queries if not specified)"26 print " -o: output queries fasta filename (default=qryFileName+'.uniq')"27 print " -i: identity threshold (default=0.95)"28 print " -l: length threshold (default=0.98)"29 print " -e: E-value threshold (default=1e-10)"30 print " -f: length filter for Blaster (default=100)"31 print " -Q: queue name to run in parallel"32 print " -C: name of the configuration file"33 print " -n: max. number of jobs (default=100,given a min. of 1 query per job)"34 print " -c: clean"35 print " -v: verbose (default=0/1/2)"36 print37def main():38 """39 This program removes redundancy among queries compared to subjects.40 """41 qryFileName = ""42 sbjFileName = ""43 outFileName = ""44 thresIdentity = 0.95 # remove the seq if it is identical to 95% of another seq45 thresLength = 0.98 # and if its length is 98% of that seq46 threshEvalue = "1e-10"47 lengthFilter = 10048 useCluster = False49 queue = ""50 configFileName = ""51 maxNbJobs = 10052 clean = False53 verbose = 054 try:55 opts, args = getopt.getopt(sys.argv[1:],"hq:s:o:L:i:l:e:w:f:Q:C:n:cv:")56 except getopt.GetoptError, err:57 print str(err)58 help()59 sys.exit(1)60 for o,a in opts:61 if o == "-h":62 help()63 sys.exit(0)64 elif o == "-q":65 qryFileName = a 66 elif o == "-s":67 sbjFileName = a 68 elif o == "-o":69 outFileName = a70 elif o == "-i":71 thresIdentity = float(a) 72 elif o == "-l":73 thresLength = float(a)74 elif o == "-e":75 threshEvalue = a76 elif o == "-f":77 lengthFilter = int(a)78 elif o == "-Q":79 useCluster = True80 queue = a81 elif o == "-C":82 configFileName = a83 elif o == "-n":84 maxNbJobs = int(a)85 elif o == "-c":86 clean = True87 elif o == "-v":88 verbose = int(a)89 90 if thresIdentity > 1.0 or thresLength > 1.0:91 print "ERROR: thresholds must be <= 1.0"92 sys.exit(1)93 if qryFileName == "":94 print "ERROR: missing input file (-q)"95 help()96 sys.exit(1)97 if configFileName == "":98 print "ERROR: missing configuration file (-C)"99 sys.exit(1)100 if not os.path.exists( configFileName ):101 print "ERROR: configuration file doesn't exist"102 sys.exit(1)103 #--------------------------------------------------------------------------104 if verbose > 0:105 print "START %s (%s)" % ( sys.argv[0].split("/")[-1],106 time.strftime("%m/%d/%Y %H:%M:%S") )107 sys.stdout.flush()108 if outFileName == "":109 outFileName = "%s.uniq" % ( qryFileName )110 111 uniqId = "%s-%s" % ( time.strftime("%Y%m%d%H%M%S"), os.getpid() )112 # create the 'log' file113 logFileName = "%s_rmvRedundancy_%s.log" % ( outFileName, uniqId )114 handler = logging.FileHandler( logFileName )115 formatter = logging.Formatter( "%(asctime)s %(levelname)s: %(message)s" )116 handler.setFormatter( formatter )117 logging.getLogger('').addHandler( handler )118 logging.getLogger('').setLevel( logging.DEBUG )119 logging.info( "started" )120 if not os.path.exists( qryFileName ):121 string = "query file '%s' doesn't exist" % ( qryFileName )122 logging.error( string )123 print "ERROR: %s" % ( string )124 sys.exit(1)125 if sbjFileName != "":126 if not os.path.exists( sbjFileName ):127 string = "subject file '%s' doesn't exist" % ( sbjFileName )128 logging.error( string )129 print "ERROR: %s" % ( string )130 sys.exit(1)131 else:132 sbjFileName = qryFileName133 logging.info( "remove redundancy among '%s' (queries) compare to '%s' (subjects)" % ( qryFileName, sbjFileName ) )134 #--------------------------------------------------------------------------135 # check the input files are not empty, otherwise exit136 nbSeqQry = FastaUtils.dbSize( qryFileName )137 if nbSeqQry == 0:138 string = "query file is empty"139 logging.info( string )140 print "WARNING: %s" % ( string )141 logging.info( "finished" )142 sys.exit(0)143 nbSeqSbj = FastaUtils.dbSize( sbjFileName )144 if sbjFileName != qryFileName:145 nbSeqSbj = FastaUtils.dbSize( sbjFileName )146 if nbSeqSbj == 0:147 string = "subject file is empty"148 logging.info( string )149 print "WARNING: %s" % ( string )150 logging.info( "finished" )151 sys.exit(0)152 #--------------------------------------------------------------------------153 pL = programLauncher()154 uniqTmpDir = "tmp%s" % ( uniqId )155 if os.path.exists( uniqTmpDir ):156 shutil.rmtree( uniqTmpDir )157 os.mkdir( uniqTmpDir )158 os.chdir( uniqTmpDir )159 os.system( "ln -s ../%s ." % ( qryFileName ) )160 if sbjFileName != qryFileName:161 os.system( "ln -s ../%s ." % ( sbjFileName ) )162 os.system( "ln -s ../%s ." % ( logFileName ) )163 if useCluster:164 os.system( "ln -s ../%s ." % ( configFileName ) )165 config = ConfigParser.ConfigParser()166 config.readfp( open(configFileName) )167 queue = config.get("classif_consensus", "resources")168 cDir = os.getcwd()169 if config.get("classif_consensus", "tmpDir" ) != "":170 tmpDir = config.get("classif_consensus", "tmpDir")171 else:172 tmpDir = cDir173 lCmds=[]174 # shorten sequence headers175 if sbjFileName == qryFileName:176 # sort sequences by increasing length177 178 os.system( "mv %s %s.initOrder" % ( qryFileName, qryFileName ) )179 180 prg = os.environ["REPET_PATH"] + "/bin/srptSortSequencesByIncreasingLength.py"181 cmd = prg182 cmd += " -i %s.initOrder" % ( qryFileName )183 cmd += " -o %s" % ( qryFileName )184 cmd += " -v %d" % ( verbose-1 )185 lCmds.append(cmd)186 prg = os.environ["REPET_PATH"] + "/bin/shortenSeqHeader.py"187 cmd = prg188 cmd += " -i %s" % ( qryFileName )189 lCmds.append(cmd)190 if not useCluster:191 for c in lCmds:192 pL.launch( prg, c )193 else:194 groupid = "%s_SubjectEqualQuery_sortSequencesAndShortenSeqHeader" % ( "rmvRedundancy" )195 acronym = "sortSequencesAndShortenSeqHeader"196 jobdb = RepetJob( cfgFileName = configFileName )197 cLauncher = Launcher( jobdb, os.getcwd(), "", "", cDir, tmpDir, "jobs", queue, groupid, acronym )198 cLauncher.beginRun()199 cLauncher.job.jobname = acronym200 cmd_start = ""201 cmd_start += "os.system( \"ln -s %s/%s.initOrder .\" )\n" % ( cDir, qryFileName )202 for c in lCmds:203 cmd_start += "log = os.system( \""204 cmd_start += c205 cmd_start += "\" )\n"206 cmd_finish = "if not os.path.exists( \"%s/%s.shortH\" ):\n" % ( cDir, qryFileName )207 cmd_finish += "\tos.system( \"mv %s.shortH %s/.\" )\n" % ( qryFileName, cDir )208 cmd_finish += "if not os.path.exists( \"%s/%s.shortHlink\" ):\n" % ( cDir, qryFileName )209 cmd_finish += "\tos.system( \"mv %s.shortHlink %s/.\" )\n" % ( qryFileName, cDir )210 cmd_finish += "if not os.path.exists( \"%s/%s.initOrder\" ):\n" % ( cDir, qryFileName )211 cmd_finish += "\tos.system( \"mv %s.initOrder %s/.\" )\n" % ( qryFileName, cDir )212 cLauncher.runSingleJob( cmd_start, cmd_finish )213 cLauncher.endRun()214 if config.get("classif_consensus","clean") == "yes":215 cLauncher.clean( acronym )216 jobdb.close()217 else:218 prg = os.environ["REPET_PATH"] + "/bin/shortenSeqHeader.py"219 cmd = prg220 cmd += " -i %s" % ( qryFileName )221 cmd += " -p %s" % ( "qry" )222 lCmds.append(cmd)223 224 cmd = prg225 cmd += " -i %s" % ( sbjFileName )226 cmd += " -p %s" % ( "sbj" )227 lCmds.append(cmd)228 if not useCluster:229 for c in lCmds:230 pL.launch( prg, c )231 else:232 acronym = "shortenSeqHeader"233 groupid = "rmvRedundancy_SubjectNotEqualQuery_shortenSeqHeader"234 jobdb = RepetJob( cfgFileName = configFileName )235 cLauncher = Launcher( jobdb, os.getcwd(), "", "", cDir, tmpDir, "jobs", queue, groupid, acronym )236 cLauncher.beginRun()237 cLauncher.job.jobname = acronym238 cmd_start = ""239 cmd_start += "os.system( \"ln -s %s/%s .\" )\n" % ( cDir, qryFileName )240 cmd_start += "os.system( \"ln -s %s/%s .\" )\n" % ( cDir, sbjFileName )241 for c in lCmds:242 cmd_start += "log = os.system( \""243 cmd_start += c244 cmd_start += "\" )\n"245 cmd_finish = "if not os.path.exists( \"%s/%s.shortH\" ):\n" % ( cDir, qryFileName )246 cmd_finish += "\tos.system( \"mv %s.shortH %s/.\" )\n" % ( qryFileName, cDir )247 cmd_finish += "if not os.path.exists( \"%s/%s.shortHlink\" ):\n" % ( cDir, qryFileName )248 cmd_finish += "\tos.system( \"mv %s.shortHlink %s/.\" )\n" % ( qryFileName, cDir )249 cmd_finish += "if not os.path.exists( \"%s/%s.shortH\" ):\n" % ( cDir, sbjFileName )250 cmd_finish += "\tos.system( \"mv %s.shortH %s/.\" )\n" % ( sbjFileName, cDir )251 cmd_finish += "if not os.path.exists( \"%s/%s.shortHlink\" ):\n" % ( cDir, sbjFileName )252 cmd_finish += "\tos.system( \"mv %s.shortHlink %s/.\" )\n" % ( sbjFileName, cDir )253 cLauncher.runSingleJob( cmd_start, cmd_finish )254 cLauncher.endRun()255 if config.get("classif_consensus", "clean") == "yes":256 cLauncher.clean( acronym )257 jobdb.close()258 259 #--------------------------------------------------------------------------260 # case not in parallel261 if useCluster == False:262 # run Blaster263 prg = os.environ["REPET_PATH"] + "/bin/blaster"264 cmd = prg265 cmd += " -q %s.shortH" % ( qryFileName )266 cmd += " -s %s.shortH" % ( sbjFileName )267 cmd += " -a"268 cmd += " -I %i" % ( int(100*thresIdentity) )269 cmd += " -L %i" % ( lengthFilter )270 cmd += " -E %s" % ( threshEvalue )271 cmd += " -B %s.shortH_vs_%s.shortH" % ( qryFileName, sbjFileName )272 pL.launch( prg, cmd )273 # run Matcher274 prg = os.environ["REPET_PATH"] + "/bin/matcher"275 cmd = prg276 cmd += " -m %s.shortH_vs_%s.shortH.align" % ( qryFileName, sbjFileName )277 cmd += " -q %s.shortH" % ( qryFileName )278 cmd += " -s %s.shortH" % ( sbjFileName )279 cmd += " -j"280 cmd += " -a"281 pL.launch( prg, cmd )282 #--------------------------------------------------------------------------283 284 # case in parallel285 elif useCluster == True:286 # run Blaster + Matcher287 prg = os.environ["REPET_PATH"] + "/bin/launchBlasterMatcherPerQuery.py"288 cmd = prg289 cmd += " -q %s.shortH" % ( qryFileName )290 cmd += " -s %s.shortH" % ( sbjFileName )291 cmd += " -Q '%s'" % ( queue )292 cmd += " -d %s" % ( os.getcwd() )293 cmd += " -C %s" % ( configFileName )294 cmd += " -n %i" % ( maxNbJobs )295 cmd += " -B \"-a -I %s -L %s -E %s\"" % ( int(100*thresIdentity), lengthFilter, threshEvalue )296 cmd += " -M \"%s\"" % ( "-j -a" )297 cmd += " -Z tab"298 if clean == True:299 cmd += " -c"300 cmd += " -v %i" % ( verbose - 1 )301 pL.launch( prg, cmd )302 #--------------------------------------------------------------------------303 # filter the resulting 'tab' file304 prg = os.environ["REPET_PATH"] + "/bin/filterOutMatcher.py"305 cmd = prg306 cmd += " -q %s.shortH" % ( qryFileName )307 if sbjFileName != qryFileName:308 cmd += " -s %s.shortH" % ( sbjFileName )309 cmd += " -m %s.shortH_vs_%s.shortH.align.match.tab" % ( qryFileName, sbjFileName )310 cmd += " -o %s.shortH.filtered" % ( qryFileName )311 cmd += " -i %f" % ( thresIdentity )312 cmd += " -l %f" % ( thresLength )313 cmd += " -L %s" % ( logFileName )314 cmd += " -v %i" % ( verbose )315 316 if not useCluster:317 pL.launch( prg, cmd )318 else:319 groupid = "rmvRedundancy_filterOutMatcher"320 acronym = "filterOutMatcher"321 jobdb = RepetJob( cfgFileName = configFileName )322 cLauncher = Launcher( jobdb, os.getcwd(), "", "", cDir, tmpDir, "jobs", queue, groupid, acronym )323 cLauncher.beginRun()324 cLauncher.job.jobname = acronym325 cmd_start = ""326 cmd_start += "os.system( \"ln -s %s/%s.shortH .\" )\n" % ( cDir, qryFileName )327 if sbjFileName != qryFileName:328 cmd_start += "os.system( \"ln -s %s/%s.shortH .\" )\n" % ( cDir, sbjFileName )329 cmd_start += "os.system( \"ln -s %s/%s.shortH_vs_%s.shortH.align.match.tab .\" )\n" % ( cDir, qryFileName, sbjFileName )330 cmd_start += "log = os.system( \""331 cmd_start += cmd332 cmd_start += "\" )\n"333 cmd_finish = "if not os.path.exists( \"%s/%s.shortH.filtered\" ):\n" % ( cDir, qryFileName )334 cmd_finish += "\tos.system( \"mv %s.shortH.filtered %s/.\" )\n" % ( qryFileName, cDir )335 cmd_finish += "if not os.path.exists( \"%s/%s.shortH_vs_%s.shortH.align.match.tab\" ):\n" % ( cDir, qryFileName, sbjFileName )336 cmd_finish += "\tos.system( \"mv %s.shortH_vs_%s.shortH.align.match.tab %s/.\" )\n" % ( qryFileName, sbjFileName, cDir )337 cmd_finish += "if not os.path.exists( \"%s/%s\" ):\n" % ( cDir, logFileName )338 cmd_finish += "\tos.system( \"mv %s %s/.\" )\n" % ( logFileName, cDir )339 cLauncher.runSingleJob( cmd_start, cmd_finish )340 cLauncher.endRun()341 if config.get("classif_consensus", "clean") == "yes":342 cLauncher.clean( acronym )343 jobdb.close()344 345 # retrieve initial headers346 prg = os.environ["REPET_PATH"] + "/bin/ChangeSequenceHeaders.py"347 cmd = prg348 cmd += " -i %s.shortH.filtered" % ( qryFileName )349 cmd += " -f fasta"350 cmd += " -s 2"351 cmd += " -l %s.shortHlink" % ( qryFileName )352 cmd += " -o %s" % ( outFileName )353 354 if not useCluster:355 pL.launch( prg, cmd )356 else:357 groupid = "rmvRedundancy_ChangeSequenceHeaders"358 acronym = "ChangeSequenceHeaders"359 jobdb = RepetJob( cfgFileName = configFileName )360 cLauncher = Launcher( jobdb, os.getcwd(), "", "", cDir, tmpDir, "jobs", queue, groupid, acronym )361 cLauncher.beginRun()362 cLauncher.job.jobname = acronym363 cmd_start = ""364 cmd_start += "os.system( \"ln -s %s/%s.shortH.filtered .\" )\n" % ( cDir, qryFileName )365 cmd_start += "os.system( \"ln -s %s/%s.shortHlink .\" )\n" % ( cDir, qryFileName )366 cmd_start += "log = os.system( \""367 cmd_start += cmd368 cmd_start += "\" )\n"369 cmd_finish = "if not os.path.exists( \"%s/%s\" ):\n" % ( cDir, outFileName )370 cmd_finish += "\tos.system( \"mv %s %s/.\" )\n" % ( outFileName, cDir )371 cLauncher.runSingleJob( cmd_start, cmd_finish )372 cLauncher.endRun()373 if config.get("classif_consensus", "clean") == "yes":374 cLauncher.clean( acronym )375 jobdb.close()376 # future improvement: give file '.shortHlink' to 'filterOutMatcher.py' so that it saves the information about which match removed which query, with the true headers377 os.system( "mv %s .." % ( outFileName ) )378 os.chdir( ".." )379 if clean == True:380 shutil.rmtree( uniqTmpDir )381 logging.info( "finished" )382 if verbose > 0:383 print "END %s (%s)" % ( sys.argv[0].split("/")[-1],384 time.strftime("%m/%d/%Y %H:%M:%S") )385 sys.stdout.flush()386 return 0387if __name__ == "__main__":...
optional_rhseqv2_wrapper.py
Source:optional_rhseqv2_wrapper.py
1import os23## USAGE ##4# python optional_rhseqv2_wrapper5# optional wrapper for running the rhseq pipeline from poolCount to mwu analysis with one command6# generates, prints (optional), and runs (optional) a command for all the rhseq scripts with their current paramters7# call this wrapper from the folder where output is intended)89##PARAMETERS##10printCommand=True11runCommand=False1213scriptDir='/usr2/people/mabrams/scripts/' #the location where the rhseqv2 scripts are saved14py3='python' #the call for python at the start of the script151617poolCountFile='poolCount.txt'#name/path to the first infile (the poolCount from the Coradetti et al. RBseq pipeline RBseq_Count_Barcodes script v1.1.4 (no changes))181920##RUN##21if __name__ == '__main__':22 command=''2324 cmd_start=py3+' '+scriptDir25 outDir='./' 26 cmd_mid=' '+outDir+' '27 28 command=cmd_start+'convert_poolcount_to_fastqpooledreadsclean_rhseqv2.py'+cmd_mid+poolCountFile+';'29 command+=cmd_start+'2total_reads_and_normalize_rhseqv2.py'+cmd_mid+'./*fastq_pooled_reads_clean;'30 command+=cmd_start+'3remove_NC_and_plasmid_inserts_rhseqv2.py'+cmd_mid+'./*.normalized_pooled_reads;'31 command+=cmd_start+'4reformat_single_techrep_rhseqv2.py'+cmd_mid+'./*.normalized_pooled_reads_coding;'32 command+=cmd_start+'5combine_bioreps_rhseqv2.py'+cmd_mid+'*.normalized_averaged_techreps;'33 command+=cmd_start+'6filter_inserts_rhseqv2.py'+cmd_mid+'*.normalized_averaged_bioreps;'34 command+=cmd_start+'7fitness_ratios_rhseqv2.py'+cmd_mid+'*.filtered_inserts;'35 command+=cmd_start+'8organize_and_filter_genes_rhseqv2.py'+cmd_mid+'*.insert_ratios;'36 command+=cmd_start+'9mann_whitney_u_rhseqv2.py'+cmd_mid+'*.filtered_gene_inserts;'37 command+=cmd_start+'10effect_size_rhseqv2.py'+cmd_mid+'*.filtered_gene_inserts'+'*.mwu_test_results;'383940 if printCommand==True:41 print(command)42 if runCommand==True:43 os.system(command)4445 464748
...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!