Best Python code snippet using tox_python
gametes_full_archive_gen.py
Source:gametes_full_archive_gen.py
1"""2Author: Ryan Urbanowicz3Created: 11/30/204Description: Script to apply GAMETES to generate and organize a variety of SNP simulated models and a corresponding datasets5"""6import sys7import os8import argparse9import time10def main(argv):11 #Parse arguments12 parser = argparse.ArgumentParser(description="")13 #No defaults14 parser.add_argument('--output-path',dest='output_path',type=str,help='path to output directory')15 parser.add_argument('--archive-name', dest='archive_name',type=str, help='name of archive output folder (no spaces)')16 parser.add_argument('--run-parallel',dest='run_parallel',type=str,help='path to directory containing datasets',default="True")17 parser.add_argument('--use', dest='use', help='', type=str, default ='model') #defaults to model generation18 options = parser.parse_args(argv[1:])19 output_path = options.output_path20 archive_name = options.archive_name21 run_parallel = options.run_parallel22 use = options.use23 if not os.path.exists(output_path):24 os.mkdir(output_path)25 this_file_path = os.path.dirname(os.path.realpath(__file__))26 model_dest = output_path+'/'+archive_name+'/models'27 job_dest = output_path+'/temporary'+'/jobs'28 log_dest = output_path+'/temporary'+'/logs'29 #Create folders30 if not os.path.exists(output_path+'/'+archive_name):31 os.mkdir(output_path+'/'+archive_name)32 if not os.path.exists(model_dest):33 os.mkdir(model_dest)34 if not os.path.exists(output_path+'/temporary'):35 os.mkdir(output_path+'/temporary')36 if not os.path.exists(job_dest):37 os.mkdir(job_dest)38 if not os.path.exists(log_dest):39 os.mkdir(log_dest)40 if use == 'model':41 #Generate core main effect models42 univariate_core_model(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)43 #Generate core 2-way epistasis models44 epistasis_2_locus_core_model(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)45 #Generate 3-way epistasis models46 epistasis_3_locus_model(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)47 elif use == 'data':48 #Generate core main effect data49 univariate_core_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)50 #Generate core epistasis data51 epistasis_2_locus_core_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)52 #Generate 3-way epistasis data53 epistasis_3_locus_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)54 #Generate heterogeneous data (2 subgroups of 2-way epistasis)55 epistasis_2_locus_hetero_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)56 #Generate additive data (2 additively combined 2-way epistasis models, yielding 'impure' epistasis)57 epistasis_2_locus_additive_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)58 #Generate heterogeneous data (2 subgroups of univariate efects)59 univariate_2_locus_hetero_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)60 #Generate additive data (2 subgroups of univariate efects)61 univariate_2_locus_additive_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)62 #Generate heterogeneous data (4 subgroups of univariate efects)63 univariate_4_locus_hetero_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)64 #Generate additive data (4 subgroups of univariate efects)65 univariate_4_locus_additive_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)66 #Generate imbalanced dataset (with 2-way epistasis)67 epistasis_2_locus_imbalanced_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)68 #Generate continuous endpoint data (with 2-way epistasis)69 epistasis_2_locus_quantitative_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)70 #Generate increasing feature count datasets (with 2-way epistasis)71 epistasis_2_locus_numfeatures_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path)72 else:73 print("GAMETES use not recognized.")74def univariate_core_model(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):75 #Define model parameters76 locus = 177 heritability = [0.05, 0.1, 0.2, 0.4]78 minorAF = [0.2]79 setK = True #if True, then K value will be specified as a constraint - if False, then K will be allowed to vary.80 K = 0.3 #population prevelance81 pop_count = 100000 #82 try_count = 1000000083 quantiles = 284 #Generate models85 for h in heritability:86 for m in minorAF:87 model_path_name = model_dest+"/L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)88 #Create gametes run command89 if setK:90 model_path_name = model_path_name+'_K_'+str(K)91 filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar '+'-M " -h '+str(h)+' -p '+str(K)92 else:93 filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar '+'-M " -h '+str(h)94 for i in range(locus):95 filewrite = filewrite +' -a '+str(m)96 filewrite = filewrite +' -o '+model_path_name+'.txt'+'" -q '+str(quantiles)+' -p '+str(pop_count)+' -t '+str(try_count)97 if run_parallel:98 job_ref = str(time.time())99 job_path_name = job_dest+'/gametes_'+"L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_'+job_ref+'_run.sh'100 sh_file = open(job_path_name,'w')101 sh_file.write('#!/bin/bash\n')102 sh_file.write('#BSUB -q i2c2_normal'+'\n')103 sh_file.write('#BSUB -J '+job_ref+'\n')104 sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')105 sh_file.write('#BSUB -M 15GB'+'\n')106 sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+"L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_'+job_ref+'.o\n')107 sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+"L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_'+job_ref+'.e\n')108 sh_file.write(filewrite)109 sh_file.close()110 os.system('bsub < '+job_path_name)111 pass112 else:113 os.system(filewrite)114def epistasis_2_locus_core_model(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):115 #Define model parameters116 locus = 2117 heritability = [0.05, 0.1, 0.2, 0.4]118 minorAF = [0.2]119 setK = True #if True, then K value will be specified as a constraint - if False, then K will be allowed to vary.120 K = 0.3 #population prevelance121 pop_count = 100000 #122 try_count = 10000000123 quantiles = 2124 #Generate models125 for h in heritability:126 for m in minorAF:127 model_path_name = model_dest+"/L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)128 #Create gametes run command129 if setK:130 model_path_name = model_path_name+'_K_'+str(K)131 filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar '+'-M " -h '+str(h)+' -p '+str(K)132 else:133 filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar '+'-M " -h '+str(h)134 for i in range(locus):135 filewrite = filewrite +' -a '+str(m)136 filewrite = filewrite +' -o '+model_path_name+'.txt'+'" -q '+str(quantiles)+' -p '+str(pop_count)+' -t '+str(try_count)137 if run_parallel:138 job_ref = str(time.time())139 job_path_name = job_dest+'/gametes_'+"L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_'+job_ref+'_run.sh'140 sh_file = open(job_path_name,'w')141 sh_file.write('#!/bin/bash\n')142 sh_file.write('#BSUB -q i2c2_normal'+'\n')143 sh_file.write('#BSUB -J '+job_ref+'\n')144 sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')145 sh_file.write('#BSUB -M 15GB'+'\n')146 sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+"L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_'+job_ref+'.o\n')147 sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+"L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_'+job_ref+'.e\n')148 sh_file.write(filewrite)149 sh_file.close()150 os.system('bsub < '+job_path_name)151 pass152 else:153 os.system(filewrite)154def epistasis_3_locus_model(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):155 #Define model parameters156 locus = 3157 heritability = [0.2]158 minorAF = [0.2]159 setK = True #if True, then K value will be specified as a constraint - if False, then K will be allowed to vary.160 K = 0.3 #population prevelance161 pop_count = 100000 #162 try_count = 100000000163 quantiles = 2164 #Generate models165 for h in heritability:166 for m in minorAF:167 model_path_name = model_dest+"/L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)168 #Create gametes run command169 if setK:170 model_path_name = model_path_name+'_K_'+str(K)171 filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar '+'-M " -h '+str(h)+' -p '+str(K)172 else:173 filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar '+'-M " -h '+str(h)174 for i in range(locus):175 filewrite = filewrite +' -a '+str(m)176 filewrite = filewrite +' -o '+model_path_name+'.txt'+'" -q '+str(quantiles)+' -p '+str(pop_count)+' -t '+str(try_count)177 if run_parallel:178 job_ref = str(time.time())179 job_path_name = job_dest+'/gametes_'+"L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_'+job_ref+'_run.sh'180 sh_file = open(job_path_name,'w')181 sh_file.write('#!/bin/bash\n')182 sh_file.write('#BSUB -q i2c2_normal'+'\n')183 sh_file.write('#BSUB -J '+job_ref+'\n')184 sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')185 sh_file.write('#BSUB -M 15GB'+'\n')186 sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+"L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_'+job_ref+'.o\n')187 sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+"L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_'+job_ref+'.e\n')188 sh_file.write(filewrite)189 sh_file.close()190 os.system('bsub < '+job_path_name)191 pass192 else:193 os.system(filewrite)194def univariate_core_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):195 #Model parameters needed196 locus = 1197 heritability = [0.05, 0.1, 0.2, 0.4]198 minorAF = [0.2]199 K = 0.3 #population prevelance200 #Define dataset parameters201 data_name = 'gametes_univariate'202 samplesize = [200, 400, 800, 1600] #[200, 400, 800, 1600, 3200, 6400] #assumes balanced datasets (#cases = #controls)203 numberofattributes = [100] # [20, 100, 1000, 10000, 100000] #[200, 100]204 AF_Min = 0.01205 AF_Max = 0.5206 replicates = 30 #100207 #Make dataset folder208 if not os.path.exists(output_path+'/'+archive_name+'/'+data_name):209 os.mkdir(output_path+'/'+archive_name+'/'+data_name)210 #Generate datasets and folders211 for n in numberofattributes:212 for s in samplesize:213 for h in heritability:214 for m in minorAF:215 modelName = "L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_K_'+str(K)216 modelFile = model_dest+'/'+modelName+"_Models.txt"217 genDataName = output_path+'/'+archive_name+'/'+data_name+'/'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_'+str(modelName)218 #Create gametes run command219 filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar -i '+modelFile+' -D "-n '+str(AF_Min)+' -x '+str(AF_Max)+' -a '+str(n)+' -s '+str(int(s/2))+' -w '+str(int(s/2))+' -r '+str(replicates)+' -o '+str(genDataName)+'"'220 if run_parallel:221 job_ref = str(time.time())222 job_path_name = job_dest+'/gametes_'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'_run.sh'223 sh_file = open(job_path_name,'w')224 sh_file.write('#!/bin/bash\n')225 sh_file.write('#BSUB -q i2c2_normal'+'\n')226 sh_file.write('#BSUB -J '+job_ref+'\n')227 sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')228 sh_file.write('#BSUB -M 15GB'+'\n')229 sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.o\n')230 sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.e\n')231 sh_file.write(filewrite)232 sh_file.close()233 os.system('bsub < '+job_path_name)234 pass235 else:236 os.system(filewrite)237def epistasis_2_locus_core_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):238 #Model parameters needed239 locus = 2240 heritability = [0.05, 0.1, 0.2, 0.4]241 minorAF = [0.2]242 K = 0.3 #population prevelance243 #Define dataset parameters244 data_name = 'gametes_2way_epistasis'245 samplesize = [200, 400, 800, 1600] #[200, 400, 800, 1600, 3200, 6400] #assumes balanced datasets (#cases = #controls)246 numberofattributes = [100] # [20, 100, 1000, 10000, 100000] #[200, 100]247 AF_Min = 0.01248 AF_Max = 0.5249 replicates = 30 #100250 #Make dataset folder251 if not os.path.exists(output_path+'/'+archive_name+'/'+data_name):252 os.mkdir(output_path+'/'+archive_name+'/'+data_name)253 #Generate datasets and folders254 for n in numberofattributes:255 for s in samplesize:256 for h in heritability:257 for m in minorAF:258 modelName = "L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_K_'+str(K)259 modelFile = model_dest+'/'+modelName+"_Models.txt"260 genDataName = output_path+'/'+archive_name+'/'+data_name+'/'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_'+str(modelName)261 #Create gametes run command262 filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar -i '+modelFile+' -D "-n '+str(AF_Min)+' -x '+str(AF_Max)+' -a '+str(n)+' -s '+str(int(s/2))+' -w '+str(int(s/2))+' -r '+str(replicates)+' -o '+str(genDataName)+'"'263 if run_parallel:264 job_ref = str(time.time())265 job_path_name = job_dest+'/gametes_'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'_run.sh'266 sh_file = open(job_path_name,'w')267 sh_file.write('#!/bin/bash\n')268 sh_file.write('#BSUB -q i2c2_normal'+'\n')269 sh_file.write('#BSUB -J '+job_ref+'\n')270 sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')271 sh_file.write('#BSUB -M 15GB'+'\n')272 sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.o\n')273 sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.e\n')274 sh_file.write(filewrite)275 sh_file.close()276 os.system('bsub < '+job_path_name)277 pass278 else:279 os.system(filewrite)280def epistasis_3_locus_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):281 #Model parameters needed282 locus = 3283 heritability = [0.2]284 minorAF = [0.2]285 K = 0.3 #population prevelance286 #Define dataset parameters287 data_name = 'gametes_3way_epistasis'288 samplesize = [1600] #[200, 400, 800, 1600, 3200, 6400] #assumes balanced datasets (#cases = #controls)289 numberofattributes = [100] # [20, 100, 1000, 10000, 100000] #[200, 100]290 AF_Min = 0.01291 AF_Max = 0.5292 replicates = 30 #100293 #Make dataset folder294 if not os.path.exists(output_path+'/'+archive_name+'/'+data_name):295 os.mkdir(output_path+'/'+archive_name+'/'+data_name)296 #Generate datasets and folders297 for n in numberofattributes:298 for s in samplesize:299 for h in heritability:300 for m in minorAF:301 modelName = "L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_K_'+str(K)302 modelFile = model_dest+'/'+modelName+"_Models.txt"303 genDataName = output_path+'/'+archive_name+'/'+data_name+'/'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_'+str(modelName)304 #Create gametes run command305 filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar -i '+modelFile+' -D "-n '+str(AF_Min)+' -x '+str(AF_Max)+' -a '+str(n)+' -s '+str(int(s/2))+' -w '+str(int(s/2))+' -r '+str(replicates)+' -o '+str(genDataName)+'"'306 if run_parallel:307 job_ref = str(time.time())308 job_path_name = job_dest+'/gametes_'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'_run.sh'309 sh_file = open(job_path_name,'w')310 sh_file.write('#!/bin/bash\n')311 sh_file.write('#BSUB -q i2c2_normal'+'\n')312 sh_file.write('#BSUB -J '+job_ref+'\n')313 sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')314 sh_file.write('#BSUB -M 15GB'+'\n')315 sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.o\n')316 sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.e\n')317 sh_file.write(filewrite)318 sh_file.close()319 os.system('bsub < '+job_path_name)320 pass321 else:322 os.system(filewrite)323def epistasis_2_locus_hetero_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):324 #Model parameters needed325 locus = 2326 heritability = [0.4]327 minorAF = [0.2]328 K = 0.3 #population prevelance329 #Define dataset parameters330 data_name = 'gametes_2way_epi_2het'331 samplesize = [1600] #[200, 400, 800, 1600, 3200, 6400] #assumes balanced datasets (#cases = #controls)332 numberofattributes = [100] # [20, 100, 1000, 10000, 100000] #[200, 100]333 weight = [50,75] # have to do the math for both ratio, X and 100-X334 AF_Min = 0.01335 AF_Max = 0.5336 replicates = 30 #100337 #Make dataset folder338 if not os.path.exists(output_path+'/'+archive_name+'/'+data_name):339 os.mkdir(output_path+'/'+archive_name+'/'+data_name)340 #Generate datasets and folders341 for n in numberofattributes:342 for s in samplesize:343 for h in heritability:344 for m in minorAF:345 modelName = "L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_K_'+str(K)346 modelFile = model_dest+'/'+modelName+"_Models.txt"347 for w in weight:348 genDataName = output_path+'/'+archive_name+'/'+data_name+'/'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_'+str(modelName)349 #Create gametes run command350 filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar -i '+modelFile+' -w '+str(w)+' -i '+modelFile+' -w '+str(100-w)+' -D "-h heterogeneous -b -n '+str(AF_Min)+' -x '+str(AF_Max)+' -a '+str(n)+' -s '+str(int(s/2))+' -w '+str(int(s/2))+' -r '+str(replicates)+' -o '+str(genDataName)+'"'351 if run_parallel:352 job_ref = str(time.time())353 job_path_name = job_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'_run.sh'354 sh_file = open(job_path_name,'w')355 sh_file.write('#!/bin/bash\n')356 sh_file.write('#BSUB -q i2c2_normal'+'\n')357 sh_file.write('#BSUB -J '+job_ref+'\n')358 sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')359 sh_file.write('#BSUB -M 15GB'+'\n')360 sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.o\n')361 sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.e\n')362 sh_file.write(filewrite)363 sh_file.close()364 os.system('bsub < '+job_path_name)365 pass366 else:367 os.system(filewrite)368def epistasis_2_locus_additive_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):369 #Model parameters needed370 locus = 2371 heritability = [0.4]372 minorAF = [0.2]373 K = 0.3 #population prevelance374 #Define dataset parameters375 data_name = 'gametes_2way_epi_2add'376 samplesize = [1600] #[200, 400, 800, 1600, 3200, 6400] #assumes balanced datasets (#cases = #controls)377 numberofattributes = [100] # [20, 100, 1000, 10000, 100000] #[200, 100]378 weight = [50,75] # have to do the math for both ratio, X and 100-X379 AF_Min = 0.01380 AF_Max = 0.5381 replicates = 30 #100382 #Make dataset folder383 if not os.path.exists(output_path+'/'+archive_name+'/'+data_name):384 os.mkdir(output_path+'/'+archive_name+'/'+data_name)385 #Generate datasets and folders386 for n in numberofattributes:387 for s in samplesize:388 for h in heritability:389 for m in minorAF:390 modelName = "L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_K_'+str(K)391 modelFile = model_dest+'/'+modelName+"_Models.txt"392 for w in weight:393 genDataName = output_path+'/'+archive_name+'/'+data_name+'/'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_'+str(modelName)394 #Create gametes run command395 filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar -i '+modelFile+' -w '+str(w)+' -i '+modelFile+' -w '+str(100-w)+' -D "-h hierarchical -n '+str(AF_Min)+' -x '+str(AF_Max)+' -a '+str(n)+' -s '+str(int(s/2))+' -w '+str(int(s/2))+' -r '+str(replicates)+' -o '+str(genDataName)+'"'396 if run_parallel:397 job_ref = str(time.time())398 job_path_name = job_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'_run.sh'399 sh_file = open(job_path_name,'w')400 sh_file.write('#!/bin/bash\n')401 sh_file.write('#BSUB -q i2c2_normal'+'\n')402 sh_file.write('#BSUB -J '+job_ref+'\n')403 sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')404 sh_file.write('#BSUB -M 15GB'+'\n')405 sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.o\n')406 sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.e\n')407 sh_file.write(filewrite)408 sh_file.close()409 os.system('bsub < '+job_path_name)410 pass411 else:412 os.system(filewrite)413def univariate_2_locus_hetero_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):414 #Model parameters needed415 locus = 1416 heritability = [0.4]417 minorAF = [0.2]418 K = 0.3 #population prevelance419 #Define dataset parameters420 data_name = 'gametes_uni_2het'421 samplesize = [1600] #[200, 400, 800, 1600, 3200, 6400] #assumes balanced datasets (#cases = #controls)422 numberofattributes = [100] # [20, 100, 1000, 10000, 100000] #[200, 100]423 weight = [50] # have to do the math for both ratio, X and 100-X424 AF_Min = 0.01425 AF_Max = 0.5426 replicates = 30 #100427 #Make dataset folder428 if not os.path.exists(output_path+'/'+archive_name+'/'+data_name):429 os.mkdir(output_path+'/'+archive_name+'/'+data_name)430 #Generate datasets and folders431 for n in numberofattributes:432 for s in samplesize:433 for h in heritability:434 for m in minorAF:435 modelName = "L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_K_'+str(K)436 modelFile = model_dest+'/'+modelName+"_Models.txt"437 for w in weight:438 genDataName = output_path+'/'+archive_name+'/'+data_name+'/'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_'+str(modelName)439 #Create gametes run command440 filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar -i '+modelFile+' -w '+str(w)+' -i '+modelFile+' -w '+str(w)+' -D "-h heterogeneous -b -n '+str(AF_Min)+' -x '+str(AF_Max)+' -a '+str(n)+' -s '+str(int(s/2))+' -w '+str(int(s/2))+' -r '+str(replicates)+' -o '+str(genDataName)+'"'441 if run_parallel:442 job_ref = str(time.time())443 job_path_name = job_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'_run.sh'444 sh_file = open(job_path_name,'w')445 sh_file.write('#!/bin/bash\n')446 sh_file.write('#BSUB -q i2c2_normal'+'\n')447 sh_file.write('#BSUB -J '+job_ref+'\n')448 sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')449 sh_file.write('#BSUB -M 15GB'+'\n')450 sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.o\n')451 sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.e\n')452 sh_file.write(filewrite)453 sh_file.close()454 os.system('bsub < '+job_path_name)455 pass456 else:457 os.system(filewrite)458def univariate_2_locus_additive_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):459 #Model parameters needed460 locus = 1461 heritability = [0.4]462 minorAF = [0.2]463 K = 0.3 #population prevelance464 #Define dataset parameters465 data_name = 'gametes_uni_2add'466 samplesize = [1600] #[200, 400, 800, 1600, 3200, 6400] #assumes balanced datasets (#cases = #controls)467 numberofattributes = [100] # [20, 100, 1000, 10000, 100000] #[200, 100]468 weight = [50] # have to do the math for both ratio, X and 100-X469 AF_Min = 0.01470 AF_Max = 0.5471 replicates = 30 #100472 #Make dataset folder473 if not os.path.exists(output_path+'/'+archive_name+'/'+data_name):474 os.mkdir(output_path+'/'+archive_name+'/'+data_name)475 #Generate datasets and folders476 for n in numberofattributes:477 for s in samplesize:478 for h in heritability:479 for m in minorAF:480 modelName = "L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_K_'+str(K)481 modelFile = model_dest+'/'+modelName+"_Models.txt"482 for w in weight:483 genDataName = output_path+'/'+archive_name+'/'+data_name+'/'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_'+str(modelName)484 #Create gametes run command485 filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar -i '+modelFile+' -w '+str(w)+' -i '+modelFile+' -w '+str(w)+' -D "-h hierarchical -b -n '+str(AF_Min)+' -x '+str(AF_Max)+' -a '+str(n)+' -s '+str(int(s/2))+' -w '+str(int(s/2))+' -r '+str(replicates)+' -o '+str(genDataName)+'"'486 if run_parallel:487 job_ref = str(time.time())488 job_path_name = job_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'_run.sh'489 sh_file = open(job_path_name,'w')490 sh_file.write('#!/bin/bash\n')491 sh_file.write('#BSUB -q i2c2_normal'+'\n')492 sh_file.write('#BSUB -J '+job_ref+'\n')493 sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')494 sh_file.write('#BSUB -M 15GB'+'\n')495 sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.o\n')496 sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.e\n')497 sh_file.write(filewrite)498 sh_file.close()499 os.system('bsub < '+job_path_name)500 pass501 else:502 os.system(filewrite)503def univariate_4_locus_hetero_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):504 #Model parameters needed505 locus = 1506 heritability = [0.4]507 minorAF = [0.2]508 K = 0.3 #population prevelance509 #Define dataset parameters510 data_name = 'gametes_uni_4het'511 samplesize = [1600] #[200, 400, 800, 1600, 3200, 6400] #assumes balanced datasets (#cases = #controls)512 numberofattributes = [100] # [20, 100, 1000, 10000, 100000] #[200, 100]513 weight = [50] # have to do the math for both ratio, X and 100-X514 AF_Min = 0.01515 AF_Max = 0.5516 replicates = 30 #100517 #Make dataset folder518 if not os.path.exists(output_path+'/'+archive_name+'/'+data_name):519 os.mkdir(output_path+'/'+archive_name+'/'+data_name)520 #Generate datasets and folders521 for n in numberofattributes:522 for s in samplesize:523 for h in heritability:524 for m in minorAF:525 modelName = "L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_K_'+str(K)526 modelFile = model_dest+'/'+modelName+"_Models.txt"527 for w in weight:528 genDataName = output_path+'/'+archive_name+'/'+data_name+'/'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_'+str(modelName)529 #Create gametes run command530 filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar -i '+modelFile+' -w '+str(w)+' -i '+modelFile+' -w '+str(w)+' -i '+modelFile+' -w '+str(w)+' -i '+modelFile+' -w '+str(w)+' -D "-h heterogeneous -b -n '+str(AF_Min)+' -x '+str(AF_Max)+' -a '+str(n)+' -s '+str(int(s/2))+' -w '+str(int(s/2))+' -r '+str(replicates)+' -o '+str(genDataName)+'"'531 if run_parallel:532 job_ref = str(time.time())533 job_path_name = job_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'_run.sh'534 sh_file = open(job_path_name,'w')535 sh_file.write('#!/bin/bash\n')536 sh_file.write('#BSUB -q i2c2_normal'+'\n')537 sh_file.write('#BSUB -J '+job_ref+'\n')538 sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')539 sh_file.write('#BSUB -M 15GB'+'\n')540 sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.o\n')541 sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.e\n')542 sh_file.write(filewrite)543 sh_file.close()544 os.system('bsub < '+job_path_name)545 pass546 else:547 os.system(filewrite)548def univariate_4_locus_additive_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):549 #Model parameters needed550 locus = 1551 heritability = [0.4]552 minorAF = [0.2]553 K = 0.3 #population prevelance554 #Define dataset parameters555 data_name = 'gametes_uni_4add'556 samplesize = [1600] #[200, 400, 800, 1600, 3200, 6400] #assumes balanced datasets (#cases = #controls)557 numberofattributes = [100] # [20, 100, 1000, 10000, 100000] #[200, 100]558 weight = [50] # have to do the math for both ratio, X and 100-X559 AF_Min = 0.01560 AF_Max = 0.5561 replicates = 30 #100562 #Make dataset folder563 if not os.path.exists(output_path+'/'+archive_name+'/'+data_name):564 os.mkdir(output_path+'/'+archive_name+'/'+data_name)565 #Generate datasets and folders566 for n in numberofattributes:567 for s in samplesize:568 for h in heritability:569 for m in minorAF:570 modelName = "L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_K_'+str(K)571 modelFile = model_dest+'/'+modelName+"_Models.txt"572 for w in weight:573 genDataName = output_path+'/'+archive_name+'/'+data_name+'/'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_'+str(modelName)574 #Create gametes run command575 filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar -i '+modelFile+' -w '+str(w)+' -i '+modelFile+' -w '+str(w)+' -i '+modelFile+' -w '+str(w)+' -i '+modelFile+' -w '+str(w)+' -D "-h hierarchical -b -n '+str(AF_Min)+' -x '+str(AF_Max)+' -a '+str(n)+' -s '+str(int(s/2))+' -w '+str(int(s/2))+' -r '+str(replicates)+' -o '+str(genDataName)+'"'576 if run_parallel:577 job_ref = str(time.time())578 job_path_name = job_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'_run.sh'579 sh_file = open(job_path_name,'w')580 sh_file.write('#!/bin/bash\n')581 sh_file.write('#BSUB -q i2c2_normal'+'\n')582 sh_file.write('#BSUB -J '+job_ref+'\n')583 sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')584 sh_file.write('#BSUB -M 15GB'+'\n')585 sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.o\n')586 sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+data_name+'_W_'+str(w)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.e\n')587 sh_file.write(filewrite)588 sh_file.close()589 os.system('bsub < '+job_path_name)590 pass591 else:592 os.system(filewrite)593def epistasis_2_locus_imbalanced_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):594 #Model parameters needed595 locus = 2596 heritability = [0.4]597 minorAF = [0.2]598 K = 0.3 #population prevelance599 #Define dataset parameters600 data_name = 'gametes_2way_epistasis_inbal'601 samplesize = [1600] #[200, 400, 800, 1600, 3200, 6400] #assumes balanced datasets (#cases = #controls)602 numberofattributes = [100] # [20, 100, 1000, 10000, 100000] #[200, 100]603 balance = [.6,.9]604 AF_Min = 0.01605 AF_Max = 0.5606 replicates = 30 #100607 #Make dataset folder608 if not os.path.exists(output_path+'/'+archive_name+'/'+data_name):609 os.mkdir(output_path+'/'+archive_name+'/'+data_name)610 #Generate datasets and folders611 for n in numberofattributes:612 for s in samplesize:613 for h in heritability:614 for m in minorAF:615 for b in balance:616 #Calculate case and control counts617 controlCount = int(float(s)*b)618 caseCount = int(s-controlCount)619 modelName = "L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_K_'+str(K)620 modelFile = model_dest+'/'+modelName+"_Models.txt"621 genDataName = output_path+'/'+archive_name+'/'+data_name+'/'+data_name+'_'+str(b)+'_A_'+str(n)+'_S_'+str(s)+'_'+str(modelName)622 #Create gametes run command623 filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar -i '+modelFile+' -D "-n '+str(AF_Min)+' -x '+str(AF_Max)+' -a '+str(n)+' -s '+str(caseCount)+' -w '+str(controlCount)+' -r '+str(replicates)+' -o '+str(genDataName)+'"'624 if run_parallel:625 job_ref = str(time.time())626 job_path_name = job_dest+'/gametes_'+data_name+'_'+str(b)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'_run.sh'627 sh_file = open(job_path_name,'w')628 sh_file.write('#!/bin/bash\n')629 sh_file.write('#BSUB -q i2c2_normal'+'\n')630 sh_file.write('#BSUB -J '+job_ref+'\n')631 sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')632 sh_file.write('#BSUB -M 15GB'+'\n')633 sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+data_name+'_'+str(b)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.o\n')634 sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+data_name+'_'+str(b)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.e\n')635 sh_file.write(filewrite)636 sh_file.close()637 os.system('bsub < '+job_path_name)638 pass639 else:640 os.system(filewrite)641def epistasis_2_locus_quantitative_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):642 #Model parameters needed643 locus = 2644 heritability = [0.4]645 minorAF = [0.2]646 K = 0.3 #population prevelance647 #Define dataset parameters648 data_name = 'gametes_2way_epistasis_quant'649 samplesize = [1600] #[200, 400, 800, 1600, 3200, 6400] #assumes balanced datasets (#cases = #controls)650 numberofattributes = [100] # [20, 100, 1000, 10000, 100000] #[200, 100]651 standardDev = [0.2,0.5,0.8]652 AF_Min = 0.01653 AF_Max = 0.5654 replicates = 30 #100655 #Make dataset folder656 if not os.path.exists(output_path+'/'+archive_name+'/'+data_name):657 os.mkdir(output_path+'/'+archive_name+'/'+data_name)658 #Generate datasets and folders659 for n in numberofattributes:660 for s in samplesize:661 for h in heritability:662 for m in minorAF:663 for d in standardDev:664 modelName = "L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_K_'+str(K)665 modelFile = model_dest+'/'+modelName+"_Models.txt"666 genDataName = output_path+'/'+archive_name+'/'+data_name+'/'+data_name+'_'+str(d)+'_A_'+str(n)+'_S_'+str(s)+'_'+str(modelName)667 #Create gametes run command668 filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar -i '+modelFile+' -D "-c -d '+ str(d) + ' -t '+ str(s) + ' -n '+str(AF_Min)+' -x '+str(AF_Max)+' -a '+str(n)+' -r '+str(replicates)+' -o '+str(genDataName)+'"'669 if run_parallel:670 job_ref = str(time.time())671 job_path_name = job_dest+'/gametes_'+data_name+'_'+str(d)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'_run.sh'672 sh_file = open(job_path_name,'w')673 sh_file.write('#!/bin/bash\n')674 sh_file.write('#BSUB -q i2c2_normal'+'\n')675 sh_file.write('#BSUB -J '+job_ref+'\n')676 sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')677 sh_file.write('#BSUB -M 15GB'+'\n')678 sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+data_name+'_'+str(d)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.o\n')679 sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+data_name+'_'+str(d)+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.e\n')680 sh_file.write(filewrite)681 sh_file.close()682 os.system('bsub < '+job_path_name)683 pass684 else:685 os.system(filewrite)686def epistasis_2_locus_numfeatures_data(output_path,archive_name,model_dest,job_dest,log_dest,run_parallel,this_file_path):687 #Model parameters needed688 locus = 2689 heritability = [0.4]690 minorAF = [0.2]691 K = 0.3 #population prevelance692 #Define dataset parameters693 data_name = 'gametes_2way_epistasis_numfeat'694 samplesize = [1600] #[200, 400, 800, 1600, 3200, 6400] #assumes balanced datasets (#cases = #controls)695 numberofattributes = [1000,10000,100000] # [20, 100, 1000, 10000, 100000] #[200, 100]696 AF_Min = 0.01697 AF_Max = 0.5698 replicates = 30 #100699 #Make dataset folder700 if not os.path.exists(output_path+'/'+archive_name+'/'+data_name):701 os.mkdir(output_path+'/'+archive_name+'/'+data_name)702 #Generate datasets and folders703 for n in numberofattributes:704 for s in samplesize:705 for h in heritability:706 for m in minorAF:707 modelName = "L_"+str(locus)+"_H_"+str(h)+"_F_"+str(m)+'_K_'+str(K)708 modelFile = model_dest+'/'+modelName+"_Models.txt"709 genDataName = output_path+'/'+archive_name+'/'+data_name+'/'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_'+str(modelName)710 #Create gametes run command711 filewrite = 'java -jar '+this_file_path+'/gametes_2.2_dev.jar -i '+modelFile+' -D "-n '+str(AF_Min)+' -x '+str(AF_Max)+' -a '+str(n)+' -s '+str(int(s/2))+' -w '+str(int(s/2))+' -r '+str(replicates)+' -o '+str(genDataName)+'"'712 if run_parallel:713 job_ref = str(time.time())714 job_path_name = job_dest+'/gametes_'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'_run.sh'715 sh_file = open(job_path_name,'w')716 sh_file.write('#!/bin/bash\n')717 sh_file.write('#BSUB -q i2c2_normal'+'\n')718 sh_file.write('#BSUB -J '+job_ref+'\n')719 sh_file.write('#BSUB -R "rusage[mem=4G]"'+'\n')720 sh_file.write('#BSUB -M 15GB'+'\n')721 sh_file.write('#BSUB -o ' + log_dest+'/gametes_'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.o\n')722 sh_file.write('#BSUB -e ' + log_dest+'/gametes_'+data_name+'_A_'+str(n)+'_S_'+str(s)+'_H_'+str(h)+'_F_'+str(m)+'_'+job_ref+'.e\n')723 sh_file.write(filewrite)724 sh_file.close()725 os.system('bsub < '+job_path_name)726 pass727 else:728 os.system(filewrite)729######################################730if __name__ == '__main__':...
main.py
Source:main.py
...285 (["reference", "fasta"], ["reference", "aligner"], ["files"])),286 samples, config, dirs, "multicore",287 multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:288 with profile.report("alignment preparation", dirs):289 samples = run_parallel("prep_align_inputs", samples)290 samples = disambiguate.split(samples)291 with profile.report("alignment", dirs):292 samples = run_parallel("process_alignment", samples)293 samples = alignprep.merge_split_alignments(samples, run_parallel)294 samples = disambiguate.resolve(samples, run_parallel)295 with profile.report("callable regions", dirs):296 samples = run_parallel("postprocess_alignment", samples)297 samples = run_parallel("combine_sample_regions", [samples])298 samples = region.clean_sample_data(samples)299 with profile.report("coverage", dirs):300 samples = coverage.summarize_samples(samples, run_parallel)301 ## Variant calling on sub-regions of the input file (full cluster)302 with prun.start(_wres(parallel, ["gatk", "picard", "variantcaller"]),303 samples, config, dirs, "full",304 multiplier=region.get_max_counts(samples), max_multicore=1) as run_parallel:305 with profile.report("alignment post-processing", dirs):306 samples = region.parallel_prep_region(samples, run_parallel)307 with profile.report("variant calling", dirs):308 samples = genotype.parallel_variantcall_region(samples, run_parallel)309 ## Finalize variants (per-sample cluster)310 with prun.start(_wres(parallel, ["gatk", "gatk-vqsr", "snpeff", "bcbio_variation"]),311 samples, config, dirs, "persample") as run_parallel:312 with profile.report("variant post-processing", dirs):313 samples = run_parallel("postprocess_variants", samples)314 samples = run_parallel("split_variants_by_sample", samples)315 with profile.report("validation", dirs):316 samples = run_parallel("compare_to_rm", samples)317 samples = genotype.combine_multiple_callers(samples)318 ## Finalizing BAMs and population databases, handle multicore computation319 with prun.start(_wres(parallel, ["gemini", "samtools", "fastqc", "bamtools", "bcbio_variation",320 "bcbio-variation-recall"]),321 samples, config, dirs, "multicore2") as run_parallel:322 with profile.report("prepped BAM merging", dirs):323 samples = region.delayed_bamprep_merge(samples, run_parallel)324 with profile.report("ensemble calling", dirs):325 samples = ensemble.combine_calls_parallel(samples, run_parallel)326 with profile.report("validation summary", dirs):327 samples = validate.summarize_grading(samples)328 with profile.report("structural variation", dirs):329 samples = structural.run(samples, run_parallel)330 with profile.report("population database", dirs):331 samples = population.prep_db_parallel(samples, run_parallel)332 with profile.report("quality control", dirs):333 samples = qcsummary.generate_parallel(samples, run_parallel)334 with profile.report("archive", dirs):335 samples = archive.compress(samples, run_parallel)336 logger.info("Timing: finished")337 return samples338def _debug_samples(i, samples):339 print "---", i, len(samples)340 for sample in (x[0] for x in samples):341 print " ", sample["description"], sample.get("region"), \342 utils.get_in(sample, ("config", "algorithm", "variantcaller")), \343 [x.get("variantcaller") for x in sample.get("variants", [])], \344 sample.get("work_bam")345class SNPCallingPipeline(Variant2Pipeline):346 """Back compatible: old name for variant analysis.347 """348 name = "SNP calling"349class VariantPipeline(Variant2Pipeline):350 """Back compatibility; old name351 """352 name = "variant"353class StandardPipeline(AbstractPipeline):354 """Minimal pipeline with alignment and QC.355 """356 name = "Standard"357 @classmethod358 def run(self, config, config_file, parallel, dirs, lane_items):359 ## Alignment and preparation requiring the entire input file (multicore cluster)360 with prun.start(_wres(parallel, ["aligner"]),361 lane_items, config, dirs, "multicore") as run_parallel:362 with profile.report("alignment", dirs):363 samples = run_parallel("process_alignment", lane_items)364 with profile.report("callable regions", dirs):365 samples = run_parallel("postprocess_alignment", samples)366 samples = run_parallel("combine_sample_regions", [samples])367 samples = region.clean_sample_data(samples)368 ## Quality control369 with prun.start(_wres(parallel, ["fastqc", "bamtools", "samtools"]),370 samples, config, dirs, "multicore2") as run_parallel:371 with profile.report("quality control", dirs):372 samples = qcsummary.generate_parallel(samples, run_parallel)373 logger.info("Timing: finished")374 return samples375class MinimalPipeline(StandardPipeline):376 name = "Minimal"377class RnaseqPipeline(AbstractPipeline):378 name = "RNA-seq"379 @classmethod380 def run(self, config, config_file, parallel, dirs, samples):381 with prun.start(_wres(parallel, ["picard", "AlienTrimmer"]),382 samples, config, dirs, "trimming") as run_parallel:383 with profile.report("adapter trimming", dirs):384 samples = run_parallel("process_lane", samples)385 samples = run_parallel("trim_lane", samples)386 with prun.start(_wres(parallel, ["aligner"],387 ensure_mem={"tophat": 8, "tophat2": 8, "star": 30}),388 samples, config, dirs, "multicore",389 multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:390 with profile.report("alignment", dirs):391 samples = disambiguate.split(samples)392 samples = run_parallel("process_alignment", samples)393 with prun.start(_wres(parallel, ["samtools", "cufflinks"]),394 samples, config, dirs, "rnaseqcount") as run_parallel:395 with profile.report("disambiguation", dirs):396 samples = disambiguate.resolve(samples, run_parallel)397 with profile.report("estimate expression", dirs):398 samples = rnaseq.estimate_expression(samples, run_parallel)399 combined = combine_count_files([x[0].get("count_file") for x in samples])400 gtf_file = utils.get_in(samples[0][0], ('genome_resources', 'rnaseq',401 'transcripts'), None)402 annotated = annotate_combined_count_file(combined, gtf_file)403 for x in samples:404 x[0]["combined_counts"] = combined405 if annotated:406 x[0]["annotated_combined_counts"] = annotated407 with prun.start(_wres(parallel, ["picard", "fastqc", "rnaseqc"]),408 samples, config, dirs, "persample") as run_parallel:409 with profile.report("quality control", dirs):410 samples = qcsummary.generate_parallel(samples, run_parallel)411 logger.info("Timing: finished")412 return samples413class ChipseqPipeline(AbstractPipeline):414 name = "chip-seq"415 @classmethod416 def run(self, config, config_file, parallel, dirs, samples):417 with prun.start(_wres(parallel, ["aligner", "picard"]),418 samples, config, dirs, "multicore",419 multiplier=alignprep.parallel_multiplier(samples)) as run_parallel:420 samples = run_parallel("process_lane", samples)421 samples = run_parallel("trim_lane", samples)422 samples = disambiguate.split(samples)423 samples = run_parallel("process_alignment", samples)424 with prun.start(_wres(parallel, ["picard", "fastqc"]),425 samples, config, dirs, "persample") as run_parallel:426 samples = run_parallel("clean_chipseq_alignment", samples)427 samples = qcsummary.generate_parallel(samples, run_parallel)428 return samples429def _get_pipeline(item):430 from bcbio.log import logger431 SUPPORTED_PIPELINES = {x.name.lower(): x for x in432 utils.itersubclasses(AbstractPipeline)}433 analysis_type = item.get("analysis", "").lower()434 if analysis_type not in SUPPORTED_PIPELINES:435 logger.error("Cannot determine which type of analysis to run, "436 "set in the run_info under details.")437 sys.exit(1)438 else:439 return SUPPORTED_PIPELINES[analysis_type]440def _pair_lanes_with_pipelines(lane_items):...
test_run_parallel.py
Source:test_run_parallel.py
...12 def funtion_error():13 raise ValueError('Error in values!')14 for use_multiprocess in (True, False):15 try:16 val1, val2 = run_parallel([17 function_no_error,18 funtion_error19 ], multiprocess=use_multiprocess)20 except Exception, e:21 self.assertIsInstance(e, ErrorInProcessException)22 self.assertEqual(1, len(e.errors))23 def test_return_value_order(self):24 """Tests that return values are returned in the order the functions are passed to run_parallel"""25 def return_first():26 time.sleep(0.0)27 return 128 def return_second():29 time.sleep(0.1)30 return 231 def return_third():32 time.sleep(0.2)33 return 334 def return_fourth():35 time.sleep(0.3)36 return 437 for use_multiprocess in (True, False):38 val1, val2, val3, val4 = run_parallel([39 return_second,40 return_first,41 return_third,42 return_fourth], multiprocess=use_multiprocess)43 self.assertEqual(val1, 2)44 self.assertEqual(val2, 1)45 self.assertEqual(val3, 3)...
run_parallel_test.py
Source:run_parallel_test.py
...15 """Test that the result is returned in the correct order."""16 script = MyTestResultOrder(tmpdir)17 data = 1018 params = [(data, p) for p in range(10)]19 results = run_parallel(script.experiment, params)20 assert results == [script.experiment(*p) for p in params]21class MyTestMultipleParams(util.TestScript):22 def experiment(self, data, exponent, bias):23 return data**exponent + bias24def test_multiple_params(tmpdir):25 """Test run_parallel when the experiment has multiple parameters."""26 script = MyTestMultipleParams(tmpdir)27 data = 1028 params = [(data, p, b) for p, b in zip(range(10), range(10, 20))]29 results = run_parallel(script.experiment, params)...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!