Best Python code snippet using Airtest
repository.py
Source:repository.py
1"""Interface to the CKAN data repository, for uploading bundle records and data extracts. 2Copyright (c) 2013 Clarinova. This file is licensed under the terms of the3Revised BSD License, included in this distribution as LICENSE.txt4"""5from databundles.dbexceptions import ConfigurationError6import petl.fluent as petlf7class Repository(object):8 '''Interface to the CKAN data repository, for uploading bundle records and 9 data extracts. classdocs10 '''11 def __init__(self, bundle, repo_name='default'):12 '''Create a new repository interface13 ''' 14 import databundles.client.ckan15 import time, datetime16 self.bundle = bundle 17 self.extracts = self.bundle.config.group('extracts')18 self.partitions = self.bundle.partitions 19 self.repo_name = repo_name20 self._api = None21 22 @property23 def api(self):24 if not self._api:25 self.set_api()26 27 return self._api28 def set_api(self): 29 import databundles.client.ckan30 repo_group = self.bundle.config.group('repository')31 32 if not repo_group.get(self.repo_name): 33 raise ConfigurationError("'repository' group in configure either nonexistent"+34 " or missing {} sub-group ".format(self.repo_name))35 36 repo_config = repo_group.get(self.repo_name)37 38 self._api = databundles.client.ckan.Ckan( repo_config.url, repo_config.key) 39 40 return self.api41 42 43 def _validate_for_expr(self, astr,debug=False):44 """Check that an expression is save to evaluate"""45 import os46 import ast47 try: tree=ast.parse(astr)48 except SyntaxError: raise ValueError(49 "Could not parse code expression : \"{}\" ".format(astr)+50 " ")51 for node in ast.walk(tree):52 if isinstance(node,(ast.Module,53 ast.Expr,54 ast.Dict,55 ast.Str,56 ast.Attribute,57 ast.Num,58 ast.Name,59 ast.Load,60 ast.BinOp,61 ast.Compare,62 ast.Eq,63 ast.Import,64 ast.alias,65 ast.Call66 )): 67 continue68 if (isinstance(node,ast.Call)69 and isinstance(node.func, ast.Attribute)70 and node.func.attr == 'datetime'): 71 continue72 if debug:73 attrs=[attr for attr in dir(node) if not attr.startswith('__')]74 print(node)75 for attrname in attrs:76 print(' {k} ==> {v}'.format(k=attrname,v=getattr(node,attrname)))77 raise ValueError("Bad node {} in {}. This code is not allowed to execute".format(node,astr))78 return True79 def _do_extract(self, extract_data, force=False):80 import os # For the eval @UnusedImport81 82 done_if = extract_data.get('done_if',False)83 84 if not force and done_if and self._validate_for_expr(done_if, True):85 if eval(done_if): 86 self.bundle.log("For extract {}, done_if ( {} ) evaluated true"87 .format(extract_data['_name'], done_if)) 88 return extract_data['path']89 if extract_data.get('function',False):90 file_ = self._do_function_extract(extract_data)91 elif extract_data.get('query',False):92 file_ = self._do_query_extract(extract_data)93 else:94 from databundles.dbexceptions import ConfigurationError95 raise ConfigurationError("Bad Extract config: {}".format(extract_data))96 return file_97 98 def _do_function_extract(self, extract_data):99 '''Run a function on the build that produces a file to upload'''100 import os.path101 102 f_name = extract_data['function']103 104 f = getattr(self.bundle, f_name)105 106 file_ = f(extract_data) 107 return file_108 109 def _do_query_extract(self, extract_data):110 """Extract a CSV file and upload it to CKAN"""111 import tempfile112 import uuid113 import os114 p = extract_data['_partition'] # Set in _make_partition_dict115 116 file_name = extract_data.get('name', None)117 118 if file_name:119 file_ = self.bundle.filesystem.path('extracts', file_name)120 else:121 file_ = os.path.join(tempfile.gettempdir(), str(uuid.uuid4()) )122 123 self.bundle.log("Extracting {} to {}".format(extract_data['title'],file_))124 petlf.fromsqlite3(p.database.path, extract_data['query'] ).tocsv(file_) #@UndefinedVariable125 126 return file_ 127 128 def _send(self, package, extract_data, file_):129 import os130 import mimetypes131 132 _, ext = os.path.splitext(file_)133 mimetypes.init()134 content_type = mimetypes.types_map.get(ext,None) #@UndefinedVariable135 136 try:137 _,format = content_type.split('/')138 except:139 format = None140 141 name = extract_data.get('name', os.path.basename(file_))142 143 144 r = self.api.add_file_resource(package, file_, 145 name=name,146 description=extract_data['description'],147 content_type = content_type, 148 format=format149 )150 151 152 return r153 154 def _make_partition_dict(self, p):155 '''Return a dict that includes the fields from the extract expanded for156 the values of each and the partition'''157 158 qd = {159 'p_id' : p.identity.id_,160 'p_name' : p.identity.name,161 }162 163 try:164 # Bundles don't have these 165 qd_part = {166 'p_table' : p.identity.table,167 'p_space' : p.identity.space,168 'p_time' : p.identity.time,169 'p_grain' : p.identity.grain, 170 }171 except:172 qd_part = {'p_table' : '','p_space' : '', 'p_time' :'','p_grain' : ''}173 174 qd = dict(qd.items()+ qd_part.items())175 qd['_partition'] = p176 return qd177 178 def _expand_each(self, each):179 '''Generate a set of dicts from the cross product of each of the180 arrays of 'each' group'''181 182 # Normalize the each group, particular for the case where there is only183 # one dimension184 185 if not isinstance(each, list):186 raise ConfigurationError("The 'each' key must have a list. Got a {} ".format(type(each)))187 188 elif len(each) == 0:189 each = [[{}]]190 if not isinstance(each[0], list):191 each = [each]192 193 # Now the top level arrays of each are dimensions, and we can do a 194 # multi dimensional iteration over them. 195 # This is essentially a cross-product, where out <- out X dim(i)196 out = []197 for i,dim in enumerate(each):198 if i == 0:199 out = dim200 else:201 o2 = []202 for i in dim:203 for j in out:204 o2.append(dict(i.items()+j.items()))205 out = o2206 return out207 208 209 def _expand_partitions(self, partition_name='any', for_=None):210 '''Generate a list of partitions to apply the extract process to. '''211 if partition_name == 'any':212 partitions = [p for p in self.partitions]213 partitions = [self.bundle] + partitions214 else:215 partition = self.partitions.get(partition_name)216 partitions = [partition]217 out = []218 219 if not for_:220 for_ = 'True'221 222 for partition in partitions:223 224 try:225 self.bundle.log("Testing: {} ".format(partition.identity.name))226 if self._validate_for_expr(for_, True):227 if eval(for_): 228 out.append(partition)229 except Exception as e:230 self.bundle.error("Error in evaluting for '{}' : {} ".format(for_, e))231 232 return out233 234 def _sub(self, data):235 236 if data.get('aa', False):237 from databundles.geo.analysisarea import get_analysis_area238 aa = get_analysis_area(self.bundle.library, **data['aa']) 239 240 aa_d = dict(aa.__dict__)241 aa_d['aa_name'] = aa_d['name']242 del aa_d['name']243 244 data = dict(data.items() + aa_d.items())245 data['query'] = data.get('query','').format(**data)246 data['title'] = data.get('title','').format(**data)247 data['description'] = data.get('description','').format(**data)248 data['name'] = data.get('name','').format(**data)249 data['path'] = self.bundle.filesystem.path('extracts',format(data['name']))250 data['done_if'] = data.get('done_if','').format(**data)251 252 return data253 254 def dep_tree(self, root):255 """Return the tree of dependencies rooted in the given nod name, 256 excluding all other nodes"""257 258 graph = {}259 for key,extract in self.extracts.items():260 graph[key] = set(extract.get('depends',[]))261 262 def _recurse(node):263 l = set([node])264 for n in graph[node]:265 l = l | _recurse(n)266 267 return l268 269 return _recurse(root)270 271 272 def generate_extracts(self, root=None):273 """Generate dicts that have the data for an extract, along with the 274 partition, query, title and description275 276 :param root: The name of an extract group to use as the root of277 the dependency tree278 :type root: string279 280 If `root` is specified, it is a name of an extract group from the configuration,281 and the only extracts performed will be the named extracts and any of its282 dependencies. 283 284 """285 import collections286 from databundles.util import toposort287 288 ext_config = self.extracts289 # Order the extracts to satisfy dependencies. 290 graph = {}291 for key,extract in ext_config.items():292 graph[key] = set(extract.get('depends',[]))293 294 if graph:295 exec_list = []296 for group in toposort(graph):297 exec_list.extend(group)298 else:299 exec_list = ext_config.keys()300 301 if root:302 deps = self.dep_tree(root)303 exec_list = [ n for n in exec_list if n in deps]304 305 306 # now can iterate over the list. 307 for key in exec_list:308 extract = ext_config[key]309 extract['_name'] = key310 for_ = extract.get('for', "'True'")311 function = extract.get('function', False)312 each = extract.get('each', [])313 p_id = extract.get('partition', False)314 eaches = self._expand_each(each)315 316 # This part is a awful hack and should be refactored317 if function:318 for data in eaches: 319 yield self._sub(dict(extract.items() + data.items()))320 elif p_id: 321 partitions = self._expand_partitions(p_id, for_)322 323 for partition in partitions:324 p_dict = self._make_partition_dict(partition)325 for data in eaches: 326 yield self._sub(dict(p_dict.items()+extract.items() + 327 data.items() ))328 329 def store_document(self, package, config):330 import re, string331 id = re.sub('[\W_]+', '-',config['title'])332 333 r = self.api.add_url_resource(package, 334 config['url'], 335 config['title'],336 description=config['description'])337 338 return r339 340 def extract(self, root=None, force=False):341 import os342 for extract_data in self.generate_extracts(root=root):343 file_ = self._do_extract(extract_data, force=force)344 if file_ is True:345 #self.bundle.log("Extract {} marked as done".format(extract_data['_name']))346 pass347 elif file_ and os.path.exists(file_):348 self.bundle.log("Extracted: {}".format(file_))349 else:350 self.bundle.error("Extracted file {} does not exist".format(file_))351 352 return True353 354 def submit(self, root=None, force=False, repo=None): 355 """Create a dataset for the bundle, then add a resource for each of the356 extracts listed in the bundle.yaml file"""357 358 if repo:359 self.repo_name = repo360 self.set_api()361 362 self.bundle.update_configuration()363 from os.path import basename364 365 ckb = self.api.update_or_new_bundle_extract(self.bundle)366 367 sent = set()368 369 # Clear out existing resources. 370 ckb['resources'] = [] 371 self.api.put_package(ckb)372 373 for doc in self.bundle.config.group('about').get('documents',[]):374 self.store_document(ckb, doc)375 for extract_data in self.generate_extracts(root=root):376 file_ = self._do_extract(extract_data, force=force)377 if file_ not in sent:378 r = self._send(ckb, extract_data,file_)379 sent.add(file_)380 url = r['ckan_url']381 self.bundle.log("Submitted {} to {}".format(basename(file_), url))382 else:383 self.bundle.log("Already processed {}, not sending.".format(basename(file_)))384 ...
ocr_badao.py
Source:ocr_badao.py
1"""2Usage:3python get_reference_embedded.py \4 --img_lst ./img_lst.txt \5 --output_dir ./output6 --verbose True7"""8import re9import os10import cv211import glob212import argparse13import pytesseract14import numpy as np15from tqdm import tqdm16import re17from constrains import check_embedded18def get_reference_embbeded(path, output_dir, verbose=False):19 img = cv2.imread(path)20 # img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)21 _, bw_img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)22 # Extract information from the image23 extract_data = pytesseract.image_to_data(bw_img, output_type=pytesseract.Output.DICT)24 # Get the object recognized is page_num, block_num, par_num, line_num, word_num25 # page_num_indices = np.where(np.array(extract_data['level'])==1)[0]26 # block_num_indices = np.where(np.array(extract_data['level'])==2)[0]27 # par_num_indices = np.where(np.array(extract_data['level'])==3)[0]28 # line_num_indices = np.where(np.array(extract_data['level'])==4)[0]29 word_num_indices = np.where(np.array(extract_data['level'])==5)[0]30 # Get the object have specific patterns31 # Case 132 filter_word_num_indices_case1 = [i for i in word_num_indices33 if re.match("[A-Z0-9]\-[A-Za-z]{4,10}", extract_data['text'][i])]34 # # Case 235 filter_word_num_indices_case2 = [i for i in word_num_indices36 if re.match("\([A-Z]\.[0-9]", extract_data['text'][i])]37 bw_img = bw_img[:, :, 0]38 filter_word_num_indices_case2 = [39 i for i in filter_word_num_indices_case240 if check_embedded(bin_img=bw_img,41 bbox=(extract_data['left'][i], 42 extract_data['top'][i], 43 extract_data['width'][i], 44 extract_data['height'][i]45 )46 )47 ]48 # # Case 349 filter_word_num_indices_case3 = [i for i in word_num_indices50 if re.match("[\-+]{0,1}[0-9]{1,2}[+\-\*/><=][0-9]{1,2}", extract_data['text'][i])]51 52 # # Case 453 filter_word_num_indices_case4 = [i for i in word_num_indices54 if re.match("exp\(|dim\(|rank\(|cos\(|sin\(|tan\(|cotan\(|Ker\(|Im\(|LSM\(", extract_data['text'][i])]55 # Case 556 filter_word_num_indices_case5 = [i for i in word_num_indices57 if re.match("[A-Z]+\(.", extract_data['text'][i])]58 if len(filter_word_num_indices_case1) + len(filter_word_num_indices_case2) + len(filter_word_num_indices_case3) + len(filter_word_num_indices_case4) + len(filter_word_num_indices_case5) == 0:59 return60 61 # Write detection result to file62 with open(os.path.join(output_dir, 'case1.txt'), 'a+') as f:63 for i in filter_word_num_indices_case1:64 line = "{}, {}, {}, {}, {}, {}, {}".format(65 os.path.basename(path),66 extract_data['left'][i], 67 extract_data['top'][i], 68 extract_data['left'][i]+extract_data['width'][i], 69 extract_data['top'][i]+extract_data['height'][i],70 extract_data['conf'][i]/100, 071 )72 f.write(line + '\n')73 with open(os.path.join(output_dir, 'case2.txt'), 'a+') as f:74 for i in filter_word_num_indices_case2:75 line = "{}, {}, {}, {}, {}, {}, {}".format(76 os.path.basename(path),77 extract_data['left'][i], 78 extract_data['top'][i], 79 extract_data['left'][i]+extract_data['width'][i], 80 extract_data['top'][i]+extract_data['height'][i],81 extract_data['conf'][i]/100, 082 )83 f.write(line + '\n')84 with open(os.path.join(output_dir, 'case3.txt'), 'a+') as f:85 for i in filter_word_num_indices_case3:86 line = "{}, {}, {}, {}, {}, {}, {}".format(87 os.path.basename(path),88 extract_data['left'][i], 89 extract_data['top'][i], 90 extract_data['left'][i]+extract_data['width'][i], 91 extract_data['top'][i]+extract_data['height'][i],92 extract_data['conf'][i]/100, 093 )94 f.write(line + '\n')95 with open(os.path.join(output_dir, 'case4.txt'), 'a+') as f:96 for i in filter_word_num_indices_case4:97 line = "{}, {}, {}, {}, {}, {}, {}".format(98 os.path.basename(path),99 extract_data['left'][i], 100 extract_data['top'][i], 101 extract_data['left'][i]+extract_data['width'][i], 102 extract_data['top'][i]+extract_data['height'][i],103 extract_data['conf'][i]/100, 0104 )105 f.write(line + '\n')106 with open(os.path.join(output_dir, 'case5.txt'), 'a+') as f:107 for i in filter_word_num_indices_case5:108 line = "{}, {}, {}, {}, {}, {}, {}".format(109 os.path.basename(path),110 extract_data['left'][i], 111 extract_data['top'][i], 112 extract_data['left'][i]+extract_data['width'][i], 113 extract_data['top'][i]+extract_data['height'][i],114 extract_data['conf'][i]/100, 0115 )116 f.write(line + '\n')117 if verbose:118 # Draw case 1119 for i in filter_word_num_indices_case1:120 (x, y, w, h) = (extract_data['left'][i], 121 extract_data['top'][i], 122 extract_data['width'][i], 123 extract_data['height'][i])124 img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 2)125 cv2.putText(img, '1', (x, y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)126 # Draw case 2127 for i in filter_word_num_indices_case2:128 (x, y, w, h) = (extract_data['left'][i], 129 extract_data['top'][i], 130 extract_data['width'][i], 131 extract_data['height'][i])132 img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 127, 255), 2)133 cv2.putText(img, '2', (x, y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 127, 255), 2)134 # Draw case 3135 for i in filter_word_num_indices_case3:136 (x, y, w, h) = (extract_data['left'][i], 137 extract_data['top'][i], 138 extract_data['width'][i], 139 extract_data['height'][i])140 img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 255), 2)141 cv2.putText(img, '3', (x, y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)142 # Draw case 4143 for i in filter_word_num_indices_case4:144 (x, y, w, h) = (extract_data['left'][i], 145 extract_data['top'][i], 146 extract_data['width'][i], 147 extract_data['height'][i])148 img = cv2.rectangle(img, (x, y), (x + w, y + h), (127, 0, 127), 2)149 cv2.putText(img, '4', (x, y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (127, 0, 127), 2)150 # Draw case 5151 for i in filter_word_num_indices_case5:152 (x, y, w, h) = (extract_data['left'][i], 153 extract_data['top'][i], 154 extract_data['width'][i], 155 extract_data['height'][i])156 img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)157 cv2.putText(img, '5', (x, y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)158 save_bin_img_file = os.path.join(output_dir, 'bin_images', os.path.basename(path))159 save_img_file = os.path.join(output_dir, 'images', os.path.basename(path))160 #save_text_file = os.path.join(output_dir, 'text', os.path.basename(path).split(".")[0]+'.txt')161 162 cv2.imwrite(save_bin_img_file, bw_img)163 cv2.imwrite(save_img_file, img)164 # with open(save_text_file, 'w') as f:165 # for i in filter_word_num_indices_case1:166 # f.write(extract_data['text'][i]+'\n')167 # for i in filter_word_num_indices_case2:168 # f.write(extract_data['text'][i]+'\n')169 # for i in filter_word_num_indices_case3:170 # f.write(extract_data['text'][i]+'\n')171 # for i in filter_word_num_indices_case4:172 # f.write(extract_data['text'][i]+'\n')173 # for i in filter_word_num_indices_case5:174 # f.write(extract_data['text'][i]+'\n')175 # pass176def main(args):177 178 # Check invalid input or not179 if not os.path.exists(args['img_lst']):180 print("INVALID IMAGE LIST FILE !")181 exit(0)182 if not os.path.exists(args['output_dir']):183 print("Creating output directory: {}".format(args['output_dir']))184 try:185 os.mkdir(args['output_dir'])186 except:187 print("INVALID OUTPUT DIRECTORY !")188 exit(0)189 os.mkdir(os.path.join(args['output_dir'], 'images'))190 os.mkdir(os.path.join(args['output_dir'], 'bin_images'))191 os.mkdir(os.path.join(args['output_dir'], 'text'))192 if not os.path.exists(os.path.join(args['output_dir'], 'bin_images')):193 os.mkdir(os.path.join(args['output_dir'], 'bin_images'))194 if not os.path.exists(os.path.join(args['output_dir'], 'images')):195 os.mkdir(os.path.join(args['output_dir'], 'images'))196 if not os.path.exists(os.path.join(args['output_dir'], 'text')):197 os.mkdir(os.path.join(args['output_dir'], 'text'))198 # Get the existing image path from file199 with open(args['img_lst'], 'r') as f:200 img_paths = [line.rstrip() for line in f.readlines() if os.path.exists(line.rstrip())]201 # Extract information from each image202 for i, img_path in enumerate(img_paths):203 print("{:6}/{:6} Extracting {}".format(204 str(i).zfill(6), str(len(img_paths)).zfill(6), os.path.basename(img_path)))205 get_reference_embbeded(path=img_path, output_dir=args['output_dir'], verbose=args['verbose'])206if __name__ == '__main__':207 parser = argparse.ArgumentParser(description='Extract information from image using Tesseract OCR engine.')208 parser.add_argument('--img_lst', required=True,209 help='an integer for the accumulator')210 parser.add_argument('--output_dir', required=True,211 help='The output directory to store extracted information')212 parser.add_argument('--verbose', action='store_true',213 help='Store image to disk wheter or not.')214 args = vars(parser.parse_args())215 print(args)...
__init__.py
Source:__init__.py
1from .polygon import get_subset_area2from ..utils.plink import run_plink3import pandas as pd4import tempfile5import numpy as np6from numpy.random import randn7def filter_data(meta_data, bedfile, missing=0.001, plink="plink",8 exclude_loci=[],9 has_dataset=False, outfile='TMP_PLINK', per_ind_missing=1.,10 max_per_pop=5000):11 """filter_data12 filters bed file to only keep the individuals in meta_data, uses plink13 14 the data is read from bedfile, and written to the file in the TMP_PLINK file15 global variable16 17 Parameters18 ----------19 meta_data : pd.DataFrame20 pandas data frame with individuals to keep21 bedfile : path22 the bedfile to be filtered23 plink : path24 the plink executable to be used25 26 """27 include_name = '%s.incl' % outfile28 try:29 fam = pd.read_table("%s.fam" % bedfile, header=None,30 skipinitialspace=True)31 fam.columns = ['FAM', 'sampleId', 'a', 'b', 'c', 'd']32 except ValueError:33 fam = pd.read_table("%s.fam" % bedfile, header=None,34 skipinitialspace=True, sep=" ")35 fam.columns = ['FAM', 'sampleId', 'a', 'b', 'c', 'd']36 extract_data = meta_data.merge(fam, on='sampleId', how='inner')37 #538 extract_data['random'] = randn(extract_data.shape[0])39 extract_data.index = extract_data.sampleId40 print("subsetting: ", extract_data.shape)41 largest = extract_data.groupby('popId')['random'].nlargest(max_per_pop)42 print(largest)43 #largest = extract_data.groupby('popId')['random'].nlargest(5)44 largest= pd.DataFrame(largest.index)45 print(largest)46 print("subsetting: ", largest.shape, largest.columns)47 extract_data = extract_data.merge(largest, on="sampleId", how="inner")48 print("subsetting: ", extract_data.shape, largest.shape)49 extract_data.to_csv(include_name, sep=' ',50 columns=('FAM', 'sampleId'),51 header=None, index=None)52 extract_data.drop('a', axis=1, inplace=True)53 extract_data.drop('b', axis=1, inplace=True)54 extract_data.drop('c', axis=1, inplace=True)55 extract_data.drop('d', axis=1, inplace=True)56 extract_data.drop('FAM', axis=1, inplace=True)57 extract_data.drop('POINTS', axis=1, inplace=True)58 print(extract_data.columns)59 #if has_dataset:60 # meta_data = extract_data[['sampleId', 'POP', 'latitude', 'longitude', 'FAM',61 # 'DATASET']]62 #else:63 # meta_data = extract_data[['sampleId', 'POP', 'latitude', 'longitude', 'FAM']]64 extract_data = extract_data[pd.notnull(extract_data['latitude'])]65 extract_data = extract_data[pd.notnull(extract_data['longitude'])]66 flags = dict()67 flags['make-bed'] = ''68 #flags['allow-extra-chr'] = ''69 flags['bfile'] = bedfile70 flags['out'] = outfile71 flags['keep'] = include_name72 flags['indiv-sort'] = 'f %s' % include_name73 if exclude_loci != []:74 exclude_loci_file = tempfile.NamedTemporaryFile(delete=False, mode="w")75 for locus in exclude_loci:76 exclude_loci_file.write("%s\n"% locus)77 exclude_loci_file.close()78 flags['exclude'] = exclude_loci_file.name79 print("excluding loci %s " % exclude_loci_file.name)80 run_plink(plink, flags)81 flags['bfile'] = outfile82# flags['geno'] = '%s' % (float(missing) * 2)83 flags['geno'] = '%s' % missing84 run_plink(plink, flags)85 flags['mind'] = '%s' % per_ind_missing86 del flags['geno']87 #flags['geno'] = '%s' % missing88 run_plink(plink, flags)89 #now re-read fam file to find retained guys90 print(extract_data.shape)91 fam = pd.read_table("%s.fam" % outfile, header=None,92 skipinitialspace=True, sep=" ")93 fam.columns = ['FAM', 'sampleId', 'a', 'b', 'c', 'd']94 print(fam.shape)95 extract_data = extract_data.merge(fam, on='sampleId', how='inner')96 extract_data.drop('a', axis=1, inplace=True)97 extract_data.drop('b', axis=1, inplace=True)98 extract_data.drop('c', axis=1, inplace=True)99 extract_data.drop('d', axis=1, inplace=True)100 extract_data.drop('FAM', axis=1, inplace=True)101 print(extract_data.shape)...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!