Best Python code snippet using ATX
summarize_dmp_files.py
Source:summarize_dmp_files.py
1#!/usr/bin/env python2import pandas as pd3import sys4import os5import gc6def build_lineage(taxid_list, mapping):7 """Given a list of taxIDs, generate a NCBI LineageEx-like list."""8 temp_list = []9 for taxid in taxid_list:10 temp_dict = {}11 if taxid == "":12 pass13 else:14 temp_dict["TaxId"] = taxid15 if taxid in mapping.keys():16 temp_dict["Rank"] = mapping[taxid]17 else:18 temp_dict["Rank"] = "NA"19 temp_list.append(temp_dict)20 return temp_list21NCBI_dmp_dir_path = sys.argv[1]22assembly_gb_path = os.path.join(NCBI_dmp_dir_path, "assembly_summary_genbank.txt")23assembly_rs_path = os.path.join(NCBI_dmp_dir_path, "assembly_summary_refseq.txt")24dump_names_path = os.path.join(NCBI_dmp_dir_path, "names.dmp")25dump_nodes_path = os.path.join(NCBI_dmp_dir_path, "nodes.dmp")26dump_taxidlineage_path = os.path.join(NCBI_dmp_dir_path, "taxidlineage.dmp")27assembly_to_taxID_out_path = os.path.join(NCBI_dmp_dir_path, "assembly_to_taxID.pkl")28taxid_to_lineage_out_path = os.path.join(NCBI_dmp_dir_path, "taxid_to_lineage.pkl")29print("Combining genome assembly records from both GenBank and RefSeq sources...")30assembly_gb = pd.read_csv(assembly_gb_path, sep = "\t", skiprows = [0], low_memory = False)31assembly_rs = pd.read_csv(assembly_rs_path, sep = "\t", skiprows = [0], low_memory = False)32assembly_gb_filtered = assembly_gb[["# assembly_accession", "taxid"]]33assembly_rs_filtered = assembly_rs[["# assembly_accession", "taxid"]]34combined_df = pd.concat([assembly_gb_filtered, assembly_rs_filtered], ignore_index=True)35combined_df = combined_df.rename(columns = {"# assembly_accession": "Accession", "taxid": "taxID"})36combined_df = combined_df.astype({"Accession": "str", "taxID": "str"})37del assembly_gb, assembly_rs, assembly_gb_filtered, assembly_rs_filtered38gc.collect()39combined_df.to_pickle(assembly_to_taxID_out_path, protocol = 4)40print("File output completed!")41print("Retrieving scientific names for different taxIDs...")42dump_names = pd.read_csv(dump_names_path, sep = "\t\|\t", header = None, engine = "python")43dump_names.iloc[:, 0] = dump_names.iloc[:, 0].astype("str")44dump_names.iloc[:, 3] = dump_names.iloc[:, 3].str.rstrip("\t\|")45dump_names_sci = dump_names[dump_names.iloc[:, 3]=="scientific name"].iloc[:, [0, 1]]46dump_names_final = dump_names_sci.rename(columns = {0: "taxID", 1: "Scientific_Name"}).reset_index(drop = True)47del dump_names, dump_names_sci48gc.collect()49print("Retrieving taxonomic ranks for different taxIDs...")50dump_nodes = pd.read_csv(dump_nodes_path, sep = "\t\|\t", header = None, engine = "python")51dump_nodes.iloc[:, 0] = dump_nodes.iloc[:, 0].astype("str")52dump_nodes_final = dump_nodes.iloc[:, [0, 2]].rename(columns = {0: "taxID", 2: "Rank"})53taxid_to_rank_dict = dict(zip(dump_nodes_final.loc[:, "taxID"], dump_nodes_final.loc[:, "Rank"]))54del dump_nodes55gc.collect()56print("Retrieving complete taxonomic lineage information for different taxIDs...")57dump_taxidlineage = pd.read_csv(dump_taxidlineage_path, sep = "\t\|\t", header = None, engine = "python")58dump_taxidlineage.iloc[:, 0] = dump_taxidlineage.iloc[:, 0].astype("str")59dump_taxidlineage.iloc[:, 1] = dump_taxidlineage.iloc[:, 1].str.rstrip(" \t\|")60dump_taxidlineage["lineage_list"] = dump_taxidlineage.iloc[:, 1].str.split(" ")61dump_taxidlineage = dump_taxidlineage.rename(columns = {0: "taxID"})62dump_taxidlineage["Lineage"] = dump_taxidlineage["lineage_list"].apply(build_lineage, args = (taxid_to_rank_dict,))63dump_taxidlineage_final = dump_taxidlineage.loc[:, ["taxID", "Lineage"]]64del dump_taxidlineage65gc.collect()66print("Retrieving information for merged taxIDs...")67dump_merged_path = os.path.join(NCBI_dmp_dir_path, "merged.dmp")68dump_merged = pd.read_csv(dump_merged_path, sep = "\t\|\t", header = None, engine = "python")69dump_merged.iloc[:, 0] = dump_merged.iloc[:, 0].astype("str")70dump_merged.iloc[:, 1] = dump_merged.iloc[:, 1].str.rstrip("\t\|")71dump_merged = dump_merged.rename(columns = {0: "previous", 1: "current"})72print("Combining all gathered infomation...")73comp_summary = pd.DataFrame({"taxID": dump_names_final["taxID"],74 "Scientific_Name": dump_names_final["Scientific_Name"]})75comp_summary = comp_summary.merge(dump_nodes_final, how = "inner", left_on = "taxID", right_on = "taxID")76comp_summary = comp_summary.merge(dump_taxidlineage_final, how = "inner", left_on = "taxID", right_on = "taxID")77comp_summary = comp_summary.astype({"taxID": "str", "Scientific_Name": "str", "Rank": "str"})78comp_summary["CurrentID"] = comp_summary["taxID"]79temp_cond_main = comp_summary["taxID"].isin(dump_merged["previous"])80temp_cond_merged = dump_merged["previous"].isin(comp_summary["taxID"])81temp_ind_main = comp_summary.index[temp_cond_main]82temp_ind_merged = dump_merged.index[temp_cond_merged]83comp_summary.loc[temp_ind_main, "CurrentID"] = dump_merged.loc[temp_ind_merged, "current"]84comp_summary.to_pickle(taxid_to_lineage_out_path, protocol = 4)...
94_tree_middle_travel.py
Source:94_tree_middle_travel.py
1# ç»å®ä¸ä¸ªäºåæ ï¼è¿åå®çä¸åº éåã2#3# 示ä¾:4#5# è¾å
¥: [1,null,2,3]6# 17# \8# 29# /10# 311#12# è¾åº: [1,3,2]13class BinaryTreeNode(object):14 def __init__(self, val='', left=None, right=None):15 self.val = val16 self.left = left17 self.right = right18class BTree(object):19 def __init__(self):20 self.root = None21 self.dump_nodes=[]22 def create_binary_tree(self, root, lst, i):23 if i < len(lst):24 if lst[i] == None:25 return None26 root = BinaryTreeNode(lst[i])27 root.left = self.create_binary_tree(root, lst, i*2 + 1)28 root.right = self.create_binary_tree(root, lst, i*2 + 2)29 return root30 return None31 def build_binary_tree(self, lst):32 nodes = [None if v is None else BinaryTreeNode(v) for v in lst]33 for index in range(1,len(nodes)):34 node = nodes[index]35 if node is not None:36 parent_index = (index-1)//237 parent_node = nodes[parent_index]38 if index % 2:39 parent_node.left = node40 else:41 parent_node.right = node42 return nodes[0] if nodes else None43 def add_node(self, data):44 if data == '#':45 node = '#'46 else:47 node = BinaryTreeNode(data)48 if self.root is None:49 self.root = node50 return51 tmp_queue = [self.root]52 while True:53 current_node = tmp_queue.pop(0)54 if current_node is None:55 return56 if current_node.left is None:57 current_node.left = node58 return59 elif current_node.right is None:60 current_node.right = node61 return62 tmp_queue.append(current_node.left)63 tmp_queue.append(current_node.right)64 def inorder(self, node):65 if node is None:66 return67 if node.left is not None:68 self.inorder(node.left)69 self.dump_nodes.append(node.val)70 if node.right is not None:71 self.inorder(node.right)72 def front_travel(self, node):73 if node == None:74 return75 if node.left != None:76 self.middle_travel(node.left)77 self.dump_nodes.append(node.val)78 if node.right != None:79 self.middle_travel(node.right)80 def front_travel(self, node):81 if node == None:82 return83 if node.left != None:84 self.middle_travel(node.left)85 if node.right != None:86 self.middle_travel(node.right)87 self.dump_nodes.append(node.val)88if __name__=="__main__":89 t = BTree()90 lst = [1, 2,3,None,4]91 print('lst=',lst)92 #import pdb; pdb.set_trace()93 t.root = t.build_binary_tree(lst)94 t.inorder(t.root)...
parser.py
Source:parser.py
1import ipaddress2from pyrser import grammar, meta3from pyrser.directives import ignore4from pyrser.hooks import dump_nodes5from pyrser.hooks.echo import echo_nodes6from pyrser.passes import to_yml7text = """810.100.0.19"""10class Bind(grammar.Grammar):11 entry = "zone"12 grammar="""13 zone = [ [ipv4 | octet]:>_ eof? #dump_nodes]14 ipv4 = [ [octet '.' octet '.' octet '.' octet]:ipv4 #is_ipv4(_, ipv4) ]15 octet = [ 16 [ '2' [ '0'..'5' ] [ '0'..'5' ] |17 '1' [ '0'..'9' ] [ '0'..'9' ] |18 [ '1'..'9' ] [ '0'..'9' ] |19 [ '0'..'9' ]20 ]: octet #is_int(_, octet)21 #dump_nodes22 ]23 """24 25@meta.hook(Bind)26def is_int(self, ast, octet):27 ast.node = int(self.value(octet))28 return True29@meta.hook(Bind)30def is_ipv4(self, ast, ipv4):31 print(f"{ipv4.node=}")32 ast.node = ipaddress.IPv4Address(self.value(ipv4))33 return True34if __name__ == "__main__":35 bind = Bind()...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!