Best Python code snippet using avocado_python
scoring_loader.py
Source:scoring_loader.py
1"""2* Software Name : DAGOBAH Radar Station3* Version: <1.0.0>4* SPDX-FileCopyrightText: Copyright (c) 2022 Orange5* SPDX-License-Identifier: BSD-3-Clause6* This software is confidential and proprietary information of Orange.7* You shall not disclose such Confidential Information and shall not copy, use or distribute it8* in whole or in part without the prior written consent of Orange9* Author: Jixiong Liu, Viet-Phi Huynh, Yoan Chabot, and Raphael Troncy10* Software description: DAGOBAH Radar Station is a hybrid system that aims to add a semantic disambiguation step after a previously11identified cell-entity annotation. Radar Station takes into account the entire column as context and uses graph embeddings to capture12latent relationships between entities to improve their disambiguation.13"""14import json15import os16import csv17import ast18import copy19class Data_loader():20 def __init__(self, scoringSystem, annotationSe, goldStandard, tableFolder):21 self.scoringSystem = scoringSystem22 self.annotationSe = annotationSe23 self.goldStandard = goldStandard24 self.tableFolder = tableFolder25 def loader_manager(self, fileName):26 if self.scoringSystem == 'MTab':27 tableRepresentation = self.Mtab_scoring_loader(fileName)28 return tableRepresentation29 if self.scoringSystem == 'DAGOBAHSL':30 tableRepresentation = self.dagobah_scoring_loader(fileName)31 return tableRepresentation32 if self.scoringSystem == 'bbw':33 tableRepresentation = self.bbw_scoring_loader(fileName)34 return tableRepresentation35 if self.scoringSystem == 'semtab2022':36 tableRepresentation = self.semtab2022_scoring_loader(fileName)37 return tableRepresentation38 def semtab2022_scoring_loader(self, tableName):39 fileName = self.tableFolder + "/BaselineScoring/Semtab2022/Valid/" + tableName + ".json"40 with open(fileName, 'r', encoding='latin1') as load_f:41 annotations = json.load(load_f)42 load_f.close()43 TableRepresentation = {}44 for aCell in annotations["CEA"]:45 ambiguityCandidate = []46 row = aCell["row"]47 column = aCell["column"]48 presenceOfAmbiguity = False49 highestScoring = aCell["annotation"][0]['score']50 for one_candidate in aCell["annotation"]:51 if one_candidate['score'] < (self.annotationSe * highestScoring):52 break53 id = copy.deepcopy(one_candidate['uri'].replace("http://www.wikidata.org/entity/",""))54 acandidate = {}55 acandidate["id"] = id56 acandidate["score"] = copy.deepcopy(one_candidate['score'])57 ambiguityCandidate.append(acandidate)58 if len(ambiguityCandidate) > 1:59 presenceOfAmbiguity = True60 if aCell['column'] in TableRepresentation:61 TableRepresentation[aCell['column']].append(62 {"row": row, "column": column, "presenceOfAmbiguity": presenceOfAmbiguity,63 "TopCandidatesNumbers": len(ambiguityCandidate),64 "TopCandidates": ambiguityCandidate})65 else:66 TableRepresentation[aCell['column']] = [67 {"row": row, "column": column, "presenceOfAmbiguity": presenceOfAmbiguity,68 "TopCandidatesNumbers": len(ambiguityCandidate),69 "TopCandidates": ambiguityCandidate}]70 return TableRepresentation71 def dagobah_scoring_loader(self, tableName):72 '''73 Loading the table and scoring74 Detection of ambiguities75 '''76 if self.goldStandard in ['2T', 'ShortTable', 'Limaye']:77 fileName = self.tableFolder + "/BaselineScoring/DAGOBAHSL_Scoring/" + self.goldStandard + "_Result" + "/" + tableName + '/scores.json'78 if self.goldStandard in ['T2D']:79 fileName = self.tableFolder + "/BaselineScoring/DAGOBAHSL_Scoring/" + self.goldStandard + "_Result" + "/" + tableName + '/score.json'80 if os.path.exists(fileName) is False:81 return None82 with open(fileName, 'r', encoding='latin1') as load_f:83 annotations = json.load(load_f)84 load_f.close()85 TableRepresentation = {}86 for aCell in annotations:87 ambiguityCandidate = []88 row = aCell["row"]89 column = aCell["column"]90 presenceOfAmbiguity = False91 sorted_candidate = sorted(aCell['scores'], key=lambda i: i['score'], reverse=True)92 highestScoring = sorted_candidate[0]['score']93 for one_candidate in sorted_candidate:94 if one_candidate['score'] < (self.annotationSe * highestScoring):95 break96 ambiguityCandidate.append(one_candidate)97 if len(ambiguityCandidate) > 1:98 presenceOfAmbiguity = True99 if aCell['column'] in TableRepresentation:100 TableRepresentation[aCell['column']].append(101 {"row": row, "column": column, "presenceOfAmbiguity": presenceOfAmbiguity,102 "TopCandidatesNumbers": len(ambiguityCandidate),103 "TopCandidates": ambiguityCandidate})104 else:105 TableRepresentation[aCell['column']] = [106 {"row": row, "column": column, "presenceOfAmbiguity": presenceOfAmbiguity,107 "TopCandidatesNumbers": len(ambiguityCandidate),108 "TopCandidates": ambiguityCandidate}]109 return TableRepresentation110 else:111 tablePath = self.tableFolder + "/BaselineScoring/DAGOBAHSL_Scoring/" + self.goldStandard + "_Result" + "/" + tableName + '.json'112 with open(tablePath, 'r', encoding='latin1') as load_f:113 jsonFile = json.load(load_f)114 TableRepresentation = {}115 candidateList = jsonFile['annotated']['CEA']116 # ambiguities detection from Dagobah-SL117 for aCell in candidateList:118 ambiguityCandidate = []119 highestScoring = 0120 row = aCell["row"]121 column = aCell["column"]122 presenceOfAmbiguity = False123 sorted_candidate = sorted(aCell['annotation'], key=lambda i: i['score'], reverse=True)124 highestScoring = sorted_candidate[0]['score']125 for one_candidate in sorted_candidate:126 if one_candidate['score'] < self.annotationSe * highestScoring:127 break128 ambiguityCandidate.append(one_candidate)129 if len(ambiguityCandidate) > 1:130 presenceOfAmbiguity = True131 if aCell['column'] in TableRepresentation:132 TableRepresentation[aCell['column']].append(133 {"row": row, "column": column, "presenceOfAmbiguity": presenceOfAmbiguity,134 "TopCandidatesNumbers": len(ambiguityCandidate),135 "TopCandidates": ambiguityCandidate})136 else:137 TableRepresentation[aCell['column']] = [138 {"row": row, "column": column, "presenceOfAmbiguity": presenceOfAmbiguity,139 "TopCandidatesNumbers": len(ambiguityCandidate),140 "TopCandidates": ambiguityCandidate}]141 return TableRepresentation142 def bbw_scoring_loader(self, fileName):143 fileName = fileName.replace(".json","").replace(".csv","")144 tablePath = self.tableFolder + "/BaselineScoring/BBW_Scoring/" + self.goldStandard + '/' + fileName + ".csv"145 if self.goldStandard == "T2D":146 tablePath = tablePath.replace(".csv",".json")147 TableRepresentation = {}148 csv_reader = csv.reader(open(tablePath))149 for line in csv_reader:150 if len(line) == 0:151 continue152 row = int(line[0])153 column = int(line[1])154 presenceOfAmbiguity = False155 line[2] = ast.literal_eval(line[2])156 if len(line[2]) == 0:157 continue158 highestScoring = line[2][0][1]159 ambiguityCandidate = []160 for one_candidate in line[2]:161 if one_candidate[1] < (self.annotationSe * highestScoring):162 break163 formedcandidate = {}164 formedcandidate['id'] = one_candidate[0]165 formedcandidate['score'] = one_candidate[1] / highestScoring166 ambiguityCandidate.append(formedcandidate)167 if len(ambiguityCandidate) > 1:168 presenceOfAmbiguity = True169 if column in TableRepresentation:170 TableRepresentation[column].append(171 {"row": row, "column": column, "presenceOfAmbiguity": presenceOfAmbiguity,172 "TopCandidatesNumbers": len(ambiguityCandidate),173 "TopCandidates": ambiguityCandidate})174 else:175 TableRepresentation[column] = [176 {"row": row, "column": column, "presenceOfAmbiguity": presenceOfAmbiguity,177 "TopCandidatesNumbers": len(ambiguityCandidate),178 "TopCandidates": ambiguityCandidate}]179 return TableRepresentation180 def Mtab_scoring_loader(self, fileName):181 fileName = fileName.replace(".json","")182 tablePath = self.tableFolder + "/BaselineScoring/MTab_Scoring/" + self.goldStandard + '/' + fileName + '.json'183 with open(tablePath, 'r', encoding='latin1') as load_f:184 load_dict = json.load(load_f)185 load_dict = load_dict.replace('\n','').replace('\t','')186 MtabScores = json.loads(load_dict)187 TableRepresentation = {}188 for aCell in MtabScores['semantic']['cea']:189 ambiguityCandidate = []190 row = aCell[0]191 column = aCell[1]192 presenceOfAmbiguity = False193 highestScoring = aCell[2][0][1]194 for one_candidate in aCell[2]:195 if one_candidate[1] < (self.annotationSe * highestScoring):196 break197 formedcandidate = {}198 formedcandidate['id'] = one_candidate[0]199 formedcandidate['score'] = one_candidate[1]200 ambiguityCandidate.append(formedcandidate)201 if len(ambiguityCandidate) > 1:202 presenceOfAmbiguity = True203 if column in TableRepresentation:204 TableRepresentation[column].append({"row":row, "column": column, "presenceOfAmbiguity": presenceOfAmbiguity,205 "TopCandidatesNumbers": len(ambiguityCandidate),206 "TopCandidates" : ambiguityCandidate})207 else:208 TableRepresentation[column] = [{"row":row, "column": column, "presenceOfAmbiguity": presenceOfAmbiguity,209 "TopCandidatesNumbers": len(ambiguityCandidate),210 "TopCandidates" : ambiguityCandidate}]211 return TableRepresentation212if __name__ == '__main__':...
Ambiguity.py
Source:Ambiguity.py
1import HelperFunctions as hf2import random3import numpy as np4import math5from copy import deepcopy6from ROOT import gROOT, TCanvas, TF1, TFile, gStyle,TH2F, TH1F, TGraph, TGraphErrors7import time8from array import array910hf.SetSeed(2022)11np.random.seed(53234)12saveStripMaps=False13useHalfStrips=False1415NLoops=100016size=20 #cm1718#beam properties19width=25 #mm20posX=0.0 #cm21posY=0.0 #cm2223#X-Y24tolerance=math.sqrt(2.0)/2.0 #no need for tolerance as X-Y guaranteed to overlap25angles=[0,90]2627#X-U-V28#tolerance=0.67 #~pitch*2/3 (triangle innit), area=5774.0 for 100um29#angles=[0,60,120]3031#X-U-V-Y32#tolerance=1.5 #needed for 4 planes, why?33#angles=[0,45,90,135]3435effTolerance=1.0363738xmax=size*50003940#gStyle.SetOptStat(0000)41gROOT.SetBatch(True)4243_aMeanProton,_aAmbiguity=array('d'),array('d')44_aMeanProtonError,_aAmbiguityError=array('d'),array('d')4546#setup histograms47ambiguityRate=TH2F("ambiguityRate","Ambiguitie Rate (%) for uniform beam of width "+(str)(width)+"cm", 5,10.0,110, 13,0.5,13.5)48ambiguityRate.GetXaxis().SetTitle("Strip Pitch (#mum)")49ambiguityRate.GetYaxis().SetTitle("nProtons")50ambiguityRateRefined=TH2F("ambiguityRateRefined","Ambiguitie Rate (%) for uniform beam of width "+(str)(width)+"cm", 5,10.0,110, 13,0.5,13.5)51ambiguityRateRefined.GetXaxis().SetTitle("Strip Pitch (#mum)")52ambiguityRateRefined.GetYaxis().SetTitle("nProtons")53hNProtons=TH1F("hNProtons","",30,-0.5,29.5)5455for nMeanProton in range(10,11):56 for pitch in range (100,101,20):5758 nRawHits=0.059 nRefinedHits=0.060 totalProtons=0.06162 nProton_RawEffCorrected=0.063 nProton_RefinedEffCorrected=0.06465 #reset output file and setup histograms for loop66 #MyFile =TFile("/disk/moose/bilpa/Optima/aw/Ambiguities/nP"+(str)(nProton)+"_pitch"+(str)(pitch)+"um_Array"+(str)(size)+"cm.root","RECREATE");67 MyFile =TFile("test.root","RECREATE");68 hRawHits=TH1F("hRawHits","n Reconstructed Hits (Raw)",100,-0.5,100.5)69 hRawEfficiency=TH1F("hRawEfficiency","Raw Efficiency",20,-2.5,102.5)7071 hRefinedHits=TH1F("hRefinedHits","n Reconstructed Hits (Refined)",100,-0.5,100.5)72 hRefinedEfficiency=TH1F("hRefinedEfficiency","Refined Efficiency",20,-2.5,102.5)7374 myHitMap=TH2F("myHitMap","Hit Locations",(int)(2*xmax/pitch),-xmax,xmax,(int)(2*xmax/pitch),-xmax,xmax)7576 for i in range(NLoops):77 if i%1 == 0:78 print("Processing loop "+(str)(i))7980 #nProton=nMeanProton81 nProton=np.random.poisson(nMeanProton)82 hNProtons.Fill(nProton)83 totalProtons+=nProton84 if nProton==0:85 continue86 87 #generate N=10 random points for x-y 88 XY=hf.GetRandomXY(nProton,size,width,posX,posY)89 #print XY9091 #convert points into a strip number for each layer 92 Strips=hf.GetStripCoOrds(XY,pitch,angles)93 #print Strips9495 #cycle through all strip combinations, find intersections of pairs of planes, see if they are within tolerance of each other96 #returns hit objects of form (XCoord,YCoord,[stripX,stripU,stripV],[radial distance to intersections])97 allHits=hf.FindOverlaps(Strips,pitch,angles,tolerance,useHalfStrips)98 nRawHits+=len(allHits)99 hRawHits.Fill(len(allHits))100 for hit in allHits:101 myHitMap.Fill(hit[0],hit[1])102 103 rawEff=hf.GetEfficiency(allHits,XY,pitch,tolerance)104 hRawEfficiency.Fill(100.0*rawEff)105 nProton_RawEffCorrected+=nProton*rawEff106107 if saveStripMaps:108 hf.PlotHitMap("RawHits",allHits,XY,Strips,pitch,size,i,angles)109110111 #for hit in allHits:112 # area=hf.GetPixelArea(hit,angles,pitch)113 # if area>1.0:114 # print "Area=",area,hit115 116117 #remove duplicate hits (separated by < tolerance) 118 #refinedHits=hf.RemoveAdjacentHits(allHits,tolerance,pitch)119 refinedHits=hf.MergeAdjacentHits(allHits,effTolerance,pitch)120121122 nRefinedHits+=len(refinedHits)123 hRefinedHits.Fill(len(refinedHits))124 refinedEff=hf.GetEfficiency(refinedHits,XY,pitch,effTolerance)125 hRefinedEfficiency.Fill(100*refinedEff)126 nProton_RefinedEffCorrected+=nProton*refinedEff127128 if saveStripMaps:129 hf.PlotHitMap("RefinedHits",refinedHits,XY,Strips,pitch,size,i,angles)130 #if len(refinedHits)<len(allHits):131 # print("Found a duplicate")132 # print allHits133134 rawAmbiguity=100*(nRawHits-totalProtons)/totalProtons135 refinedAmbiguity=100*(nRefinedHits-totalProtons)/totalProtons136137 refinedAmbiguityAlt=100.0*((hRefinedHits.GetMean())-nMeanProton)/nMeanProton138 fractionalUncertaintyAlt=hRefinedHits.GetMeanError()/hRefinedHits.GetMean()139140 print "nMeanProton="+(str)(totalProtons)+",nReco= "+(str)(nRawHits)+",nReco2= "+(str)(nRefinedHits)+", pitch="+(str)(pitch)+", rawAmbiguity="+(str)(rawAmbiguity)+"%"+", refinedAmbiguity="+(str)(refinedAmbiguity)141 print "Efficiency: raw="+(str)(hRawEfficiency.GetMean())+", refined="+(str)(hRefinedEfficiency.GetMean())142 print "alt ambiguity=",refinedAmbiguityAlt,"+/-",fractionalUncertaintyAlt*refinedAmbiguityAlt143 hRawHits.Write()144 hRawEfficiency.Write()145 hRefinedHits.Write()146 hRefinedEfficiency.Write()147 myHitMap.Write()148 hNProtons.Write()149 MyFile.Close()150 ambiguityRate.Fill(pitch,nMeanProton,rawAmbiguity)151 ambiguityRateRefined.Fill(pitch,nMeanProton,refinedAmbiguity)152 _aMeanProton.append(nMeanProton)153 _aMeanProtonError.append(0)154 _aAmbiguity.append(refinedAmbiguity)155 _aAmbiguityError.append(refinedAmbiguity*fractionalUncertaintyAlt)156 157mycanvas = TCanvas( "c1", "Ambiguities", 550,195,800,700 )158ambiguityRate.Draw("COLZText")159mycanvas.SaveAs("OutputData/outputAmbiguities_"+(str)(width)+"cm.C")160mycanvas.SaveAs("OutputData/outputAmbiguities_"+(str)(width)+"cm.png")161ambiguityRateRefined.Draw("COLZTEXT")162mycanvas.SaveAs("OutputData/outputAmbiguitiesRefined_"+(str)(width)+"cm.C")163mycanvas.SaveAs("OutputData/outputAmbiguitiesRefined_"+(str)(width)+"cm.png")164165if len(_aAmbiguity) >1:166 graph=TGraphErrors(len(_aAmbiguity),_aMeanProton,_aAmbiguity,_aMeanProtonError,_aAmbiguityError)167 graph.SetMarkerStyle(5)168 graph.GetXaxis().SetTitle("Mean N Proton")169 graph.GetYaxis().SetTitle("Ambiguity (%)")170 graph.SetTitle("")171 graph.Draw("AP")172 graph.Fit("pol2")173 mycanvas.SaveAs("OutputData/AmbiguitiesGraph.C")
...
removing_ambiguity.py
Source:removing_ambiguity.py
...9import math10#Loading data 11data = list(SeqIO.parse("derep_DNA_whole_16S_strep.fasta", "fasta"))12#Counting ambiguity13def ambiguity(data, file_name):14 """Counts number of ambiguous nucleotides15 in sequences in FASAT format and returns16 count number in a list (excludes sequences17 with 0 ambiguity count). 18 Sequences with at least 1 ambiguity residue19 are saved to a separate FASTA file. 20 21 :param data: FASTA file 22 :file_name: file name to which sequences23 with at least 1 ambiguity residue will be save to. 24 """25 data = list(SeqIO.parse(data, "fasta"))26 amgu_count = []27 seq = []28 for record in data:29 ambiguity = len(record.seq) - record.seq.count('A') - record.seq.count('C') - record.seq.count('G') - record.seq.count('T')30 if ambiguity > 0:31 amgu_count.append(ambiguity)32 seq.append(record)33 SeqIO.write(seq, file_name , "fasta")34 return amgu_count35ambiguity_count = ambiguity("derep_DNA_whole_16S_strep.fasta", "all_ambiguity_seq.fasta")36print(f'Average count of ambiguity per sequence {np.mean(ambiguity_count)}')37print(f'Variance {np.var(ambiguity_count)}')38def total_bases_count(data):39 """Counts total number of bases40 in a given FASTA file41 """42 sequence_length = []43 data = list(SeqIO.parse(data, "fasta"))44 for record in data:45 seq_len = len(record.seq)46 sequence_length.append(seq_len)47 return sum(sequence_length)48total_bases = total_bases_count("all_ambiguity_seq.fasta")49#Visualisation50def ambiguity_distplot(data, s_bin, save_as):51 """Density plot. 52 :param data: list of integres53 :param s_bin: bin size 54 :param save_as: string of file name which55 the figure will be save as56 """57 sns.displot(data, binwidth=s_bin, log=True)58 plt.tight_layout()59 plt.xlabel("Ambiguity Count Per Sequence")60 plt.ylabel("log10(count)")61 plt.xlim((min(data)-1), (max(data)+1))62 plt.savefig(save_as)63 return plt.show()64ambiguity_distplot(ambiguity_count, 5, "ambiguity_dist.pdf")65#Finding parameters for statistical model66#mean count of ambiguity symbols per sequence67mean_ambiguity = np.mean(ambiguity_count)68#probablity69probablity = sum(ambiguity_count)/total_bases70print(f'Probablity (total count of ambiguity/total count of all bases) {probablity}')71#run of positive outcomes (size)72size = (mean_ambiguity*probablity)/(1-probablity)73print(f'Size {size}')74def discard_ambiguity(data, num, file_name):75 """ Discards sequences with 76 <= num of ambiguity count. 77 :param data: FASTA file78 :param num: number of maximum ambiguity79 count allowed80 :param file_name: file name to which81 seq will be save to82 """83 data = list(SeqIO.parse(data, "fasta"))84 seq = []85 for record in data:86 ambiguity = len(record.seq) - record.seq.count('A') - record.seq.count('C') - record.seq.count('G') - record.seq.count('T')87 if ambiguity <= num:88 seq.append(record)89 90 return SeqIO.write(seq, file_name, "fasta")...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!