Best Python code snippet using avocado_python
wsd.py
Source:wsd.py
1"""Processing of data."""2from __future__ import absolute_import3from __future__ import division4from __future__ import print_function5import torch6print(torch.cuda.is_available())7from transformers import BertTokenizer, BertModel, GPT2Model, BertForMultipleChoice8import tqdm, sklearn9import numpy as np10import os, time, sys11import pickle12from os import listdir13from os.path import isfile, join14import scipy15import xml.etree.ElementTree as ET16def getsubidx(x, y, begin=0):17 l1, l2 = len(x), len(y)18 for i in range(begin,l1):19 if x[i:i+l2] == y:20 return i21 return None22class WSD_BERT_NN(object):23 def __init__(self):24 # main events25 #self.main_sents = []26 #self.main_events = []27 self.tokenizer = None28 self.model = None29 self.word2synembset = {}30 self.word2pos_syn = {}31 self.word2mfs = None32 self.token_merge_mode = None33 self.pos_map = {'NN':'NOUN', 'JJ':'ADJ', 'VB':'VERB', 'RB':'ADV'}34 35 36 def initialize(self, pretrained='bert-base-uncased', tokenizer='bert-base-uncased', sep_token='[SEP]', annotation_key='lexsn', wn_firstsen_file='../data/ALL.mfs.txt'):37 self.tokenizer = BertTokenizer.from_pretrained(tokenizer, sep_token=sep_token)38 self.model = BertModel.from_pretrained(pretrained, output_hidden_states=True)39 self.model.cuda()40 assert (annotation_key in ['lexsn', 'wnsn'])41 self.annotation_key = annotation_key42 if wn_firstsen_file is not None:43 self.word2mfs = {}44 for line in open(wn_firstsen_file):45 line = line.strip().split('\t')[-1]46 line = line.split(' ')47 line = line[0].split('%')48 if len(line) == 2:49 if self.word2mfs.get(line[0]) is None:50 self.word2mfs[line[0]] = line[1]51 52 def load_and_encode_semcor(self, folder_path='../data/semcor-corpus/semcor/semcor/brownv/tagfiles', token_merge_mode='avg', avg_vec=True):53 assert (token_merge_mode in ['avg', 'first'])54 self.token_merge_mode = token_merge_mode55 onlyfiles = [join(folder_path, f) for f in listdir(folder_path) if isfile(join('../data/semcor-corpus/semcor/semcor/brownv/tagfiles', f))]56 for f in tqdm.tqdm(onlyfiles):57 tree = ET.parse(f)58 root = tree.getroot()59 for P in root[0]:60 try:61 sent0 = P[0]62 except:63 continue64 for S in P:65 sent, senses, poses = [], [], []66 tokens = S67 68 for T in tokens:69 this_t = T.attrib.get('lemma')70 if this_t is None:71 this_t = T.text.lower()72 sent.append(this_t)73 senses.append( T.attrib.get(self.annotation_key) )74 this_pos = T.attrib.get('pos')75 if this_pos is not None:76 this_pos = self.pos_map.get(this_pos)77 poses.append(this_pos)78 if len(sent) < 2:79 continue80 tokenized_sent = self.tokenizer.encode(' '.join(sent), add_special_tokens=False)81 vecs = self.model(torch.tensor(tokenized_sent).cuda().unsqueeze(0))[0][0].data.cpu().numpy()82 #print (vecs.shape)83 begin = 084 for i in range(len(sent)):85 t_sense = senses[i]86 this_pos = poses[i]87 if t_sense is not None:88 #try:89 # wnsn is a int90 """91 if self.annotation_key == 'wnsn':92 semi_col = t_sense.find(';')93 if semi_col > 1:94 t_sense = t_sense[:semi_col]95 t_sense = int(t_sense)96 """97 t_sense = t_sense.split(';')98 if self.annotation_key == 'wnsn':99 t_sense = [int(x) for x in t_sense]100 #except:101 #print (f)102 #exit()103 t_token = sent[i]104 tokenized_token = self.tokenizer.encode(t_token, add_special_tokens=False)105 tid = getsubidx(tokenized_sent, tokenized_token, begin)106 assert (tid is not None)107 begin = tid + len(tokenized_token)108 if token_merge_mode == 'avg':109 if len(tokenized_token) > 1:110 t_vec = np.average(vecs[tid:tid+len(tokenized_token)], axis=0)111 else:112 t_vec = vecs[tid]113 else:114 t_vec = vecs[tid]115 if self.word2synembset.get(t_token) is None:116 self.word2synembset[t_token] = {}117 if self.word2pos_syn.get(t_token) is None:118 self.word2pos_syn[t_token] = {}119 for this_sense in t_sense:120 if self.word2synembset[t_token].get(this_sense) is None:121 self.word2synembset[t_token][this_sense] = [t_vec]122 else:123 self.word2synembset[t_token][this_sense].append(t_vec)124 if self.word2pos_syn[t_token].get(this_pos) is None:125 self.word2pos_syn[t_token][this_pos] = set([this_sense])126 else:127 self.word2pos_syn[t_token][this_pos].add(this_sense)128 129 if avg_vec:130 for X, senses in self.word2synembset.items():131 for Y, vecs in senses.items():132 self.word2synembset[X][Y] = [np.average(vecs, axis=0)]133 134 print ("Loaded semcor BERT vectors for", len([x for x,y in self.word2synembset.items()]))135 136 # return number or none137 def get_wn_sense_id(self, token, context, n_occur = 1, token_merge_mode=None):138 assert (context.count(token) >= n_occur)139 if token_merge_mode is None:140 token_merge_mode = self.token_merge_mode141 assert (token_merge_mode in ['first','avg'])142 if self.word2synembset.get(token) is None:143 return None144 tokenized_token = self.tokenizer.encode(token, add_special_tokens=False)145 tokenized_sent = self.tokenizer.encode(context, add_special_tokens=False)146 vecs = self.model(torch.tensor(tokenized_sent).cuda().unsqueeze(0))[0][0].data.cpu().numpy()147 n_seen = 0148 tid = 0149 while tid < len(tokenized_sent):150 if tokenized_sent[tid:tid+len(tokenized_token)] == tokenized_token:151 n_seen += 1152 if n_seen >= n_occur:153 break154 else:155 tid += len(tokenized_token)156 else:157 tid += 1158 if token_merge_mode == 'avg':159 if len(tokenized_token) > 1:160 t_vec = np.average(vecs[tid:tid+len(tokenized_token)], axis=0)161 else:162 t_vec = vecs[tid]163 else:164 t_vec = vecs[tid]165 #print (tid, tid+len(tokenized_token))166 m_dist = 2167 sense_id = None168 for sid, Y in self.word2synembset[token].items():169 for v in Y:170 #print (v)171 #print (t_vec)172 #exit()173 n_dist = scipy.spatial.distance.cosine(v, t_vec)174 if n_dist < m_dist:175 m_dist = n_dist176 sense_id = sid177 return sid178 179 180 def get_wn_sense_id_wpos(self, token, context, n_occur = 1, pos='VERB', token_merge_mode=None):181 assert (context.count(token) >= n_occur)182 assert (pos in ['VERB', 'NOUN', 'ADJ', 'ADV'])183 if token_merge_mode is None:184 token_merge_mode = self.token_merge_mode185 assert (token_merge_mode in ['first','avg'])186 if self.word2synembset.get(token) is None:187 return None188 tokenized_token = self.tokenizer.encode(token, add_special_tokens=False)189 tokenized_sent = self.tokenizer.encode(context, add_special_tokens=False)190 vecs = self.model(torch.tensor(tokenized_sent).cuda().unsqueeze(0))[0][0].data.cpu().numpy()191 n_seen = 0192 tid = 0193 while tid < len(tokenized_sent):194 if tokenized_sent[tid:tid+len(tokenized_token)] == tokenized_token:195 n_seen += 1196 if n_seen >= n_occur:197 break198 else:199 tid += len(tokenized_token)200 else:201 tid += 1202 if token_merge_mode == 'avg':203 if len(tokenized_token) > 1:204 t_vec = np.average(vecs[tid:tid+len(tokenized_token)], axis=0)205 else:206 t_vec = vecs[tid]207 else:208 t_vec = vecs[tid]209 #print (tid, tid+len(tokenized_token))210 m_dist = 2211 sense_id = None212 pos_sense_set = self.word2pos_syn.get(token)213 if pos_sense_set is not None:214 pos_sense_set = pos_sense_set.get(pos)215 if pos_sense_set is None:216 return None217 for sid, Y in self.word2synembset[token].items():218 if sid in pos_sense_set:219 for v in Y:220 #print (v)221 #print (t_vec)222 #exit()223 n_dist = scipy.spatial.distance.cosine(v, t_vec)224 if n_dist < m_dist:225 m_dist = n_dist226 sense_id = sid227 return sense_id228 229 230 231 def get_wn_first_sen(self, token):232 return self.word2mfs.get(token)233 234 def save(self, filename):235 f = open(filename,'wb')236 pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL)237 f.close()238 print("Save WSD-BERT-NN object as", filename)239 def load(self, filename):240 f = open(filename,'rb')241 tmp_dict = pickle.load(f)242 self.__dict__.update(tmp_dict)243 print("Loaded WSD-BERT-NN object from", filename)244def main():245 wsd_file = '../utils/wsd_bert_nn.bin'246 wsd = WSD_BERT_NN()247 if os.path.exists(wsd_file):248 wsd.load(wsd_file)249 print ("==ATTN== ", len([x for x,y in wsd.word2synembset.items()]))250 else:251 wsd.initialize(annotation_key='lexsn')252 wsd.load_and_encode_semcor(folder_path='../data/semcor-corpus/semcor/semcor/brownv/tagfiles', token_merge_mode='avg', avg_vec=True)253 wsd.save(wsd_file)254 255 print ("..Running test for WSD-BERT-NN..")256 print ("the rain *bank* the soil up behind the gate ", wsd.get_wn_sense_id('bank',"the rain bank the soil up behind the gate", 1))257 print ("I pay the money straight into my *bank* ", wsd.get_wn_sense_id('bank', "I pay the money straight into my bank", 1))258 print ("and I run, I *run* so far away ", wsd.get_wn_sense_id('run', "and I run, I run so far away", 2))259 260 261if __name__ == "__main__":...
db.py
Source:db.py
1import sqlite32conn = sqlite3.connect("bombcrypto.db")3cursor = conn.cursor()4def create():5 """6 Cria a tabela de relatório de não existe7 :return:8 """9 global cursor10 cursor.execute("""11 create table if not exists relatorio (12 id INTEGER PRIMARY KEY autoincrement,13 data text,14 q_token real,15 t_token text,16 login text17 )18 """)19def get():20 """21 Traz listagem de dados22 :return:23 """24 global cursor25 create()26 cursor.execute("""27 select * from relatorio28 """)29 data = []30 for line in cursor.fetchall():31 data.append(line)32 return data33def sum_month(token):34 """35 Traz listagem do somatório do token informado no mês36 :param token:37 :return:38 """39 global cursor40 from datetime import datetime41 cur_month = datetime.now().strftime('%m')42 create()43 cursor.execute(f"""44 select sum(q_token) from relatorio where strftime('%m', data) = '{cur_month}' and t_token = '{token}'45 """)46 return cursor.fetchone()[0]47def add(data, q_token, t_token, login):48 """49 Inserção de dados50 :param data:51 :param q_token:52 :param t_token:53 :param login:54 :return:55 """56 global conn, cursor57 create()58 cursor.execute(f"""59 insert into relatorio (data, q_token, t_token, login) values (?, ?, ?, ?)60 """, (data, q_token, t_token, login))...
t_secrets.py
Source:t_secrets.py
1from pathlib import Path2import os3import platform4clear = lambda: os.system('clear')5if platform.system() == 'Darwin' or platform.system() == 'Linux':6 clear()7home_path = Path.home()8if os.path.exists(f'{home_path}/t_secrets.txt'):9 with open(f'{home_path}/t_secrets.txt', 'r') as f:10 secret_values = f.readlines()11 TRELLO_KEY = secret_values[0].strip()12 TRELLO_TOKEN = secret_values[1]13else:14 t_key = ""15 t_token = ""16 while len(t_key) < 1 or len(t_token) < 1:17 print("\n")18 t_key = input("Enter Trello API key: ")19 t_token = input("Enter Trello API token: ")20 with open(f'{home_path}/t_secrets.txt', 'w') as f:21 f.write(f'{t_key}\n'22 f'{t_token}')23 TRELLO_KEY = t_key...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!