Best Python code snippet using autotest_python
bert_glove_coarse_fine_using_fine.py
1import json2import torch3from torch.utils.data import TensorDataset4from transformers import BertTokenizer5import os, sys, pickle6from bert_class import BERTClass7from sklearn.model_selection import train_test_split8from sklearn.metrics import classification_report9from util import plot_confusion_mat10import matplotlib.pyplot as plt11from bert_glove import train, test, bert_tokenize, create_data_loaders12def order_label_embeddings(label_embeddings, index_to_label, label_word_map, device):13 mod_label_embeds = []14 for i in index_to_label:15 mod_label_embeds.append(label_embeddings[label_word_map[index_to_label[i]]])16 mod_label_embeds = torch.tensor(mod_label_embeds).to(device)17 return mod_label_embeds18def make_train_test(df, children, label_word_map):19 tokens = []20 for ch in children:21 tokens.append(label_word_map[ch])22 reg_exp = "|".join(tokens)23 df_train = df[df['text'].str.contains(reg_exp)].reset_index(drop=True)24 df_test = df[~df['text'].str.contains(reg_exp)].reset_index(drop=True)25 return df_train, df_test26if __name__ == "__main__":27 # basepath = "/Users/dheerajmekala/Work/Coarse2Fine/data/"28 basepath = "/data4/dheeraj/coarse2fine/"29 dataset = "nyt/"30 pkl_dump_dir = basepath + dataset31 # tok_path = pkl_dump_dir + "bert/tokenizer_coarse_fine_using_fine"32 # model_path = pkl_dump_dir + "bert/model/"33 # model_name = "bert_vmf_coarse_fine_using_fine.pt"34 #35 # os.makedirs(tok_path, exist_ok=True)36 # os.makedirs(model_path, exist_ok=True)37 use_gpu = int(sys.argv[1])38 # use_gpu = False39 gpu_id = int(sys.argv[2])40 # Tell pytorch to run this model on the GPU.41 if use_gpu:42 device = torch.device('cuda:' + str(gpu_id))43 else:44 device = torch.device("cpu")45 df_fine = pickle.load(open(pkl_dump_dir + "df_fine.pkl", "rb"))46 parent_to_child = pickle.load(open(pkl_dump_dir + "parent_to_child.pkl", "rb"))47 # df_train, df_test = train_test_split(df_fine, test_size=0.1, stratify=df_fine["label"], random_state=42)48 fine_labels = list(set(df_fine.label.values))49 label_set = fine_labels50 label_to_index = {}51 index_to_label = {}52 for i, l in enumerate(list(label_set)):53 label_to_index[l] = i54 index_to_label[i] = l55 fine_inds = []56 for f in fine_labels:57 fine_inds.append(label_to_index[f])58 label_word_map = json.load(open(pkl_dump_dir + "label_word_map_coarse_fine_using_fine.json", "r"))59 label_embeddings = pickle.load(open(pkl_dump_dir + "label_embeddings.pkl", "rb"))60 label_embeddings = order_label_embeddings(label_embeddings, index_to_label, label_word_map, device)61 # label_embeddings = create_label_embeddings(glove_dir, index_to_label, device, label_word_map)62 sibling_map = {}63 for p in parent_to_child:64 children = parent_to_child[p]65 for ch in children:66 sibling_map[label_to_index[ch]] = [label_to_index[l] for l in children]67 print('Loading BERT tokenizer...', flush=True)68 tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)69 input_ids, attention_masks, labels = bert_tokenize(tokenizer, df_fine, label_to_index)70 # Combine the training inputs into a TensorDataset.71 dataset = TensorDataset(input_ids, attention_masks, labels)72 # Create a 90-10 train-validation split.73 train_dataloader, validation_dataloader = create_data_loaders(dataset)74 model = BERTClass()75 model.to(device)76 add_args = {}77 add_args["possible_labels"] = fine_inds78 add_args["contrastive_map"] = sibling_map79 model = train(train_dataloader, validation_dataloader, model, label_embeddings, device, epochs=5,80 additional_args=add_args)81 print("****************** CLASSIFICATION REPORT ON All Data ********************", flush=True)82 all_true = []83 all_preds = []84 for p in parent_to_child:85 children = parent_to_child[p]86 temp_df = df_fine[df_fine.label.isin(children)].reset_index(drop=True)87 children_ids = [label_to_index[l] for l in children]88 test_add_args = {}89 test_add_args["possible_labels"] = children_ids90 test_add_args["contrastive_map"] = None91 true, preds = test(temp_df, tokenizer, model, label_embeddings, device, label_to_index, index_to_label,92 test_add_args)93 all_true += true94 all_preds += preds95 print(classification_report(all_true, all_preds), flush=True)96 print("*" * 80, flush=True)97 # tokenizer.save_pretrained(tok_path)...
experiment.py
Source: experiment.py
1# experiment.py2# Marco Lui Feb 20113#4# This class represents an experiment. Its intention is to act as the interface between the user5# and the udnerlying store at the level of managing tasks and results. The dataset abstraction is6# delegated to the DataProxy object.7from hydrat import config8from hydrat.datamodel import TaskSetResult, Result, BasicTask9from hydrat.common.pb import ProgressIter10import cPickle11import multiprocessing as mp12class ExperimentFold(object):13 def __init__(self, task, learner, add_args={}):14 self.task = task15 self.learner = learner16 self.add_args = add_args17 def __getstate__(self):18 # TODO: Potentially use a disk-backed implementation of tasks19 task = BasicTask.from_task(self.task)20 return {'task':task, 'learner':self.learner, 'add_args':self.add_args}21 @property22 def classifier(self):23 train_add_args = dict( (k, v[self.task.train_indices]) for k,v in self.add_args.items())24 classifier = self.learner( self.task.train_vectors, self.task.train_classes,\25 sequence=self.task.train_sequence, indices=self.task.train_indices, **train_add_args)26 return classifier27 @property28 def result(self):29 classifier = self.classifier30 test_add_args = dict( (k, v[self.task.test_indices]) for k,v in self.add_args.items())31 classifications = classifier( self.task.test_vectors,\32 sequence=self.task.test_sequence, indices=self.task.test_indices, **test_add_args)33 return Result.from_task(self.task, classifications, dict(classifier.metadata))34def get_result(fold):35 """36 Needed for parallelized implementation, as we need a top-level function to pickle.37 """38 return fold.result39# TODO: Refactor in a way that allows access to per-fold classifiers40class Experiment(TaskSetResult):41 def __init__(self, taskset, learner=None, parallel=None):42 # TODO: Why is learner optional?43 self.taskset = taskset44 self.learner = learner45 self._results = None46 self.parallel = parallel if parallel is not None else config.getboolean('parameters', 'parallel_classify')47 @property48 def metadata(self):49 """ Result object metadata """50 result_metadata = dict(self.taskset.metadata)51 result_metadata['learner'] = self.learner.__name__52 result_metadata['learner_params'] = self.learner.params53 return result_metadata 54 55 @property56 def results(self):57 if self._results is None:58 self.run()59 return self._results60 @property61 def folds(self):62 folds = []63 for task in self.taskset:64 folds.append(ExperimentFold(task, self.learner))65 return folds66 def run(self, add_args = None):67 # TODO: parallelize over folds?68 results = []69 # TODO: Nicer in-progress output70 print "Experiment: %s %s" % (self.learner.__name__, self.taskset.metadata)71 try:72 if not self.parallel or not self.learner.is_pickleable():73 # TODO: Should we define a custom exception for this?74 raise cPickle.UnpickleableError75 cPickle.dumps(self.learner)76 # TODO: closing a multiprocessing pool produces an unsightly error msg77 # TODO: it seems that explicitly closing it does not cause this error msg78 pool = mp.Pool(config.getint('parameters','job_count'))79 for result in ProgressIter(pool.imap_unordered(get_result, self.folds), 'PARALLEL', maxval=len(self.taskset)):80 results.append(result)81 pool.close()82 pool.join() # This waits for all the pool members to exit83 results.sort(key=lambda x:x.metadata['index'])84 except (cPickle.UnpickleableError, TypeError):85 for fold in ProgressIter(self.folds, 'SERIES'):86 results.append(fold.result)87 self._results = results...
crawl_test.py
Source: crawl_test.py
...21 (["--random", "http://example.com"], True),22 ([], True),23 ],24 )25 def test_add_args(self, args, error):26 """test_add_args27 """28 parser = ArgumentParser()29 CrawlCommand().add_args(parser)30 if error:31 with pytest.raises(SystemExit):32 parser.parse_args(args)33 else:34 parsed_args = parser.parse_args(args)35 assert parsed_args.seed == args[1]36 if "--filename" in args or "-f" in args:37 assert parsed_args.filename == args[3]38 else:39 # Defaults to datetime format...
Check out the latest blogs from LambdaTest on this topic:
Before we discuss Scala testing, let us understand the fundamentals of Scala and how this programming language is a preferred choice for your development requirements.The popularity and usage of Scala are rapidly rising, evident by the ever-increasing open positions for Scala developers.
So, now that the first installment of this two fold article has been published (hence you might have an idea of what Agile Testing is not in my opinion), I’ve started feeling the pressure to explain what Agile Testing actually means to me.
Did you know that according to Statista, the number of smartphone users will reach 18.22 billion by 2025? Let’s face it, digital transformation is skyrocketing and will continue to do so. This swamps the mobile app development market with various options and gives rise to the need for the best mobile app testing tools
As a developer, checking the cross browser compatibility of your CSS properties is of utmost importance when building your website. I have often found myself excited to use a CSS feature only to discover that it’s still not supported on all browsers. Even if it is supported, the feature might be experimental and not work consistently across all browsers. Ask any front-end developer about using a CSS feature whose support is still in the experimental phase in most prominent web browsers. ????
The count of mobile users is on a steep rise. According to the research, by 2025, it is expected to reach 7.49 billion users worldwide. 70% of all US digital media time comes from mobile apps, and to your surprise, the average smartphone owner uses ten apps per day and 30 apps each month.
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!