Best Python code snippet using yandex-tank
create_stats.py
Source:create_stats.py
1import os2from pathlib import Path3import numpy as np4def remove_zero_entries(d: dict):5 new_d = {k: v for k, v in d.items() if v != 0}6 return new_d7class DatasetSplit:8 def __init__(self, data_folder: Path,9 examples_file_name: str,10 definitions_file_name: str,11 hypernyms_file_name: str,12 labels_file_name: str,13 domains_file_name: str = None):14 self.domain_dict = {15 'wnt': ["0"],16 'msh': ["1"],17 'ctl': ["2"],18 'cps': ["3"],19 'domains': ["1", "2", "3"]20 }21 self.examples = [line.split('\t') for line in22 (data_folder / examples_file_name).read_text().strip('\n').split('\n')]23 self.defs = [line for line in24 (data_folder / definitions_file_name).read_text().strip('\n').split('\n')]25 self.hyps = [line.split('\t') for line in26 (data_folder / hypernyms_file_name).read_text().strip('\n').split('\n')]27 self.labels = [line for line in28 (data_folder / labels_file_name).read_text().strip('\n').split('\n')]29 if domains_file_name is not None:30 self.domains = [line for line in31 (data_folder / domains_file_name).read_text().strip('\n').split('\n')]32 else:33 self.domains = None34 self.targets = ([l[0] for l in self.examples])35 def num_total_examples(self, domain=None):36 if domain is not None:37 return len(list(np.where(np.isin(self.domains, self.domain_dict[domain]))[0]))38 return len(self.examples)39 def num_unique_senses(self, domain=None):40 if domain is not None:41 indices = list(np.where(np.isin(self.domains, self.domain_dict[domain]))[0])42 return len(set(list(np.array(self.defs)[indices])))43 return len(set(self.defs))44 def num_unique_targets(self, domain):45 if domain is not None:46 indices = list(np.where(np.isin(self.domains, self.domain_dict[domain]))[0])47 return len(set(list(np.array(self.targets)[indices])))48 return len(set(self.targets))49 def examples_per_sense(self, domain=None):50 """51 creates a list of number of examples per sense provided in the dataset52 outcome: ordered list by numbers of used examples per sense53 example: {1:10, 2:5, 4:15} means that for one sense 10 examples have been provided,54 for 2 senses 5 examples have been provided, for 4 senses have been provided and so on55 """56 if domain is not None:57 indices = list(np.where(np.isin(self.domains, self.domain_dict[domain]))[0])58 domain_defs = list(np.array(self.defs)[indices])59 examples_per_sense = [domain_defs.count(x) for x in set(domain_defs)]60 else:61 examples_per_sense = [self.defs.count(x) for x in set(self.defs)]62 count_examples_per_sense = remove_zero_entries({i: examples_per_sense.count(i) for i in range(1, 100)})63 return count_examples_per_sense64 def examples_per_target(self, domain: list = None):65 if domain is not None:66 indices = list(np.where(np.isin(self.domains, self.domain_dict[domain]))[0])67 domain_tgts = list(np.array(self.targets)[indices])68 nof_examples_per_target = [domain_tgts.count(x) for x in set(domain_tgts)]69 else:70 nof_examples_per_target = [self.targets.count(x) for x in set(self.targets)]71 count_examples_per_target = remove_zero_entries({i: nof_examples_per_target.count(i) for i in range(1, 100)})72 return count_examples_per_target73 def percent_positive_examples(self, domain=None):74 if domain is not None:75 indices = list(np.where(np.isin(self.domains, self.domain_dict[domain]))[0])76 return list(np.array(self.labels)[indices]).count('T') / len(indices)77 return self.labels.count('T') / len(self.labels)78 def target_overlap(self, overlap_dataset, second_dataset=None, domain=None):79 if domain is not None:80 indices = list(np.where(np.isin(self.domains, self.domain_dict[domain]))[0])81 else:82 indices = list(range(len(self.targets)))83 if second_dataset is not None:84 unique_targets_a = set(np.array(self.targets)[indices]) | set(second_dataset.targets)85 else:86 unique_targets_a = set(np.array(self.targets)[indices])87 overlap_targets = unique_targets_a & set(overlap_dataset.targets)88 return len(overlap_targets) / len(overlap_dataset.targets)89 def sense_overlap(self, overlap_dataset, second_dataset=None, domain=None):90 if domain is not None:91 indices = list(np.where(np.isin(self.domains, self.domain_dict[domain]))[0])92 else:93 indices = list(range(len(self.defs)))94 if second_dataset is not None:95 unique_defs_a = set(np.array(self.defs)[indices]) | set(second_dataset.defs)96 else:97 unique_defs_a = set(np.array(self.defs)[indices])98 overlap_defs = unique_defs_a & set(overlap_dataset.defs)99 return len(overlap_defs) / len(overlap_dataset.defs)100 def instance_overlap(self, overlap_dataset, second_dataset=None):101 instances_self = set([e + d for e, d in zip(self.examples, self.defs)])102 instances_overlap = set([e + d for e, d in zip(overlap_dataset.examples, overlap_dataset.defs)])103 if second_dataset is not None:104 instances_second = set([e + d for e, d in zip(second_dataset.examples, second_dataset.defs)])105 unique_instances_a = instances_self | instances_second106 else:107 unique_instances_a = instances_self108 overlap_defs = unique_instances_a & set(instances_overlap)109 return len(overlap_defs) / len(instances_overlap)110def get_pos_label_percent(folder: str, prefix: str, label_indices: list = None):111 labels = open(os.path.join(folder, prefix + 'labels.txt')).read().split("\n")112 if label_indices is not None:113 labels = list(np.array(labels)[label_indices])114 return labels.count('T') / len(labels)115def get_unique_senses(folder: str, prefix: str, definition_indices: list = None):116 definitions = open(os.path.join(folder, prefix + 'definitions.txt')).read().split("\n")117 if definition_indices is not None:118 definitions = list(np.array(definitions)[definition_indices])119 return len(set(definitions))120def get_total_number(folder: str, prefix: str, example_indices: list = None):121 examples = open(os.path.join(folder, prefix + 'definitions.txt')).read().split("\n")122 if example_indices is not None:123 examples = list(np.array(examples)[example_indices])124 return len(examples)125def print_stats(dataset: DatasetSplit, domain=None):126 print('total number', dataset.num_total_examples(domain=domain))127 print('number unique senses', dataset.num_unique_senses(domain=domain))128 print('number unique targets', dataset.num_unique_targets(domain=domain))129 print('percent positive labels', dataset.percent_positive_examples(domain=domain))130if __name__ == '__main__':131 location = Path(__file__).parent.parent132 train_folder = location / 'data' /'en' / 'Training'133 dev_folder = location / 'data' /'en' /'Development'134 test_folder = location / 'data' /'en' / 'Test'135 train_split = DatasetSplit(data_folder=Path(train_folder),136 examples_file_name='train_examples.txt',137 definitions_file_name='train_definitions.txt',138 hypernyms_file_name='train_hypernyms.txt',139 labels_file_name='train_labels.txt')140 dev_split = DatasetSplit(data_folder=Path(dev_folder),141 examples_file_name='dev_examples.txt',142 definitions_file_name='dev_definitions.txt',143 hypernyms_file_name='dev_hypernyms.txt',144 labels_file_name='dev_labels.txt')145 # test_split = DatasetSplit(data_folder=Path(test_folder),146 # examples_file_name='test_examples.txt',147 # definitions_file_name='test_definitions.txt',148 # hypernyms_file_name='test_hypernyms.txt',149 # labels_file_name='test_labels.txt',150 # domains_file_name='test_domains.txt')151 print(train_split.target_overlap(dev_split))152 # print(train_split.target_overlap(test_split))153 # print(train_split.target_overlap(test_split, dev_split))154 print()155 # print(test_split.target_overlap(train_split))156 # print(test_split.target_overlap(train_split, domain='wnt'))157 # print(test_split.target_overlap(train_split, domain='msh'))158 # print(test_split.target_overlap(train_split, domain='ctl'))159 # print(test_split.target_overlap(train_split, domain='cps'))160 print()161 print(train_split.sense_overlap(dev_split))162 # print(train_split.sense_overlap(test_split))163 # print(train_split.sense_overlap(test_split, dev_split))164 print()165 # print(test_split.sense_overlap(train_split))166 # print(test_split.sense_overlap(train_split, domain='wnt'))167 # print(test_split.sense_overlap(train_split, domain='msh'))168 # print(test_split.sense_overlap(train_split, domain='ctl'))169 # print(test_split.sense_overlap(train_split, domain='cps'))170 print('\n##### train #####')171 print_stats(train_split)172 print('\n##### dev #####')173 print_stats(dev_split)174 # print('\n##### test #####')175 # print_stats(test_split)176 #177 # print('\n##### wnt')178 # print_stats(test_split, 'wnt')179 #180 # print('\n##### msh')181 # print_stats(test_split, 'msh')182 #183 # print('\n##### ctl')184 # print_stats(test_split, 'ctl')185 #186 # print('\n##### cps')187 # print_stats(test_split, 'cps')188 #189 # print('\n##### all domains')...
boston_housing.py
Source:boston_housing.py
1"""Boston housing price regression dataset.2"""3from __future__ import absolute_import4from __future__ import division5from __future__ import print_function6from ..utils.data_utils import get_file7import numpy as np8def load_data(path='boston_housing.npz', test_split=0.2, seed=113):9 """Loads the Boston Housing dataset.10 # Arguments11 path: path where to cache the dataset locally12 (relative to ~/.keras/datasets).13 test_split: fraction of the data to reserve as test set.14 seed: Random seed for shuffling the data15 before computing the test split.16 # Returns17 Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.18 """19 assert 0 <= test_split < 120 path = get_file(path,21 origin='https://s3.amazonaws.com/keras-datasets/boston_housing.npz',22 file_hash='f553886a1f8d56431e820c5b82552d9d95cfcb96d1e678153f8839538947dff5')23 f = np.load(path)24 x = f['x']25 y = f['y']26 f.close()27 np.random.seed(seed)28 indices = np.arange(len(x))29 np.random.shuffle(indices)30 x = x[indices]31 y = y[indices]32 x_train = np.array(x[:int(len(x) * (1 - test_split))])33 y_train = np.array(y[:int(len(x) * (1 - test_split))])34 x_test = np.array(x[int(len(x) * (1 - test_split)):])35 y_test = np.array(y[int(len(x) * (1 - test_split)):])...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!