Best Python code snippet using yandex-tank
basic_iterator_test.py
Source:basic_iterator_test.py
1# pylint: disable=no-self-use,invalid-name2from typing import List, Iterable, Dict, Union3from collections import Counter4from allennlp.common import Params5from allennlp.common.testing import AllenNlpTestCase6from allennlp.data import Instance, Token, Vocabulary7from allennlp.data.dataset import Batch8from allennlp.data.dataset_readers.dataset_reader import _LazyInstances9from allennlp.data.fields import TextField10from allennlp.data.iterators import BasicIterator11from allennlp.data.token_indexers import SingleIdTokenIndexer12class LazyIterable:13 def __init__(self, instances):14 self._instances = instances15 def __iter__(self):16 return (instance for instance in self._instances)17class IteratorTest(AllenNlpTestCase):18 def setUp(self):19 super(IteratorTest, self).setUp()20 self.token_indexers = {"tokens": SingleIdTokenIndexer()}21 self.vocab = Vocabulary()22 self.this_index = self.vocab.add_token_to_namespace('this')23 self.is_index = self.vocab.add_token_to_namespace('is')24 self.a_index = self.vocab.add_token_to_namespace('a')25 self.sentence_index = self.vocab.add_token_to_namespace('sentence')26 self.another_index = self.vocab.add_token_to_namespace('another')27 self.yet_index = self.vocab.add_token_to_namespace('yet')28 self.very_index = self.vocab.add_token_to_namespace('very')29 self.long_index = self.vocab.add_token_to_namespace('long')30 instances = [31 self.create_instance(["this", "is", "a", "sentence"]),32 self.create_instance(["this", "is", "another", "sentence"]),33 self.create_instance(["yet", "another", "sentence"]),34 self.create_instance(["this", "is", "a", "very", "very", "very", "very", "long", "sentence"]),35 self.create_instance(["sentence"]),36 ]37 self.instances = instances38 self.lazy_instances = LazyIterable(instances)39 def create_instance(self, str_tokens: List[str]):40 tokens = [Token(t) for t in str_tokens]41 instance = Instance({'text': TextField(tokens, self.token_indexers)})42 return instance43 def create_instances_from_token_counts(self, token_counts: List[int]) -> List[Instance]:44 return [self.create_instance(["word"] * count) for count in token_counts]45 def get_batches_stats(self, batches: Iterable[Batch]) -> Dict[str, Union[int, List[int]]]:46 grouped_instances = [batch.instances for batch in batches]47 group_lengths = [len(group) for group in grouped_instances]48 sample_sizes = []49 for batch in batches:50 batch_sequence_length = max(51 [instance.get_padding_lengths()['text']['num_tokens']52 for instance in batch.instances]53 )54 sample_sizes.append(batch_sequence_length * len(batch.instances))55 return {56 "batch_lengths": group_lengths,57 "total_instances": sum(group_lengths),58 "sample_sizes": sample_sizes59 }60 def assert_instances_are_correct(self, candidate_instances):61 # First we need to remove padding tokens from the candidates.62 # pylint: disable=protected-access63 candidate_instances = [tuple(w for w in instance if w != 0) for instance in candidate_instances]64 expected_instances = [tuple(instance.fields["text"]._indexed_tokens["tokens"])65 for instance in self.instances]66 assert set(candidate_instances) == set(expected_instances)67class TestBasicIterator(IteratorTest):68 def test_get_num_batches(self):69 # Lazy and instances per epoch not specified.70 assert BasicIterator(batch_size=2).get_num_batches(self.lazy_instances) == 171 # Lazy and instances per epoch specified.72 assert BasicIterator(batch_size=2, instances_per_epoch=21).get_num_batches(self.lazy_instances) == 1173 # Not lazy and instances per epoch specified.74 assert BasicIterator(batch_size=2, instances_per_epoch=21).get_num_batches(self.instances) == 1175 # Not lazy and instances per epoch not specified.76 assert BasicIterator(batch_size=2).get_num_batches(self.instances) == 377 # The BasicIterator should work the same for lazy and non lazy datasets,78 # so each remaining test runs over both.79 def test_yield_one_epoch_iterates_over_the_data_once(self):80 for test_instances in (self.instances, self.lazy_instances):81 iterator = BasicIterator(batch_size=2)82 iterator.index_with(self.vocab)83 batches = list(iterator(test_instances, num_epochs=1))84 # We just want to get the single-token array for the text field in the instance.85 instances = [tuple(instance.detach().cpu().numpy())86 for batch in batches87 for instance in batch['text']["tokens"]]88 assert len(instances) == 589 self.assert_instances_are_correct(instances)90 def test_call_iterates_over_data_forever(self):91 for test_instances in (self.instances, self.lazy_instances):92 iterator = BasicIterator(batch_size=2)93 iterator.index_with(self.vocab)94 generator = iterator(test_instances)95 batches = [next(generator) for _ in range(18)] # going over the data 6 times96 # We just want to get the single-token array for the text field in the instance.97 instances = [tuple(instance.detach().cpu().numpy())98 for batch in batches99 for instance in batch['text']["tokens"]]100 assert len(instances) == 5 * 6101 self.assert_instances_are_correct(instances)102 def test_create_batches_groups_correctly(self):103 # pylint: disable=protected-access104 for test_instances in (self.instances, self.lazy_instances):105 iterator = BasicIterator(batch_size=2)106 batches = list(iterator._create_batches(test_instances, shuffle=False))107 grouped_instances = [batch.instances for batch in batches]108 assert grouped_instances == [[self.instances[0], self.instances[1]],109 [self.instances[2], self.instances[3]],110 [self.instances[4]]]111 def test_few_instances_per_epoch(self):112 # pylint: disable=protected-access113 for test_instances in (self.instances, self.lazy_instances):114 iterator = BasicIterator(batch_size=2, instances_per_epoch=3)115 # First epoch: 3 instances -> [2, 1]116 batches = list(iterator._create_batches(test_instances, shuffle=False))117 grouped_instances = [batch.instances for batch in batches]118 assert grouped_instances == [[self.instances[0], self.instances[1]],119 [self.instances[2]]]120 # Second epoch: 3 instances -> [2, 1]121 batches = list(iterator._create_batches(test_instances, shuffle=False))122 grouped_instances = [batch.instances for batch in batches]123 assert grouped_instances == [[self.instances[3], self.instances[4]],124 [self.instances[0]]]125 # Third epoch: 3 instances -> [2, 1]126 batches = list(iterator._create_batches(test_instances, shuffle=False))127 grouped_instances = [batch.instances for batch in batches]128 assert grouped_instances == [[self.instances[1], self.instances[2]],129 [self.instances[3]]]130 def test_many_instances_per_epoch(self):131 # pylint: disable=protected-access132 for test_instances in (self.instances, self.lazy_instances):133 iterator = BasicIterator(batch_size=2, instances_per_epoch=7)134 # First epoch: 7 instances -> [2, 2, 2, 1]135 batches = list(iterator._create_batches(test_instances, shuffle=False))136 grouped_instances = [batch.instances for batch in batches]137 assert grouped_instances == [[self.instances[0], self.instances[1]],138 [self.instances[2], self.instances[3]],139 [self.instances[4], self.instances[0]],140 [self.instances[1]]]141 # Second epoch: 7 instances -> [2, 2, 2, 1]142 batches = list(iterator._create_batches(test_instances, shuffle=False))143 grouped_instances = [batch.instances for batch in batches]144 assert grouped_instances == [[self.instances[2], self.instances[3]],145 [self.instances[4], self.instances[0]],146 [self.instances[1], self.instances[2]],147 [self.instances[3]]]148 def test_epoch_tracking_when_one_epoch_at_a_time(self):149 iterator = BasicIterator(batch_size=2, track_epoch=True)150 iterator.index_with(self.vocab)151 for epoch in range(10):152 for batch in iterator(self.instances, num_epochs=1):153 assert all(epoch_num == epoch for epoch_num in batch['epoch_num'])154 def test_epoch_tracking_multiple_epochs(self):155 iterator = BasicIterator(batch_size=2, track_epoch=True)156 iterator.index_with(self.vocab)157 all_batches = list(iterator(self.instances, num_epochs=10))158 assert len(all_batches) == 10 * 3159 for i, batch in enumerate(all_batches):160 # Should have 3 batches per epoch161 epoch = i // 3162 assert all(epoch_num == epoch for epoch_num in batch['epoch_num'])163 def test_epoch_tracking_forever(self):164 iterator = BasicIterator(batch_size=2, track_epoch=True)165 iterator.index_with(self.vocab)166 it = iterator(self.instances, num_epochs=None)167 all_batches = [next(it) for _ in range(30)]168 assert len(all_batches) == 30169 for i, batch in enumerate(all_batches):170 # Should have 3 batches per epoch171 epoch = i // 3172 assert all(epoch_num == epoch for epoch_num in batch['epoch_num'])173 def test_shuffle(self):174 # pylint: disable=protected-access175 for test_instances in (self.instances, self.lazy_instances):176 iterator = BasicIterator(batch_size=2, instances_per_epoch=100)177 in_order_batches = list(iterator._create_batches(test_instances, shuffle=False))178 shuffled_batches = list(iterator._create_batches(test_instances, shuffle=True))179 assert len(in_order_batches) == len(shuffled_batches)180 # With 100 instances, shuffling better change the order.181 assert in_order_batches != shuffled_batches182 # But not the counts of the instances.183 in_order_counts = Counter(id(instance) for batch in in_order_batches for instance in batch)184 shuffled_counts = Counter(id(instance) for batch in shuffled_batches for instance in batch)185 assert in_order_counts == shuffled_counts186 def test_max_instances_in_memory(self):187 # pylint: disable=protected-access188 for test_instances in (self.instances, self.lazy_instances):189 iterator = BasicIterator(batch_size=2, max_instances_in_memory=3)190 # One epoch: 5 instances -> [2, 1, 2]191 batches = list(iterator._create_batches(test_instances, shuffle=False))192 grouped_instances = [batch.instances for batch in batches]193 assert grouped_instances == [[self.instances[0], self.instances[1]],194 [self.instances[2]],195 [self.instances[3], self.instances[4]]]196 def test_multiple_cursors(self):197 # pylint: disable=protected-access198 lazy_instances1 = _LazyInstances(lambda: (i for i in self.instances))199 lazy_instances2 = _LazyInstances(lambda: (i for i in self.instances))200 eager_instances1 = self.instances[:]201 eager_instances2 = self.instances[:]202 for instances1, instances2 in [(eager_instances1, eager_instances2),203 (lazy_instances1, lazy_instances2)]:204 iterator = BasicIterator(batch_size=1, instances_per_epoch=2)205 iterator.index_with(self.vocab)206 # First epoch through dataset1207 batches = list(iterator._create_batches(instances1, shuffle=False))208 grouped_instances = [batch.instances for batch in batches]209 assert grouped_instances == [[self.instances[0]], [self.instances[1]]]210 # First epoch through dataset2211 batches = list(iterator._create_batches(instances2, shuffle=False))212 grouped_instances = [batch.instances for batch in batches]213 assert grouped_instances == [[self.instances[0]], [self.instances[1]]]214 # Second epoch through dataset1215 batches = list(iterator._create_batches(instances1, shuffle=False))216 grouped_instances = [batch.instances for batch in batches]217 assert grouped_instances == [[self.instances[2]], [self.instances[3]]]218 # Second epoch through dataset2219 batches = list(iterator._create_batches(instances2, shuffle=False))220 grouped_instances = [batch.instances for batch in batches]221 assert grouped_instances == [[self.instances[2]], [self.instances[3]]]222 def test_from_params(self):223 # pylint: disable=protected-access224 params = Params({})225 iterator = BasicIterator.from_params(params)226 assert iterator._batch_size == 32 # default value227 params = Params({"batch_size": 10})228 iterator = BasicIterator.from_params(params)229 assert iterator._batch_size == 10230 def test_maximum_samples_per_batch(self):231 for test_instances in (self.instances, self.lazy_instances):232 # pylint: disable=protected-access233 iterator = BasicIterator(234 batch_size=3, maximum_samples_per_batch=['num_tokens', 9]235 )236 iterator.index_with(self.vocab)237 batches = list(iterator._create_batches(test_instances, shuffle=False))238 stats = self.get_batches_stats(batches)239 # ensure all instances are in a batch240 assert stats['total_instances'] == len(self.instances)241 # ensure correct batch sizes242 assert stats['batch_lengths'] == [2, 1, 1, 1]243 # ensure correct sample sizes (<= 9)244 assert stats['sample_sizes'] == [8, 3, 9, 1]245 def test_maximum_samples_per_batch_packs_tightly(self):246 # pylint: disable=protected-access247 token_counts = [10, 4, 3]248 test_instances = self.create_instances_from_token_counts(token_counts)249 iterator = BasicIterator(250 batch_size=3, maximum_samples_per_batch=['num_tokens', 11]251 )252 iterator.index_with(self.vocab)253 batches = list(iterator._create_batches(test_instances, shuffle=False))254 stats = self.get_batches_stats(batches)255 # ensure all instances are in a batch256 assert stats['total_instances'] == len(token_counts)257 # ensure correct batch sizes258 assert stats['batch_lengths'] == [1, 2]259 # ensure correct sample sizes (<= 11)...
input.py
Source:input.py
1import os2import numpy3import csv4# read data from file5def readFile(filename):6 with open(filename) as f:7 lines=csv.reader(f,delimiter=',')8 matrix=[]9 for line in lines:10 if line[-1] == '':11 array=line[:-1]#remove last ',' if any12 else:13 array=line14 matrix.append(array)15 matrix=numpy.asarray(matrix,dtype=numpy.float32)16 #instances=matrix[:,1:]17 #values=numpy.ones((instances.shape[0],1))-matrix[:,0:1]18 #one_hot_labels=numpy.c_[matrix[:,0:1],values]19 return matrix 20# read group information, for pairwise loss function21def readGroup(filename):22 groups=readFile(filename)23 groups=numpy.asarray(groups,dtype=numpy.int32)24 #groups=numpy.reshape(groups, (-1,1))25 #print(groups.shape)26 results=[]27 for g in range(groups.shape[0]):28 group=numpy.full((1, groups[g][0]), g).ravel()29 results=numpy.concatenate((results,group))30 return results.reshape([-1,1])31 32class DataSet(object):33 def __init__(self,instances,labels, groups=[]):34 self._instances=instances35 self._labels=labels36 self._num_instances=instances.shape[0]37 self._epochs_completed=038 self._index_in_epoch=039 self._groups=groups40 @property41 def instances(self):42 return self._instances43 @property44 def labels(self):45 return self._labels46 @property47 def groups(self):48 return self._groups49 @property50 def num_instances(self):51 return self._num_instances52 @property53 def epochs_completed(self):54 return self._epochs_completed55 def pos_instance_ratio(self):56 pos_instances=0.057 for label in self._labels[:,0]:58 if label==1:59 pos_instances+=160 return pos_instances/self._labels.shape[0]61 def next_batch(self,batch_size):62 start=self._index_in_epoch63 self._index_in_epoch+=batch_size64 if self._index_in_epoch>self.num_instances:65 self._epochs_completed+=166 #shuffle the data67 shuffled=numpy.arange(self._num_instances)68 numpy.random.shuffle(shuffled)69 self._instances=self._instances[shuffled]70 self._labels=self._labels[shuffled]71 if self._groups != []:72 self._groups=self._groups[shuffled]73 #start next epoch74 start=075 self._index_in_epoch=batch_size76 end=self._index_in_epoch77 return self._instances[start:end],self._labels[start:end],self._groups[start:end]78 79def read_data_sets(train_file,train_label_file,test_file, test_label_file, group_file):80 class DataSets(object):81 pass82 data_sets=DataSets()83 train_instances=readFile(train_file)84 train_labels=readFile(train_label_file)85 test_instances=readFile(test_file)86 test_labels=readFile(test_label_file)87 '''88 print("data read complete, train instance dimension is ",numpy.shape(train_instances),"label dimension is,",numpy.shape(train_labels))89 90 print("first features",train_instances[1,:34])91 print("second features",train_instances[1,34:174])92 print("third features",train_instances[1,174:211])93 print("fourth features",train_instances[1,-15:])94 print("fourth features",train_instances[1,211:])95 '''96 groups=readGroup(group_file)97 #print("group file dimension is,", numpy.shape(groups),groups)98 # normalization,optional99 #train_instances,test_instances=normalize(train_instances,test_instances)100 #Note: added for shuffling features101 if False:102 shuffled=numpy.arange(test_instances.shape[1])103 numpy.random.shuffle(shuffled)104 test_instances=test_instances[:,shuffled]105 train_instances=train_instances[:,shuffled]106 data_sets.train=DataSet(train_instances,train_labels, groups)107 data_sets.test=DataSet(test_instances,test_labels)108 return data_sets109# min-max normalization110def normalize(train_instances,test_instances):111 #train_size=train_instances.shape[0]112 train_min=train_instances.min(0)113 train_max=train_instances.max(0)114 train_instances=(train_instances-train_min)/(train_max-train_min)115 test_instances=(test_instances-train_min)/(train_max-train_min)116 return train_instances,test_instances117 #comb=numpy.r_[train_instances,test_instances]118 #comb=(comb-comb.min(0))/(comb.max(0)-comb.min(0))119 #return comb[0:train_size,:],comb[train_size:,:]120# test normalize() method121def testNormalize():122 a=numpy.arange(15).reshape(-1,5)123 b=numpy.arange(15,25).reshape(-1,5)124 a=a.astype(numpy.float32)125 b=b.astype(numpy.float32)126 a,b=normalize(a,b)127 print(a)128 print(b)129#test read_data_sets() method130def testReadDataSets():131 dir='/media/StorageData1/MutationFaultLocalization/LearningRankData/NfolderData/Deep/All25Features/SpectTIRMetricM/Time/1/'132 datasets=read_data_sets(dir+'Train.csv',dir+'TrainLabel.csv',dir+'Test.csv',dir+'TestLabel.csv')133 print(datasets.train.pos_instance_ratio())134 print(datasets.test.pos_instance_ratio())135# test136#testReadDataSets()137#train=readFile("/Users/lingmingzhang/Documents/workspace/FLStudy/data/training.txt")138#print(train.shape)139#print(label.shape)140#testNormalize()141#g=readGroup('/home/lixia/XiaLi/ICSE18/DeepLearningData/SpectrumTestJhawkByte/groupfile/Time/1/traidata.txt.group')...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!