Best Python code snippet using pyshould_python
prepro.py
Source:prepro.py
1# Copyright (c) Microsoft. All rights reserved.2# Modified Copyright by Ubiquitous Knowledge Processing (UKP) Lab, Technische Universität Darmstadt3import argparse4from data_utils.log_wrapper import create_logger5from data_utils.glue_utils import *6import random7LOW_RESOURCE_DATA_RATES = [0.1, 0.3, 0.7]8bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')9logger = create_logger(__name__, to_disk=True, log_file='bert_data_proc_512.log')10def get_random_indices(dump_path, data, rate):11 rdm_indices = []12 if rate < 1.0:13 with open("data_splits/low_resource_splits/"+dump_path.rsplit("/", 1)[1].split("_")[0]+"_train.csv", "r") as split_file_in:14 rdm_indices = [int(idx) for idx in split_file_in.readline().split(",")[:int(len(data)*rate)]]15 return rdm_indices16def build_data(data, dump_path, rate=1.0, MAX_SEQ_LEN=100, is_train=True, tolower=True):17 """Build data of sentence pair tasks18 """19 if os.path.isfile(dump_path):20 print(dump_path + " already existis. Skipping!")21 return22 rdm_indices = get_random_indices(dump_path, data, rate)23 with open(dump_path, 'w', encoding='utf-8') as writer:24 for idx, sample in tqdm(enumerate(data)):25 if rate < 1.0 and idx not in rdm_indices:26 continue27 ids = sample['uid']28 premise = bert_tokenizer.tokenize(sample['premise'])29 hypothesis = bert_tokenizer.tokenize(sample['hypothesis'])30 label = sample['label']31 if "spelling" in dump_path or "negation" in dump_path: # have already been truncated in preprocessing step32 truncate_len = 512-333 else:34 truncate_len = MAX_SEQ_LEN - 335 truncate_seq_pair(premise, hypothesis, truncate_len)36 input_ids =bert_tokenizer.convert_tokens_to_ids(['[CLS]'] + hypothesis + ['[SEP]'] + premise + ['[SEP]'])37 type_ids = [0] * ( len(hypothesis) + 2) + [1] * (len(premise) + 1)38 features = {'uid': ids, 'label': label, 'token_id': input_ids, 'type_id': type_ids, 'hypothesis': sample['hypothesis'], 'premise': sample['premise']}39 writer.write('{}\n'.format(json.dumps(features)))40def build_data_single(data, dump_path, rate=1.0, MAX_SEQ_LEN=100):41 """Build data of single sentence tasks42 """43 if os.path.isfile(dump_path):44 print(dump_path + " already existis. Skipping!")45 return46 rdm_indices = get_random_indices(dump_path, data, rate)47 with open(dump_path, 'w', encoding='utf-8') as writer:48 for idx, sample in tqdm(enumerate(data)):49 if rate < 1.0 and idx not in rdm_indices:50 continue51 ids = sample['uid']52 premise = bert_tokenizer.tokenize(sample['premise'])53 label = sample['label']54 if "spelling" in dump_path or "negation" in dump_path: # have already been truncated in preprocessing step55 premise = premise[:512 - 3]56 elif len(premise) > MAX_SEQ_LEN - 3:57 premise = premise[:MAX_SEQ_LEN - 3]58 input_ids =bert_tokenizer.convert_tokens_to_ids(['[CLS]'] + premise + ['[SEP]'])59 type_ids = [0] * ( len(premise) + 2)60 features = {'uid': ids, 'label': label, 'token_id': input_ids, 'type_id': type_ids, 'premise': sample['premise']}61 writer.write('{}\n'.format(json.dumps(features)))62def print_status(train_data, dev_data, test_data, test_negation, test_spelling, test_paraphrase, dataset):63 logger.info('Loaded {0} {1} train samples'.format(len(train_data), dataset))64 logger.info('Loaded {0} {1} dev samples'.format(len(dev_data), dataset))65 logger.info('Loaded {0} {1} test samples'.format(len(test_data), dataset))66 if test_negation != None:67 logger.info('Loaded {0} {1} test negation samples'.format(len(test_negation), dataset))68 if test_spelling != None:69 logger.info('Loaded {0} {1} test spelling samples'.format(len(test_spelling), dataset))70 if test_paraphrase != None:71 logger.info('Loaded {0} {1} test paraphras samples'.format(len(test_paraphrase), dataset))72 logger.info('\n')73def build_handler(train_data, dev_data, test_data, test_negation, test_spelling, test_paraphrase,74 dataset, mt_dnn_root, single=False, MAX_SEQ_LEN=100):75 build_fn = build_data_single if single == True else build_data76 logger.info('Start converting and storing train/dev/test files for the model')77 train_fout = os.path.join(mt_dnn_root, '{0}_train.json'.format(dataset))78 dev_fout = os.path.join(mt_dnn_root, '{0}_dev.json'.format(dataset))79 test_fout = os.path.join(mt_dnn_root, '{0}_test.json'.format(dataset))80 test_negation_fout = os.path.join(mt_dnn_root, '{0}_test_negation.json'.format(dataset))81 test_spelling_fout = os.path.join(mt_dnn_root, '{0}_test_spelling.json'.format(dataset))82 test_paraphrase_fout = os.path.join(mt_dnn_root, '{0}_test_paraphrase.json'.format(dataset))83 for rate in LOW_RESOURCE_DATA_RATES:84 build_fn(train_data, train_fout.split(".json")[0]+str(int(rate*100))+"p.json", rate, MAX_SEQ_LEN=MAX_SEQ_LEN)85 build_fn(train_data, train_fout)86 build_fn(dev_data, dev_fout)87 build_fn(test_data, test_fout)88 if test_negation != None:89 build_fn(test_negation, test_negation_fout)90 if test_spelling != None:91 build_fn(test_spelling, test_spelling_fout)92 if test_paraphrase != None:93 build_fn(test_paraphrase, test_paraphrase_fout)94 logger.info('done with {0}'.format(dataset))95def parse_args():96 parser = argparse.ArgumentParser(description='Preprocessing datasets.')97 parser.add_argument('--seed', type=int, default=13)98 parser.add_argument('--root_dir', type=str, default='data')99 parser.add_argument('--generate_adversarial', type=int, default=0, help="0 if no adversarial samples should be created, otherwise 1")100 parser.add_argument('--max_seq_len', type=int, default=100)101 parser.add_argument('--opennmt_gpu', type=int, default=-1, help="-1 for CPU, else GPU index.")102 parser.add_argument('--opennmt_batch_size', type=int, default=30)103 args = parser.parse_args()104 return args105def main(args):106 root = args.root_dir107 seed = args.seed108 CREATE_ADVERSARIAL = True if args.generate_adversarial == 1 else False109 MAX_SEQ_LEN = args.max_seq_len110 OPENNMT_GPU = args.opennmt_gpu # Machine translation on GPU111 OPENNMT_BATCH_SIZE = args.opennmt_batch_size # Machine translation model batch size112 assert os.path.exists(root)113 random.seed(seed)114 np.random.seed(seed)115 mt_dnn_root = os.path.join(root, 'mt_dnn')116 if not os.path.isdir(mt_dnn_root):117 os.mkdir(mt_dnn_root)118 ######################################119 # Stance Detection Tasks120 ######################################121 snopes_path = os.path.join(root, 'Snopes/snopes_corpus_2.csv')122 argmin_path = os.path.join(root, 'ArgMin/')123 fnc1_path = os.path.join(root, 'FNC-1/')124 arc_path = os.path.join(root, 'ARC/')125 ibm_claim_stance_path = os.path.join(root, 'IBM_CLAIM_STANCE/claim_stance_dataset_v1.csv')126 perspectrum_path = os.path.join(root, 'PERSPECTRUM/')127 semeval2016t6_path = os.path.join(root, 'SemEval2016Task6/')128 semeval2019t7_path = os.path.join(root, 'SemEval2019Task7/')129 scd_path = os.path.join(root, 'SCD/')130 iac1_path = os.path.join(root, 'IAC/')131 ######################################132 # Loading / Building DATA133 ######################################134 scd_train_data, scd_dev_data, scd_test_data, scd_test_negation, scd_test_spelling, scd_test_paraphrase = \135 load_scd(scd_path, GLOBAL_MAP['scd'], create_adversarial=CREATE_ADVERSARIAL, dataset='scd', MAX_SEQ_LEN=MAX_SEQ_LEN,136 OPENNMT_GPU=OPENNMT_GPU, OPENNMT_BATCH_SIZE=OPENNMT_BATCH_SIZE)137 print_status(scd_train_data, scd_dev_data, scd_test_data, scd_test_negation, scd_test_spelling,138 scd_test_paraphrase, 'scd')139 build_handler(scd_train_data, scd_dev_data, scd_test_data, scd_test_negation,140 scd_test_spelling, scd_test_paraphrase, 'scd', mt_dnn_root, single=True, MAX_SEQ_LEN=MAX_SEQ_LEN)141 semeval2016t6_train_data, semeval2016t6_dev_data, semeval2016t6_test_data, semeval2016t6_test_negation, \142 semeval2016t6_test_spelling, semeval2016t6_test_paraphrase\143 = load_semeval2016t6(semeval2016t6_path, GLOBAL_MAP['semeval2016t6'], create_adversarial=CREATE_ADVERSARIAL,144 dataset='semeval2016t6', MAX_SEQ_LEN=MAX_SEQ_LEN,145 OPENNMT_GPU=OPENNMT_GPU, OPENNMT_BATCH_SIZE=OPENNMT_BATCH_SIZE)146 print_status(semeval2016t6_train_data, semeval2016t6_dev_data, semeval2016t6_test_data, semeval2016t6_test_negation,147 semeval2016t6_test_spelling, semeval2016t6_test_paraphrase, 'semeval2016t6')148 build_handler(semeval2016t6_train_data, semeval2016t6_dev_data, semeval2016t6_test_data, semeval2016t6_test_negation,149 semeval2016t6_test_spelling, semeval2016t6_test_paraphrase, 'semeval2016t6', mt_dnn_root, MAX_SEQ_LEN=MAX_SEQ_LEN)150 ibmcs_train_data, ibmcs_dev_data, ibmcs_test_data, ibmcs_test_negation, ibmcs_test_spelling,\151 ibmcs_test_paraphrase = load_ibmcs(ibm_claim_stance_path, GLOBAL_MAP['ibmcs'], create_adversarial=CREATE_ADVERSARIAL,152 dataset='ibmcs', MAX_SEQ_LEN=MAX_SEQ_LEN,153 OPENNMT_GPU=OPENNMT_GPU, OPENNMT_BATCH_SIZE=OPENNMT_BATCH_SIZE)154 print_status(ibmcs_train_data, ibmcs_dev_data, ibmcs_test_data, ibmcs_test_negation,155 ibmcs_test_spelling, ibmcs_test_paraphrase, 'ibmcs')156 build_handler(ibmcs_train_data, ibmcs_dev_data, ibmcs_test_data, ibmcs_test_negation, ibmcs_test_spelling,157 ibmcs_test_paraphrase, 'ibmcs', mt_dnn_root, MAX_SEQ_LEN=MAX_SEQ_LEN)158 iac1_train_data, iac1_dev_data, iac1_test_data, iac1_test_negation, iac1_test_spelling, iac1_test_paraphrase = \159 load_iac1(iac1_path, GLOBAL_MAP['iac1'], create_adversarial=CREATE_ADVERSARIAL, dataset='iac1', MAX_SEQ_LEN=MAX_SEQ_LEN,160 OPENNMT_GPU=OPENNMT_GPU, OPENNMT_BATCH_SIZE=OPENNMT_BATCH_SIZE)161 print_status(iac1_train_data, iac1_dev_data, iac1_test_data, iac1_test_negation, iac1_test_spelling,162 iac1_test_paraphrase, 'iac1')163 build_handler(iac1_train_data, iac1_dev_data, iac1_test_data, iac1_test_negation, iac1_test_spelling,164 iac1_test_paraphrase, 'iac1', mt_dnn_root, MAX_SEQ_LEN=MAX_SEQ_LEN)165 snopes_train_data, snopes_dev_data, snopes_test_data, snopes_test_negation,\166 snopes_test_spelling, snopes_test_paraphrase = load_snopes(snopes_path, GLOBAL_MAP['snopes'],167 create_adversarial=CREATE_ADVERSARIAL,168 dataset='snopes', MAX_SEQ_LEN=MAX_SEQ_LEN,169 OPENNMT_GPU=OPENNMT_GPU, OPENNMT_BATCH_SIZE=OPENNMT_BATCH_SIZE)170 print_status(snopes_train_data, snopes_dev_data, snopes_test_data, snopes_test_negation,171 snopes_test_spelling, snopes_test_paraphrase, 'snopes')172 build_handler(snopes_train_data, snopes_dev_data, snopes_test_data, snopes_test_negation,173 snopes_test_spelling, snopes_test_paraphrase, 'snopes', mt_dnn_root, MAX_SEQ_LEN=MAX_SEQ_LEN)174 arc_train_data, arc_dev_data, arc_test_data, arc_test_negation, arc_test_spelling, arc_test_paraphrase = \175 load_fnc1_arc(arc_path, GLOBAL_MAP['arc'], create_adversarial=CREATE_ADVERSARIAL,176 dataset='arc', MAX_SEQ_LEN=MAX_SEQ_LEN,177 OPENNMT_GPU=OPENNMT_GPU, OPENNMT_BATCH_SIZE=OPENNMT_BATCH_SIZE)178 print_status(arc_train_data, arc_dev_data, arc_test_data, arc_test_negation, arc_test_spelling,179 arc_test_paraphrase, 'arc')180 build_handler(arc_train_data, arc_dev_data, arc_test_data, arc_test_negation, arc_test_spelling,181 arc_test_paraphrase, 'arc', mt_dnn_root, MAX_SEQ_LEN=MAX_SEQ_LEN)182 semeval2019t7_train_data, semeval2019t7_dev_data, semeval2019t7_test_data, semeval2019t7_test_negation, \183 semeval2019t7_test_spelling, semeval2019t7_test_paraphrase\184 = load_semeval2019t7(semeval2019t7_path, GLOBAL_MAP['semeval2019t7'], create_adversarial=CREATE_ADVERSARIAL,185 dataset='semeval2019t7', MAX_SEQ_LEN=MAX_SEQ_LEN,186 OPENNMT_GPU=OPENNMT_GPU, OPENNMT_BATCH_SIZE=OPENNMT_BATCH_SIZE)187 print_status(semeval2019t7_train_data, semeval2019t7_dev_data, semeval2019t7_test_data, semeval2019t7_test_negation,188 semeval2019t7_test_spelling, semeval2019t7_test_paraphrase, 'semeval2019t7')189 build_handler(semeval2019t7_train_data, semeval2019t7_dev_data, semeval2019t7_test_data, semeval2019t7_test_negation,190 semeval2019t7_test_spelling, semeval2019t7_test_paraphrase, 'semeval2019t7', mt_dnn_root, single=True, MAX_SEQ_LEN=MAX_SEQ_LEN)191 perspectrum_train_data, perspectrum_dev_data, perspectrum_test_data, perspectrum_test_negation, \192 perspectrum_test_spelling, perspectrum_test_paraphrase = load_perspectrum(perspectrum_path, GLOBAL_MAP['perspectrum'],193 create_adversarial=CREATE_ADVERSARIAL,194 dataset='perspectrum', MAX_SEQ_LEN=MAX_SEQ_LEN,195 OPENNMT_GPU=OPENNMT_GPU,196 OPENNMT_BATCH_SIZE=OPENNMT_BATCH_SIZE)197 print_status(perspectrum_train_data, perspectrum_dev_data, perspectrum_test_data, perspectrum_test_negation,198 perspectrum_test_spelling, perspectrum_test_paraphrase, 'perspectrum')199 build_handler(perspectrum_train_data, perspectrum_dev_data, perspectrum_test_data, perspectrum_test_negation,200 perspectrum_test_spelling, perspectrum_test_paraphrase, 'perspectrum', mt_dnn_root, MAX_SEQ_LEN=MAX_SEQ_LEN)201 argmin_train_data, argmin_dev_data, argmin_test_data, argmin_test_negation, \202 argmin_test_spelling, argmin_test_paraphrase = load_argmin(argmin_path, GLOBAL_MAP['argmin'],203 create_adversarial=CREATE_ADVERSARIAL,204 dataset='argmin', MAX_SEQ_LEN=MAX_SEQ_LEN,205 OPENNMT_GPU=OPENNMT_GPU, OPENNMT_BATCH_SIZE=OPENNMT_BATCH_SIZE)206 print_status(argmin_train_data, argmin_dev_data, argmin_test_data, argmin_test_negation,207 argmin_test_spelling, argmin_test_paraphrase, 'argmin')208 build_handler(argmin_train_data, argmin_dev_data, argmin_test_data, argmin_test_negation,209 argmin_test_spelling, argmin_test_paraphrase, 'argmin', mt_dnn_root, MAX_SEQ_LEN=MAX_SEQ_LEN)210 fnc1_train_data, fnc1_dev_data, fnc1_test_data, fnc1_test_negation, fnc1_test_spelling,\211 fnc1_test_paraphrase = load_fnc1_arc(fnc1_path, GLOBAL_MAP['fnc1'], create_adversarial=CREATE_ADVERSARIAL,212 dataset='fnc1', MAX_SEQ_LEN=MAX_SEQ_LEN,213 OPENNMT_GPU=OPENNMT_GPU, OPENNMT_BATCH_SIZE=OPENNMT_BATCH_SIZE)214 print_status(fnc1_train_data, fnc1_dev_data, fnc1_test_data, fnc1_test_negation,215 fnc1_test_spelling, fnc1_test_paraphrase, 'fnc1')216 build_handler(fnc1_train_data, fnc1_dev_data, fnc1_test_data, fnc1_test_negation, fnc1_test_spelling,217 fnc1_test_paraphrase, 'fnc1', mt_dnn_root, MAX_SEQ_LEN=MAX_SEQ_LEN)218if __name__ == '__main__':219 args = parse_args()...
scanners_improved.py
Source:scanners_improved.py
...39 new_beacon[i] += translations[i]40 beacons[index] = new_beacon41 scanners[unknown_scanner]['position'] = translations42 return scanners43def apply_test_negation(test_negation, control_known_coordinates,44 control_unknown_coordinates, test_coordinates):45 altered_test_coordinates = test_coordinates46 translations = []47 for index, i in enumerate(test_negation):48 if i == '1':49 altered_test_coordinates[index] *= -150 control_unknown_coordinates[index] *= -151 translations.append(control_known_coordinates[index] - control_unknown_coordinates[index])52 altered_test_coordinates[index] = test_coordinates[index] + translations[index]53 return altered_test_coordinates, translations54def apply_movements(movements, coordinates):55 new_coordinates = deque(maxlen=3)56 for index in range(3):57 new_coordinates.append(coordinates[movements[index]])58 59 return new_coordinates60def get_negations_and_translations(scanners, adjustments, known_scanner,61 unknown_scanner, control_known_beacon, control_unknown_beacon,62 test_known_beacon, test_unknown_beacon):63 control_known_beacon_coordinates = scanners[known_scanner]['beacons'][control_known_beacon]64 control_unknown_beacon_coordinates = scanners[unknown_scanner]['beacons'][control_unknown_beacon]65 test_known_beacon_coordinates = scanners[known_scanner]['beacons'][test_known_beacon]66 test_unknown_beacon_coordinates = scanners[unknown_scanner]['beacons'][test_unknown_beacon]67 movements = adjustments['movements']68 moved_control_unknown_beacon_coordinates = apply_movements(movements, control_unknown_beacon_coordinates)69 moved_test_unknown_beacon_coordinates = apply_movements(movements, test_unknown_beacon_coordinates)70 for negation in range(8):71 test_negation = bin(negation)[2:].zfill(3)72 test_coordinates, translations = apply_test_negation(73 test_negation, control_known_beacon_coordinates,74 deepcopy(moved_control_unknown_beacon_coordinates),75 deepcopy(moved_test_unknown_beacon_coordinates))76 if test_coordinates == test_known_beacon_coordinates:77 adjustments['negations'] = test_negation78 adjustments['translations'] = translations79 break80 81 if not adjustments.get('negations', ''):82 print('Did not find negations, please interrupt program')83 return adjustments84def get_movements(scanners, diff_to_evaluate, known_scanner,85 known_scanner_index, unknown_scanner, unknown_scanner_index):86 movements = {}...
test_ldap3_query_classes.py
Source:test_ldap3_query_classes.py
...32 '''33 Test query join with a two elements34 '''35 self.assertEqual(str(self.test_object('|', '(givenName=John)', '(objectClass=*)', '(targetAttribute>=10)', '(cn=*John*Doe*)', '(givenName~=John)')), '(|(givenName=John)(objectClass=*)(targetAttribute>=10)(cn=*John*Doe*)(givenName~=John))')36 def test_negation(self):37 '''38 Test query join negation39 '''40 self.assertEqual(-self.test_object('|', '(givenName=John)', '(objectClass=*)', '(targetAttribute>=10)', '(cn=*John*Doe*)', '(givenName~=John)'), '(!(|(givenName=John)(objectClass=*)(targetAttribute>=10)(cn=*John*Doe*)(givenName~=John)))')41class TestQueryAssertion(unittest.TestCase):42 '''43 Tests for the QueryAssertion class44 '''45 46 def setUp(self):47 self.test_object = ldap3_directories.ldap3_.QueryAssertion48 self.maxDiff = None49 def test_implicit_equality(self):50 '''51 Test query assertion with implicit equality52 '''53 self.assertEqual(self.test_object('givenName', 'John'), '(givenName=John)')54 def test_explicit_equality(self):55 '''56 Test query assertion with explicit equality57 '''58 self.assertEqual(self.test_object('givenName', 'John', '='), '(givenName=John)')59 def test_presence(self):60 '''61 Test query assertion for presence62 '''63 self.assertEqual(self.test_object('objectClass', '*'), '(objectClass=*)')64 def test_greater_or_equal(self):65 '''66 Test query assertion with greater or equal67 '''68 self.assertEqual(self.test_object('targetAttribute', 10, '>='), '(targetAttribute>=10)')69 def test_less_or_equal(self):70 '''71 Test query assertion with less or equal72 '''73 self.assertEqual(self.test_object('targetAttribute', 10, '<='), '(targetAttribute<=10)')74 def test_substring(self):75 '''76 Test query assertion with substring77 '''78 self.assertEqual(self.test_object('cn', '*John*Doe*'), '(cn=*John*Doe*)')79 def test_approximate_match(self):80 '''81 Test query assertion with approximate match82 '''83 self.assertEqual(self.test_object('givenName', 'John', '~='), '(givenName~=John)')84 def test_negation(self):85 '''86 Test query assertion negated87 '''...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!