Best Python code snippet using slash
experimental_data.py
Source:experimental_data.py
1import pickle2import os3from tqdm import tqdm4import random5from utils import data_process6from knowledge_graph import Construct_KG78class Extract_Data:9 """10 Extract_Data is used to construct training, validation and test data by extracting TTD related relation paths from knowledge graph.11 In this work, 7,144 $drug-target-disease$ are extracted from Therapeutic Target Database (TTD) as true cases (The details could be found12 in Supplementary Data 1 of our published paper "SemaTyP: A Knowledge Graph Based LiteratureMining Method for Drug Discovery").13 The $\ell$ is set to 4, $K$ is 133 and $M$ is 52. Based on the aforementioned construction of training data, 19,230 positive data are14 obtained. Each data is a length of 873 (133*5+52*4) vector. On the other side, for each $drug-target-disease$, we random replaced the15 drug, target and disease with other drug, target and disease. If the new triplet doesn't exist in TTD, then it is considered as a false16 example, which is denoted as $drug^{'}-target^{'}-disease^{'}$. Similarly, 19,230 negative training data is obtained from false cases.17 """18 def __init__(self,predication_dir, TTD_dir, processed_dir):19 self.predication_dir=predication_dir20 self.TTD_dir=TTD_dir21 self.output_dir=processed_dir2223 def UMLS_type_vector(self):24 print("Construct the UMLS type vector...")25 with open(self.predication_dir+"/predications.txt", 'r') as f:26 entity_vector = {}27 predication_vector = {}2829 for line in tqdm(f, total=sum(1 for _ in open(self.predication_dir+"/predications.txt", 'r'))):30 sline = line.split("\t")31 if sline[0] == "" or sline[1] == "":32 continue33 predicate = sline[3]34 subject_type = sline[4]35 object_type = sline[5].strip("\n")3637 if predicate not in predication_vector:38 predication_vector[predicate] = len(predication_vector)3940 if subject_type not in entity_vector:41 entity_vector[subject_type] = len(entity_vector)4243 if object_type not in entity_vector:44 entity_vector[object_type] = len(entity_vector)4546 pickle.dump(entity_vector, open(self.output_dir+"/entity_vector", "wb+"))47 pickle.dump(predication_vector, open(self.output_dir+"/predicate_vector", "wb+"))48 return entity_vector, predication_vector4950 def drug_syndroms(self):51 """52 input: ./data/TTD/Synonyms.txt, which is downloaded from Therapeutic Target Database53 output: ./data/processed/drug_synonyms, which contains each drug and its corresponding treated synonys, the format of the drug_synonyms:54 1. drug_synonyms["drug_id"]55 2. drug_synonyms["drug_id"]["synonyms"]56 3. drug_synonyms[drug]["synonyms"][drug_synonym]=drug_id57 """58 print("Extracting the drug-syndroms information ...")59 with open(self.TTD_dir+"/Synonyms.txt", 'r') as f:60 drug_synonyms = {}61 for line in tqdm(f, total=sum(1 for _ in open(self.TTD_dir+"/Synonyms.txt", 'r'))):62 sline = line.split("\t")63 drug_id = sline[0].lower()64 drug_names = {}65 drug_names[data_process.process_en(sline[1].lower())] = drug_id66 synonyms = sline[2].lower().strip("\n").split(";")67 for s in synonyms:68 synonym = data_process.process_en(s)69 drug_names[synonym] = drug_id70 for drug_name in drug_names:71 if drug_name not in drug_synonyms:72 drug_synonyms[drug_name] = {}73 drug_synonyms[drug_name]["drug_id"] = drug_names[drug_name]74 drug_synonyms[drug_name]["synonyms"] = {}75 for synonym in drug_names:76 drug_synonyms[drug_name]["synonyms"][synonym] = drug_names[drug_name]77 if drug_name in drug_synonyms[drug_name]["synonyms"]:78 del drug_synonyms[drug_name]["synonyms"][drug_name]79 else:80 for synonym in drug_names:81 drug_synonyms[drug_name]["synonyms"][synonym] = drug_names[drug_name]82 if drug_name in drug_synonyms[drug_name]["synonyms"]:83 del drug_synonyms[drug_name]["synonyms"][drug_name]8485 pickle.dump(drug_synonyms, open(self.output_dir+"/drug_synonyms", "wb+"))8687 return drug_synonyms8889 def disease_target(self):90 """91 input: ./data/TTD/target-disease_TTD2016.txt file which downloaded from Therapeutic Target Database92 output: ./data/processed/disease_targets. The format of disease_target:93 1. disease_targets[Indication]={}94 2. disease_targets[Indication][target_1]=target_1_ID95 Indication is the disease name.96 """97 print("Extracting the disease-target relations ...")98 with open(self.TTD_dir+"/target-disease_TTD2016.txt", "r") as f:99 disease_targets={}100 next(f)101 for line in tqdm(f, total=sum(1 for _ in open(self.TTD_dir+"/target-disease_TTD2016.txt", "r"))):102 sline = line.split("\t")103 TTDTargetID = sline[0].lower()104 Target_Name = sline[1].lower()105 Indications = sline[2].lower().split(";")106 new_Target_Name = data_process.process_en(Target_Name)107 for Indication in Indications:108 new_Indication = data_process.process_en(Indication)109 if new_Indication not in disease_targets:110 disease_targets[new_Indication] = {}111 disease_targets[new_Indication][new_Target_Name] = TTDTargetID112 else:113 disease_targets[new_Indication][new_Target_Name] = TTDTargetID114 pickle.dump(disease_targets, open(self.output_dir+"/disease_targets","wb+"))115116 return disease_targets117118 def drug_disease(self):119 """120 input: ./data/TTD/target-disease_TTD2016.txt file which downloaded from Therapeutic Target Database121 output: 1) ./data/processed/disease_drug; 2)./data/processed/drug_disease122123 1)disease_drug is a dictionary, the format is:124 disease_drug[disease_name]={} contains all drugs and corresponding ids which could treat the disease125 disease_drug[disease][drug]=drug_id126127 2)drug_disease is a dictionary, the format is:128 drug_disease[drug]={} contain all diseases which could be treated by this drug,129 drug_disease[drug][disease]=drug_id130 The reason why we built the drug_disease dictionary is because there is no disease ID in the initial TTD drug-disease_TTD2016.txt file131132 This Therapeutic Target Database currently contains 2,589 targets (including 397 successful, 723 clinical trial, and 1,469 research targets), and 31,614 drugs (including 2,071 approved, 9,528 clinical trial, 17,803 experimental drugs). 20,278 small molecules and 653 antisense drugs are with available structure or oligonucleotide sequence.133 """134 print("Extracting the drug-disease relations ...")135 disease_drug = {}136 drug_disease = {}137 with open(self.TTD_dir + "/drug-disease_TTD2016.txt", "r") as f:138 next(f)139 for line in tqdm(f, total=sum(1 for _ in open(self.TTD_dir + "/drug-disease_TTD2016.txt", "r"))):140 sline = line.split("\t")141 drug_id = sline[0].lower()142 drug = data_process.process_en(sline[1].lower().strip(" "))143 diseases = sline[2].lower().split(";")144 for disease in diseases:145 disease_processed = data_process.process_en(disease)146 if disease_processed not in disease_drug:147 disease_drug[disease_processed] = {}148 disease_drug[disease_processed][drug] = drug_id149 else:150 disease_drug[disease_processed][drug] = drug_id151152 if drug not in drug_disease:153 drug_disease[drug] = {}154 for disease in diseases:155 disease_processed = data_process.process_en(disease)156 drug_disease[drug][disease_processed] = drug_id157 else:158 for disease in diseases:159 disease_processed = data_process.process_en(disease)160 drug_disease[drug][disease_processed] = drug_id161162 pickle.dump(disease_drug, open(self.output_dir+"/disease_drug", "wb+"))163 pickle.dump(drug_disease, open(self.output_dir+"/drug_disease", "wb+"))164 return drug_disease, disease_drug165166 def positive_dtd_cases(self):167 """168 positive_dtd_cases is used to obtain all TTD provided golden standard disease-target-drug relations which also existed in our constructed knowledge graph.169 """170 entities_and_type = {}171 print("Collecting all semantic types of each entity ...")172 with open(self.predication_dir + "/predications.txt", "r") as f:173 for line in tqdm(f, total=sum(1 for _ in open(self.predication_dir + "/predications.txt", "r"))):174 sline = line.split("\t")175 if sline[0] == "" or sline[1] == "":176 continue177 entity_1 = data_process.process_en(sline[0])178 entity_2 = data_process.process_en(sline[1])179 entity_1_type = sline[4]180 entity_2_type = sline[5].strip("\n")181 if entity_1 not in entities_and_type:182 entities_and_type[entity_1] = {}183 entities_and_type[entity_1][entity_1_type] = 1184 else:185 entities_and_type[entity_1][entity_1_type] = 1186187 if entity_2 not in entities_and_type:188 entities_and_type[entity_2] = {}189 entities_and_type[entity_2][entity_2_type] = 1190 else:191 entities_and_type[entity_2][entity_2_type] = 1192193 output=open(self.output_dir+"/experimental_disease_target_drug","w+")194 with open(self.TTD_dir + "/disease_target_drug_cases.txt", "r", encoding='utf-8', errors='replace') as f:195 for line in tqdm(f, total=sum(1 for _ in open(self.TTD_dir + "/disease_target_drug_cases.txt", "r"))):196 # Disease:sickle-cell_disease Target:humanized_igg2 Drug:selg2197 sline = line.split("\t")198 disease = data_process.process_en(sline[0].split(":")[1])199 target = data_process.process_en(sline[1].split(":")[1])200 drug = data_process.process_en(sline[2].strip("\n").split(":")[1])201 if disease in entities_and_type and target in entities_and_type and drug in entities_and_type:202 output.write("Disease:" + disease + "\tTarget:" + target + "\tDrug:" + drug + "\n")203 output.close()204205 def positive_training_data(self):206 """207 positive_training_data is used to construct positive training data all each drug-target-disease cases from "experimental_disease_target_drug"208 """209 if os.path.exists(self.output_dir+"/KnowledgeGraph"):210 KG = pickle.load(open(self.output_dir+"/KnowledgeGraph", "rb"))211 else:212 constuct_KG= Construct_KG(self.predication_dir+"/predications.txt",self.output_dir+"/KnowledgeGraph")213 KG = constuct_KG.construct_KnowledgeGraph()214215 if os.path.exists(self.output_dir+"/predicate_vector") and os.path.exists(self.output_dir+"/entity_vector"):216 entity_vector = pickle.load(open(self.output_dir+"/entity_vector", "rb"))217 predicate_vector = pickle.load(open(self.output_dir+"/predicate_vector", "rb"))218 else:219 entity_vector, predicate_vector = self.UMLS_type_vector()220221 if not os.path.exists(self.output_dir+"/experimental_disease_target_drug"):222 self.initial_disease_target_drug()223224 output = open(self.output_dir+"/all_positive_data", "w+")225 print("Constructing the positive training data ...")226 with open(self.output_dir + "/experimental_disease_target_drug", "r") as f:227 for line in tqdm(f, total=sum(1 for _ in open(self.output_dir + "/experimental_disease_target_drug", "r"))):228 sline = line.split("\t")229 drug = data_process.process_en(sline[2].split(":")[1].strip("\n"))230 disease = data_process.process_en(sline[0].split(":")[1].strip("\n"))231 target = data_process.process_en(sline[1].split(":")[1].strip("\n"))232 print("Constructing the %s\t%s\t%s\t relations ..." %(drug,target,disease))233 self.construct_training_positive_data_based_one_dtd(KG, entity_vector, predicate_vector, drug, target, disease, output)234235 output.close()236237238 def construct_training_positive_data_based_one_dtd(self, KG, entity_vector, predicate_vector, drug, target,239 disease, output):240241 ##242 # The struction of KG, KG is a dictionary, and the following shows the format of the KGï¼243 # KG={244 # subject:245 # {"TYPES":{sysn:2,horm:1,htrf:3}246 # "OBJECTS":247 # object_1:{248 # "TYPES":{}249 # "PREDICATES":{250 # predicate_1:3,251 # predicate_2:4252 # }}}}}253 ##254 ##255 # For one specific drug-target-disease example, there are 4 potential possible positive cases could be constructed, and each of the 4 is a vector of lenght 792:256 # In the KG,257 # case 1: drug - PREDICATE_1 - target - PREDICATE_2 - disease Case 1 indicates the drug, target and disease are directly connected. For case 1, we construct a vector, the order of each eneity in the vector is drug - PREDICATE_1 - target - PREDICATE_1 - target - PREDICATE_2 - target - PREDICATE_2 - disease258 # case 2: drug - PREDICATE_1 - entity - PREDICATE_2 - target - PREDICATE_3 - disease Case 2 indicates that the target and disease are directly connected, while the drug and target are indirectly connected.259 # case 3: drug - PREDICATE_1 - target - PREDICATE_2 - entity - PREDICATE_3 - disease Case 3 and Case 4 are the other 2 situations.260 # case 4: drug - PREDICATE_1 - entity_1 - PREDICATE_2 - target- PREDICATE_3 - entity_2 - PREDICATE_4 - disease261 # drug->target->262 if drug in KG:263 # For case 1: drug - PREDICATE_1 - target - PREDICATE_2 - disease264 # We used a vector of lenght 873, the format is:265 # drug - PREDICATE_1 - target - PREDICATE_1 - target - PREDICATE_2 - target - PREDICATE_2 - disease266 if target in KG[drug]["OBJECTS"]:267 if target in KG:268 if disease in KG[target]["OBJECTS"]:269 ## construct vector for drug - PREDICATE_1 - target - PREDICATE_1 - target - PREDICATE_2 - target - PREDICATE_2 - disease270 # drug part of vector271 vector = [0] * 873272 for umls_type in KG[drug]["TYPES"]:273 vector[entity_vector[umls_type]] += KG[drug]["TYPES"][umls_type]274 # PREDICATE_1 part of vector275 for predicate in KG[drug]["OBJECTS"][target]["PREDICATES"]:276 vector[133 + predicate_vector[predicate]] += KG[drug]["OBJECTS"][target]["PREDICATES"][277 predicate]278 # the REAL target part of vector: in this part, the REAL target is both object (for drug) and subject ï¼for diseaseï¼,so all the umls typs of target(as subject and object) should be collected in vector279 # -- 1 the REAL target part of vector: target as object280 for umls_type in KG[drug]["OBJECTS"][target]["TYPES"]:281 vector[133 + 52 + 133 + 52 + entity_vector[umls_type]] += \282 KG[drug]["OBJECTS"][target]["TYPES"][umls_type]283 # -- 2 the REAL target part of vector: target as subject284 for umls_type in KG[target]["TYPES"]:285 vector[133 + 52 + 133 + 52 + entity_vector[umls_type]] += KG[target]["TYPES"][umls_type]286 # target part of vector: The target of the first PREDICATE_1 - target - PREDICATE_1, the value if copied from the REAL target287 vector[133 + 52:133 + 52 + 133] = vector[133 + 52 + 133 + 52:133 + 52 + 133 + 52 + 133]288 # PREDICATE_1 part of vector: the valued of the second PREDICATE_1 is same as the first PREDICATE_1, which is copied from PREDICATE_1289 vector[133 + 52 + 133:133 + 52 + 133 + 52] = vector[133:133 + 52]290 # PREDICATE_2 part of vector291 for predicate in KG[target]["OBJECTS"][disease]["PREDICATES"]:292 vector[133 + 52 + 133 + 52 + 133 + predicate_vector[predicate]] += \293 KG[target]["OBJECTS"][disease]["PREDICATES"][predicate]294 # target part of vector:PREDICATE_2 - target - PREDICATE_2, the value of the second target is same as REAL target, which is copied from REAL target295 vector[133 + 52 + 133 + 52 + 133 + 52:133 + 52 + 133 + 52 + 133 + 52 + 133] = vector[296 133 + 52 + 133 + 52:133 + 52 + 133 + 52 + 133]297 # PREDICATE_2 part of vector:298 vector[299 133 + 52 + 133 + 52 + 133 + 52 + 133:133 + 52 + 133 + 52 + 133 + 52 + 133 + 52] = vector[300 133 + 52 + 133 + 52 + 133:133 + 52 + 133 + 52 + 133 + 52]301 # disease part of vector302 for umls_type in KG[target]["OBJECTS"][disease]["TYPES"]:303 vector[133 + 52 + 133 + 52 + 133 + 52 + 133 + 52 + entity_vector[umls_type]] += \304 KG[target]["OBJECTS"][disease]["TYPES"][umls_type]305 for umls_number in vector:306 output.write(str(umls_number) + "\t")307 output.write("1\n")308 # For case 3: drug - PREDICATE_1 - target - PREDICATE_2 - entity - PREDICATE_3 - disease309 # The format of the result is drug - PREDICATE_1 - target - PREDICATE_1 - target - PREDICATE_2 - entity - PREDICATE_3 - disease310 else:311 for entity in KG[target]["OBJECTS"]:312 if entity in KG:313 if disease in KG[entity]["OBJECTS"]:314 vector = [0] * 873315 # drug part of vector316 for umls_type in KG[drug]["TYPES"]:317 vector[entity_vector[umls_type]] += KG[drug]["TYPES"][umls_type]318 # PREDICATE_1 part of vector319 for predicate_1 in KG[drug]["OBJECTS"][target]["PREDICATES"]:320 vector[133 + predicate_vector[predicate_1]] += \321 KG[drug]["OBJECTS"][target]["PREDICATES"][predicate_1]322 # the REAL target of vector323 # --1: The target is used as object324 for umls_type in KG[drug]["OBJECTS"][target]["TYPES"]:325 vector[133 + 52 + 133 + 52 + entity_vector[umls_type]] += \326 KG[drug]["OBJECTS"][target]["TYPES"][umls_type]327 # --2: The target is the subject328 for umls_type in KG[target]["TYPES"]:329 vector[133 + 52 + 133 + 52 + entity_vector[umls_type]] += \330 KG[target]["TYPES"][umls_type]331 # target of vector: PREDICATE_1 - target - PREDICATE_1, the value of the target if copied from REAL target332 vector[133 + 52:133 + 52 + 133] = vector[333 133 + 52 + 133 + 52:133 + 52 + 133 + 52 + 133]334 # PREDICATE_1 of vector: it's copied from the first PREDICATE_1335 vector[133 + 52 + 133:133 + 52 + 133 + 52] = vector[133:133 + 52]336 # PREDICATE_2 of vector337 for predicate_2 in KG[target]["OBJECTS"][entity]["PREDICATES"]:338 vector[133 + 52 + 133 + 52 + 133 + predicate_vector[predicate_2]] += \339 KG[target]["OBJECTS"][entity]["PREDICATES"][predicate_2]340 # entity of vector: PREDICATE_2 - entity - PREDICATE_3341 # -- 1 : entity is the object342 for umls_type in KG[target]["OBJECTS"][entity]["TYPES"]:343 vector[133 + 52 + 133 + 52 + 133 + 52 + entity_vector[umls_type]] += \344 KG[target]["OBJECTS"][entity]["TYPES"][umls_type]345 # --2 : entity is the subject346 for umls_type in KG[entity]["TYPES"]:347 vector[133 + 52 + 133 + 52 + 133 + 52 + entity_vector[umls_type]] += \348 KG[entity]["TYPES"][umls_type]349 # PREDICATE_3 of vector350 for predicate_3 in KG[entity]["OBJECTS"][disease]["PREDICATES"]:351 vector[352 133 + 52 + 133 + 52 + 133 + 52 + 133 + predicate_vector[predicate_3]] += \353 KG[entity]["OBJECTS"][disease]["PREDICATES"][predicate_3]354 # disease of vector355 for umls_type in KG[entity]["OBJECTS"][disease]["TYPES"]:356 vector[357 133 + 52 + 133 + 52 + 133 + 52 + 133 + 52 + entity_vector[umls_type]] += \358 KG[entity]["OBJECTS"][disease]["TYPES"][umls_type]359 for umls_number in vector:360 output.write(str(umls_number) + "\t")361 output.write("1\n")362 # For case 2: drug - PREDICATE_1 - entity - PREDICATE_2 - target - PREDICATE_3 - disease363 # The result format: drug - PREDICATE_1 - entity - PREDICATE_2 - target - PREDICATE_3 - target - PREDICATE_3 - disease364 else:365 for entity_1 in KG[drug]["OBJECTS"]:366 if entity_1 in KG:367 if target in KG[entity_1]["OBJECTS"]:368 if target in KG:369 if disease in KG[target]["OBJECTS"]:370 vector = [0] * 873371 # drug part of vector372 for umls_type in KG[drug]["TYPES"]:373 vector[entity_vector[umls_type]] += KG[drug]["TYPES"][umls_type]374 # PREDICATE_1 part of vector375 for predicate_1 in KG[drug]["OBJECTS"][entity_1]["PREDICATES"]:376 vector[133 + predicate_vector[predicate_1]] += \377 KG[drug]["OBJECTS"][entity_1]["PREDICATES"][predicate_1]378 # entity part of vector: This entity could be used as both subject and object, then all the umls_typy should be collected.379 # --1 entity part of vector: entity is the object380 for umls_type in KG[drug]["OBJECTS"][entity_1]["TYPES"]:381 vector[133 + 52 + entity_vector[umls_type]] += \382 KG[drug]["OBJECTS"][entity_1]["TYPES"][umls_type]383 # --2 entity part of vector: entity is the subject384 for umls_type in KG[entity_1]["TYPES"]:385 vector[133 + 52 + entity_vector[umls_type]] += KG[entity_1]["TYPES"][386 umls_type]387 # PREDICATE_2 part of vector388 for predicate_2 in KG[entity_1]["OBJECTS"][target]["PREDICATES"]:389 vector[133 + 52 + 133 + predicate_vector[predicate_2]] += \390 KG[entity_1]["OBJECTS"][target]["PREDICATES"][predicate_2]391 # the REAL target part of vector: target could be subject or object392 # --1 target part of vector: target is subject393 for umls_type in KG[entity_1]["OBJECTS"][target]["TYPES"]:394 vector[133 + 52 + 133 + 52 + entity_vector[umls_type]] += \395 KG[entity_1]["OBJECTS"][target]["TYPES"][umls_type]396 # --2 target part of vector: target is object397 for umls_type in KG[target]["TYPES"]:398 vector[133 + 52 + 133 + 52 + entity_vector[umls_type]] += \399 KG[target]["TYPES"][umls_type]400 # PREDICATE_3 part of vector401 for predicate_3 in KG[target]["OBJECTS"][disease]["PREDICATES"]:402 vector[133 + 52 + 133 + 52 + 133 + predicate_vector[predicate_3]] += \403 KG[target]["OBJECTS"][disease]["PREDICATES"][predicate_3]404 # target part of vector: PREDICATE_3 - target - PREDICATE_3, the target is same as REAL target405 vector[406 133 + 52 + 133 + 52 + 133 + 52:133 + 52 + 133 + 52 + 133 + 52 + 133] = vector[407 133 + 52 + 133 + 52:133 + 52 + 133 + 52 + 133]408 # PREDICATE_3 part of vector: the second PREDICATE_3 is same as the first PREDICATE_3409 vector[410 133 + 52 + 133 + 52 + 133 + 52 + 133:133 + 52 + 133 + 52 + 133 + 52 + 133 + 52] = vector[411 133 + 52 + 133 + 52 + 133:133 + 52 + 133 + 52 + 133 + 52]412 # disease part of vector413 for umls_type in KG[target]["OBJECTS"][disease]["TYPES"]:414 vector[415 133 + 52 + 133 + 52 + 133 + 52 + 133 + 52 + entity_vector[umls_type]] += \416 KG[target]["OBJECTS"][disease]["TYPES"][umls_type]417 for umls_number in vector:418 output.write(str(umls_number) + "\t")419 output.write("1\n")420 # For case 4: drug - PREDICATE_1 - entity_1 - PREDICATE_2 - target- PREDICATE_3 - entity_2 - PREDICATE_4 - disease421 # The output format:drug - PREDICATE_1 - entity_1 - PREDICATE_2 - target- PREDICATE_3 - entity_2 - PREDICATE_4 - disease422 else:423 for entity_2 in KG[target]["OBJECTS"]:424 if entity_2 in KG:425 if disease in KG[entity_2]["OBJECTS"]:426 vector = [0] * 873427 # drug part of vector428 for umls_type in KG[drug]["TYPES"]:429 vector[entity_vector[umls_type]] += KG[drug]["TYPES"][umls_type]430 # PREDICATE_1 part of vector431 for predicate_1 in KG[drug]["OBJECTS"][entity_1]["PREDICATES"]:432 vector[133 + predicate_vector[predicate_1]] += \433 KG[drug]["OBJECTS"][entity_1]["PREDICATES"][predicate_1]434 # entity_1 part of vector435 # --1 : entity_1 is object436 for umls_type in KG[drug]["OBJECTS"][entity_1]["TYPES"]:437 vector[133 + 52 + entity_vector[umls_type]] += \438 KG[drug]["OBJECTS"][entity_1]["TYPES"][umls_type]439 # --2 : entity_1 is subject440 for umls_type in KG[entity_1]["TYPES"]:441 vector[133 + 52 + entity_vector[umls_type]] += \442 KG[entity_1]["TYPES"][umls_type]443 # PREDICATE_2 part of vector444 for predicate_2 in KG[entity_1]["OBJECTS"][target]["PREDICATES"]:445 vector[133 + 52 + 133 + predicate_vector[predicate_2]] += \446 KG[entity_1]["OBJECTS"][target]["PREDICATES"][predicate_2]447 # target part of vector448 # --1 : target is object449 for umls_type in KG[entity_1]["OBJECTS"][target]["TYPES"]:450 vector[133 + 52 + 133 + 52 + entity_vector[umls_type]] += \451 KG[entity_1]["OBJECTS"][target]["TYPES"][umls_type]452 # --2 : target is subject453 for umls_type in KG[target]["TYPES"]:454 vector[133 + 52 + 133 + 52 + entity_vector[umls_type]] += \455 KG[target]["TYPES"][umls_type]456 # PREDICATE_3 part of vector457 for predicate_3 in KG[target]["OBJECTS"][entity_2]["PREDICATES"]:458 vector[133 + 52 + 133 + 52 + 133 + predicate_vector[459 predicate_3]] += \460 KG[target]["OBJECTS"][entity_2]["PREDICATES"][predicate_3]461 # entity_2 part of vector462 # --1 : entity_2 is object463 for umls_type in KG[target]["OBJECTS"][entity_2]["TYPES"]:464 vector[133 + 52 + 133 + 52 + 133 + 52 + entity_vector[465 umls_type]] += KG[target]["OBJECTS"][entity_2]["TYPES"][466 umls_type]467 # --1 : entity_2 is subject468 for umls_type in KG[entity_2]["TYPES"]:469 vector[133 + 52 + 133 + 52 + 133 + 52 + entity_vector[470 umls_type]] += KG[entity_2]["TYPES"][umls_type]471 # PREDICATE_4 part of vector472 for predicate_4 in KG[entity_2]["OBJECTS"][disease]["PREDICATES"]:473 vector[133 + 52 + 133 + 52 + 133 + 52 + 133 + predicate_vector[474 predicate_4]] += \475 KG[entity_2]["OBJECTS"][disease]["PREDICATES"][predicate_4]476 # disease part of vector477 for umls_type in KG[entity_2]["OBJECTS"][disease]["TYPES"]:478 vector[479 133 + 52 + 133 + 52 + 133 + 52 + 133 + 52 + entity_vector[480 umls_type]] += \481 KG[entity_2]["OBJECTS"][disease]["TYPES"][umls_type]482 for umls_number in vector:483 output.write(str(umls_number) + "\t")484 output.write("1\n")485 else:486 print("NOT FOUND " + drug + "\t" + target + "\t" + disease)487488489 def negative_dtd_cases(self):490 """491 negative_dtd_cases is used to construct negative drug-target-disease associations.492 """493 if os.path.exists(self.output_dir + "/KnowledgeGraph"):494 KG = pickle.load(open(self.output_dir + "/KnowledgeGraph", "rb"))495 else:496 constuct_KG = Construct_KG(self.predication_dir + "/predications.txt", self.output_dir + "/KnowledgeGraph")497 KG = constuct_KG.construct_KnowledgeGraph()498499 entity_set={}500 drug_type_set={}501 target_type_set={}502 disease_type_set={}503 output = open(self.output_dir + "/experimental_disease_target_drug_negative", "w+")504 print("Constructing the negative drug-target-disease cases ...")505 with open(self.output_dir + "/experimental_disease_target_drug", "r") as f:506 for line in tqdm(f, total=sum(1 for _ in open(self.output_dir + "/experimental_disease_target_drug", "r"))):507 sline=line.split("\t")508 drug=data_process.process_en(sline[2].split(":")[1].strip("\n"))509 disease=data_process.process_en(sline[0].split(":")[1].strip("\n"))510 target=data_process.process_en(sline[1].split(":")[1].strip("\n"))511 entity_set[drug]=1512 entity_set[target]=1513 entity_set[disease]=1514 if drug in KG:515 for umls_type in KG[drug]["TYPES"]:516 drug_type_set[umls_type]=1517 if target in KG:518 for umls_type in KG[target]["TYPES"]:519 target_type_set[umls_type]=1520 if disease in KG:521 for umls_type in KG[disease]["TYPES"]:522 disease_type_set[umls_type]=1523524 negative_examples_count=0525 entity_list=list(KG)526 selected_examples={}527 while negative_examples_count < 5000000:528 random_drug=random.choice(entity_list)529 random_target=random.choice(entity_list)530 random_disease=random.choice(entity_list)531 selected_example=random_drug+" "+random_target+" "+random_disease532 while selected_example in selected_examples:533 random_drug=random.choice(entity_list)534 random_target=random.choice(entity_list)535 random_disease=random.choice(entity_list)536 selected_example=random_drug+" "+random_target+" "+random_disease537538 if random_drug not in entity_set and random_target not in entity_set and random_disease not in entity_set:539 drug_umls_type_exist=False540 target_umls_type_exist=False541 disease_umls_type_exist=False542 for umls_type_of_drug in KG[random_drug]["TYPES"]:543 if umls_type_of_drug in drug_type_set:544 drug_umls_type_exist=True545 break546 for umls_type_of_target in KG[random_target]["TYPES"]:547 if umls_type_of_target in target_type_set:548 target_umls_type_exist=True549 break550 for umls_type_of_disease in KG[random_disease]["TYPES"]:551 if umls_type_of_disease in disease_type_set:552 disease_umls_type_exist=True553 if drug_umls_type_exist == target_umls_type_exist == disease_umls_type_exist == True:554 output.write("Disease:"+random_disease+"\tTarget:"+random_target+"\tDrug:"+random_drug+"\n")555 negative_examples_count += 1556557558 def negative_training_data(self):559 if os.path.exists(self.output_dir+"/KnowledgeGraph"):560 KG = pickle.load(open(self.output_dir+"/KnowledgeGraph", "rb"))561 else:562 constuct_KG= Construct_KG(self.predication_dir+"/predications.txt",self.output_dir+"/KnowledgeGraph")563 KG = constuct_KG.construct_KnowledgeGraph()564565 if os.path.exists(self.output_dir+"/predicate_vector") and os.path.exists(self.output_dir+"/entity_vector"):566 entity_vector = pickle.load(open(self.output_dir+"/entity_vector", "rb"))567 predicate_vector = pickle.load(open(self.output_dir+"/predicate_vector", "rb"))568 else:569 entity_vector, predicate_vector = self.UMLS_type_vector()570571 if not os.path.exists(self.output_dir + "/experimental_disease_target_drug_negative"):572 self.negative_dtd_cases()573574 output = open(self.output_dir + "/all_negative_data", "w+")575 print("Constructing the negative training data ...")576 with open(self.output_dir + "/experimental_disease_target_drug_negative", "r") as f:577 for line in tqdm(f, total=sum(1 for _ in open(self.output_dir + "/experimental_disease_target_drug_negative", "r"))):578 sline=line.split("\t")579 drug=sline[2].split(":")[1].strip("\n")580 disease=sline[0].split(":")[1].strip("\n")581 target=sline[1].split(":")[1].strip("\n")582 self.construct_training_negative_data_based_one_dtd(KG,entity_vector,predicate_vector,drug,target,disease,output)583 output.close()584585 def construct_training_negative_data_based_one_dtd(self,KG,entity_vector,predicate_vector,drug,target,disease,output):586 ##587 # The function is similar to construct_training_positive_data_based_one_dtd function.588 # The struction of KGï¼589 # KG={590 # subject:591 # {"TYPES":{sysn:2,horm:1,htrf:3}592 # object_1:{593 # "TYPES":{}594 # predicate_1:3,595 # predicate_2:4596 # }}}}}597 ##598 ##599 if drug in KG:600 # For case 1: drug - PREDICATE_1 - target - PREDICATE_2 - disease601 # drug - PREDICATE_1 - target - PREDICATE_1 - target - PREDICATE_2 - target - PREDICATE_2 - disease602 if target in KG[drug]["OBJECTS"]:603 if target in KG:604 if disease in KG[target]["OBJECTS"]:605 ## construct vector for drug - PREDICATE_1 - target - PREDICATE_1 - target - PREDICATE_2 - target - PREDICATE_2 - disease606 # drug part of vector607 vector=[0]*873608 for umls_type in KG[drug]["TYPES"]:609 vector[entity_vector[umls_type]] += KG[drug]["TYPES"][umls_type]610 # PREDICATE_1 part of vector611 for predicate in KG[drug]["OBJECTS"][target]["PREDICATES"]:612 vector[133+predicate_vector[predicate]] += KG[drug]["OBJECTS"][target]["PREDICATES"][predicate]613 # the REAL target part of vector: in this part, the REAL target is both object (for drug) and subject ï¼for diseaseï¼,so all the umls typs of target(as subject and object) should be collected in vector614 # -- 1 the REAL target part of vector: target as object615 for umls_type in KG[drug]["OBJECTS"][target]["TYPES"]:616 vector[133+52+133+52+entity_vector[umls_type]] += KG[drug]["OBJECTS"][target]["TYPES"][umls_type]617 # -- 2 the REAL target part of vector: target as subject618 for umls_type in KG[target]["TYPES"]:619 vector[133+52+133+52+entity_vector[umls_type]] += KG[target]["TYPES"][umls_type]620 # target part of vector621 vector[133+52:133+52+133] = vector[133+52+133+52:133+52+133+52+133]622 # PREDICATE_1 part of vector623 vector[133+52+133:133+52+133+52] = vector[133:133+52]624 # PREDICATE_2 part of vector625 for predicate in KG[target]["OBJECTS"][disease]["PREDICATES"]:626 vector[133+52+133+52+133+predicate_vector[predicate]] += KG[target]["OBJECTS"][disease]["PREDICATES"][predicate]627 # target part of vector:PREDICATE_2 - target - PREDICATE_2628 vector[133+52+133+52+133+52:133+52+133+52+133+52+133] = vector[133+52+133+52:133+52+133+52+133]629 # PREDICATE_2 part of vector630 vector[133+52+133+52+133+52+133:133+52+133+52+133+52+133+52] = vector[133+52+133+52+133:133+52+133+52+133+52]631 # disease part of vector632 for umls_type in KG[target]["OBJECTS"][disease]["TYPES"]:633 vector[133+52+133+52+133+52+133+52+entity_vector[umls_type]] += KG[target]["OBJECTS"][disease]["TYPES"][umls_type]634 for umls_number in vector:635 output.write(str(umls_number)+"\t")636 output.write("0\n")637638 # For case 3: drug - PREDICATE_1 - target - PREDICATE_2 - entity - PREDICATE_3 - disease639 # drug - PREDICATE_1 - target - PREDICATE_1 - target - PREDICATE_2 - entity - PREDICATE_3 - disease640 else:641 for entity in KG[target]["OBJECTS"]:642 if entity in KG:643 if disease in KG[entity]["OBJECTS"]:644 vector=[0]*873645 # drug part of vector646 for umls_type in KG[drug]["TYPES"]:647 vector[entity_vector[umls_type]] += KG[drug]["TYPES"][umls_type]648 # PREDICATE_1 part of vector649 for predicate_1 in KG[drug]["OBJECTS"][target]["PREDICATES"]:650 vector[133+predicate_vector[predicate_1]] += KG[drug]["OBJECTS"][target]["PREDICATES"][predicate_1]651 # the REAL target of vector652 # --1: target is object653 for umls_type in KG[drug]["OBJECTS"][target]["TYPES"]:654 vector[133+52+133+52+entity_vector[umls_type]] += KG[drug]["OBJECTS"][target]["TYPES"][umls_type]655 # --2: target is subject656 for umls_type in KG[target]["TYPES"]:657 vector[133+52+133+52+entity_vector[umls_type]] += KG[target]["TYPES"][umls_type]658 # target of vector: PREDICATE_1 - target - PREDICATE_1659 vector[133+52:133+52+133] = vector[133+52+133+52:133+52+133+52+133]660 # PREDICATE_1 of vector661 vector[133+52+133:133+52+133+52] = vector[133:133+52]662 # PREDICATE_2 of vector663 for predicate_2 in KG[target]["OBJECTS"][entity]["PREDICATES"]:664 vector[133+52+133+52+133+predicate_vector[predicate_2]] += KG[target]["OBJECTS"][entity]["PREDICATES"][predicate_2]665 # entity of vector: PREDICATE_2 - entity - PREDICATE_3666 # -- 1 : entity is object667 for umls_type in KG[target]["OBJECTS"][entity]["TYPES"]:668 vector[133+52+133+52+133+52+entity_vector[umls_type]] += KG[target]["OBJECTS"][entity]["TYPES"][umls_type]669 # --2 : entity is subject670 for umls_type in KG[entity]["TYPES"]:671 vector[133+52+133+52+133+52+entity_vector[umls_type]] += KG[entity]["TYPES"][umls_type]672 # PREDICATE_3 of vector673 for predicate_3 in KG[entity]["OBJECTS"][disease]["PREDICATES"]:674 vector[133+52+133+52+133+52+133+predicate_vector[predicate_3]] += KG[entity]["OBJECTS"][disease]["PREDICATES"][predicate_3]675 # disease of vector676 for umls_type in KG[entity]["OBJECTS"][disease]["TYPES"]:677 vector[133+52+133+52+133+52+133+52+entity_vector[umls_type]] += KG[entity]["OBJECTS"][disease]["TYPES"][umls_type]678 for umls_number in vector:679 output.write(str(umls_number)+"\t")680 output.write("0\n")681 if len(vector) > 874:682 print("case 3\t"+str(len(vector)))683 # case 2: drug - PREDICATE_1 - entity - PREDICATE_2 - target - PREDICATE_3 - disease684 else:685 for entity_1 in KG[drug]["OBJECTS"]:686 if entity_1 in KG:687 if target in KG[entity_1]["OBJECTS"]:688 if target in KG:689 if disease in KG[target]["OBJECTS"]:690 vector=[0]*873691 # drug part of vector692 for umls_type in KG[drug]["TYPES"]:693 vector[entity_vector[umls_type]] += KG[drug]["TYPES"][umls_type]694 # PREDICATE_1 part of vector695 for predicate_1 in KG[drug]["OBJECTS"][entity_1]["PREDICATES"]:696 vector[133+predicate_vector[predicate_1]] += KG[drug]["OBJECTS"][entity_1]["PREDICATES"][predicate_1]697 # entity part of vector698 # --1 entity part of vector: entity is object699 for umls_type in KG[drug]["OBJECTS"][entity_1]["TYPES"]:700 vector[133+52+entity_vector[umls_type]] += KG[drug]["OBJECTS"][entity_1]["TYPES"][umls_type]701 # --2 entity part of vector: entity is subject702 for umls_type in KG[entity_1]["TYPES"]:703 vector[133+52+entity_vector[umls_type]] += KG[entity_1]["TYPES"][umls_type]704 # PREDICATE_2 part of vector705 for predicate_2 in KG[entity_1]["OBJECTS"][target]["PREDICATES"]:706 vector[133+52+133+predicate_vector[predicate_2]] += KG[entity_1]["OBJECTS"][target]["PREDICATES"][predicate_2]707 # the REAL target part of vector708 # --1 target part of vector: target is subject709 for umls_type in KG[entity_1]["OBJECTS"][target]["TYPES"]:710 vector[133+52+133+52+entity_vector[umls_type]] += KG[entity_1]["OBJECTS"][target]["TYPES"][umls_type]711 # --2 target part of vector: target is object712 for umls_type in KG[target]["TYPES"]:713 vector[133+52+133+52+entity_vector[umls_type]] += KG[target]["TYPES"][umls_type]714 # PREDICATE_3 part of vector715 for predicate_3 in KG[target]["OBJECTS"][disease]["PREDICATES"]:716 vector[133+52+133+52+133+predicate_vector[predicate_3]] += KG[target]["OBJECTS"][disease]["PREDICATES"][predicate_3]717 # target part of vector: PREDICATE_3 - target - PREDICATE_3718 vector[133+52+133+52+133+52:133+52+133+52+133+52+133]=vector[133+52+133+52:133+52+133+52+133]719 # PREDICATE_3 part of vector:è¿æ¯ç¬¬äºä¸ªPREDICATE_3720 vector[133+52+133+52+133+52+133:133+52+133+52+133+52+133+52] = vector[133+52+133+52+133:133+52+133+52+133+52]721 # disease part of vector722 for umls_type in KG[target]["OBJECTS"][disease]["TYPES"]:723 vector[133+52+133+52+133+52+133+52+entity_vector[umls_type]] += KG[target]["OBJECTS"][disease]["TYPES"][umls_type]724 for umls_number in vector:725 output.write(str(umls_number)+"\t")726 output.write("0\n")727 if len(vector) > 874:728 print("case 2\t"+str(len(vector)))729 # For case 4: drug - PREDICATE_1 - entity_1 - PREDICATE_2 - target- PREDICATE_3 - entity_2 - PREDICATE_4 - disease730 # Example: drug - PREDICATE_1 - entity_1 - PREDICATE_2 - target- PREDICATE_3 - entity_2 - PREDICATE_4 - disease731 else:732 for entity_2 in KG[target]["OBJECTS"]:733 if entity_2 in KG:734 if disease in KG[entity_2]["OBJECTS"]:735 vector=[0]*873736 # drug part of vector737 for umls_type in KG[drug]["TYPES"]:738 vector[entity_vector[umls_type]] += KG[drug]["TYPES"][umls_type]739 # PREDICATE_1 part of vector740 for predicate_1 in KG[drug]["OBJECTS"][entity_1]["PREDICATES"]:741 vector[133+predicate_vector[predicate_1]] += KG[drug]["OBJECTS"][entity_1]["PREDICATES"][predicate_1]742 # entity_1 part of vector743 # --1 : entity_1 is object744 for umls_type in KG[drug]["OBJECTS"][entity_1]["TYPES"]:745 vector[133+52+entity_vector[umls_type]] += KG[drug]["OBJECTS"][entity_1]["TYPES"][umls_type]746 # --2 : entity_1 is subject747 for umls_type in KG[entity_1]["TYPES"]:748 vector[133+52+entity_vector[umls_type]] += KG[entity_1]["TYPES"][umls_type]749 # PREDICATE_2 part of vector750 for predicate_2 in KG[entity_1]["OBJECTS"][target]["PREDICATES"]:751 vector[133+52+133+predicate_vector[predicate_2]] += KG[entity_1]["OBJECTS"][target]["PREDICATES"][predicate_2]752 # target part of vector753 # --1 : target is object754 for umls_type in KG[entity_1]["OBJECTS"][target]["TYPES"]:755 vector[133+52+133+52+entity_vector[umls_type]] += KG[entity_1]["OBJECTS"][target]["TYPES"][umls_type]756 # --2 : target is subject757 for umls_type in KG[target]["TYPES"]:758 vector[133+52+133+52+entity_vector[umls_type]] += KG[target]["TYPES"][umls_type]759 # PREDICATE_3 part of vector760 for predicate_3 in KG[target]["OBJECTS"][entity_2]["PREDICATES"]:761 vector[133+52+133+52+133+predicate_vector[predicate_3]] += KG[target]["OBJECTS"][entity_2]["PREDICATES"][predicate_3]762 # entity_2 part of vector763 # --1 : entity_2 is object764 for umls_type in KG[target]["OBJECTS"][entity_2]["TYPES"]:765 vector[133+52+133+52+133+52+entity_vector[umls_type]] += KG[target]["OBJECTS"][entity_2]["TYPES"][umls_type]766 # --1 : entity_2 is subject767 for umls_type in KG[entity_2]["TYPES"]:768 vector[133+52+133+52+133+52+entity_vector[umls_type]] += KG[entity_2]["TYPES"][umls_type]769 # PREDICATE_4 part of vector770 for predicate_4 in KG[entity_2]["OBJECTS"][disease]["PREDICATES"]:771 vector[133+52+133+52+133+52+133+predicate_vector[predicate_4]] += KG[entity_2]["OBJECTS"][disease]["PREDICATES"][predicate_4]772 # disease part of vector773 for umls_type in KG[entity_2]["OBJECTS"][disease]["TYPES"]:774 vector[133+52+133+52+133+52+133+52+entity_vector[umls_type]] += KG[entity_2]["OBJECTS"][disease]["TYPES"][umls_type]775 for umls_number in vector:776 output.write(str(umls_number)+"\t")777 output.write("0\n")778 if len(vector) > 874:779 print("case 4\t"+str(len(vector)))780781 def construct_all_data(self):782 if not os.path.exists(self.output_dir + "/predicate_vector"):783 self.UMLS_type_vector()784785 if not os.path.exists(self.output_dir + "/drug_synonyms"):786 self.drug_syndroms()787788 if not os.path.exists(self.output_dir + "/disease_targets"):789 self.disease_target()790791 if not os.path.exists(self.output_dir + "/drug_disease"):792 self.drug_disease()793794 if not os.path.exists(self.output_dir + "/experimental_disease_target_drug"):795 self.positive_dtd_cases()796797 if not os.path.exists(self.output_dir + "/all_positive_data"):798 self.positive_training_data()799800 if not os.path.exists(self.output_dir + "/experimental_disease_target_drug_negative"):801 self.negative_dtd_cases()802803 if not os.path.exists(self.output_dir + "/all_negative_data"):804 self.negative_training_data()805806if __name__ == "__main__":807 predication_dir = "./data/SemmedDB"808 TTD_dir = "./data/TTD"809 processed_dir = "./data/processed"810 s=Extract_Data(predication_dir,TTD_dir,processed_dir)811 s.construct_all_data()
...
Statement.py
Source:Statement.py
1from Predicate import *2import copy34class Statement():5 """6 defines one FOL statement and the operations allowed on them7 member variables include:8 predicate_set : set of 'Predicate' objects which are 9 connected via OR operator in a statement10 statement_string : string representation of statement11 """12 def __init__(self, statement_string=None):13 if statement_string:14 predicate_list = statement_string.split('|')15 predicate_list = map(lambda x:Predicate(x), predicate_list)16 self.predicate_set = set(predicate_list)17 statement_string_list = map(lambda x: x.predicate_string, self.predicate_set)18 self.statement_string = '|'.join(statement_string_list)19 else:20 self.statement_string = None21 self.predicate_set = None2223 def init_from_string(self, statement_string):24 """25 initializes a Statement object from statement string26 """27 predicate_list = statement_string.split('|')28 predicate_list = map(lambda x:Predicate(x), predicate_list)29 self.predicate_set = set(predicate_list)30 statement_string_list = map(lambda x: x.predicate_string, self.predicate_set)31 self.statement_string = '|'.join(statement_string_list)3233 def init_from_predicate_set(self, predicate_set):34 """35 initializes Statement object from a predicate set36 """37 self.predicate_set = predicate_set38 statement_string_list = map(lambda x: x.predicate_string, predicate_set)39 self.statement_string = '|'.join(statement_string_list)4041 def __str__(self):42 return self.statement_string4344 def __eq__(self, statement):45 return self.predicate_set==statement.predicate_set4647 def __hash__(self):48 return hash((''.join(sorted(self.statement_string))))4950 def exists_in_KB(self, KB):51 '''52 returns true if cnf_statement already exists53 in the KNOWLEDGE_BASE else False54 '''55 if self in KB:56 return True57 return False5859 def add_statement_to_KB(self, KB, KB_HASH):60 """61 adds a statement in a knowledge base and updates the Hash62 """63 KB.add(self)64 for predicate in self.predicate_set:65 if predicate.name in KB_HASH:66 KB_HASH[predicate.name].add(self)67 else:68 KB_HASH[predicate.name] = set([self])6970 def resolve(self, statement):71 '''72 Resolves two statements73 returns False if a contradiction is encountered when resolved otherwise,74 returns set of new infered statements(empty if no statements infered)75 '''76 infered_statements = set()77 for predicate_1 in self.predicate_set:78 for predicate_2 in statement.predicate_set:79 unification = False80 if (predicate_1.negative ^ predicate_2.negative) and predicate_1.name==predicate_2.name:81 unification = predicate_1.unify_with_predicate(predicate_2) # returns substitution if statements can unify else false82 if unification == False:83 continue84 else:85 rest_statement_1 = copy.deepcopy(self.predicate_set)86 rest_statement_2 = copy.deepcopy(statement.predicate_set)87 rest_statement_1 = filter(lambda x: False if x == predicate_1 else True, rest_statement_1)88 rest_statement_2 = filter(lambda x: False if x == predicate_2 else True, rest_statement_2)89 if not rest_statement_1 and not rest_statement_2: # contradiction found90 return False91 rest_statement_1 = map(lambda x: x.substitute(unification), rest_statement_1)92 rest_statement_2 = map(lambda x: x.substitute(unification), rest_statement_2)93 new_statement = Statement()94 new_statement.init_from_predicate_set(set(rest_statement_1+rest_statement_2))95 infered_statements.add(new_statement)96 return infered_statements9798 def get_resolving_clauses(self, KB_HASH):99 """100 returns a set of possible statements101 the self statement object can resolve with102 """103 resolving_clauses = set()104 for predicate in self.predicate_set:105 if predicate.name in KB_HASH:106 resolving_clauses = resolving_clauses.union(KB_HASH[predicate.name])
...
test_multisplitby.py
Source:test_multisplitby.py
...6def _consume(iterable: Iterable):7 def yield_all():8 yield from iterable9 return list(yield_all())10def predicate_1(x: int) -> bool:11 """Return true if the integer is 3."""12 return x == 313def predicate_2(x: int) -> bool:14 """Return true if the integer is 6."""15 return x == 616def predicate_3(x: int) -> bool:17 """Return true if the integer is 8."""18 return x == 819class TestIter(unittest.TestCase):20 """Test :mod:`multisplitby`."""21 def test_split_by_iterable_is_empty(self):22 """Test when an empty iterable is given."""23 integers = []24 predicates = [predicate_1, predicate_2]...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!