Best Python code snippet using assertpy_python
main.py
Source:main.py
1import sys2import os3import re4import logging5from spacy.lang.en import English6from spacy.pipeline import EntityRuler7log = logging.getLogger(__name__)8TRIAL_LABEL = "TRIAL_REGISTRATION_ID"9DATA_AVAILABILITY_LABEL_URL = "DATA_AVAILABILITY_OPEN_URL"10DATA_AVAILABILITY_LABEL_SUPPL = "DATA_AVAILABILITY_OPEN_SUPPLEMENT"11DATA_AVAILABILITY_LABEL_CLOSED = "DATA_AVAILABILITY_CLOSED"12DATA_AVAILABILITY_ROOT = [{'TEXT': {13 "REGEX": "^([Ss]upplementary|[Ss]upporting|[Ss]ource|[Cc]omputer|[Pp]rogram|[Ee]xperiment(al)?|[Aa]nonymi[sz]ed)$"},14 "OP": "?"},15 {"ORTH": ",", "OP": "?"}, {"IS_ALPHA": True, "OP": "?"}, {"ORTH": "-", "OP": "?"},16 {"ORTH": ",", "OP": "?"}, {"IS_ALPHA": True, "OP": "?"}, {"ORTH": "-", "OP": "?"},17 {"IS_ALPHA": True, "OP": "?"},18 {"IS_ALPHA": True, "OP": "?"},19 {"IS_ALPHA": True, "OP": "?"},20 {'TEXT': {21 "REGEX": "^([Cc]odes?|[Ff]iles|[Dd]ata(sets)?|[Mm]odels?|[Ss]scripts?|[Ss]oftware|[Ii]nformation)$"}},22 {"ORTH": "sets", "OP": "?"},23 {"ORTH": ",", "OP": "?"},24 {'TEXT': {"REGEX": "^(and|&)$"}, "OP": "?"},25 {'TEXT': {26 "REGEX": "^([Cc]odes?|[Ff]iles|[Dd]ata(sets)?|[Mm]odels?|[Ss]scripts?|[Ss]oftware|[Ii]nformation)$"},27 "OP": "?"},28 {"ORTH": "sets", "OP": "?"},29 {"ORTH": ",", "OP": "?"},30 {'TEXT': {"REGEX": "^(and|&)$"}, "OP": "?"},31 {'TEXT': {32 "REGEX": "^([Cc]odes?|[Ff]iles|[Dd]ata(sets)?|[Mm]odels?|[Ss]scripts?|[Ss]oftware|[Ii]nformation)$"},33 "OP": "?"},34 {"ORTH": "sets", "OP": "?"},35 {"ORTH": ",", "OP": "?"},36 {'TEXT': {"REGEX": "^(and|&)$"}, "OP": "?"},37 {"IS_ALPHA": True, "OP": "?"},38 {"ORTH": ",", "OP": "?"},39 {"IS_ALPHA": True, "OP": "?"},40 {"ORTH": ",", "OP": "?"},41 {"IS_ALPHA": True, "OP": "?"},42 {"ORTH": ",", "OP": "?"},43 {"IS_ALPHA": True, "OP": "?"},44 {"ORTH": ",", "OP": "?"},45 {"IS_ALPHA": True, "OP": "?"},46 {"ORTH": ",", "OP": "?"},47 {"IS_ALPHA": True, "OP": "?"},48 {"ORTH": ",", "OP": "?"},49 {"IS_ALPHA": True, "OP": "?"},50 {"ORTH": ",", "OP": "?"},51 {"IS_ALPHA": True, "OP": "?"},52 {"ORTH": ",", "OP": "?"},53 {"IS_ALPHA": True, "OP": "?"},54 {"ORTH": ",", "OP": "?"},55 {"IS_ALPHA": True, "OP": "?"},56 {"ORTH": ",", "OP": "?"},57 {"IS_ALPHA": True, "OP": "?"},58 {"ORTH": ",", "OP": "?"},59 {"IS_ALPHA": True, "OP": "?"},60 {"ORTH": ",", "OP": "?"},61 {"IS_ALPHA": True, "OP": "?"},62 {'TEXT': {"REGEX": "^(is|are|can|may|will|has|have)$"}},63 {'TEXT': {"REGEX": "^(be|been)$"}, "OP": "?"},64 {"IS_ALPHA": True, "OP": "?"},65 {"IS_ALPHA": True, "OP": "?"},66]67DATA_AVAILABILITY_PATTERN_URL = DATA_AVAILABILITY_ROOT + [68 {'TEXT': {"REGEX": "^(available|found|deposited|provided)$"}},69 {"ORTH": ",", "OP": "?"},70 {"ORTH": "(", "OP": "?"},71 {"IS_ALPHA": True, "OP": "?"},72 {"ORTH": ",", "OP": "?"},73 {"ORTH": ")", "OP": "?"},74 {"IS_ALPHA": True, "OP": "?"},75 {"ORTH": ",", "OP": "?"},76 {"ORTH": ")", "OP": "?"},77 {"IS_ALPHA": True, "OP": "?"},78 {"ORTH": ",", "OP": "?"},79 {"ORTH": ")", "OP": "?"},80 {"IS_ALPHA": True, "OP": "?"},81 {"ORTH": ",", "OP": "?"},82 {"ORTH": ")", "OP": "?"},83 {'TEXT': {"REGEX": "^(at|on|from|in|to|without)$"}},84 {"ORTH": "restriction", "OP": "?"},85 {"ORTH": ":", "OP": "?"},86 {"IS_ALPHA": True, "OP": "*"},87 {'TEXT': {"REGEX": "^[\u2019']s$"}, "OP": "?"},88 {"IS_ALPHA": True, "OP": "*"},89 {'TEXT': {"REGEX": "^[-0-9\u2010-\u2013]$"}, "OP": "*"},90 {"IS_ALPHA": True, "OP": "*"},91 {"ORTH": "(", "OP": "?"},92 {'TEXT': {"REGEX": "^(https?://.+|goo.gl/.+)$"}},93 {"ORTH": ")", "OP": "?"},94]95DATA_AVAILABILITY_PATTERN_CLOSED = DATA_AVAILABILITY_ROOT + [96 {'TEXT': {"REGEX": "^(available|requested|provided)$"}},97 {"ORTH": ",", "OP": "?"},98 {"ORTH": "(", "OP": "?"},99 {"IS_ALPHA": True, "OP": "?"},100 {"ORTH": ",", "OP": "?"},101 {"ORTH": ")", "OP": "?"},102 {"IS_ALPHA": True, "OP": "?"},103 {"ORTH": ",", "OP": "?"},104 {"ORTH": ")", "OP": "?"},105 {"IS_ALPHA": True, "OP": "?"},106 {"ORTH": ",", "OP": "?"},107 {"ORTH": ")", "OP": "?"},108 {"IS_ALPHA": True, "OP": "?"},109 {"ORTH": ",", "OP": "?"},110 {"ORTH": ")", "OP": "?"},111 {'TEXT': {"REGEX": "^(by|on|from)$"}},112 {"IS_ALPHA": True, "OP": "?"},113 {"IS_ALPHA": True, "OP": "?"},114 {'TEXT': {"REGEX": "^(request|authors?)$"}}115]116DATA_AVAILABILITY_PATTERN_SUPPLEMENT = DATA_AVAILABILITY_ROOT + [117 {'TEXT': {"REGEX": "^(available|found|deposited|provided)$"}},118 {"ORTH": ",", "OP": "?"},119 {"ORTH": "(", "OP": "?"},120 {"IS_ALPHA": True, "OP": "?"},121 {"ORTH": ",", "OP": "?"},122 {"ORTH": ")", "OP": "?"},123 {"IS_ALPHA": True, "OP": "?"},124 {"ORTH": ",", "OP": "?"},125 {"ORTH": ")", "OP": "?"},126 {"IS_ALPHA": True, "OP": "?"},127 {"ORTH": ",", "OP": "?"},128 {"ORTH": ")", "OP": "?"},129 {"IS_ALPHA": True, "OP": "?"},130 {"ORTH": ",", "OP": "?"},131 {"ORTH": ")", "OP": "?"},132 {'TEXT': {"REGEX": "^(at|on|in)$"}},133 {"IS_ALPHA": True, "OP": "*"},134 {'TEXT': {"REGEX": "^[\u2019']s$"}, "OP": "?"},135 {'TEXT': {"REGEX": "^([Ww]eb(site)?|[Oo]nline|[Ss]upplements?|[Aa]ppendix|[Aa]ppendices|[Aa]ccession)$"}},136 {"IS_ALPHA": True, "OP": "?"},137 {'TEXT': {"REGEX": "^[-0-9\u2010-\u2013]$"}, "OP": "*"},138 {"IS_ALPHA": True, "OP": "?"},139]140# TODO: We can probably combine some of these patterns, e.g. NCT|DRKS|ISRCTN141# ClinicalTrials.gov142NCT_PATTERN_1 = [{'TEXT': {"REGEX": "^NCT[-\u2010-\u2013/]?[0-9]{8}(?=[.,:;)]|$)"}}]143NCT_PATTERN_2 = [{'ORTH': "NCT"},144 {'TEXT': {"REGEX": "[-\u2010-\u2013/]"}, "OP": "?"},145 {'TEXT': {"REGEX": "^[0-9]{8}(?=[.,:;)]|$)"}}]146NCT_PATTERN_3 = [{'ORTH': "NCT"},147 {'TEXT': {"REGEX": "^(#|[Nn]o|[Nn]um(ber)?)$"}},148 {'ORTH': ":", "OP": "?"},149 {'TEXT': {"REGEX": "^[0-9]{8}(?=[.,:;)]|$)"}, "OP": "?"}150 ]151# ANZCTR:152ANZ_PATTERN_1 = [{'TEXT': {"REGEX": "^ACTRN[-\u2010-\u2013/]?[0-9]{14}(?=[.,:;)]|$)"}}]153ANZ_PATTERN_2 = [{'ORTH': "ACTRN", "OP": "?"}, {'TEXT': {"REGEX": "^126[0-9]{11}(?=[.,:;)]|$)"}}]154ANZ_PATTERN_3 = [{'ORTH': "ACTRN"},155 {'TEXT': {"REGEX": "^(#|[Nn]o|[Nn]um(ber)?)$"}},156 {'ORTH': ":", "OP": "?"},157 {'TEXT': {"REGEX": "^[0-9]{14}(?=[.,:;)]|$)"}, "OP": "?"}158 ]159# Universal trial number160# U1111-1254-7316161UTN_PATTERN = [{'TEXT': {"REGEX": "^U[0-9]{4}[-\u2010-\u2013/]?[0-9]{4}[-\u2010-\u2013/]?[0-9]{4}(?=[.,:;)]|$)"}}]162# DRKS163# DRKS00000002164DRKS_PATTERN_1 = [{'TEXT': {"REGEX": "^DRKS[-\u2010-\u2013/]?[0-9]{8}(?=[.,:;)]|$)"}}]165DRKS_PATTERN_2 = [{'ORTH': "DRKS"},166 {'TEXT': {"REGEX": "[-\u2010-\u2013/]"}, "OP": "?"},167 {'TEXT': {"REGEX": "^[0-9]{8}(?=[.,:;)]|$)"}}]168DRKS_PATTERN_3 = [{'ORTH': "DRKS"},169 {'TEXT': {"REGEX": "^(#|[Nn]o|[Nn]um(ber)?)$"}},170 {'ORTH': ":", "OP": "?"},171 {'TEXT': {"REGEX": "^[0-9]{8}(?=[.,:;)]|$)"}, "OP": "?"}172 ]173# ISRCTN174# ISRCTN14702259175ISRCTN_PATTERN_1 = [{'TEXT': {"REGEX": "^ISRCTN[-\u2010-\u2013/]?[0-9]{8}(?=[.,:;)]|$)"}}]176ISRCTN_PATTERN_2 = [{'ORTH': "ISRCTN"},177 {'TEXT': {"REGEX": "[-\u2010-\u2013/]"}, "OP": "?"},178 {'TEXT': {"REGEX": "^[0-9]{8}(?=[.,:;)]|$)"}}]179ISRCTN_PATTERN_3 = [{'ORTH': "ISRCTN"},180 {'TEXT': {"REGEX": "^(#|[Nn]o|[Nn]um(ber)?)$"}},181 {'ORTH': ":", "OP": "?"},182 {'TEXT': {"REGEX": "^[0-9]{8}(?=[.,:;)]|$)"}, "OP": "?"}183 ]184# CRD185# CRD42020179519186# https://www.crd.york.ac.uk/prospero/display_record.php?RecordID=179519187CRD_PATTERN_1 = [{'TEXT': {"REGEX": "^CRD[-\u2010-\u2013/]?[0-9]{11}(?=[.,:;)]|$)"}}]188CRD_PATTERN_2 = [{'ORTH': "CRD"},189 {'TEXT': {"REGEX": "[-\u2010-\u2013/]"}, "OP": "?"},190 {'TEXT': {"REGEX": "^[0-9]{11}(?=[.,:;)]|$)"}}]191CRD_PATTERN_3 = [{'ORTH': "CRD"},192 {'TEXT': {"REGEX": "^(#|[Nn]o|[Nn]um(ber)?)$"}},193 {'ORTH': ":", "OP": "?"},194 {'TEXT': {"REGEX": "^[0-9]{11}(?=[.,:;)]|$)"}, "OP": "?"}195 ]196# JPRN197# https://apps.who.int/trialsearch/Trial2.aspx?TrialID=JPRN-UMIN000040928198JPRN_PATTERN_1 = [{'TEXT': {"REGEX": "^JPRN[-\u2010-\u2013/]?UMIN[-\u2010-\u2013/]?[0-9]{9}(?=[.,:;)]|$)"}}]199JPRN_PATTERN_2 = [{'ORTH': "JPRN"},200 {'TEXT': {"REGEX": "[-\u2010-\u2013/]"}, "OP": "?"},201 {'TEXT': {"REGEX": "^UMIN[-\u2010-\u2013/]?[0-9]{9}(?=[.,:;)]|$)"}}]202JPRN_PATTERN_3 = [{'ORTH': "JPRN"},203 {'TEXT': {"REGEX": "[-\u2010-\u2013/]"}, "OP": "?"},204 {'ORTH': "UMIN"},205 {'TEXT': {"REGEX": "[-\u2010-\u2013/]"}, "OP": "?"},206 {'TEXT': {"REGEX": "^[0-9]{9}(?=[.,:;)]|$)"}}]207JPRN_PATTERN_4 = [{'ORTH': "JPRN"},208 {'TEXT': {"REGEX": "^(#|[Nn]o|[Nn]um(ber)?)$"}},209 {'ORTH': ":", "OP": "?"},210 {'TEXT': {"REGEX": "^[0-9]{9}(?=[.,:;)]|$)"}, "OP": "?"}211 ]212# ChiCTR213# http://www.chictr.org.cn/showprojen.aspx?proj=57931214CHICTR_PATTERN_1 = [{'TEXT': {"REGEX": "^ChiCTR[-\u2010-\u2013/]?(IIR[-\u2010-\u2013/]?)?[0-9]{8,10}(?=[.,:;)]|$)"}}]215CHICTR_PATTERN_2 = [{'ORTH': "ChiCTR"},216 {'TEXT': {"REGEX": "[-\u2010-\u2013/]"}, "OP": "?"},217 {'TEXT': {"REGEX": "^IIR[-\u2010-\u2013/]?[0-9]{8,10}(?=[.,:;)]|$)"}}]218CHICTR_PATTERN_3 = [{'ORTH': "ChiCTR"},219 {'TEXT': {"REGEX": "[-\u2010-\u2013/]"}, "OP": "?"},220 {'ORTH': "IIR", "OP": "?"},221 {'TEXT': {"REGEX": "[-\u2010-\u2013/]"}, "OP": "?"},222 {'TEXT': {"REGEX": "^[0-9]{8,10}(?=[.,:;)]|$)"}}]223CHICTR_PATTERN_4 = [{'ORTH': "ChiCTR"},224 {'TEXT': {"REGEX": "^(#|[Nn]o|[Nn]um(ber)?)$"}},225 {'ORTH': ":", "OP": "?"},226 {'TEXT': {"REGEX": "^[0-9]{8,10}(?=[.,:;)]|$)"}, "OP": "?"}227 ]228# PACTR229# PACTR202008685699453230# https://apps.who.int/trialsearch/Trial2.aspx?TrialID=PACTR202008685699453231PACTR_PATTERN_1 = [{'TEXT': {"REGEX": "^PACTR[-\u2010-\u2013/]?[0-9]{15}(?=[.,:;)]|$)"}}]232PACTR_PATTERN_2 = [{'ORTH': "PACTR"},233 {'TEXT': {"REGEX": "[-\u2010-\u2013/]"}, "OP": "?"},234 {'TEXT': {"REGEX": "^[0-9]{15}(?=[.,:;)]|$)"}}]235PACTR_PATTERN_3 = [{'ORTH': "PACTR"},236 {'TEXT': {"REGEX": "^(#|[Nn]o|[Nn]um(ber)?)$"}},237 {'ORTH': ":", "OP": "?"},238 {'TEXT': {"REGEX": "^[0-9]{15}(?=[.,:;)]|$)"}, "OP": "?"}239 ]240# KCT241# KCT0005285242# https://apps.who.int/trialsearch/Trial2.aspx?TrialID=KCT0005285243KCT_PATTERN_1 = [{'TEXT': {"REGEX": "^KCT[-\u2010-\u2013/]?[0-9]{7}(?=[.,:;)]|$)$"}}]244KCT_PATTERN_2 = [{'ORTH': "KCT"},245 {'TEXT': {"REGEX": "[-\u2010-\u2013/]"}, "OP": "?"},246 {'TEXT': {"REGEX": "^[0-9]{7}(?=[.,:;)]|$)$"}}]247KCT_PATTERN_3 = [{'ORTH': "KCT"},248 {'TEXT': {"REGEX": "^(#|[Nn]o|[Nn]um(ber)?)$"}},249 {'ORTH': ":", "OP": "?"},250 {'TEXT': {"REGEX": "^[0-9]{7}(?=[.,:;)]|$)"}, "OP": "?"}251 ]252# https://aspredicted.org/253ASP_PATTERN = [{'TEXT': {"REGEX": "^https?://aspredicted.org/.+$"}}]254# EudraCT255# https://www.clinicaltrialsregister.eu/ctr-search/search?query=covid-19256EUDRACT_PATTERN_1 = [{'TEXT': {"REGEX": "^EudraCT$"}},257 {'TEXT': {"REGEX": "^(#|[Nn]o|[Nn]um(ber)?)$"}},258 {'ORTH': ":", "OP": "?"},259 {'TEXT': {"REGEX": "^[0-9]{4}$"}},260 {'TEXT': {"REGEX": "[-\u2010-\u2013/]"}, "OP": "?"},261 {'TEXT': {"REGEX": "^[0-9]{6}$"}},262 {'TEXT': {"REGEX": "[-\u2010-\u2013/]"}, "OP": "?"},263 {'TEXT': {"REGEX": "^[0-9]{2}$"}}264 ]265EUDRACT_PATTERN_2 = [{'TEXT': {"REGEX": "^EudraCT#[0-9]{4}$"}},266 {'TEXT': {"REGEX": "[-\u2010-\u2013/]"}, "OP": "?"},267 {'TEXT': {"REGEX": "^[0-9]{6}$"}},268 {'TEXT': {"REGEX": "[-\u2010-\u2013/]"}, "OP": "?"},269 {'TEXT': {"REGEX": "^[0-9]{2}$"}}270 ]271GENERAL_PREREG_PATTERN = [{'TEXT': {"REGEX": "^[Pp]re$"}, "OP": "?"},272 {'TEXT': {"REGEX": "[-\u2010-\u2013/]"}, "OP": "?"},273 {'TEXT': {"REGEX": "^([Pp]re)?[Rr]egist(ered|ration)$"}},274 {"IS_ALPHA": True, "OP": "?"},275 {"IS_ALPHA": True, "OP": "?"},276 {"IS_ALPHA": True, "OP": "?"},277 {'TEXT': {"REGEX": "^(at|from|:)$"}},278 {'ORTH': ":", "OP": "?"},279 {'TEXT': {"REGEX": "^https?://.+$"}}280 ]281def file_to_text(file_path):282 doc_text = ''283 with open(file_path, 'rb') as f:284 try:285 doc_text = f.read().decode('utf-8-sig')286 except UnicodeDecodeError:287 try:288 doc_text = f.read().decode('utf-8')289 except UnicodeDecodeError:290 try:291 doc_text = f.read().decode('latin-1')292 except UnicodeDecodeError as e:293 log.error("{}: Text encoding problem with {}".format(e, file_path))294 return doc_text295def main(file_path_or_input_text=None):296 nlp = English()297 ruler = nlp.add_pipe("entity_ruler")298 patterns = [299 {"label": TRIAL_LABEL, "pattern": NCT_PATTERN_1},300 {"label": TRIAL_LABEL, "pattern": NCT_PATTERN_2},301 {"label": TRIAL_LABEL, "pattern": NCT_PATTERN_3},302 {"label": TRIAL_LABEL, "pattern": ANZ_PATTERN_1},303 {"label": TRIAL_LABEL, "pattern": ANZ_PATTERN_2},304 {"label": TRIAL_LABEL, "pattern": ANZ_PATTERN_3},305 {"label": TRIAL_LABEL, "pattern": UTN_PATTERN},306 {"label": TRIAL_LABEL, "pattern": DRKS_PATTERN_1},307 {"label": TRIAL_LABEL, "pattern": DRKS_PATTERN_2},308 {"label": TRIAL_LABEL, "pattern": DRKS_PATTERN_3},309 {"label": TRIAL_LABEL, "pattern": ISRCTN_PATTERN_1},310 {"label": TRIAL_LABEL, "pattern": ISRCTN_PATTERN_2},311 {"label": TRIAL_LABEL, "pattern": ISRCTN_PATTERN_3},312 {"label": TRIAL_LABEL, "pattern": CRD_PATTERN_1},313 {"label": TRIAL_LABEL, "pattern": CRD_PATTERN_2},314 {"label": TRIAL_LABEL, "pattern": CRD_PATTERN_3},315 {"label": TRIAL_LABEL, "pattern": JPRN_PATTERN_1},316 {"label": TRIAL_LABEL, "pattern": JPRN_PATTERN_2},317 {"label": TRIAL_LABEL, "pattern": JPRN_PATTERN_3},318 {"label": TRIAL_LABEL, "pattern": JPRN_PATTERN_4},319 {"label": TRIAL_LABEL, "pattern": CHICTR_PATTERN_1},320 {"label": TRIAL_LABEL, "pattern": CHICTR_PATTERN_2},321 {"label": TRIAL_LABEL, "pattern": CHICTR_PATTERN_3},322 {"label": TRIAL_LABEL, "pattern": CHICTR_PATTERN_4},323 {"label": TRIAL_LABEL, "pattern": PACTR_PATTERN_1},324 {"label": TRIAL_LABEL, "pattern": PACTR_PATTERN_2},325 {"label": TRIAL_LABEL, "pattern": PACTR_PATTERN_3},326 {"label": TRIAL_LABEL, "pattern": KCT_PATTERN_1},327 {"label": TRIAL_LABEL, "pattern": KCT_PATTERN_2},328 {"label": TRIAL_LABEL, "pattern": KCT_PATTERN_3},329 {"label": TRIAL_LABEL, "pattern": ASP_PATTERN},330 {"label": TRIAL_LABEL, "pattern": EUDRACT_PATTERN_1},331 {"label": TRIAL_LABEL, "pattern": EUDRACT_PATTERN_2},332 {"label": TRIAL_LABEL, "pattern": GENERAL_PREREG_PATTERN},333 {"label": DATA_AVAILABILITY_LABEL_URL, "pattern": DATA_AVAILABILITY_PATTERN_URL},334 {"label": DATA_AVAILABILITY_LABEL_CLOSED, "pattern": DATA_AVAILABILITY_PATTERN_CLOSED},335 {"label": DATA_AVAILABILITY_LABEL_SUPPL, "pattern": DATA_AVAILABILITY_PATTERN_SUPPLEMENT},336 ]337 ruler.add_patterns(patterns)338 nlp.add_pipe("sentencizer")339 if os.path.isfile(file_path_or_input_text):340 doc_text = file_to_text(file_path_or_input_text)341 else:342 doc_text = file_path_or_input_text343 doc = nlp(doc_text)344 # Output patterns to disk345 ruler.to_disk("./patterns.jsonl")346 entity_data = []347 for sent in doc.sents:348 for ent in sent.ents:349 if ent:350 entity_data.append({'sentence': sent, 'entity': ent.text, 'label': ent.label_})351 print(sent, ent.text, ent.label_)352 return entity_data353if __name__ == "__main__":354 args = sys.argv...
conditionals.py
Source:conditionals.py
1#!/usr/bin/env python32# -*- coding: utf-8 -*-3class Conditionals(object):4 5 def __init__(self, nlp, object):6 self.object = object7 self.matcher = object.matcher.Matcher(nlp.vocab)8 self.matcher.add("Conditionals", None,9 10 # if (we|you) want to ([\w]+[ \,]+){1,7}(we|you) (need to|must|have to)11 [{'LOWER':'if'},12 {"POS": 'PRON'}, 13 {'LEMMA': 'want'},14 {'LOWER': 'to'},15 {'IS_ALPHA': True, 'OP':'+'},16 {'POS': 'PRON'},17 {'LEMMA': {'IN':['need', 'must', 'have']}}],18 #(we|you) ([\w ,]+) (must|have to|need to) ([\w]+[ \,]+){1,7}if (you|we) want to 19 [{'LOWER':{'IN':['we', 'you']}},20 {'IS_ALPHA': True, 'OP':'+'},21 {'LOWER': {'IN':['need', 'must', 'have']}},22 {'LOWER': 'to', 'OP':'?'},23 {'IS_ALPHA': True, 'OP':'+'},24 {'LOWER': 'if'},25 {'LOWER':{'IN':['we', 'you']}},26 {'LOWER' : 'want'},27 {'LOWER': 'to'}],28 # it would be ([\w]+[ \,]+){0,2}nice if29 [{'LOWER': 'it'},30 {'LOWER': 'would'},31 {'LEMMA': 'be'},32 {'IS_ALPHA': True, 'OP':'?'},33 {'IS_ALPHA': True, 'OP': '?'},34 {'LOWER': 'nice'},35 {'LOWER': 'if'}],36 # wouldn\'t it be ([\w]+[ \,]+){0,2}nice if37 [{'LOWER': 'would'},38 {'LOWER': 'it'},39 {'LOWER': 'not'},40 {'LEMMA': 'be'},41 {'IS_ALPHA': True, 'OP':'?'},42 {'IS_ALPHA': True, 'OP': '?'},43 {'LOWER': 'nice'},44 {'LOWER': 'if'}],45 # if ([\w]+[ \,]+){3,8} that would be ([\w]+[ \,]+){0,2}nice46 [{'LOWER': 'if'},47 {'IS_ALPHA': True, 'OP':'+'},48 {'LOWER': 'that'},49 {'LOWER': 'would'},50 {'LOWER': 'be'},51 {'IS_ALPHA': True, 'OP':'?'},52 {'IS_ALPHA': True, 'OP':'?'},53 {'LOWER': 'nice'}],54 # (cannot|will not|won\'t|can\'t) ([\w]+[ \,]+){1,7}(if|unless)55 [{'LOWER': {'IN':['can', 'will']}},56 {'LOWER': 'not'},57 {'IS_ALPHA': True, 'OP':'+'},58 {'LOWER': {'IN':['if', 'unless']}}],59 # (if|unless) ([\w]+[ \,]+){3,10}(cannot|will not|won\'t|can\'t)60 [{'LOWER': {'IN':['if', 'unless']}},61 {'IS_ALPHA': True, 'OP':'+'},62 {'LOWER': {'IN':['can', 'will']}},63 {'LOWER': 'not'}],64 # (need|needs|must|has to|have to) ([\w]+[ \,]+){3,10}(in order )to65 [{'LEMMA': {'IN':['need', 'must', 'have']}},66 {'LOWER': 'to', 'OP':'?'},67 {'IS_ALPHA': True, 'OP':'+'},68 {'LOWER': 'in'},69 {'LOWER': 'order'},70 {'LOWER': 'to'}],71 # (in order )?to ([\w]+[ \,]+){3,10}(need|needs|must|has to|have to)72 [{'LOWER': 'in'},73 {'LOWER': 'order'},74 {'LOWER': 'to'},75 {'IS_ALPHA': True, 'OP':'+'},76 {'LEMMA': {'IN':['need', 'must','have']}},77 {'LOWER': 'to'}],78 # as long as (we|you) ([\w]+[ \,]+){3,10}(will|can|able|should|[a-zA-Z]+\'ll)79 [{'LOWER': 'as'},80 {'LOWER': 'long'},81 {'LOWER': 'as'},82 {'DEP': 'poss'},83 {'IS_ALPHA': True, 'OP':'+'},84 {'LEMMA': {'IN':['will', 'can', 'able', 'should']}}],85 # ([a-zA-Z]\'ll|will|can|able|should) ([\w]+[ \,]+){3,10}as long as (we|you) 86 [{'LEMMA': {'IN':['will', 'can', 'able', 'should']}},87 {'IS_ALPHA': True, 'OP':'+'},88 {'LOWER': 'as'},89 {'LOWER': 'long'},90 {'LOWER': 'as'},91 {'DEP': 'poss'}],92 #(you|he|we) better ([\w]+[ \,]+){3,10}or93 [{'POS': 'PRON'},94 {'LOWER': 'better'},95 {'IS_ALPHA': True, 'OP':'+'},96 {'LOWER':'or'}],97 # otherwise98 [{'LOWER': 'otherwise'}]99 )100 101 def __call__(self, doc):102 matches = self.matcher(doc)103 104 for match_id, start, end in matches:105 sents = self.object.tokens.Span(doc, start, end).sent106 sent_start, sent_end = sents.start, sents.end107 opinion = self.object.tokens.Span(doc, sent_start, sent_end, label = "CONDITIONALS")108 doc._.opinion.append(opinion,)...
test_utils.py
Source:test_utils.py
1from unittest import TestCase2from win_unc.internal import utils as U3never = lambda _: False4always = lambda _: True5is_alpha = lambda x: x.isalpha()6class ListTransformerTestCase(TestCase):7 def assertEqualAsLists(self, first, second):8 self.assertEqual(list(first), list(second))9class TestDropWhile(ListTransformerTestCase):10 def test_drop_nothing(self):11 self.assertEqualAsLists(U.drop_while(never, ''), '')12 self.assertEqualAsLists(U.drop_while(never, 'a'), 'a')13 self.assertEqualAsLists(U.drop_while(never, 'abc'), 'abc')14 def test_drop_everything(self):15 self.assertEqualAsLists(U.drop_while(always, ''), '')16 self.assertEqualAsLists(U.drop_while(always, 'a'), '')17 self.assertEqualAsLists(U.drop_while(always, 'abc'), '')18 def test_drop_with_predicate(self):19 not_alpha = lambda x: not x.isalpha()20 self.assertEqualAsLists(U.drop_while(not_alpha, ''), '')21 self.assertEqualAsLists(U.drop_while(not_alpha, 'abc'), 'abc')22 self.assertEqualAsLists(U.drop_while(not_alpha, '123abc456'), 'abc456')23 self.assertEqualAsLists(U.drop_while(not_alpha, ' abc '), 'abc ')24 self.assertEqualAsLists(U.drop_while(not_alpha, ' '), '')25class TestTakeWhile(ListTransformerTestCase):26 def test_take_nothing(self):27 self.assertEqualAsLists(U.take_while(never, ''), '')28 self.assertEqualAsLists(U.take_while(never, 'a'), '')29 self.assertEqualAsLists(U.take_while(never, 'abc'), '')30 def test_take_everything(self):31 self.assertEqualAsLists(U.take_while(always, ''), '')32 self.assertEqualAsLists(U.take_while(always, 'a'), 'a')33 self.assertEqualAsLists(U.take_while(always, 'abc'), 'abc')34 def test_take_with_predicate(self):35 self.assertEqualAsLists(U.take_while(is_alpha, ''), '')36 self.assertEqualAsLists(U.take_while(is_alpha, 'abc'), 'abc')37 self.assertEqualAsLists(U.take_while(is_alpha, '123abc456'), '')38 self.assertEqualAsLists(U.take_while(is_alpha, ' abc '), '')39 self.assertEqualAsLists(U.take_while(is_alpha, ' '), '')40class TestFirst(TestCase):41 def test_no_match(self):42 self.assertEqual(U.first(never, ''), None)43 self.assertEqual(U.first(never, 'abc'), None)44 def test_all_match(self):45 self.assertEqual(U.first(always, ''), None)46 self.assertEqual(U.first(always, 'abc'), 'a')47 def test_with_predicate(self):48 self.assertEqual(U.first(is_alpha, ''), None)49 self.assertEqual(U.first(is_alpha, 'abc'), 'a')50 self.assertEqual(U.first(is_alpha, '123abc'), 'a')51 self.assertEqual(U.first(is_alpha, ' abc'), 'a')52class TestReversedFirst(TestCase):53 def test_no_match(self):54 self.assertEqual(U.rfirst(never, ''), None)55 self.assertEqual(U.rfirst(never, 'abc'), None)56 def test_all_match(self):57 self.assertEqual(U.rfirst(always, ''), None)58 self.assertEqual(U.rfirst(always, 'abc'), 'c')59 def test_with_predicate(self):60 self.assertEqual(U.rfirst(is_alpha, ''), None)61 self.assertEqual(U.rfirst(is_alpha, 'abc'), 'c')62 self.assertEqual(U.rfirst(is_alpha, 'abc123'), 'c')63 self.assertEqual(U.rfirst(is_alpha, 'abc '), 'c')64class TestHigherOrderNot(TestCase):65 def test_not_(self):66 false_ = lambda: False67 true_ = lambda: True68 id_ = lambda x: x69 self.assertEqual(U.not_(false_)(), True)70 self.assertEqual(U.not_(id_)(False), True)71 self.assertEqual(U.not_(true_)(), False)72 self.assertEqual(U.not_(id_)(True), False)73class TestRekeyDict(TestCase):74 def test_rekey_dict(self):...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!