TestMu 2025 - The Largest Virtual Software Testing Conference is Back | 50,000+ Attendees

How to use str_matches method in pandera

Best Python code snippet using pandera_python

parser.py

Source:parser.py

...11    file = pd.read_excel(file_path)12    error_schema = DataFrameSchema({13        "nom_amenageur": Column(str, Check.is_uppercase(), nullable=True),14        "siren_amenageur": Column(int, Check.in_range(100000000, 999999999)),15        "contact_amenageur": Column(str, Check.str_matches(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b')),16        "nom_operateur": Column(str, nullable=True),17        "contact_operateur": Column(str, Check.str_matches(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b')),18        "telephone_operateur": Column(str, nullable=True),19        "nom_enseigne": Column(str),20        "id_station_itinerance": Column(str, Check.str_matches(r'(?:(?:^|,)(^FR[A-Z0-9]{4,33}$|Non concernÃ©))+$')),21        "id_station_local": Column(str, nullable=True),22        "nom_station": Column(str),23        "implantation_station": Column(str, Check.str_matches(24            r'(Voirie|Parking public|Parking privÃ© Ã  usage public|Parking privÃ© rÃ©servÃ© Ã  la clientÃ¨le|Station dÃ©diÃ©e Ã  la recharge rapide){1}')),25        "adresse_station": Column(str),26        # "code_insee_commune": Column(str, Check.str_matches(r'^([013-9]\d|2[AB1-9])\d{3}$')),27        # Regex officiel code INSEE ne marche pas28        # coordonneesXY29        # Difficile Ã  valider. consolidated_longitude et latitude sont elles validÃ©es.30        "nbre_pdc": Column(int, Check.greater_than(0)),31        "id_pdc_itinerance": Column(str, Check.str_matches(r'(?:(?:^|,)(^FR[A-Z0-9]{4,33}$|Non concernÃ©))+$')),32        "id_pdc_local": Column(str, nullable=True),33        "puissance_nominale": Column(float, Check.greater_than_or_equal_to(0)),34        "prise_type_ef": Column(int, Check.in_range(0, 1)),35        "prise_type_2": Column(int, Check.in_range(0, 1)),36        "prise_type_combo_ccs": Column(int, Check.in_range(0, 1)),37        "prise_type_chademo": Column(int, Check.in_range(0, 1)),38        "prise_type_autre": Column(int, Check.in_range(0, 1)),39        "gratuit": Column(int, Check.in_range(0, 1)),40        "paiement_acte": Column(int, Check.in_range(0, 1)),41        "paiement_cb": Column(int, Check.in_range(0, 1)),42        "tarification": Column(str, nullable=True),43        "condition_acces": Column(str, Check.str_matches(r'(AccÃ¨s libre|AccÃ¨s rÃ©servÃ©){1}')),44        "reservation": Column(int, Check.in_range(0, 1)),45        # "horaires": Column(str, Check.str_matches(r'(.*?)((\d{1,2}:\d{2})-(\d{1,2}:\d{2})|24/7)')),46        # Regex officielle ne marche pas47        "accessibilite_pmr": Column(str, Check.str_matches(48            r'(RÃ©servÃ© PMR|Accessible mais non rÃ©servÃ© PMR|Non accessible|AccessibilitÃ© inconnue){1}')),49        "restriction_gabarit": Column(str),50        "station_deux_roues": Column(int, Check.in_range(0, 1)),51        "raccordement": Column(str, Check.str_matches(r'(Direct|Indirect){1}')),52        "num_pdl": Column(str, nullable=True),53        # date_mise_en_service54        # Dates, compliquÃ© Ã  valider55        "observations": Column(str, nullable=True),56        # date_maj57        # last_modified58        "datagouv_dataset_id": Column(str),59        "datagouv_resource_id": Column(str),60        "datagouv_organization_or_owner": Column(str),61        "consolidated_longitude": Column(float, Check.in_range(-180, 180)),62        "consolidated_latitude": Column(float, Check.in_range(-180, 180)),63        "consolidated_code_postal": Column(int, Check.in_range(1000, 99999)),64        "consolidated_commune": Column(str),65        "consolidated_is_lon_lat_correct": Column(int, Check.in_range(0, 1)),66        "consolidated_is_code_insee_verified": Column(int, Check.in_range(0, 1)),67    })68    warning_schema = DataFrameSchema({69        "contact_amenageur": Column(str, Check.str_matches(r'\b[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}\b')),70        "contact_operateur": Column(str, Check.str_matches(r'\b[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}\b')),71        "nom_amenageur": Column(str, Check.is_uppercase(), nullable=True),72    })73    error_percent = 074    warning_percent = 075    warning_df = pd.DataFrame()76    error_df = pd.DataFrame()77    try:78        error_schema.validate(file, lazy=True)79    except errors.SchemaErrors as err:80        pd.set_option("display.max_columns", None, "display.max_rows", None)81        error_df = err.failure_cases82        error_percent = len(err.failure_cases)/len(file)*10083    try:84        warning_schema.validate(file, lazy=True)...

preprocessing.py

Source:preprocessing.py

1import re2class PreProcessing():3    def __init__(self, raw_dict):4        self.raw_comp = raw_dict["Components"]5        self.raw_rules = raw_dict["Rules"]6        self.components = self.get_dict_components()7        self.rules = self.get_rules()8    def get_dict_components(self) -> dict:9        components = dict()10        matches_for_comp = self.get_matches_components()11        for key in matches_for_comp.keys():12            components[key] = matches_for_comp[key][0]13            if key == "States":14                components["Initial State"] = matches_for_comp[key][1]15                components["Final States"] = matches_for_comp[key][2]16        for key, value in components.items():17            components[key] = self.format_str(value)18        components["Initial State"] = components["Initial State"][0]19        return components20    def get_matches_components(self) -> dict:21        symb_pattern = re.compile(r"{([a-z],\s*)*[a-z]}")22        state_pattern = re.compile(r"(q(\d+|f),\s*)*q(\d+|f)")23        stack_symb_pattern = re.compile(r"{([A-Z],\s*)*[A-Z]}")24        patterns = {"Symbols": symb_pattern, "States": state_pattern, "Stack Symbols": stack_symb_pattern}25        matches_comps = dict()26        for key, value in patterns.items():27            matches_comps[key] = self.find(value)28        return matches_comps29    def find(self, pattern) -> list:30        str_matches = []31        matches = re.finditer(pattern, self.raw_comp)32        for match in matches:33            str_matches.append(match.group())34        return str_matches35    def format_str(self, string) -> list:36        string = string.replace("{", "")37        string = string.replace("}", "")38        lista = string.split(",")39        for i in range(len(lista)):40            lista[i] = lista[i].strip()41        return lista42    def get_rules(self) -> list:43        rules = []44        keys = ["origin_state", "word_read_symbol", "stack_read_symbol",45                "final_state", "stack_written_symbol"]46        for item in self.raw_rules:47            dict_rules = dict()48            for i in range(5):49                dict_rules[keys[i]] = self.format_str(item)[i]50            rules.append(dict_rules)51        """52        rules = []53        for item in self.raw_rules:54            rules.append(self.format_str(item))55        """...

dna.py

Source:dna.py

1import sys2import csv3def main():4    argc = len(sys.argv)5    # Check for incorrect usage6    if argc != 3:7        print("Usage: ./dna.py <db_file> <sequence_file>")8        sys.exit(1)9    else:10        db_file = sys.argv[1]11        sequence_file = sys.argv[2]12    # Get STRs and their consecutive occurences in sequence13    with open(db_file) as db_stream:14        db_csv = csv.reader(db_stream)15        STRs = next(db_csv)[1::]16        str_occurence = {}17        for i in range(len(STRs)):18            str_occurence[STRs[i]] = str_repetitions(STRs[i], sequence_file)19    # Get CSV database as a list of people20    with open(db_file) as db_stream:21        db_csv_dict = csv.DictReader(db_stream)22        people = []23        for row in db_csv_dict:24            people.append(row)25    # Check for matches of sequence STRs for each person in people26    for person in people:27        str_matches = 028        for str_ in STRs:29            if int(person[str_]) == str_occurence[str_]:30                str_matches += 131        if str_matches == len(STRs):32            print(person['name'])33            sys.exit(0)34    print("No match")35# Return the number of consecutive occurences of the STR (str_seq) in the Sequence file (seq_file)36def str_repetitions(str_sequence, sequence_file):37    str_length = len(str_sequence)38    repetitions = 039    consecutive_matches = 040    pos = 041    # Read sequence_file into a buffer string42    with open(sequence_file) as sequence_stream:43        buffer = sequence_stream.read()44        buffer_len = len(buffer)45    i = 046    while i < buffer_len:47        cc = buffer[i]48        i += 149        if cc == str_sequence[pos]:50            pos += 151            if pos == str_length:52                repetitions += 153                if repetitions > consecutive_matches:54                    consecutive_matches = repetitions55                pos = 056        else:57            i = i - pos58            pos = 059            repetitions = 060    return consecutive_matches...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.