TestMu 2025 - The Largest Virtual Software Testing Conference is Back | 50,000+ Attendees

How to use validate_column method in pandera

Best Python code snippet using pandera_python

services.py

Source:services.py

...118    def __init__(self, row, index):119        self.row = row120        self.index = index121        self.row_errors = []122    def validate_column(self, validator_method, col):123        """124        validates an individual column with the validator method passed125        returns true or add error message to the list126        """127        try:128            validator_method(col)129        except ValidationError as exc:130            self.row_errors.append(f"Row Index {self.index+1}: " + exc.message)131    def validate_row(self):132        """133        Return true if all columns of a row have valid data, false otherwise134        """135        self.validate_column(136            validate_username, self.row['username'])137        self.validate_column(138            validate_email, self.row['email'])139        self.validate_column(140            validate_date_of_birth, self.row['date_of_birth'])141        self.validate_column(142            validate_phone_number, self.row['phone_number'])143        self.validate_column(144            validate_blood_group, self.row['blood_group'])...

data_quality.py

Source:data_quality.py

1from airflow.hooks.postgres_hook import PostgresHook2from airflow.models import BaseOperator3from airflow.utils.decorators import apply_defaults4#DataQuality Opeator to validate the records loaded into dimension & fact tables5class DataQualityOperator(BaseOperator):6    ui_color = '#89DA59'7    @apply_defaults8    def __init__(self,9                 redshift_connection_id,10                 target_table,11                 validate_column,12                 *args, **kwargs):13        '''14            DataQualityOperator init constructor15            arguments:16                redshift_conn_id -- redshift connection id 17                target_table -- target table where data validation need to happen18                validate_column - column in which validation need to be applied19        '''20        super(DataQualityOperator, self).__init__(*args, **kwargs)21        self.redshift_connection_id = redshift_connection_id22        self.target_table = target_table23        self.validate_column = validate_column 24    def execute(self, context):25        '''26            execute gets called when DataQualityOperator is invoked27        '''28        self.log.info('Inside DataQualityOperator --->>>>')29        redshift = PostgresHook(self.redshift_connection_id)30        31        #Validation 1 - Check if table contains records32        for table in self.target_table:33            self.log.info(f"Executing record count validation for table : {table}")34            data_valid_sql = f"SELECT COUNT(*) FROM {table}"35            result = redshift.get_records(data_valid_sql)36            if len(result) < 1 or len(result[0]) < 1:37                raise ValueError(f"Data quality check failed: {table} select returned no results")38            num_records = result[0][0]39            if num_records < 1:40                raise ValueError(f"Data quality check failed: {table} contain 0 rows")41            self.log.info(f"Record count check passed and table {table} contains # {num_records}")42        43        #Validation 2 - Check selected field contains any null values, incase of null throws an error 44        for index, table in enumerate(self.target_table):45            self.log.info(f"Executing column null validation for table:{table} and column:{self.validate_column[index]}")46            data_valid_sql_1 = f"select count(1) from {table} where {self.validate_column[index]} is null"47            print(data_valid_sql_1)48            records = redshift.get_records(data_valid_sql_1)49            print("records value {}".format(records[0][0]))50            if records and records[0][0]> 0 :51                raise ValueError(f"Data quality check failed. {table} contains {len(records)} null records in column {self.validate_column[index]}")52            self.log.info(f"Null validation check passed for table:{table} and column:{self.validate_column[index]}")53            ...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.