Best Python code snippet using pandera_python
taskflowdataschema.py
Source:taskflowdataschema.py
1"""2Schema repository for task flow data and task flow data subsets3"""4from pandera.typing import Series, String5import pandas as pd6import pandera as pa7from cdadagbuilder.genflow.utils.check import (8 check_empty_df,9 invalid_seq_number,10)11class TaskFlowDataSchema(pa.SchemaModel):12 """13 Pandera schema model including the validations for fields14 in the configuration files for task flow data dataframe15 """16 variabletablegroupname: Series[String] = pa.Field(17 nullable=False, coerce=True18 )19 variableworkflowstepname: Series[String] = pa.Field(20 nullable=False, coerce=True, allow_duplicates=False21 )22 variableworkflowstepquerytype: Series[String] = pa.Field(23 nullable=False, coerce=True24 )25 variableworkflowstepexecutionorder: Series[int] = pa.Field(26 nullable=False, coerce=True27 )28 variableworkflowstepschema: Series[String] = pa.Field(29 nullable=False, coerce=True30 )31 variableworkflowstepquery: Series[String] = pa.Field(32 nullable=False, coerce=True33 )34 workflowstepqueryparameters: Series[String] = pa.Field(35 nullable=False, coerce=True36 )37 # pylint: disable=R020138 @pa.dataframe_check39 def validate_taskflow_data_dataframe(40 self, task_flow_data: pd.DataFrame41 ) -> bool:42 """43 Raise a error if TaskDataSchema dataframe is empty44 :param task_flow_data:45 :type task_flow_data: DataFrame46 """47 _indicator = check_empty_df(task_flow_data)48 return _indicator == 049class TaskFlowDataSubsetSchema(pa.SchemaModel):50 """51 Pandera schema model including the validations for fields52 in the configuration files for task flow data subset dataframe53 """54 variableworkflowstepname: Series[String] = pa.Field(55 nullable=False, coerce=True, allow_duplicates=False56 )57 variableworkflowstepquerytype: Series[String] = pa.Field(58 nullable=False, coerce=True59 )60 variableworkflowstepexecutionorder: Series[int] = pa.Field(61 nullable=False, coerce=True, ge=162 )63 variableworkflowstepschema: Series[String] = pa.Field(64 nullable=False, coerce=True65 )66 variableworkflowstepquery: Series[String] = pa.Field(67 nullable=False, coerce=True68 )69 workflowstepqueryparameters: Series[String] = pa.Field(70 nullable=False, coerce=True71 )72 # pylint: disable=R020173 @pa.dataframe_check74 def validate_taskflow_data_subset_dataframe(75 self, task_flow_data_subset: pd.DataFrame76 ) -> bool:77 """78 Raise a error if TaskDataFlowSubsetSchema dataframe is empty79 :param task_flow_data_subset:80 :type task_flow_data_subset: DataFrame81 """82 _indicator = check_empty_df(task_flow_data_subset)83 return _indicator == 084 # pylint: disable=R020185 @pa.dataframe_check86 def validate_task_step_buildorder(87 self, task_flow_data_subset: pd.DataFrame88 ) -> bool:89 """90 Identify the missing task step build sequence numbers91 configured in the tasks data config file92 :param task_flow_data_subset:93 :type task_flow_data_subset: DataFrame94 """95 _taskflow_step_buildorder = task_flow_data_subset[96 "variableworkflowstepexecutionorder"97 ].tolist()98 _invalid_num = invalid_seq_number(_taskflow_step_buildorder)99 return _invalid_num == 0100 # pylint: disable=R0201101 @pa.dataframe_check102 def validate_min_task_step_buildorder(103 self, task_flow_data_subset: pd.DataFrame104 ) -> bool:105 """106 Minimum build order has to be 1107 :param task_flow_data_subset:108 :type task_flow_data_subset: DataFrame109 """110 _taskflow_step_buildorder = task_flow_data_subset[111 "variableworkflowstepexecutionorder"112 ].tolist()113 _min_build_order = min(_taskflow_step_buildorder)...
match_kingdoms.py
Source:match_kingdoms.py
1import os2import random3import numpy as np4import pandas as pd56tsk1 = []7import pandas as pd8import csv9from datetime import datetime,timedelta1011path='C:\\Users\\Administrator\\Desktop\\sumup1.csv' # è·åæ件夹çè·¯å¾12data = pd.read_csv(path,error_bad_lines=False)1314path='C:\\Users\\Administrator\\Desktop\\check.csv' # è·åæ件夹çè·¯å¾15data_check = pd.read_csv(path,error_bad_lines=False)1617dataframe_1=pd.DataFrame(data)1819print(len(data),len(data_check))20new=[]21dataframe_check=pd.DataFrame(data_check)22for i in range(len(data)):23 ind=024 for j in range(len(data_check)):25 if dataframe_1['city'][i]==dataframe_check['LAD20NM'][j]:26 ind=j27 break28 new.append(dataframe_check['LAD20CD'][ind])29dataframe_1['region']=new
...
correlation_matrix.py
Source:correlation_matrix.py
1# /usr/bin/env python2import pandas as pd3def dataframe_check(mat):4 if type(mat) != pd.core.frame.DataFrame:5 raise Exception("argument mat must be a pandas DataFrame")6 7def normalize(mat):8 dataframe_check(mat)9 mat = mat.sub(mat.mean(axis = 1), axis = 0)10 mat = mat.div((mat * mat).sum(axis = 1)**(1 / 2), axis = 0)11 return mat12def pearson_corr_mat(mat):13 dataframe_check(mat)14 mat_norm = normalize(mat)15 return mat_norm.dot(mat_norm.transpose())16def spearman_corr_mat(mat):17 dataframe_check(mat)18 mat_rank = mat.rank(axis = 1)...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!