How to use test_select_columns method in pandera

Best Python code snippet using pandera_python

test_open_corporates_querying.py

Source: test_open_corporates_querying.py Github

copy

Full Screen

...35 def test_call_endpoint(self):36 """Assumes rate limit has not been reached."""37 self.assertEqual(self.test_query.call_enpoint(self.search_endpoint)["results"][38 "company"]["company_number"], "09026697")39 def test_select_columns(self):40 """Tests to see whether function returns correct number of columns."""41 test_row = {'name': '! ! ! 1ST CHOICE ANDROID SMART-PHONE TUTORING, INC.', 'company_number': 'C3517133', 'jurisdiction_code': 'us_ca', 'incorporation_date': '2012-11-02', 'dissolution_date': None, 'company_type': 'DOMESTIC STOCK', 'registry_url': 'https:/​/​businessfilings.sos.ca.gov/​frmDetail.asp?CorpID=03517133', 'branch': None, 'branch_status': None, 'inactive': True, 'current_status': 'Dissolved', 'created_at': '2012-11-10T03:15:55+00:00', 'updated_at': '2019-12-03T12:53:16+00:00', 'retrieved_at': '2019-11-28T01:23:56+00:00', 'opencorporates_url':42 'https:/​/​opencorporates.com/​companies/​us_ca/​C3517133', 'previous_names': [], 'source': {'publisher': 'California Secretary of State', 'url': 'https:/​/​businessfilings.sos.ca.gov/​frmDetail.asp?CorpID=03517133', 'retrieved_at': '2019-11-28T01:23:56+00:00'}, 'registered_address': {'street_address': '420 N MCKINLEY ST #111-182\nCORONA CA 92879', 'locality': None, 'region': None, 'postal_code': None, 'country': 'United States'}, 'registered_address_in_full': '420 N MCKINLEY ST #111-182\nCORONA CA 92879', 'industry_codes': [], 'restricted_for_marketing': None, 'native_company_number': None}43 number_of_columns = len(self.test_query.select_columns(test_row))44 self.assertEqual(number_of_columns, 24)45 def test_get_result_pagination(self):46 self.assertEqual(self.test_query.get_result_pagination(), 5)47 def test_pipeline(self):48 """This tests both extract() and load()."""49 self.test_query.extract()50 self.assertTrue(os.path.exists(self.test_query.filename))51 # Create a test version of the target table.52 statement = """CREATE TABLE IF NOT EXISTS public.{0} (53 like public.{1} including all);54 TRUNCATE public.{0}""".format(55 self.test_query.test_target_table, self.test_query.target_table)56 try:57 self.test_query.cursor.execute(statement)58 except psycopg2.Error as e:59 print(e)60 exit()61 else:62 # A bit messy but need to swap these values in order for load()63 # to load the test table.64 self.test_query.target_table = self.test_query.test_target_table65 self.test_query.load()66 self.test_query.cursor.execute(67 "SELECT COUNT(*) FROM public.{}".format(self.test_query.test_target_table))68 target_row_count = self.test_query.cursor.fetchall()[0]69 # There should be a row count of greater than 0.70 self.assertNotEqual(target_row_count, 0)71 # def run_tests(self):72 # """73 # This follows74 # """75 # # print(dir(self.test_query))76 # self.test_construct_enpoint_url()77 # # self.test_call_endpoint_url()78 # self.test_select_columns()79 # self.test_get_result_pagination()80if __name__ == '__main__':81 unittest.main()82 # test_suite = TestQueryCompanies()...

Full Screen

Full Screen

predict.py

Source: predict.py Github

copy

Full Screen

1from __future__ import division2from pyspark.sql import SparkSession3from pyspark import SparkConf4import os, sys, time, json, pickle, numpy as np5from pyspark.ml.recommendation import ALS, ALSModel6#%%7import pandas as pd8from sys import getsizeof9from datetime import datetime10import xgboost as xgb11from sklearn import model_selection12from sklearn.preprocessing import MinMaxScaler, StandardScaler13#%%14start = time.time()15input_dir = '../​resource/​asnlib/​publicdata/​'16model_path = os.getcwd()+'/​model'17als_model_path = os.getcwd()+'/​als_model'18test_path = sys.argv[1]19output_path = sys.argv[2]20print(test_path)21#%%22def correctPred(x):23 if x > 5:24 return 525 elif x < 1:26 return 127 else:28 return x29#%%30conf = SparkConf().setMaster("local[3]")31spark = SparkSession.builder.config(conf = conf).getOrCreate()32sc = spark.sparkContext33user_avg, biz_avg = json.load(open(input_dir+'user_avg.json')), json.load(open(input_dir+'business_avg.json'))34global_avg = 3.796161152634150335als_model = ALSModel.load(als_model_path)36with open(model_path, "rb") as files:37 scaler, xg_model, userInt, bizInt = pickle.load(files)38test = sc.textFile(test_path).map(json.loads).map(lambda x: (x['user_id'],x['business_id']))39for user,biz in test.collect():40 if user not in userInt:41 userInt[user] = len(userInt)42 if biz not in bizInt:43 bizInt[biz] = len(bizInt)44test = test.map(lambda x: (userInt.get(x[0],'UNKNOWN'), bizInt.get(x[1],'UNKNOWN'))).toDF(['user_id','business_id'])45#%% Predictions46# ALS Prediction47pred = als_model.transform(test)48pred = pred.withColumnRenamed('prediction','pred_als').toPandas()49pred['pred_als'] = pred['pred_als'].fillna(global_avg).apply(lambda x: correctPred(x))50#%%51userInt, bizInt = {v:k for k,v in userInt.items()}, {v:k for k,v in bizInt.items()}52pred['user_id'] = pred['user_id'].apply(lambda x: userInt[x])53pred['business_id'] = pred['business_id'].apply(lambda x: bizInt[x])54#%%55# XGBoostRegressor Prediction56weekdays = ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']57def modifyHours(x):58 if x is not None:59 days = x.keys()60 if all(day in weekdays[:5] for day in days):61 return 'd'62 elif all(day in weekdays[5:] for day in days):63 return 'e'64 else:65 return 'b'66 else:67 return None68def userFeatureEngg(x):69 x.update({'friends':len(x['friends']), 'elite':len(x['elite']), 'user_avg':user_avg.get(x['user_id'],None),70 'yelping_since':2020-datetime.strptime(x['yelping_since'],'%Y-%m-%d %H:%M:%S').year})71 return x72def bizFeatureEngg(x):73 x.update({'categories':x['categories'].split(', '), 'hrs':modifyHours(x['hours']),74 'biz_avg':biz_avg.get(x['business_id'],None)})75 return x76#%%77user_avg, biz_avg = json.load(open(input_dir+'user_avg.json')), json.load(open(input_dir+'business_avg.json'))78test = sc.textFile(test_path).map(json.loads).toDF()79user = sc.textFile(input_dir+'user.json').map(json.loads).map(lambda x: userFeatureEngg(x)).toDF()80biz = sc.textFile(input_dir+'business.json').map(json.loads).map(lambda x: bizFeatureEngg(x)).toDF()81test = test.join(biz, on='business_id', how='left')82test = test.join(user, on='user_id', how='left')83test_select_columns = ['user_id','business_id','user_avg','biz_avg','latitude','longitude','yelping_since','useful','funny',84 'cool','elite','friends','fans','compliment_hot','compliment_writer',85 'compliment_photos']86test = test.select(test_select_columns).toPandas()87user_biz_ids = test[['user_id','business_id']]88test = test.drop(['user_id','business_id'],axis=1)89test = scaler.transform(test)90del user, biz, userInt, bizInt91pred_xg = pd.concat([user_biz_ids, pd.Series(xg_model.predict(test))], axis=1)92pred_xg.columns = ['user_id','business_id','pred_xg']93pred_xg['pred_xg'] = pred_xg['pred_xg'].apply(lambda x: correctPred(x))94pred = pred.merge(pred_xg, on=['user_id','business_id'], how='inner')95del test, pred_xg96pred['stars'] = 0.1*pred['pred_als'] + 0.9*pred['pred_xg']97pred.drop(['pred_als','pred_xg'],axis=1,inplace=True)98pred.to_json(output_path, orient='records', lines=True)99end=time.time()...

Full Screen

Full Screen

test_subset_data.py

Source: test_subset_data.py Github

copy

Full Screen

...14 return True15 else:16 print(lines1)17 print(lines2)18def test_select_columns():19 subset_data.select_columns(FILE_2_TEST, OUTFILE_TEST)...

Full Screen

Full Screen

Blogs

Check out the latest blogs from LambdaTest on this topic:

Test strategy and how to communicate it

I routinely come across test strategy documents when working with customers. They are lengthy—100 pages or more—and packed with monotonous text that is routinely reused from one project to another. Yawn once more— the test halt and resume circumstances, the defect management procedure, entrance and exit criteria, unnecessary generic risks, and in fact, one often-used model replicates the requirements of textbook testing, from stress to systems integration.

How To Write End-To-End Tests Using Cypress App Actions

When I started writing tests with Cypress, I was always going to use the user interface to interact and change the application’s state when running tests.

Pair testing strategy in an Agile environment

Pair testing can help you complete your testing tasks faster and with higher quality. But who can do pair testing, and when should it be done? And what form of pair testing is best for your circumstance? Check out this blog for more information on how to conduct pair testing to optimize its benefits.

LIVE With Automation Testing For OTT Streaming Devices ????

People love to watch, read and interact with quality content — especially video content. Whether it is sports, news, TV shows, or videos captured on smartphones, people crave digital content. The emergence of OTT platforms has already shaped the way people consume content. Viewers can now enjoy their favorite shows whenever they want rather than at pre-set times. Thus, the OTT platform’s concept of viewing anything, anytime, anywhere has hit the right chord.

Different Ways To Style CSS Box Shadow Effects

Have you ever visited a website that only has plain text and images? Most probably, no. It’s because such websites do not exist now. But there was a time when websites only had plain text and images with almost no styling. For the longest time, websites did not focus on user experience. For instance, this is how eBay’s homepage looked in 1999.

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run pandera automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful