How to use _notin method in pandera

1import configparser2import glob3import json4import pymongo5 6class Connection:7 def __init__(self, host="localhost", port=27017):8 self.client = pymongo.MongoClient(host, port)9 self.db = None10 self.collection = None11 12 def set_db(self, db):13 self.db = self.client[db]14 15 def set_collection(self, collection):16 self.collection = self.get_db().get_collection(collection)17 18 def get_db(self):19 return self.db20 21 def get_coll(self):22 return self.collection23class QueryConfiguration:24 def __init__(self):25 self.columns = []26 self.queries = {}27 self.group_by = []28 self.numeric_columns = []29 30 def set_columns(self,cols):31 self.columns = cols32 33 def set_queries(self, q):34 self.queries = q35 36 def set_group_by(self, g):37 self.group_by = g38 39 def set_numeric_columns(self, cols):40 self.numeric_columns = cols41 42 def get_columns(self):43 return self.columns44 45 def get_queries(self):46 return self.queries47 48 def get_group_by(self):49 return self.group_by50 51 def get_numeric_columns(self):52 return self.numeric_columns53#Utility functions54def read_json(json_file):55 print(f"Loading in {json_file}")56 with open(json_file) as file:57 data = json.load(file)58 return data59def read_ini(ini_file):60 config = configparser.ConfigParser()61 print(f"Reading {ini_file}")63 data = {}64 for section in config.sections():65 data[section] = {}66 for option in config.options(section):67 data[section][option] = config.get(section, option)68 return data69def insert_into_collection(collection, data):70 if data:71 if isinstance(data, list):72 collection.insert_many(data)73 else:74 collection.insert_one(data)75 else:76 print("No data to insert, collection not created.")77def nested_dict_pairs_iterator(dict_obj):78 ''' This function accepts a nested dictionary as argument79 and iterate over all values of nested dictionaries80 '''81 # Iterate over all key-value pairs of dict argument82 for key, value in dict_obj.items():83 # Check if value is of dict type84 if isinstance(value, dict):85 # If value is dict then iterate over all its values86 for pair in nested_dict_pairs_iterator(value):87 yield (key, *pair)88 else:89 # If value is not dict type then yield the value90 yield (key, value)91def print_string_options(strs):92 result = ""93 for s in strs:94 result += str(strs.index(s)+1) + " " + str(s) + " | "95 print(result)96 97def get_items_by_index(values, indexes):98 list_str = indexes.split(",")99 list_int = set(map(int,list_str))100 result = list()101 for item in list_int:102 result.append(values[item-1])103 return result104def parse_config(data):105 config = QueryConfiguration()106 columns = []107 mongo_queries = {}108 group_by = []109 for pair in nested_dict_pairs_iterator(data):110 section = pair[0]111 key = pair[1]112 value = pair[2]113 if section == "IncludeColumns":114 if int(value) == 1:115 columns.append(str(key))116 elif section == "ApplyFilters":117 if value != "":118 if str(key.split("_")[0]) in columns or str(key.split("_")[0]) + "_" + str(key.split("_")[1]) in columns:119 num = value.isnumeric()120 if "_equals" in key:121 if num:122 value = int(value)123 mongo_queries[str(key.split("_equals")[0])] = { "$eq" : value }124 elif "_greater_than" in key:125 if num:126 value = int(value)127 mongo_queries[str(key.split("_greater_than")[0])] = { "$gt" : value }128 elif "_greater_equal" in key:129 if num:130 value = int(value)131 mongo_queries[str(key.split("_greater_equal")[0])] = { "$gte" : value }132 elif "_less_than" in key: 133 if num:134 value = int(value)135 mongo_queries[str(key.split("_less_than")[0])] = { "$lt" : value }136 elif "_less_equal" in key:137 if num:138 value = int(value)139 mongo_queries[str(key.split("_less_equal")[0])] = { "$lte" : value }140 elif "_in" in key:141 if num:142 value = [int(x) for x in value]143 mongo_queries[str(key.split("_in")[0])] = { "$in" : value }144 else:145 in_list = value.split(",")146 mongo_queries[str(key.split("_in")[0])] = { "$in" : in_list } 147 elif "_not" in key:148 if num:149 value = int(value)150 mongo_queries[str(key.split("_not")[0])] = { "$ne" : value }151 elif "_notin" in key:152 if num:153 value = [int(x) for x in value]154 mongo_queries[str(key.split("_notin")[0])] = { "$nin" : value }155 else:156 not_in_list = value.split(",")157 mongo_queries[str(key.split("_notin")[0])] = { "$nin" : not_in_list } 158 elif section == "GroupBy":159 if str(key.split("group_")[1]) in columns:160 if int(value) == 1:161 group_by.append(key.split("group_")[1])162 config.set_columns(columns)163 config.set_queries(mongo_queries)164 config.set_group_by(group_by)165 return config166def clean_input(prompt, type_ = None, min_ = None, max_ = None, range_ = None):167 if min_ is not None and max_ is not None and max_ < min_:168 raise ValueError("min_ must be less than or equal to max_") 169 while True:170 choice = input(prompt)171 if type_ is not None:172 try:173 choice = type_(choice)174 except ValueError:175 print(f"Input type must be {type_.__name__}.")176 continue177 if max_ is not None and choice > max_:178 print(f"Input must be less than or equal to {max_}.")179 elif min_ is not None and choice < min_:180 print(f"Input must be greater than or equal to {min_}.")181 elif range_ is not None and choice not in range_:182 if isinstance(range_, range):183 print(f"Input must be between {range_.start} and {range_.stop}.")184 else:185 txt = "Input must be {0}."186 if len(range_) == 1:187 print(txt.format(*range_))188 else:189 expected = " or ".join((", ".join(str(x) for x in range_[:-1]),str(range_[-1])))190 print(txt.format(expected))191 else:192 return choice193def select_collection(connection):194 try:195 print("Please select which Collection to use by typing the corresponding number and hitting enter.")196 collections = connection.get_db().list_collection_names()197 print_string_options(collections)198 col_index = clean_input("Enter the number for the collection to select: ",int,1,len(collections))-1199 connection.set_collection(collections[col_index])200 print("Selected " + connection.get_coll().name + " collection.")201 except ValueError:202 print("No collections exist in database, please run setup option first.")203 menu(connection)204 205def is_number(string):206 try:207 float(string)208 return True209 except ValueError:210 return False211#Operation functions212def main():213 print("|___________Take home interview project___________|")214 print("|Developed by Austin Lemacks for BornTec interview|")215 example_mongo = Connection(host, port)216 example_mongo.set_db(db_name)217 print(f"Using {db_name} database on MongoDB instance at {host}:{port}.")218 menu(example_mongo)219def menu(connection):220 print("|___Option Menu___|")221 print("Please select an option from below by entering the corresponding number.")222 print("| 1 | Setup - Loads JSON files in the directory into the local MongoDB instance as collections for example data.")223 print("| 2 | Run Queries - Select a collection to query using an .ini file for filtering.")224 print("| 3 | View Data - View the first document of a selected collection.")225 print("| 4 | Exit - Closes application.")226 print("_________________")227 choice = clean_input("Enter corresponding number for option above: ", int, 1, 4)228 if choice == 1:229 setup(connection)230 elif choice == 2:231 pipeline(connection)232 elif choice == 3:233 view_document(connection)234 elif choice == 4:235 print("Quitting.")236 quit()237 238def setup(connection):239 print("Starting JSON to MongoDB load for test data.")240 existing_collections = connection.get_db().list_collection_names()241 json_files = [f for f in glob.glob("*.json")]242 for file in json_files:243 if str(file)[:-5] not in existing_collections:244 file_data = read_json(file)245 collection = connection.get_db()[str(file)[:-5]]246 insert_into_collection(collection, file_data)247 else:248 print(f"Skipping {file}, collection already exists in the database.")249 print("Collection setup complete.")250 menu(connection)251 252def pipeline(connection):253 config_data = query_input(connection)254 initial_data, config = retrieve_stage(connection, config_data)255 final_data = aggregate_stage(connection, config, initial_data)256 store_stage(connection, final_data)257 258def query_input(connection):259 select_collection(connection)260 print("Now, select which configuration to use by selecting an .ini file from the list below for " + connection.get_coll().name + ".")261 ini_files = [f for f in glob.glob(connection.get_coll().name + "*.ini")]262 print_string_options(ini_files)263 if ini_files:264 config_index = clean_input("Enter the number for the ini configuration you wish to use for " + connection.get_coll().name +":" ,int,1,len(ini_files))265 config_data = read_ini(ini_files[config_index-1])266 return config_data267 else:268 print("No .ini configuration exists for this collection, please select a new one.")269 pipeline(connection)270def retrieve_stage(connection, config_data):271 config = parse_config(config_data)272 collection = connection.get_coll()273 print(f"Querying {}")274 filtered_docs = []275 numeric_cols = []276 cursor = collection.find()277 print("Retrieval complete.")278 print("Mapping data.")279 inc_cols = config.get_columns()280 for doc in cursor:281 doc_dict = {}282 for key in inc_cols:283 doc_dict[key] = (doc.get(key))284 if is_number(doc.get(key)):285 numeric_cols.append(key)286 filtered_docs.append(doc_dict)287 config.set_numeric_columns(numeric_cols)288 print("Mapping complete.")289 return filtered_docs, config290def aggregate_stage(connection, config, initial_data):291 print("Aggregating data.")292 temp_collection = connection.get_db()["temp_pipline"]293 connection.set_collection( insert_into_collection(temp_collection, initial_data)295 aggregation = []296 matches = config.get_queries()297 match_fields = {}298 match = {"$match" : match_fields}299 for col in config.get_columns():300 if matches.get(col) is not None:301 match_fields[col] = matches.get(col)302 aggregation.append(match)303 concat = []304 all_fields = []305 g = config.get_group_by()306 group = {}307 if g:308 group["$group"] = {"_id" : "$" + g[0]}309 else:310 group["$group"] = {"_id" : 0}311 for col in config.get_columns():312 if col not in g:313 if col in config.get_numeric_columns():314 group["$group"]["mean_" + str(col)] = {"$avg" : "$" + col}315 group["$group"]["sum_" + str(col)] = {"$sum" : "$" + col}316 group["$group"]["med_calc_" + str(col)] = {"$push" : "$" + col}317 all_fields.append("mean_" + str(col))318 all_fields.append("sum_" + str(col))319 else:320 group["$group"]["unique_strings_" + str(col)] = {"$addToSet" : "$" + col}321 concat.append("$unique_strings_" + str(col))322 aggregation.append(group)323 project = {}324 project["$project"] = {}325 for col in all_fields:326 project["$project"][str(col)] = "$" + col327 for col in config.get_columns():328 if col not in g and col in config.get_numeric_columns():329 project["$project"]["median_" + str(col)] = {"$arrayElemAt" : ["$med_calc_" + str(col), {"$toInt" : {"$divide" : [{"$size" : "$med_calc_" + str(col)}, 2]}}]}330 project["$project"]["all_unique_strings"] = {"$concatArrays" : concat}331 aggregation.append(project)332 #print(aggregation)333 print("Aggregation complete.")334 try:335 final_data = temp_collection.aggregate(aggregation)336 except:337 print("Aggregation error occured, likely an issue with the .ini file configuration.")338 menu(connection)339 temp_collection.drop()340 return final_data341def store_stage(connection, data):342 name = clean_input("Please enter a name for the new collection: ",str)343 connection.set_collection(name)344 for doc in data:345 insert_into_collection(connection.get_coll(), doc)346 print(f"Data inserted into {name}. To view a sample of the data select that option from the menu.")347 menu(connection)348 349def view_document(connection):350 select_collection(connection)351 collection = connection.get_coll()352 cursor = collection.aggregate([{"$sample" : {"size" : 5}}])353 print("Viewing a sample of up to 5 documents.")354 for doc in cursor:355 print(doc)356 menu(connection)357 358if __name__ == "__main__":359 host = "localhost"360 port = 27017361 db_name = "takehome"...

...93 def _in(expect_value, actual_value):94 allure_value(expect_value, actual_value)95 assert str(expect_value) in actual_value96 @staticmethod97 def _notin(expect_value, actual_value):98 allure_value(expect_value, actual_value)99 assert str(expect_value) not in actual_value100def allure_value(expect_value, actual_value):101 Logger().logs_file().info("actual_value:{actual_value};expect_value:{expect_value}"102 .format(actual_value=actual_value, expect_value=expect_value))103 with allure.step("check"):104 allure.attach("预期结果:{expect_value};实际结果:{actual_value}"...

...60 return functools.partial(_compare, = NotEq62def _in(left, right):63 return left[left.isin(right)]64def _notin(left, right):65 return left[~left.isin(right)]66def Or():67 return operator.or_68def And():69 return operator.and_70def BoolOp(op, values):71 if len(values) != 2:72 raise ValueError("Can only support two comparisons")73 left, right = values74 if op is operator.and_:75 return left.align(right, join='inner')[0]76 elif op is operator.or_:77 return left.align(right, join='outer')[0]78 else:...

