Best Python code snippet using pandera_python
excel_splitter.py
Source:excel_splitter.py
1import os2import pandas as pd3from openpyxl import load_workbook4from openpyxl.styles.colors import Color5from openpyxl.styles import PatternFill, Border, Side, Alignment, Font6from utilities.utility_functions import (7 generate_delimiters,8 check_against_truth_threshold,9 remove_items_from_list10)11question_cell_colors = ['FFFBE4E4', 'FFFBE4E4', 'FFEEEDEC']12non_value_colors = ['00000000', 'FFC0C0C0']13value_storage = ['Sample Size', 'Mentions', '% Sample Size', 'Count', 'Column %', 'Mean', 'Median']14first_row_of_data = 715column_header_row = 616def add_formatting(cell, wrap=True, h_align='center', v_align='center', size=8, bold=False, cell_color=None, float=True17 ):18 cell.alignment = Alignment(horizontal=h_align, vertical=v_align, wrap_text=wrap)19 cell.font = Font(bold=bold, size=size)20 cell.border = Border(top=None, left=None, right=None, bottom=None)21 if cell_color is not None:22 PatternFill(fill_type='solid', fgColor=cell_color)23 try:24 if cell.value < 1:25 cell.number_format = '0%'26 else:27 cell.number_format = '0.0' if float else '0'28 except TypeError:29 pass30def create_mergable_list(lst):31 most_recent_value = None32 new_lst = []33 for x in lst:34 if x is not None:35 new_lst.append(x)36 most_recent_value = x37 else:38 new_lst.append(most_recent_value)39 return new_lst40def move_worksheet(wb, worksheet_to_move, new_position):41 order = [wb.sheetnames.index(x) for x in wb.sheetnames]42 popped = order.pop(wb.sheetnames.index(worksheet_to_move))43 order.insert(new_position, popped)44 wb._sheets = [wb._sheets[i] for i in order]45def create_data_worksheets(file, data_dict):46 with pd.ExcelWriter(file) as writer:47 for idx, (question, info) in enumerate(data_dict.items()):48 sheet_name = f'T{(idx + 1)}'49 data = info['frame']50 data.to_excel(51 writer,52 sheet_name=sheet_name,53 startrow=3,54 index_label= False,55 engine='openpyxl'56 )57def general_format_all_cells(ws, row_start=4):58 for col in ws.iter_cols(min_row=row_start):59 for cell in col:60 add_formatting(cell)61def format_series_headers(ws, row_start=4, row_end=5):62 for col in ws.iter_cols(min_row=row_start, max_row=row_end):63 for cell_idx, cell in enumerate(col):64 add_formatting(cell, h_align='center', v_align='bottom', bold=True if cell_idx < 2 else False)65def format_bases(ws, row_start=6, row_end=6):66 for col in ws.iter_cols(min_row=row_start, max_row=row_end):67 for cell_idx, cell in enumerate(col):68 add_formatting(cell, h_align='center', v_align='center', float=False)69def format_category_headers(ws, is_multiindex=False):70 for col in ws.iter_cols(min_col=1, max_col=2 if is_multiindex else 1):71 for cell_idx, cell in enumerate(col):72 add_formatting(cell, h_align='left', v_align='center', bold=True)73def add_and_format_defaults(ws, question, is_multiindex):74 defaults = {75 'A1': {'val': question},76 'A3': {'val': 'bar'},77 'B3': {'val': '*Sort'},78 'B6' if is_multiindex else 'A6': {'val': 'Base'}79 }80 for idx, (place, contents) in enumerate(defaults.items()):81 ws[place] = contents['val']82 add_formatting(83 ws[place],84 h_align='right' if contents['val'] == 'Base' else 'left',85 wrap=False, bold=(True if idx == 0 else False),86 size=12 if idx == 0 else 887 )88def adjust_widths(ws, columns, width=20):89 for column in columns:90 ws.column_dimensions[column].width = width91def adjust_heights(ws):92 ws.row_dimensions[4].height = 2593 for row in range(6, (ws.max_row + 1)):94 ws.row_dimensions[row].height = 2595def preselect_data(ws, is_multiindex):96 # c = Color(indexed=32)97 # c = Color(theme=6, tint=0.5)98 colors = {99 'A1': Color(theme=4),100 'C6' if is_multiindex else 'B6': Color(theme=5),101 'A8' if is_multiindex else 'B5': Color(theme=7),102 }103 data_selection = Color(theme=7)104 for cell, color in colors.items():105 ws[cell].fill = PatternFill("solid", fgColor=color)106 for col in ws.iter_cols(min_row=8 if is_multiindex else 7, max_col=3 if is_multiindex else 2):107 for cell in col:108 cell.fill = PatternFill("solid", fgColor=data_selection)109def format_data_worksheets(wb, data_dict):110 questions = data_dict.keys()111 for sheet, question in zip(wb.sheetnames, questions):112 is_multiindex = isinstance(data_dict[question]['frame'].index, pd.MultiIndex)113 ws = wb[sheet]114 if not is_multiindex:115 ws.delete_rows(7)116 general_format_all_cells(ws)117 format_series_headers(ws)118 format_bases(ws)119 format_category_headers(ws, is_multiindex)120 add_and_format_defaults(ws, question, is_multiindex)121 adjust_widths(ws, ['A', 'B'] if is_multiindex else ['A'])122 adjust_heights(ws)123 preselect_data(ws, is_multiindex)124def create_contents_page(wb, entries):125 sheet = wb.create_sheet(title='Contents')126 sheet.append(['Contents'])127 for q_idx, question in enumerate(entries):128 sheet.append([f'T{(q_idx + 1)}', question])129 move_worksheet(wb, 'Contents', 0)130def create_new_workbook(file_name, data_dict):131 output_file_name = file_name.replace('.xlsx', '_output.xlsx')132 create_data_worksheets(output_file_name, data_dict)133 wb = load_workbook(output_file_name)134 print(wb.loaded_theme)135 format_data_worksheets(wb, data_dict)136 create_contents_page(wb, data_dict.keys())137 wb.save(output_file_name)138def clean_up_questions(questions, question_indices):139 most_recent_question = questions[0].split()140 reject_questions, reject_indexes = [], []141 for q_idx, (question, question_index) in enumerate(zip(questions, question_indices)):142 if q_idx != 0:143 q_split = question.split()144 similarity_check = [x == y for x, y in zip(most_recent_question, q_split)]145 if check_against_truth_threshold(similarity_check, 0.75):146 reject_questions.append(question_index)147 reject_indexes.append(question_index)148 most_recent_question = q_split149 questions = [x for x in questions if x not in reject_questions]150 question_indices = [x for x in question_indices if x not in reject_indexes]151 return questions, question_indices152def comparative_separation(text, text_to_compare_against):153 text_list = text.split()154 comparative_list = text_to_compare_against.split()155 new_text_list = [x for x_idx, x in enumerate(text_list) if x == comparative_list[x_idx]]156 new_text = ' '.join(new_text_list)157 excluded_text = ' '.join([y for y in text_list if y not in new_text_list])158 excluded_text = None if len(excluded_text) == 1 else excluded_text159 return new_text, excluded_text160def clean_up_question(question):161 outline_delimiters = generate_delimiters(suffix='\t')162 for od in outline_delimiters:163 question = question.replace(od, ': ')164 component_lst = question.split(': ')165 component_lst = [x.strip() for x in component_lst]166 clean_values = {'prefix': component_lst[0], 'question': component_lst[1]}167 try:168 clean_values['statement'] = component_lst[2]169 except IndexError:170 clean_values['statement'] = None171 return clean_values172def clean_up_frame(df, statement=None):173 df = df.set_index(df.columns[0])174 value_row_names = [x for x in value_storage if x in df.index.tolist()]175 value_row_titles = [x for x in value_row_names if any(['%' in x, x in ['Median', 'Mean']])]176 responses = remove_items_from_list(df.index.tolist(), value_row_names)177 responses = value_row_titles if len(responses) == 0 else responses178 df = df[df.index.isin(value_row_titles)]179 if statement is not None:180 statement_array = [statement for _ in responses]181 df['Statement'] = statement_array182 df.index = pd.MultiIndex.from_arrays([statement_array, responses], names=('Statement', 'Response'))183 else:184 df['Responses'] = responses185 df = df.set_index('Responses', drop=True)186 df = df[[x for x in df.columns if x != 'Responses']]187 return df188def get_list_of_fills(ws):189 colors, color_idx = [], []190 for row_idx, row in enumerate(ws.iter_rows(max_col=1)):191 for cell in row:192 color = cell.fill.start_color.rgb193 colors.append(color)194 color_idx = color_idx + ([row_idx] if color in question_cell_colors else [])195 return color_idx196def df_question_scrubber(df):197 index = df.index.tolist()198 split_qs = [x.split() for x in index]199 word_groups = list(zip(*split_qs))200 split_idx = 0201 for group_idx, group in enumerate(word_groups):202 if len(set(group)) != 1:203 split_idx = group_idx204 break205 new_index = [' '.join(x[split_idx:]) for x in split_qs]206 for i, n in zip(index, new_index):207 df =df.rename(index={i: n})208 new_q = ' '.join(split_qs[0][:split_idx])209 new_q = None if len(new_q) == 0 else new_q210 return df, new_q211def data_cleanup(data_dict):212 temp_sheets = {}213 for question, info in data_dict.items():214 try:215 info['frame'], new_q = df_question_scrubber(info['frame'])216 except AttributeError:217 new_q = None218 if new_q is None:219 temp_sheets[question] = info220 else:221 temp_sheets[new_q] = info222 return temp_sheets223def split_excel(file_name):224 """225 Splits original df in to new dataframes to be shuffled and reorganized later226 """227 og_wb = load_workbook(filename=file_name)228 new_sheets = {}229 for sheet in og_wb.worksheets:230 headers = ['Question Values'] + [x.value for x in sheet[column_header_row]][1:]231 cat_headers = create_mergable_list([x.value for x in sheet[column_header_row - 1]][1:])232 cat_headers = [x.split(': ') for x in cat_headers]233 cat_headers = ['Question Values'] + [x[1] for x in cat_headers]234 df = pd.DataFrame(sheet.values)235 sample_size_indices = df.index[df[0] == 'Sample Size'].tolist()236 sample_sizes = df.loc[sample_size_indices[0]].values.tolist()237 question_indices = get_list_of_fills(sheet)238 questions = [x[0] for x in df.filter(items=question_indices, axis=0).values.tolist()]239 df = df.iloc[first_row_of_data:]240 df.columns = pd.MultiIndex.from_arrays([cat_headers, headers, sample_sizes])241 for index_idx, index in enumerate(question_indices):242 question_values = clean_up_question(questions[index_idx])243 # most_recent_question = questions[index_idx]244 if index_idx == len(question_indices) - 1:245 q_df = df.loc[(index + 1):]246 else:247 next_index = (question_indices[index_idx + 1] - 1)248 q_df = df.loc[(index + 1):next_index]249 q_df = clean_up_frame(q_df, question_values['statement'])250 if new_sheets.get(question_values['question']):251 current_frame = new_sheets[question_values['question']]['frame']252 new_sheets[question_values['question']]['frame'] = current_frame.append(q_df)253 else:254 new_sheets[question_values['question']] = {'frame': q_df, 'sample': sample_sizes}255 new_sheets = data_cleanup(new_sheets)...
utils.py
Source:utils.py
...5 This function returns a list of all the tuples of the form (a_1, ..., a_p)6 with a_i between 1 and n-1. These tuples serve as multiindices for tensors.7 """8 return list(itertools.product(range(n), repeat=p))9def is_multiindex(multiindex, n, c_dimension):10 """11 This function determines if a tuple is a multiindex or not12 according to these rules:13 1. () is a multiindex of length 0 (i.e. if the covariant or contravariant dimension14 is 0, the empty tuple is the only 0-multiindex)15 2. The length of a multiindex must be equal to the c_dimension16 3. Each value in the multiindex varies between 0 and n-1.17 """18 if isinstance(multiindex, tuple):19 if len(multiindex) != c_dimension:20 return False21 for value in multiindex:22 if isinstance(value, int) or isinstance(value, float):23 if value < 0 or value >= n:24 return False25 else:26 return False27 return True28 else:29 return False30def _get_matrix_of_basis_change(basis1, basis2, _dict, jacobian=True):31 """32 This is an internal function. It is used in the change_basis method33 for tensor objects. It computes the matrix that represents the34 identity function from (V, basis1) to (V, basis2). It does so35 using derivatives.36 For example, for the variables37 basis1 = [e0, e1, e2, e3]38 basis2 = [f0, f1, f2, f3]39 _dict = {40 e0: f0 + f1,41 e1: f1,42 e2: f1 + f3,43 e3: f244 }45 the resulting matrix would be46 [[1, 1, 0, 0],47 [0, 1, 0, 0],48 [0, 1, 0, 1],49 [0, 0, 1, 0]]50 if the jacobian keyword is set to True, and its transpose if51 it is false. The transpose works when you're trying to change52 basis in the algebraic sense (instead of the geometrical sense).53 """54 dim = len(basis1)55 L = sympy.zeros(dim)56 for i in range(dim):57 for j in range(dim):58 if jacobian == True:59 L[i, j] = _dict[basis1[i]].diff(basis2[j])60 if jacobian == False:61 L[i, j] = _dict[basis1[j]].diff(basis2[i])62 if L.det() == 0:63 raise ValueError("The transformation is not invertible.")64 return L65def _is_valid_key(key, dim, ct_dim, c_dim):66 """67 This is an internal function, it checks whether a given key (i.e. a pair68 of multiindices) is a valid key for certain dimension dim, contravariant dimension69 ct_dim and covariant dimension c_dim. It does so using the is_multiindex function.70 """71 if len(key) != 2:72 return False73 a, b = key74 if not is_multiindex(a, dim, ct_dim):75 return False76 if not is_multiindex(b, dim, c_dim):77 return False78 return True79def _dict_completer_for_tensor(_dict, _type, dim):80 """81 This function checks that the _dict is in proper form and completes in certain cases.82 Those cases are:83 - If one of the dimensions is 0, it is allowed to put only one multiindex instead84 of a pair.85 - if one of the dimensions is 1, it is allowd to put an integer instead of a86 1-multiindex.87 """88 ct_dim = _type[0]89 c_dim = _type[1]90 new_dict = {}91 if _dict == {}:92 new_dict = {93 (tuple(0 for i in range(ct_dim)), tuple(0 for i in range(c_dim))): 094 }95 return new_dict96 if ct_dim > 0 and c_dim == 0:97 for key in _dict:98 if _is_valid_key(key, dim, ct_dim, c_dim):99 new_dict[key] = _dict[key]100 elif ct_dim == 1 and isinstance(key, int):101 new_dict[((key,), ())] = _dict[key]102 elif is_multiindex(key, dim, ct_dim):103 new_dict[(key, ())] = _dict[key]104 else:105 raise ValueError(106 "Can't extend key {} because it isn't a {}-multiindex".format(107 key, ct_dim108 )109 )110 return new_dict111 if ct_dim == 0 and c_dim > 0:112 for key in _dict:113 if _is_valid_key(key, dim, ct_dim, c_dim):114 new_dict[key] = _dict[key]115 elif c_dim == 1 and isinstance(key, int):116 new_dict[(), (key,)] = _dict[key]117 elif is_multiindex(key, dim, c_dim):118 new_dict[((), key)] = _dict[key]119 else:120 raise ValueError(121 "Can't extend key {} because it isn't a {}-multiindex".format(122 key, c_dim123 )124 )125 return new_dict126 if ct_dim == 1 and c_dim > 0:127 for key in _dict:128 if _is_valid_key(key, dim, ct_dim, c_dim):129 new_dict[key] = _dict[key]130 elif len(key) == 2:131 i, b = key132 if isinstance(i, int) and isinstance(b, int):133 new_dict[((i,), (b,))] = _dict[key]134 elif isinstance(i, int) and is_multiindex(b, dim, c_dim):135 new_dict[(i,), b] = _dict[key]136 else:137 raise ValueError(138 "{} isn't an integer or {} isn't a {}-multiindex (or int).".format(139 i, b, c_dim140 )141 )142 else:143 raise ValueError("There should only be two things in {}".format(key))144 return new_dict145 if ct_dim > 0 and c_dim == 1:146 for key in _dict:147 if _is_valid_key(key, dim, ct_dim, c_dim):148 new_dict[key] = _dict[key]149 elif len(key) == 2:150 a, j = key151 if isinstance(a, int) and isinstance(j, int):152 new_dict[(a,), (j,)] = _dict[key]153 elif is_multiindex(a, dim, ct_dim) and isinstance(j, int):154 new_dict[a, (j,)] = _dict[key]155 else:156 raise ValueError(157 "{} should be an integer and {} should be a {}-multiindex (or int in case 1).".format(158 j, a, ct_dim159 )160 )161 return new_dict162 for key in _dict:163 if not _is_valid_key(key, dim, ct_dim, c_dim):164 raise ValueError("Key {} is not compatible with the dimensions")165 return _dict166def _symmetry_completer(_dict):167 new_dict = _dict.copy()...
pandas.py
Source:pandas.py
...12 :return:13 """14 # index should be of same type15 is_multiindex = lambda idx: isinstance(idx, pd.MultiIndex)16 if is_multiindex(df_a.index) != is_multiindex(df_b.index):17 return False18 # index dtypes should be the same19 get_multiindex_dtypes = lambda idx: [idx.get_level_values(i).dtype for i in range(len(idx.levels))]20 if is_multiindex(df_a.index):21 df_a_column_dtypes = get_multiindex_dtypes(df_a.index)22 df_b_column_dtypes = get_multiindex_dtypes(df_b.index)23 if df_a_column_dtypes != df_b_column_dtypes:24 return False25 else:26 if df_a.index.dtype != df_b.index.dtype:27 return False28 # index names should match29 if df_a.index.names != df_b.index.names:30 return False31 # indexes should be disjunct in some cases32 if indices_must_be_disjunct and not df_a.index.intersection(df_b.index).empty:33 return False34 # column index should be of same type35 if is_multiindex(df_a.columns) != is_multiindex(df_b.columns):36 return False37 # index dtypes should be the same38 if is_multiindex(df_a.columns):39 df_a_column_dtypes = get_multiindex_dtypes(df_a.columns)40 df_b_column_dtypes = get_multiindex_dtypes(df_b.columns)41 if df_a_column_dtypes != df_b_column_dtypes:42 return False43 else:44 if df_a.columns.dtype != df_b.columns.dtype:45 return False46 # column names must match47 if df_a.columns.names != df_b.columns.names:48 return False...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!