Best Python code snippet using refurb_python
check_uploaded_files.py
Source:check_uploaded_files.py
1"""2These functions make sure that only studies with correctly formatted files are3saved and allowed to be analysed.4"""5import os6import numpy as np7import pandas as pd8import shutil9from frontend import app10from werkzeug.utils import secure_filename11from frontend.view_functions import get_user_folder12# -----------------------------------------------------------------------------13# CHECK FILES MAIN FUNCTION14# -----------------------------------------------------------------------------15def check_files(user_data_folder, files_dict, form):16 """17 Checks if the uploaded files have the correct format18 """19 # -------------------------------------------------------------------------20 # SET UP OF VARIABLES21 format_errors = dict()22 # this will show a top panel on the form, informing the user23 format_errors['misformatted'] = ''24 autocorr = not form.autocorr.data25 study_folder = secure_filename(form.study_name.data)26 # -------------------------------------------------------------------------27 # CHECKING DATASETS28 # -------------------------------------------------------------------------29 datasets = ['dataset1']30 samples = []31 probes = []32 if not autocorr:33 datasets.append('dataset2')34 for dataset in datasets:35 # check if we can open the file36 try:37 dataset_path = os.path.join(user_data_folder, files_dict[dataset])38 df, sep = open_file(dataset_path)39 except:40 format_errors[dataset] = ['Could not open this file.']41 clear_up_study(study_folder)42 return False, format_errors43 # check if we have enough numeric columns in all data files44 df_numeric = df.select_dtypes(include=[np.number])45 # save row and col names of datasets46 samples.append(df_numeric.index)47 probes.append(df_numeric.columns)48 min_numeric = app.config['MINIMUM_FEATURES']49 if df_numeric.shape[1] < min_numeric:50 format_errors[dataset] = ['Datasets must have at least %s numeric'51 ' columns.' % min_numeric]52 clear_up_study(study_folder)53 return False, format_errors54 # check dataset dimensions55 if (df.shape[0] > app.config['MAX_SAMPLES'] or56 df.shape[1] > app.config['MAX_FEATURES']):57 format_errors[dataset] = ['The dimensions of this dataset exceed '58 'the supported maximum.']59 clear_up_study(study_folder)60 return False, format_errors61 # impute missing values with median62 df_numeric = df_numeric.fillna(df_numeric.median())63 # save imputed, all-numeric dataset64 df_numeric.to_csv(dataset_path, sep=sep)65 # if we have two datasets check if we have enough intersecting columns66 min_intersecting = app.config['INTERSECTING_SAMPLES']67 if not autocorr:68 samples_intersect = np.intersect1d(samples[0], samples[1])69 if samples_intersect.shape[0] <= min_intersecting:70 format_errors[dataset] = ['Datasets must have at least %s shared '71 'samples.' % min_intersecting]72 clear_up_study(study_folder)73 return False, format_errors74 # everything went fine, every file is checked75 return True, format_errors76# -----------------------------------------------------------------------------77# HELPER FUNCTIONS78# -----------------------------------------------------------------------------79def clear_up_study(study_folder):80 """81 Deletes the uploaded files if they are mis-formatted.82 """83 user_folder = get_user_folder()84 folder_to_delete = os.path.join(user_folder, study_folder)85 if os.path.exists(folder_to_delete):86 shutil.rmtree(folder_to_delete)87def open_file(file_path, **kwargs):88 """89 Opens files based on their file extension in pandas90 """91 filename, extension = os.path.splitext(file_path)92 if extension == '.txt':93 sep = '\t'94 else:95 sep = ','96 file = pd.read_csv(file_path, sep=sep, index_col=0, **kwargs)...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!