Best Python code snippet using avocado_python
Day_Ahead_Imbalance_Forecast_LEAR_test.py
Source:Day_Ahead_Imbalance_Forecast_LEAR_test.py
1from urllib.parse import urljoin2import datetime as datetime3import numpy as np4import pandas as pd5import matplotlib.pyplot as plt6import matplotlib as mpl7import seaborn as sns8from epftoolbox.models import LEAR9import os10import datetime as datetime11from epftoolbox.data import read_data12from epftoolbox.evaluation import MAE, sMAPE,RMSE13from epftoolbox.models import LEAR14import warnings15from sklearn.metrics import confusion_matrix16warnings.filterwarnings("ignore")17#### Activate Lear Model18instance = LEAR()19def plot_forecast_nc(forecast, train, test, mape=None, mase=None):20 plt.figure(figsize=(10,4), dpi=100)21 plt.plot(train, label='training', color='black')22 plt.plot(test, label='actual', color='black', ls='--')23 plt.plot(forecast, label='forecast', color='orange')24 title = 'Forecast vs Actuals'25 plt.title(title)26 plt.legend(loc='upper left', fontsize=8)27 plt.show()28def make_dt_index(df, timestamp_col, dt_offset=None):29 df.index = pd.to_datetime(30 df.loc[:, timestamp_col], format='%Y-%m-%dT%H:%M:%S.%f'31 )32 if dt_offset:33 df.index = df.index + dt_offset34 df.drop(timestamp_col, axis=1, inplace=True)35 df.sort_index(inplace=True)36 return df37def check_duplicate_index(df, verbose=True):38 """ checks for duplicates in the index of a dataframe """39 dupes = df[df.index.duplicated()]40 num = dupes.shape[0]41 print('{} index duplicates'.format(num))42 if verbose == True:43 print('duplicates are:')44 print(dupes.head(3))45 return df[df.index.duplicated(keep=False)]46def drop_duplicate_index(df):47 return df[~df.index.duplicated(keep='first')]48def check_duplicate_rows(df, verbose=True):49 duplicated_bools = df.duplicated()50 num = np.sum(duplicated_bools)51 print('{} row duplicates'.format(num))52 if verbose:53 df[duplicated_bools].head(3)54 return df[df.duplicated(keep=False)]55def check_nans(df, verbose=True):56 """ checks for NANs in a dataframe """57 nans = df[df.isnull().any(axis=1)]58 num = nans.shape[0]59 print('{} nan rows'.format(num))60 if verbose:61 print('nan values are:')62 print(nans.head())63 return nans64def check_index_length(df, freq, verbose=True):65 """ compare a DatetimeIndex with the expected length """66 ideal = pd.DatetimeIndex(start=df.index[0],67 end=df.index[-1],68 freq=freq)69 ideal_len = ideal.shape[0]70 actual_len = df.shape[0]71 num_missing = ideal_len - actual_len72 print('ideal index len {} actual {} missing {}'.format(73 ideal_len, actual_len, num_missing))74 if ideal.shape[0] != df.shape[0]:75 missing = set(df.index).symmetric_difference(set(ideal))76 if verbose:77 print('missing are:')78 print(missing)79 return missing, ideal80 else:81 return None, df.index82def make_df_fill_dt_index(df, freq, method='ffill'):83 missing, ideal = check_index_length(df, freq)84 ideal_idx_df = pd.DataFrame(index=ideal)85 df = pd.concat([ideal_idx_df, df], axis=1)86 return df.fillna(method=method)87def check_dataframe(df, freq, verbose=False):88 """ wraps together all the checks """89 duplicate_index = check_duplicate_index(df, verbose)90 duplicate_rows = check_duplicate_rows(df, verbose)91 nans = check_nans(df, verbose)92 missing_index, ideal_index = check_index_length(df, freq, verbose)93 return {94 'duplicate_index': duplicate_index,95 'duplicate_rows': duplicate_rows,96 'nans': nans,97 'missing_index': missing_index,98 'ideal_index': ideal_index99 }100df_may_recalibration = pd.read_csv('./datasets/All_data_may_final.csv')101df_may_recalibration['Unnamed: 0'] = pd.to_datetime(df_may_recalibration['Unnamed: 0'])102df_may_recalibration.set_index('Unnamed: 0', drop=True, inplace=True)103df_may_recalibration.index.name = None104path_datasets_folder = os.path.join('.', 'datasets')105path_recalibration_folder = os.path.join('.', 'experimental_files')106calibration_window = 364107##### Load DATASET FROM FOLDER datasets that contains all the data108dataset = 'All_data_may_final'109begin_test_date = datetime.datetime(2022,3,28)110end_test_date = datetime.datetime(2022,5,31)111#dataset = 'final_data_imputed_all'112years_test = None113forecast_file_name = 'fc_nl' + '_dat' + str(dataset) + '_YT' + str(years_test) + \114 '_CW' + str(calibration_window) + '.csv'115forecast_file_path = os.path.join(path_recalibration_folder, forecast_file_name)116calibration_window = 364117df_train, df_test = read_data(dataset=dataset, years_test=None, path=path_datasets_folder,118 begin_test_date=begin_test_date, end_test_date=end_test_date)119# Defining empty forecast array and the real values to be predicted in a more friendly format120forecast = pd.DataFrame(index=df_test.index[::24], columns=['h' + str(k) for k in range(24)])121forecast_list = []122real_values = df_test.loc[:, ['Price']].values.reshape(-1, 24)123real_values = pd.DataFrame(real_values, index=forecast.index, columns=forecast.columns)124forecast_dates = forecast.index125model = LEAR(calibration_window=calibration_window)126RMSE_LIST = []127MAE_LIST = []128SMAPE_LIST= []129residual_list = []130datetime_published = forecast.resample('7D').last()131datetime_published_index = pd.date_range('2022-03-31 00:00:00', '2022-05-29 23:00:00', freq='7D')132#print(pd.date_range('2022-03-31 00:00:00', '2022-05-29 23:00:00', freq='7D'))133sdate = datetime.datetime(2022,3,28)134edate = datetime.datetime(2022,5,29)135list_datetime_changes = [datetime.datetime(2022,4,7),datetime.datetime(2022,4,14),datetime.datetime(2022,4,21),datetime.datetime(2022,4,28),datetime.datetime(2022,5,5),datetime.datetime(2022,5,12),datetime.datetime(2022,5,19),datetime.datetime(2022,5,29)]136#sdate = sdate[-1:]137#print(sdate.strftime("%Y-%m-%d, %H:%M:%S"))138#datetime_published_index = [datetime.datetime(2022,4,7),datetime.datetime(2022,4,14),datetime.datetime(2022,4,21),datetime.datetime(2022,4,28),datetime.datetime(2022,5,5),datetime.datetime(2022,5,12),datetime.datetime(2022,5,19),datetime.datetime(2022,5,26)]139#datetime_published_index = datetime_published.index140datetime_published_index = datetime_published_index[1:9]141print(datetime_published_index)142###### Here we get all the data tha have been published until 28/03143data_available_1 = pd.concat([df_train, df_test.loc[sdate:sdate + pd.Timedelta(hours=23), :]], axis=0)144print(data_available_1)145###### Here we get all the data that have been published on 7/04 and we have data until146##### 3/04147data_available_2 = pd.concat([df_train,df_test.loc[sdate:datetime_published_index[0] - pd.Timedelta(days = 4) + pd.Timedelta(hours =23),:]],axis= 0)148print(data_available_2)149#### Here we get the data that have been published on 14/04 and we have available data until150#### 10/04151data_available_3 = pd.concat([df_train,df_test.loc[sdate:datetime_published_index[1] - pd.Timedelta(days = 4) + pd.Timedelta(hours = 23),:]],axis=0)152print(data_available_3)153#### Data published on 21/04 and we have data until 18/04154data_available_4 = pd.concat([df_train,df_test.loc[sdate:datetime_published_index[2] - pd.Timedelta(days = 4) + pd.Timedelta(hours = 23),:]],axis=0)155print(data_available_4)156##### Data published on 28/04 and are available until 25/04157data_available_5 = pd.concat([df_train,df_test.loc[sdate:datetime_published_index[3] - pd.Timedelta(days = 4) + pd.Timedelta(hours = 23),:]],axis=0)158print(data_available_5)159#### Data published on 6/5 and are available until 01/05160data_available_6 = pd.concat([df_train,df_test.loc[sdate:datetime_published_index[4] - pd.Timedelta(days = 4) + pd.Timedelta(hours = 23),:]],axis=0)161print(data_available_6)162###### Data published on 13/5 and are available until 8/05163data_available_7 = pd.concat([df_train,df_test.loc[sdate:datetime_published_index[5] - pd.Timedelta(days = 4) + pd.Timedelta(hours = 23),:]],axis=0)164print(data_available_7)165##### Data published on 19/5 and are available until 13/5166data_available_8 = pd.concat([df_train,df_test.loc[sdate:datetime_published_index[6] - pd.Timedelta(days = 4) + pd.Timedelta(hours = 23),:]],axis=0)167print(data_available_8)168##### Data published on 26/5 and are availble ntil 22/5169data_available_9 = pd.concat([df_train,df_test.loc[sdate:datetime_published_index[7] - pd.Timedelta(days = 4) + pd.Timedelta(hours = 23),:]],axis=0)170print(data_available_9)171for date in forecast_dates:172 if date < datetime_published_index[0]:173 # We set the real prices for current date to NaN in the dataframe of available data174 data_available = data_available_1175 print(data_available)176 data_available_forecast = pd.concat([df_train,df_test.loc[date:date + pd.Timedelta(hours = 23) ,:]],axis= 0)177 data_available_forecast.loc[date:date + pd.Timedelta(hours = 23),'Price'] = np.NaN178 data_available_last = data_available_forecast.tail(23)179 index_test = df_test.loc[forecast_dates[0]:forecast_dates[0] + pd.Timedelta(hours = 23),:].index180 data_available_last = data_available_last.reindex(pd.date_range('2022-03-28 00:00:00', '2022-03-28 23:00:00', freq='H'),method = "bfill")181 n=24182 data_available = data_available.iloc[:-n]183 data_available = pd.concat([data_available,data_available_last])184 Yp= model.recalibrate_and_forecast_next_day(df=data_available, next_day_date= datetime.datetime(2022,3,28),185 calibration_window=calibration_window)186 # Saving the current prediction187 forecast_list.append(Yp)188 forecast.loc[date , :] = Yp189 residuals = data_available.Price - forecast190 residual_list.append(residuals)191 # Computing metrics up-to-current-date192 mae = np.mean(MAE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))193 MAE_LIST.append(mae)194 smape = np.mean(sMAPE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values)) * 100195 SMAPE_LIST.append(smape)196 rmse = np.mean(RMSE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))197 RMSE_LIST.append(rmse)198 print('{} - sMAPE: {:.2f}% | MAE: {:.3f} | RMSE: {:.4f}'.format(str(date)[:10], smape, mae,rmse))199 print(data_available.tail(48))200 elif(date >= datetime_published_index[0] and date < datetime_published_index[1]):201 print(date)202 data_available = data_available_2203 print(data_available)204 data_available_forecast = pd.concat([df_train,df_test.loc[date:date + pd.Timedelta(hours = 23) ,:]],axis= 0)205 data_available_forecast.loc[date:date + pd.Timedelta(hours = 23),'Price'] = np.NaN206 data_available_last = data_available_forecast.tail(23)207 print(data_available_last)208 index_test = df_test.loc[forecast_dates[0]:forecast_dates[0] + pd.Timedelta(hours = 23),:].index209 data_available_last = data_available_last.reindex(pd.date_range('2022-04-03 00:00:00', '2022-04-03 23:00:00', freq='H'),method = "bfill")210 n=24211 data_available = data_available.iloc[:-n]212 data_available = pd.concat([data_available,data_available_last])213 Yp= model.recalibrate_and_forecast_next_day(df=data_available, next_day_date= datetime.datetime(2022,4,3),214 calibration_window=calibration_window)215 # Saving the current prediction216 forecast_list.append(Yp)217 forecast.loc[date , :] = Yp218 residuals = data_available.Price - forecast219 residual_list.append(residuals)220 # Computing metrics up-to-current-date221 mae = np.mean(MAE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))222 MAE_LIST.append(mae)223 smape = np.mean(sMAPE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values)) * 100224 SMAPE_LIST.append(smape)225 rmse = np.mean(RMSE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))226 RMSE_LIST.append(rmse)227 print('{} - sMAPE: {:.2f}% | MAE: {:.3f} | RMSE: {:.4f}'.format(str(date)[:10], smape, mae,rmse))228 #print(data_available.tail(48))229 elif(date >= datetime_published_index[1] and date < datetime_published_index[2]):230 print(date)231 data_available = data_available_3232 print(data_available)233 data_available_forecast = pd.concat([df_train,df_test.loc[date:date + pd.Timedelta(hours = 23) ,:]],axis= 0)234 data_available_forecast.loc[date:date + pd.Timedelta(hours = 23),'Price'] = np.NaN235 data_available_last = data_available_forecast.tail(23)236 print(data_available_last)237 index_test = df_test.loc[forecast_dates[0]:forecast_dates[0] + pd.Timedelta(hours = 23),:].index238 data_available_last = data_available_last.reindex(pd.date_range('2022-04-10 00:00:00', '2022-04-10 23:00:00', freq='H'),method = "bfill")239 n=24240 data_available = data_available.iloc[:-n]241 data_available = pd.concat([data_available,data_available_last])242 Yp= model.recalibrate_and_forecast_next_day(df=data_available, next_day_date= datetime.datetime(2022,4,10),243 calibration_window=calibration_window)244 # Saving the current prediction245 forecast_list.append(Yp)246 forecast.loc[date , :] = Yp247 residuals = data_available.Price - forecast248 residual_list.append(residuals)249 # Computing metrics up-to-current-date250 mae = np.mean(MAE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))251 MAE_LIST.append(mae)252 smape = np.mean(sMAPE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values)) * 100253 SMAPE_LIST.append(smape)254 rmse = np.mean(RMSE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))255 RMSE_LIST.append(rmse)256 print('{} - sMAPE: {:.2f}% | MAE: {:.3f} | RMSE: {:.4f}'.format(str(date)[:10], smape, mae,rmse))257 elif(date >= datetime_published_index[2] and date < datetime_published_index[3]):258 print(date)259 data_available = data_available_4260 print(data_available)261 data_available_forecast = pd.concat([df_train,df_test.loc[date:date + pd.Timedelta(hours = 23) ,:]],axis= 0)262 data_available_forecast.loc[date:date + pd.Timedelta(hours = 23),'Price'] = np.NaN263 data_available_last = data_available_forecast.tail(23)264 index_test = df_test.loc[forecast_dates[0]:forecast_dates[0] + pd.Timedelta(hours = 23),:].index265 data_available_last = data_available_last.reindex(pd.date_range('2022-04-17 00:00:00', '2022-04-17 23:00:00', freq='H'),method = "bfill")266 print(data_available_last)267 n=24268 data_available = data_available.iloc[:-n]269 data_available = pd.concat([data_available,data_available_last])270 Yp= model.recalibrate_and_forecast_next_day(df=data_available, next_day_date= datetime.datetime(2022,4,17),271 calibration_window=calibration_window)272 # Saving the current prediction273 forecast_list.append(Yp)274 forecast.loc[date , :] = Yp275 residuals = data_available.Price - forecast276 residual_list.append(residuals)277 # Computing metrics up-to-current-date278 mae = np.mean(MAE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))279 MAE_LIST.append(mae)280 smape = np.mean(sMAPE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values)) * 100281 SMAPE_LIST.append(smape)282 rmse = np.mean(RMSE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))283 RMSE_LIST.append(rmse)284 print('{} - sMAPE: {:.2f}% | MAE: {:.3f} | RMSE: {:.4f}'.format(str(date)[:10], smape, mae,rmse))285 #print(data_available.tail(48))286 elif(date >= datetime_published_index[3] and date < datetime_published_index[4]):287 print(date)288 data_available = data_available_5289 print(data_available)290 data_available_forecast = pd.concat([df_train,df_test.loc[date:date + pd.Timedelta(hours = 23) ,:]],axis= 0)291 data_available_forecast.loc[date:date + pd.Timedelta(hours = 23),'Price'] = np.NaN292 data_available_last = data_available_forecast.tail(23)293 print(data_available_last)294 index_test = df_test.loc[forecast_dates[0]:forecast_dates[0] + pd.Timedelta(hours = 23),:].index295 data_available_last = data_available_last.reindex(pd.date_range('2022-04-24 00:00:00', '2022-04-24 23:00:00', freq='H'),method = "bfill")296 n=24297 data_available = data_available.iloc[:-n]298 data_available = pd.concat([data_available,data_available_last])299 Yp= model.recalibrate_and_forecast_next_day(df=data_available, next_day_date= datetime.datetime(2022,4,24),300 calibration_window=calibration_window)301 # Saving the current prediction302 forecast_list.append(Yp)303 forecast.loc[date , :] = Yp304 residuals = data_available.Price - forecast305 residual_list.append(residuals)306 # Computing metrics up-to-current-date307 mae = np.mean(MAE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))308 MAE_LIST.append(mae)309 smape = np.mean(sMAPE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values)) * 100310 SMAPE_LIST.append(smape)311 rmse = np.mean(RMSE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))312 RMSE_LIST.append(rmse)313 print('{} - sMAPE: {:.2f}% | MAE: {:.3f} | RMSE: {:.4f}'.format(str(date)[:10], smape, mae,rmse))314 print(data_available.tail(48))315 elif(date >= datetime_published_index[4] and date < datetime_published_index[5]):316 print(date)317 data_available = data_available_6318 print(data_available)319 data_available_forecast = pd.concat([df_train,df_test.loc[date:date + pd.Timedelta(hours = 23) ,:]],axis= 0)320 data_available_forecast.loc[date:date + pd.Timedelta(hours = 23),'Price'] = np.NaN321 data_available_last = data_available_forecast.tail(23)322 print(data_available_last)323 index_test = df_test.loc[forecast_dates[0]:forecast_dates[0] + pd.Timedelta(hours = 23),:].index324 data_available_last = data_available_last.reindex(pd.date_range('2022-05-01 00:00:00', '2022-05-01 23:00:00', freq='H'),method = "bfill")325 n=24326 data_available = data_available.iloc[:-n]327 data_available = pd.concat([data_available,data_available_last])328 Yp= model.recalibrate_and_forecast_next_day(df=data_available, next_day_date= datetime.datetime(2022,5,1),329 calibration_window=calibration_window)330 # Saving the current prediction331 forecast_list.append(Yp)332 forecast.loc[date , :] = Yp333 residuals = data_available.Price - forecast334 residual_list.append(residuals)335 # Computing metrics up-to-current-date336 mae = np.mean(MAE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))337 MAE_LIST.append(mae)338 smape = np.mean(sMAPE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values)) * 100339 SMAPE_LIST.append(smape)340 rmse = np.mean(RMSE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))341 RMSE_LIST.append(rmse)342 print('{} - sMAPE: {:.2f}% | MAE: {:.3f} | RMSE: {:.4f}'.format(str(date)[:10], smape, mae,rmse))343 print(data_available.tail(48))344 elif(date >= datetime_published_index[5] and date < datetime_published_index[6]):345 print(date)346 data_available = data_available_7347 print(data_available)348 data_available_forecast = pd.concat([df_train,df_test.loc[date:date + pd.Timedelta(hours = 23) ,:]],axis= 0)349 data_available_forecast.loc[date:date + pd.Timedelta(hours = 23),'Price'] = np.NaN350 data_available_last = data_available_forecast.tail(23)351 print(data_available_last)352 index_test = df_test.loc[forecast_dates[0]:forecast_dates[0] + pd.Timedelta(hours = 23),:].index353 data_available_last = data_available_last.reindex(pd.date_range('2022-05-08 00:00:00', '2022-05-08 23:00:00', freq='H'),method = "bfill")354 n=24355 data_available = data_available.iloc[:-n]356 data_available = pd.concat([data_available,data_available_last])357 Yp= model.recalibrate_and_forecast_next_day(df=data_available, next_day_date= datetime.datetime(2022,5,8),358 calibration_window=calibration_window)359 # Saving the current prediction360 forecast_list.append(Yp)361 forecast.loc[date , :] = Yp362 residuals = data_available.Price - forecast363 residual_list.append(residuals)364 # Computing metrics up-to-current-date365 mae = np.mean(MAE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))366 MAE_LIST.append(mae)367 smape = np.mean(sMAPE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values)) * 100368 SMAPE_LIST.append(smape)369 rmse = np.mean(RMSE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))370 RMSE_LIST.append(rmse)371 print('{} - sMAPE: {:.2f}% | MAE: {:.3f} | RMSE: {:.4f}'.format(str(date)[:10], smape, mae,rmse))372 print(data_available.tail(48))373 elif(date >= datetime_published_index[6] and date < datetime_published_index[7]):374 print(date)375 data_available = data_available_8376 print(data_available)377 data_available_forecast = pd.concat([df_train,df_test.loc[date:date + pd.Timedelta(hours = 23) ,:]],axis= 0)378 data_available_forecast.loc[date:date + pd.Timedelta(hours = 23),'Price'] = np.NaN379 data_available_last = data_available_forecast.tail(23)380 print(data_available_last)381 index_test = df_test.loc[forecast_dates[0]:forecast_dates[0] + pd.Timedelta(hours = 23),:].index382 data_available_last = data_available_last.reindex(pd.date_range('2022-05-15 00:00:00', '2022-05-15 23:00:00', freq='H'),method = "bfill")383 n=24384 data_available = data_available.iloc[:-n]385 data_available = pd.concat([data_available,data_available_last])386 Yp= model.recalibrate_and_forecast_next_day(df=data_available, next_day_date= datetime.datetime(2022,5,15),387 calibration_window=calibration_window)388 # Saving the current prediction389 forecast_list.append(Yp)390 forecast.loc[date , :] = Yp391 residuals = data_available.Price - forecast392 residual_list.append(residuals)393 # Computing metrics up-to-current-date394 mae = np.mean(MAE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))395 MAE_LIST.append(mae)396 smape = np.mean(sMAPE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values)) * 100397 SMAPE_LIST.append(smape)398 rmse = np.mean(RMSE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))399 RMSE_LIST.append(rmse)400 print('{} - sMAPE: {:.2f}% | MAE: {:.3f} | RMSE: {:.4f}'.format(str(date)[:10], smape, mae,rmse))401 print(data_available.tail(48))402 elif(date >= datetime_published_index[7] ):403 print(date)404 data_available = data_available_9405 print(data_available)406 data_available_forecast = pd.concat([df_train,df_test.loc[date:date + pd.Timedelta(hours = 23) ,:]],axis= 0)407 data_available_forecast.loc[date:date + pd.Timedelta(hours = 23),'Price'] = np.NaN408 data_available_last = data_available_forecast.tail(23)409 print(data_available_last)410 index_test = df_test.loc[forecast_dates[0]:forecast_dates[0] + pd.Timedelta(hours = 23),:].index411 data_available_last = data_available_last.reindex(pd.date_range('2022-05-22 00:00:00', '2022-05-22 23:00:00', freq='H'),method = "bfill")412 n=24413 data_available = data_available.iloc[:-n]414 data_available = pd.concat([data_available,data_available_last])415 Yp= model.recalibrate_and_forecast_next_day(df=data_available, next_day_date= datetime.datetime(2022,5,22),416 calibration_window=calibration_window)417 # Saving the current prediction418 forecast_list.append(Yp)419 forecast.loc[date , :] = Yp420 residuals = data_available.Price - forecast421 residual_list.append(residuals)422 # Computing metrics up-to-current-date423 mae = np.mean(MAE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))424 MAE_LIST.append(mae)425 smape = np.mean(sMAPE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values)) * 100426 SMAPE_LIST.append(smape)427 rmse = np.mean(RMSE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))428 RMSE_LIST.append(rmse)429 print('{} - sMAPE: {:.2f}% | MAE: {:.3f} | RMSE: {:.4f}'.format(str(date)[:10], smape, mae,rmse))430 print(data_available.tail(48))431######## Transform Forecast into dataframe and analyze432forecast.to_csv(forecast_file_path)433print(forecast.head(40))434forecast_df = forecast.copy()435forecast_df['Time'] = forecast.index436forecast_df437#print(forecast_df)438mask = (df_may_recalibration.index >=begin_test_date) & (df_may_recalibration.index <= end_test_date + pd.Timedelta(hours = 23))439april_may_data = df_may_recalibration.loc[mask]440#print(april_may_data)441#pril_may_data = df_may_recalibration.loc[df_may_recalibration.index == "2022-05-23 23:00:00":"2022-05-23 00:00:00"]442forecast_df = (forecast_df.replace(0, np.nan)443 .set_index('Time', append=True)444 .stack()445 .reset_index()446 .rename(columns={0:'Pred'})447 .drop('level_2',1))448print(forecast_df)449print(april_may_data)450forecast_df = forecast_df.set_index(april_may_data.index)451forecast_df = forecast_df.drop('level_0',axis=1)452forecast_df = forecast_df.drop('Time',axis=1)453print(forecast_df)454plot_forecast_nc(forecast_df["Pred"][forecast_df.index >= '2022-4-01 00:00:00'],455 df_train["Price"][df_train.index > '2022-5-21 00:00:00'],456 df_test["Price"][df_test.index >= '2022-4-01 00:00:00'],457 )458plt.plot(forecast_df)459plt.savefig(f'./report/LEAR/aa.png',bbox_inches = 'tight', dpi = 300)460plt.show()461forecast_df['Real'] = april_may_data['Price']462forecast_df["Real_sign"] = [1 if int(x) >= 0 else 0 for x in np.sign(forecast_df["Real"])]463forecast_df["Prediction_sign"] = [1 if int(x) >= 0 else 0 for x in np.sign(forecast_df["Pred"])]464forecast_df.to_csv('Forecast_DA_Balancing_difference_correct.csv')465comparison_column = np.where(forecast_df["Prediction_sign"] == forecast_df["Real_sign"], True, False)466count = np.count_nonzero(comparison_column)467print("We have a total of True values : " , count)468print("Accuracy of True values compared to the total observations :" ,count/len(comparison_column))469cf_matrix = confusion_matrix(forecast_df["Real_sign"] , forecast_df["Prediction_sign"])470ax = sns.heatmap(cf_matrix/np.sum(cf_matrix), annot=True,471 fmt='.2%', cmap='Blues')472ax.set_title('Seaborn Confusion Matrix with labels\n\n');473ax.set_xlabel('\nPredicted Values')474ax.set_ylabel('Actual Values ');475## Ticket labels - List must be in alphabetical order476ax.xaxis.set_ticklabels(['False','True'])477ax.yaxis.set_ticklabels(['False','True'])478## Display the visualization of the Confusion Matrix.479# plt.savefig(f'./report/LEAR/Confusion_matrix.png',bbox_inches = 'tight', dpi = 300)480# plt.show()481#482#483# plt.plot(RMSE_LIST)484# plt.savefig(f'./report/LEAR/RMSE.png',bbox_inches = 'tight', dpi = 300)485# plt.show()486# plt.plot(MAE_LIST)487# plt.savefig(f'./report/LEAR/MAE.png',bbox_inches = 'tight', dpi = 300)488# plt.show()489# plt.plot(SMAPE_LIST)490# plt.savefig(f'./report/LEAR/SMAPE.png',bbox_inches = 'tight', dpi = 300)...
funs.py
Source:funs.py
1from dashboard.models import sleep, activity, readiness2import datetime as dt3import pandas as pd4from django.db import connection5def sleep_score():6 data_available = True7 # setting dates for sql query8 today = (dt.datetime.now() - dt.timedelta(days=0)).date().strftime('%Y-%m-%d %H:%M:%S')9 yesterday = (dt.datetime.now() - dt.timedelta(days=1)).date().strftime('%Y-%m-%d %H:%M:%S')10 #Â getting data from db11 try:12 sleep_score = [sleep.objects.raw(f"SELECT * FROM dashboard_sleep WHERE timestamp='{today}'")[0].sleep_score, sleep.objects.raw(f"SELECT * FROM dashboard_sleep WHERE timestamp='{yesterday}'")[0].sleep_score]13 except IndexError:14 sleep_score = ['NA', 'NA']15 data_available = False16 # calculating difference to yesterday17 if data_available:18 sleep_score[1] = sleep_score[0] - sleep_score[1]19 return sleep_score20def activity_score():21 data_available = True22 # setting dates for sql query23 today = (dt.datetime.now() - dt.timedelta(days=0)).date().strftime('%Y-%m-%d %H:%M:%S')24 yesterday = (dt.datetime.now() - dt.timedelta(days=1)).date().strftime('%Y-%m-%d %H:%M:%S')25 #Â getting data form db26 try:27 activity_score = [activity.objects.raw(f"SELECT * FROM dashboard_activity WHERE timestamp='{today}'")[0].activity_score, activity.objects.raw(f"SELECT * FROM dashboard_activity WHERE timestamp='{yesterday}'")[0].activity_score]28 except IndexError:29 activity_score = ['NA', 'NA']30 data_available = False31 #Â calculating difference to yesterday32 if data_available:33 activity_score[1] = activity_score[0] - activity_score[1]34 return activity_score35def readiness_score():36 data_available = True37 #Â setting dates for query38 today = (dt.datetime.now() - dt.timedelta(days=1)).date().strftime('%Y-%m-%d %H:%M:%S')39 yesterday = (dt.datetime.now() - dt.timedelta(days=2)).date().strftime('%Y-%m-%d %H:%M:%S')40 #Â getting data from yesterday41 try:42 readiness_score = [readiness.objects.raw(f"SELECT * FROM dashboard_readiness WHERE timestamp='{today}'")[0].readiness_score, readiness.objects.raw(f"SELECT * FROM dashboard_readiness WHERE timestamp='{yesterday}'")[0].readiness_score]43 except IndexError:44 readiness_score = ['NA', 'NA']45 data_available = False46 #Â calculating difference to yesterday47 if data_available:48 readiness_score[1] = readiness_score[0] - readiness_score[1]49 return readiness_score50def sleep_data_detailed(date:str):51 data_available = True52 #Â querying data from db53 df = pd.read_sql_query(f"SELECT * FROM dashboard_sleep WHERE timestamp='{date}'", connection)54 55 #Â exceptions for not synchronized data56 try:57 df = df.iloc[0,:]58 df = df.drop(index=['id', 'timestamp'])59 data = df.to_dict()60 except IndexError:61 data = {}62 data_available = False63 #Â data transformations64 if data_available:65 context = []66 for key in list(data.keys()):67 context.append({'name': key, 'value': data[key]})68 for i in [1, 2, 5, 6, 7]:69 context[i]['value'] = str(dt.timedelta(seconds=int(context[i]['value'])))[:-3]70 else:71 context = []72 73 return context74def get_last_7_sleep(filter_string:str):75 data_available = True76 df = pd.read_sql_query("SELECT * FROM dashboard_sleep ORDER BY timestamp DESC LIMIT 10", connection)77 df = df.drop(columns=['id'])78 df['timestamp'] = df['timestamp'].apply(lambda x: x.rstrip('00:00:00'))79 df = df[['timestamp', filter_string]]80 df.columns = ['timestamp', 'data']81 data = df.to_dict('records')82 return data83def activity_data_detailed(date:str):84 data_available = True85 #Â querying data from db86 df = pd.read_sql_query(f"SELECT * FROM dashboard_activity WHERE timestamp='{date}'", connection)87 88 #Â exceptions for not synchronized data89 try:90 df = df.iloc[0,:]91 df = df.drop(index=['id', 'timestamp'])92 data = df.to_dict()93 except IndexError:94 data = {}95 data_available = False96 print(df)97 #Â data transformations98 if data_available:99 context = []100 for key in list(data.keys()):101 context.append({'name': key, 'value': data[key]})102 else:103 context = []104 105 return context106def get_last_7_activity(filter_string:str):107 data_available = True108 df = pd.read_sql_query("SELECT * FROM dashboard_activity ORDER BY timestamp DESC LIMIT 10", connection)109 df = df.drop(columns=['id'])110 df['timestamp'] = df['timestamp'].apply(lambda x: x.rstrip('00:00:00'))111 df = df[['timestamp', filter_string]]112 df.columns = ['timestamp', 'data']113 data = df.to_dict('records')114 return data115def get_readiness_data():116 df = pd.read_sql_query("SELECT * FROM dashboard_readiness", connection)117 df = df.reset_index(drop=True)118 print(df)119 df = df.iloc[:10,:]120 data = df.to_dict('records')...
synchronization.py
Source:synchronization.py
1import concurrent.futures2import queue3import random4import threading5import time6from collections import deque7class DoublerWithEvents:8 def __init__(self):9 self.waiting_for_data = threading.Event()10 self.data_available = threading.Event()11 self.waiting_for_data.set()12 self.data_available.clear()13 self.data = None14 def generate(self):15 if self.waiting_for_data.wait():16 self.waiting_for_data.clear()17 self.data = random.randint(0, 10)18 print(f"generated {self.data}")19 self.data_available.set()20 def report(self):21 if self.data_available.wait():22 self.data_available.clear()23 print(f"{self.data} -> {2 * self.data}")24 self.waiting_for_data.set()25class DoublerWithEventsAndLocks:26 def __init__(self):27 self.waiting_for_data_lock = threading.Lock()28 self.waiting_for_data = threading.Event()29 self.data_available_lock = threading.Lock()30 self.data_available = threading.Event()31 self.waiting_for_data.set()32 self.data_available.clear()33 self.data = None34 def generate(self):35 # Grab lock first, to avoid multiple generate functions running simultaneously36 with self.waiting_for_data_lock:37 # Wait for waiting_for_data to be set38 if self.waiting_for_data.wait():39 self.data = random.randint(0, 10)40 print(f"Generated {self.data}")41 self.waiting_for_data.clear() # Note: order is important42 self.data_available.set()43 def report(self):44 with self.data_available_lock:45 if self.data_available.wait():46 print(f"{self.data} -> {2 * self.data}")47 self.data = None48 self.data_available.clear()49 self.waiting_for_data.set()50class DoublerWithCondition:51 def __init__(self):52 self.data_available = threading.Condition()53 self.data = None54 def generate(self):55 with self.data_available:56 data = random.randint(0, 10)57 self.data = data58 print(f"Generated {data}.")59 self.data_available.notify()60 def report(self):61 with self.data_available:62 while self.data is None:63 self.data_available.wait()64 print(f"{self.data} -> {2 * self.data}\n")65 self.data = None66class DoublerWithTwoConditions:67 def __init__(self):68 self.data_available = threading.Condition()69 self.data_consumed = threading.Condition()70 self.data = None71 def generate(self):72 with self.data_consumed:73 while self.data is not None:74 self.data_consumed.wait()75 with self.data_available:76 data = random.randint(0, 10)77 self.data = data78 print(f"Generated {data}.")79 self.data_available.notify()80 def report(self):81 with self.data_available:82 while self.data is None:83 self.data_available.wait()84 print(f"{self.data} -> {2 * self.data}\n")85 with self.data_consumed:86 self.data = None87 self.data_consumed.notify()88class AndGate:89 def __init__(self):90 self.barrier = threading.Barrier(2, action=self.report)91 self.data1 = queue.Queue()92 self.data2 = queue.Queue()93 self.d1 = None94 self.d2 = None95 self.lock1 = threading.Lock()96 self.lock2 = threading.Lock()97 def input1(self):98 with self.lock1:99 self.d1 = self.data1.get()100 print(f"input 1 got {self.d1}")101 self.barrier.wait()102 def input2(self):103 with self.lock2:104 self.d2 = self.data2.get()105 print(f"input 2 got {self.d2}")106 self.barrier.wait()107 def report(self):108 print(f"{self.d1} and {self.d2} -> {self.d1 and self.d2}")109class DoublerWithQueue:110 def __init__(self):111 self.data_queue = queue.Queue(maxsize=1)112 def generate(self):113 data = random.randint(0, 10)114 self.data_queue.put(data)115 print(f"Generated {data}\n")116 def report(self):117 data = self.data_queue.get()118 print(f"{data} -> {2 * data}\n")119def main():120 doubler = DoublerWithTwoConditions()121 tasks = [doubler.generate, doubler.report] * 20122 random.shuffle(tasks)123 with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:124 for t in tasks:125 executor.submit(t)126def main_and_gate():127 and_gate = AndGate()128 tasks = [and_gate.input1, and_gate.input2] * 10129 random.shuffle(tasks)130 with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:131 for t in tasks:132 executor.submit(t)133 data = [True, False] * 10134 queue = [and_gate.data1, and_gate.data2] * 10135 random.shuffle(data)136 random.shuffle(queue)137 for q, d in zip(queue, data):138 q.put(d)139if __name__ == "__main__":...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!