Best Python code snippet using autotest_python
LHS_NLP_ML.py
Source:LHS_NLP_ML.py
1#!/usr/bin/env python2# coding: utf-83# In[14]:4import pandas as pd5df_training_tweet = pd.read_csv("/Users/arjunanandapadmanabhan/Downloads/wn22_data/wn22_PA_training_tweets.txt", sep=",")6df_labels = pd.read_csv("/Users/arjunanandapadmanabhan/Downloads/wn22_data/wn22_PA_training_labels.txt", sep=",")7final_df = pd.merge(df_training_tweet, df_labels, on="TweetID")8final_df9df_testing_tweet = pd.read_csv("/Users/arjunanandapadmanabhan/Downloads/wn22_data/wn22_PA_testing_tweets.txt", sep=",")10df_testing_tweet11import re12import nltk13from nltk.corpus import stopwords14def clean_text(df, text_field):15 df[text_field] = df[text_field].str.lower()16 df[text_field] = df[text_field].apply(lambda x: re.sub(r"(@[A-Za-z0-9]+)|(\d+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|^rt|http.+?", "", x)) 17 return df18test_clean = clean_text(df_testing_tweet, "Tweet")19train_clean = clean_text(final_df, "Tweet")20train_clean.Tweet[4]21import pandas as pd22import numpy as np23import nltk24import string25import contractions26from nltk.tokenize import word_tokenize27from nltk.corpus import stopwords, wordnet28from nltk.stem import WordNetLemmatizer29train_clean['no_contract'] = train_clean['Tweet'].apply(lambda x: [contractions.fix(word) for word in x.split()])30train_clean['Tweet'] = [' '.join(map(str, l)) for l in train_clean['no_contract']]31test_clean['no_contract'] = test_clean['Tweet'].apply(lambda x: [contractions.fix(word) for word in x.split()])32test_clean['Tweet'] = [' '.join(map(str, l)) for l in test_clean['no_contract']]33train_clean['tokenized'] = train_clean['Tweet'].apply(word_tokenize)34train_clean['tokenized'] = train_clean['tokenized'].apply(lambda x: [word.lower() for word in x])35test_clean['tokenized'] = test_clean['Tweet'].apply(word_tokenize)36test_clean['tokenized'] = test_clean['tokenized'].apply(lambda x: [word.lower() for word in x])37punc = string.punctuation38train_clean['no_punc'] = train_clean['tokenized'].apply(lambda x: [word for word in x if word not in punc])39test_clean['no_punc'] = test_clean['tokenized'].apply(lambda x: [word for word in x if word not in punc])40stop_words = set(stopwords.words('english'))41train_clean['tokenized'] = train_clean['no_punc'] .apply(lambda x: [word for word in x if word not in stop_words])42test_clean['tokenized'] = test_clean['no_punc'] .apply(lambda x: [word for word in x if word not in stop_words])43train_clean['pos_tags'] = train_clean['tokenized'].apply(nltk.tag.pos_tag)44test_clean['pos_tags'] = test_clean['tokenized'].apply(nltk.tag.pos_tag)45def get_wordnet_pos(tag):46 if tag.startswith('J'):47 return wordnet.ADJ48 elif tag.startswith('V'):49 return wordnet.VERB50 elif tag.startswith('N'):51 return wordnet.NOUN52 elif tag.startswith('R'):53 return wordnet.ADV54 else:55 return wordnet.NOUN56train_clean['wordnet_pos'] = train_clean['pos_tags'].apply(lambda x: [(word, get_wordnet_pos(pos_tag)) for (word, pos_tag) in x])57test_clean['wordnet_pos'] = test_clean['pos_tags'].apply(lambda x: [(word, get_wordnet_pos(pos_tag)) for (word, pos_tag) in x])58wnl = WordNetLemmatizer()59train_clean['lemmatized'] = train_clean['wordnet_pos'].apply(lambda x: [wnl.lemmatize(word, tag) for word, tag in x])60test_clean['lemmatized'] = test_clean['wordnet_pos'].apply(lambda x: [wnl.lemmatize(word, tag) for word, tag in x])61train_clean['Tweet'] = [' '.join(map(str, l)) for l in train_clean['lemmatized']]62test_clean['Tweet'] = [' '.join(map(str, l)) for l in test_clean['lemmatized']]63# Can be used to check the frequency of a term64# d = {}65# for word in train_clean['Tweet']:66# for item in word.split():67# if item in d:68# d[item] = d[item] + 169# else:70# d[item] = 171# for i, word in enumerate(train_clean['Tweet']):72# for item in word.split():73# if d[item] <=10:74# train_clean['Tweet'][i] = train_clean['Tweet'][i].replace(item, '')75# train_clean['Tweet'][i] = re.sub(' +', ' ', train_clean['Tweet'][i])76 77 78# train_clean['Tweet'][0]79from sklearn.utils import resample80train_majority = train_clean[train_clean.Label==0]81train_minority = train_clean[train_clean.Label==1]82train_minority_upsampled = resample(train_minority, 83 replace=True, 84 n_samples=len(train_majority), 85 random_state=123)86train_upsampled = pd.concat([train_minority_upsampled, train_majority])87train_upsampled['Label'].value_counts()88from sklearn.feature_extraction.text import TfidfVectorizer89from sklearn.pipeline import Pipeline90from sklearn.feature_extraction.text import CountVectorizer91from sklearn.feature_extraction.text import TfidfTransformer92# from sklearn.linear_model import SGDClassifier93from sklearn.neural_network import MLPClassifier94# from sklearn.neighbors import KNeighborsClassifier95# from sklearn.ensemble import RandomForestClassifier96from sklearn.ensemble import GradientBoostingClassifier97from sklearn import svm98# from sklearn import tree99# pipeline_svc = Pipeline([100# ('vect', CountVectorizer()),101# ('tfidf', TfidfTransformer()),102# ('nb', SGDClassifier(learning_rate = 'constant', eta0=0.96, epsilon=0.0004, max_iter=5000, validation_fraction=0.8, loss='log')),103# ])104# pipeline_svc = Pipeline([105# ('vect', CountVectorizer()),106# ('tfidf', TfidfTransformer()),107# ('nb', MLPClassifier(hidden_layer_sizes = (2,),solver='adam', learning_rate='adaptive', max_iter=1000, epsilon=1e-6, tol=0.0001))108# ])109# pipeline_svc = Pipeline([110# ('vect', CountVectorizer(ngram_range=(1,3) )),111# ('tfidf', TfidfTransformer()),112# ('nb', svm.SVC(kernel='poly', C=2.5, degree=2, coef0=0.18, break_ties=True))113# ])114pipeline_svc = Pipeline([115 ('vect', CountVectorizer()),116 ('tfidf', TfidfTransformer()),117 ('nb', svm.SVC(gamma='scale'))118])119# pipeline_svc = Pipeline([120# ('vect', CountVectorizer()),121# ('tfidf', TfidfTransformer()),122# ('nb', GradientBoostingClassifier(n_estimators=10000, subsample=1))123# ])124# from sklearn.model_selection import train_test_split125# from sklearn.metrics import f1_score126# from numpy import arange127# ep = [0.001, 0.0002, 0.0003, 0.0004, 0.0007, 0.005, 0.03, 0.01, 0.003, 0.000004, 0.00000006, 0.005]128# max_num = 0.0129# ep_max = 0.0130# et_max = 0.0131# for e in ep:132# for et in arange(0.01,1, 0.01):133# pipeline_svc = Pipeline([134# ('vect', CountVectorizer()),135# ('tfidf', TfidfTransformer()),136# ('nb', SGDClassifier(learning_rate = 'constant', eta0=et, epsilon=e, max_iter=5000, validation_fraction=0.8, loss='log')),137# ])138# X_train, X_test, y_train, y_test = train_test_split(train_upsampled['Tweet'], train_upsampled['Label'],random_state = 0)139# model = pipeline_svc.fit(X_train, y_train)140# y_predict = model.predict(X_test)141# x = f1_score(y_test, y_predict)142# if x > max_num:143# max_num = x144# ep_max = e145# et_max = et146 147 148# print(max_num, ep_max, et_max)149# In[124]:150# from sklearn.model_selection import train_test_split151# X_train, X_test, y_train, y_test = train_test_split(train_upsampled['Tweet'], train_upsampled['Label'],random_state = 123)152# In[137]:153X_train = train_upsampled['Tweet']154y_train = train_upsampled['Label']155# In[138]:156model = pipeline_svc.fit(X_train, y_train)157# y_predict = model.predict(X_test)158# from sklearn.metrics import f1_score159# f1_score(y_test, y_predict)160# In[140]:161x_valid = df_testing_tweet['Tweet']162y_predict_1 = model.predict(x_valid)163y_predict_1164# In[141]:165# from sklearn.feature_extraction.text import TfidfVectorizer166# tfidf_vec = TfidfVectorizer()167# x_training = tfidf_vec.fit_transform(X_train)168# x_validation = tfidf_vec.transform(X_test)169# In[142]:170# from sklearn import svm171# model_svm = svm.SVC()172# model_svm.fit(x_training,y_train)173# In[143]:174# pred_svm = model_svm.predict(x_validation)175# In[144]:176# from sklearn.metrics import f1_score177# f1_score(y_test, pred_svm)178# In[145]:179# x_valid = df_testing_tweet['Tweet']180# x_test = tfidf_vec.transform(x_valid)181# y_predict_1 = model_svm.predict(x_test)182# y_predict_1183# In[146]:184pred = pd.DataFrame(y_predict_1)185pred.columns = ['Label']186pred187data = [df_testing_tweet['TweetID'], pred['Label']]188headers = ["TweetID", "Label"]189Final = pd.concat(data, axis=1, keys=headers)190Final.to_csv(r'/Users/arjunanandapadmanabhan/Downloads/wn22_data/output_2.txt', header=True, index=None, sep=',', mode='a')191# Confusion Matrix192from sklearn.metrics import confusion_matrix193import matplotlib.pyplot as plt194import itertools195import numpy as np196def plot_confusion_matrix(cm, classes,197 normalize=False,198 title='Confusion matrix',199 cmap=plt.cm.Blues):200 """201 This function prints and plots the confusion matrix.202 Normalization can be applied by setting `normalize=True`.203 """204 if normalize:205 cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]206 print("Normalized confusion matrix")207 else:208 print('Confusion matrix, without normalization')209 210 plt.imshow(cm, interpolation='nearest', cmap=cmap)211 plt.title(title)212 plt.colorbar()213 tick_marks = np.arange(len(classes))214 plt.xticks(tick_marks, classes, rotation=45)215 plt.yticks(tick_marks, classes)216 fmt = '.2f' if normalize else 'd'217 thresh = cm.max() / 2.218 for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):219 plt.text(j, i, format(cm[i, j], fmt),220 horizontalalignment="center",221 color="white" if cm[i, j] > thresh else "black")222 plt.ylabel('True label')223 plt.xlabel('Predicted label')224 plt.tight_layout()225classes=list(set(y_test))226cm = confusion_matrix(y_test, y_predic, labels=classes)...
Ion XGBoost.py
Source:Ion XGBoost.py
1#!/usr/bin/env python32# -*- coding: utf-8 -*-3"""4Created on Mon May 4 14:03:53 20205@author: feichang6"""7import pandas as pd8import numpy as np9import matplotlib.pyplot as plt10train = pd.read_csv('train.csv')11test = pd.read_csv('test.csv')12type(train.shape[0])13train_clean = train14"""15#take a look at the train16plt.figure(figsize=(5,5)); res = 100017plt.plot(range(0,train.shape[0],res),train.signal[0::res])18for i in range(11): plt.plot([i*500000,i*500000],[-5,12.5],'r')19plt.show()20#take a look at the test21plt.figure(figsize=(10,5)); res = 100022plt.plot(range(0,test.shape[0],res),test.signal[0::res])23plt.show()24"""25#first, fix the first drift in seg 2, 500000-60000026a = 50000027b = 60000028seg_1 = train.loc[train.index[a:b], 'signal'].values29time_1 = train.loc[train.index[a:b], 'time'].values30from sklearn.linear_model import LinearRegression31regressor = LinearRegression()32regressor.fit(time_1.reshape(-1,1),seg_1)33train_clean.loc[train.index[a:b], 'signal'] = train_clean.signal[a:b].values - regressor.coef_*(train_clean.time.values[a:b] - 50)34"""35plt.figure(figsize=(5,5)); res = 100036plt.plot(range(0,train_clean.shape[0],res),train.signal[0::res])37for i in range(11): plt.plot([i*500000,i*500000],[-5,12.5],'r')38plt.show()39"""40#then fix the polynomial drift41a = 0 42while a < 4500001:43 b = a + 50000044 seg_2 = train.loc[train.index[a:b], 'signal'].values45 time_2 = train.loc[train.index[a:b], 'time'].values46 47 from sklearn.preprocessing import PolynomialFeatures48 poly_reg = PolynomialFeatures(degree = 2)49 time_poly = poly_reg.fit_transform(time_2.reshape(-1,1))50 #define poly regressor51 lin_reg2 = LinearRegression()52 lin_reg2.fit(time_poly,seg_2)53 drift_0 = lin_reg2.predict(time_poly)[0]54 drift = lin_reg2.predict(time_poly) - drift_055 train_clean.loc[train.index[a:b], 'signal'] = train_clean.signal[a:b].values - drift56 a += 50000057#now the signal data is clean, look: 58"""59res = 100060plt.plot(range(0,train.shape[0],res),train.signal[0::res])61"""62"""63We also need to take the average current of the "phase" into consideration64"""65a = 0 66train_clean['Mean'] = 0.67train_clean['stdev'] = 0.68while a < 4500001:69 b = a + 50000070 avg = np.mean(train_clean.signal[a:b].values, dtype = 'float32')71 std = np.std(train_clean.signal[a:b].values, dtype = 'float32')72 train_clean.Mean[a:b].values.fill(avg) 73 train_clean.stdev[a:b].values.fill(std)74 a += 50000075"""76train_clean.head()77"""78from sklearn.model_selection import train_test_split79X_train, X_test, y_train, y_test = train_test_split(train_clean[['signal','Mean','stdev']], train_clean['open_channels'], test_size = 0.25, random_state = 0)80"""81from sklearn.tree import DecisionTreeClassifier82classifier_1 = DecisionTreeClassifier(random_state = 0, max_depth = 11, 83 min_samples_split = 32, min_samples_leaf = 5)84classifier_1.fit(X_train, y_train)85"""86from xgboost import XGBClassifier87classifier_1 = XGBClassifier()88classifier_1.fit(X_train, y_train)89prediction = classifier_1.predict(X_test)90from sklearn.metrics import f1_score91F1 = f1_score(y_test, prediction, average = 'macro')92print('F1 score:', F1)93"""94now it's very close. We are ready to move to the next step95plt.plot(prediction,'red')96plt.plot(train_clean['open_channels'])97"""98"""99now we clean the test data100first take a look101plt.plot(test['signal'])102"""103#every 100000 points, a phase, till 1000000104test_clean = test105a = 0106while a < 900001:107 b = a + 100000108 seg_1 = test.loc[train.index[a:b], 'signal'].values109 time_1 = test.loc[train.index[a:b], 'time'].values110 regressor_3 = LinearRegression()111 regressor_3.fit(time_1.reshape(-1,1),seg_1)112 drift_0 = regressor_3.predict(time_1.reshape(-1,1))[0]113 drift = regressor_3.predict(time_1.reshape(-1,1)) - drift_0114 115 test_clean.loc[train.index[a:b], 'signal'] = test_clean.signal[a:b].values - drift116 a += 100000117"""118take a look119plt.plot(test_clean['signal'])120"""121#1000000 to 1500000 a polynomial122a = 1000000123b = 1500000124seg_2 = test.loc[train.index[a:b], 'signal'].values125time_2 = test.loc[train.index[a:b], 'time'].values126 127poly_reg = PolynomialFeatures(degree = 2)128time_poly = poly_reg.fit_transform(time_2.reshape(-1,1))129lin_reg2 = LinearRegression()130lin_reg2.fit(time_poly,seg_2)131drift_0 = lin_reg2.predict(time_poly)[0]132drift = lin_reg2.predict(time_poly) - drift_0133test_clean.loc[test_clean.index[a:b], 'signal'] = test_clean.signal[a:b].values - drift + 0.25134"""135take a look136plt.figure(figsize=(20,5)); res = 1000137plt.plot(range(0,test_clean.shape[0],res),test_clean.signal[0::res])138plt.plot(test_clean['signal'])139plt.figure(figsize=(20,5)); res = 1000140plt.plot(pd.read_csv('test.csv')['signal'])141"""142a = 0 143test_clean['Mean'] = 0.144test_clean['stdev'] = 0.145while a < 1900001:146 b = a + 100000147 avg = np.mean(test_clean.signal[a:b].values, dtype = 'float32')148 std = np.std(test_clean.signal[a:b].values, dtype = 'float32')149 test_clean.Mean[a:b].values.fill(avg) 150 test_clean.stdev[a:b].values.fill(std)151 a += 100000152test_clean.head()153prediction = classifier_1.predict(test_clean[['signal','Mean','stdev']])154"""155Take a look at the mean156plt.plot(test_clean['Mean'])157plt.plot(train_clean['Mean'])158"""159"""160output = pd.DataFrame({'time': test_clean['time'], 'open_channels': prediction})161output.to_csv('my_submission.csv', index=False)162print("Your submission was successfully saved!")163"""164samplesubmission = pd.read_csv('sample_submission.csv', dtype={'time': 'str'})165samplesubmission.info()166output = pd.DataFrame({'time': samplesubmission.time, 'open_channels': prediction})167output.to_csv('submission.csv', index=False)168sub = pd.read_csv('submission.csv')...
baseline.py
Source:baseline.py
1import pandas as pd2from sklearn.tree import DecisionTreeRegressor3from sklearn.ensemble import RandomForestRegressor4from sklearn.model_selection import train_test_split5from sklearn.metrics import mean_absolute_error6def basic_cleaning(train_feature_path, train_labels_path, test_feature_path, export=False, out_path=None):7 train_features=pd.read_csv(train_feature_path)8 train_labels=pd.read_csv(train_labels_path)9 train_features=pd.merge(train_features, train_labels, on=['city', 'year','weekofyear'])10 test_features=pd.read_csv(test_feature_path)11 #convert kelvin to celsius12 train_features[['reanalysis_air_temp_k', 'reanalysis_avg_temp_k', 'reanalysis_dew_point_temp_k', 'reanalysis_max_air_temp_k', 'reanalysis_min_air_temp_k', 'reanalysis_tdtr_k']]=train_features[['reanalysis_air_temp_k', 'reanalysis_avg_temp_k', 'reanalysis_dew_point_temp_k', 'reanalysis_max_air_temp_k', 'reanalysis_min_air_temp_k', 'reanalysis_tdtr_k']]-273.1513 test_features[['reanalysis_air_temp_k', 'reanalysis_avg_temp_k', 'reanalysis_dew_point_temp_k', 'reanalysis_max_air_temp_k', 'reanalysis_min_air_temp_k', 'reanalysis_tdtr_k']]=test_features[['reanalysis_air_temp_k', 'reanalysis_avg_temp_k', 'reanalysis_dew_point_temp_k', 'reanalysis_max_air_temp_k', 'reanalysis_min_air_temp_k', 'reanalysis_tdtr_k']]-273.1514 #dropping rows where no temperature data is available15 train_clean=train_features.dropna(subset=['reanalysis_air_temp_k', 'reanalysis_avg_temp_k','station_avg_temp_c'], how='all')16 # inputation for submission17 test_clean=test_features.copy(deep=True)18 #inputation - temperature data19 train_clean['station_avg_temp_c'].fillna(train_clean['reanalysis_avg_temp_k'], inplace=True)20 train_clean['station_diur_temp_rng_c'].fillna(train_clean['reanalysis_tdtr_k'], inplace=True)21 train_clean['station_max_temp_c'].fillna(train_clean['reanalysis_max_air_temp_k'], inplace=True)22 train_clean['station_min_temp_c'].fillna(train_clean['reanalysis_min_air_temp_k'], inplace=True)23 test_clean['station_avg_temp_c'].fillna(test_clean['reanalysis_avg_temp_k'], inplace=True)24 test_clean['station_diur_temp_rng_c'].fillna(test_clean['reanalysis_tdtr_k'], inplace=True)25 test_clean['station_max_temp_c'].fillna(test_clean['reanalysis_max_air_temp_k'], inplace=True)26 test_clean['station_min_temp_c'].fillna(test_clean['reanalysis_min_air_temp_k'], inplace=True)27 #inputation - vegetation index28 for i in ['ndvi_ne','ndvi_sw','ndvi_nw','ndvi_se']:29 train_clean[i]=train_clean[i].interpolate()30 test_clean[i]=test_clean[i].interpolate()31 #inputation - precipitation level32 train_clean['station_precip_mm'].fillna(train_clean['reanalysis_sat_precip_amt_mm'], inplace=True)33 test_clean['station_precip_mm'].fillna(test_clean['reanalysis_sat_precip_amt_mm'], inplace=True)34 #dropping duplicate columns35 train_clean.drop(['precipitation_amt_mm','reanalysis_sat_precip_amt_mm'],axis=1, inplace=True)36 test_clean.drop(['precipitation_amt_mm','reanalysis_sat_precip_amt_mm'],axis=1, inplace=True)37 #drop useless column38 train_clean.drop('week_start_date', axis=1, inplace=True)39 test_clean.drop('week_start_date', axis=1, inplace=True)40 #encode city as binary variable41 train_clean['city']=train_clean['city'].map({'sj':1, 'iq':0})42 test_clean['city']=test_clean['city'].map({'sj':1, 'iq':0})43 for i in test_clean.columns:44 test_clean[i]=test_clean[i].interpolate()45 if export:46 train_clean.to_csv('train_'+out_path)47 test_clean.to_csv('test_'+out_path)48 return train_clean, test_clean49def export_submission(test_clean, out_path, model):50 test_clean['total_cases']=model.predict(test_clean)51 test_clean['total_cases']=test_clean['total_cases'].astype(int)52 submission=test_clean[['city', 'year', 'weekofyear', 'total_cases']]53 submission['city']=submission['city'].map({1:'sj', 0:'iq'})54 submission.to_csv(out_path, index=False)55 return submission56cleaned=basic_cleaning('../data/dengue_features_train.csv', '../data/dengue_labels_train.csv','../data/dengue_features_test.csv' )57print(cleaned[0].head())58X=cleaned[0].drop('total_cases', axis=1)59Y=cleaned[0]['total_cases']60X_train, X_test, y_train , y_test=train_test_split(X,Y, random_state=42)61dtr=DecisionTreeRegressor(random_state=420)62dtr.fit(X_train, y_train)63rfr=RandomForestRegressor(random_state=420)64rfr.fit(X_train, y_train)65print(mean_absolute_error(y_test, dtr.predict(X_test)))...
calculate_bleu_for_bt.py
Source:calculate_bleu_for_bt.py
1from nltk.translate.bleu_score import corpus_bleu2from experiments.utils import get_daily_dialog, get_mutual_friends, get_babi_dialog3import os4import argparse5from config import *6if __name__ == "__main__":7 parser = argparse.ArgumentParser()8 parser.add_argument("--dataset_clean", type=str, required=True)9 parser.add_argument("--dataset_back", type=str, required=True)10 args = parser.parse_args()11 if "dailydialog" in args.dataset_clean.lower():12 test_clean = get_daily_dialog(os.path.join(args.dataset_clean, "test.json"))13 test_translated = get_daily_dialog(os.path.join(args.dataset_back, "test.json"))14 list_of_hypothesis = []15 list_of_references = []16 assert len(test_clean) == len(test_translated)17 for ground_truth, translation in zip(test_clean, test_translated):18 assert len(ground_truth.dialog) == len(translation.dialog)19 for utterance_ground, utterance_translation in zip(ground_truth.dialog, translation.dialog):20 references = [utterance_ground[TEXT].split(" ")]21 list_of_references.append(references)22 hypothesis = utterance_translation[TEXT].split(" ")23 list_of_hypothesis.append(hypothesis)24 print(corpus_bleu(list_of_references, list_of_hypothesis))25 elif "mutualfriends" in args.dataset_clean.lower():26 test_clean = get_mutual_friends(os.path.join(args.dataset_clean, "test.json"))27 test_translated = get_mutual_friends(os.path.join(args.dataset_back, "test.json"))28 list_of_hypothesis = []29 list_of_references = []30 assert len(test_clean) == len(test_translated)31 for ground_truth, translation in zip(test_clean, test_translated):32 assert ground_truth.uuid == translation.uuid33 assert len(ground_truth.dialog) == len(translation.dialog)34 for utterance_ground, utterance_translation in zip(ground_truth.dialog, translation.dialog):35 if utterance_ground["action"] == "message":36 references = [utterance_ground["data"].split(" ")]37 list_of_references.append(references)38 hypothesis = utterance_translation["data"].split(" ")39 list_of_hypothesis.append(hypothesis)40 print(utterance_ground, utterance_translation)41 print(corpus_bleu(list_of_references, list_of_hypothesis))42 elif "babi" in args.dataset_clean.lower():43 test_clean = get_babi_dialog(os.path.join(args.dataset_clean, "dialog-babi-task5-full-dialogs-tst.txt"))44 test_translated = get_babi_dialog(os.path.join(args.dataset_back, "dialog-babi-task5-full-dialogs-tst.txt"))45 list_of_hypothesis = []46 list_of_references = []47 assert len(test_clean) == len(test_translated)48 for ground_truth, translation in zip(test_clean, test_translated):49 assert len(ground_truth.dialog) == len(translation.dialog)50 for utterance_ground, utterance_translation in zip(ground_truth.dialog, translation.dialog):51 key = list(utterance_ground.keys())[0]52 if key in ["User", "Bot"]:53 references = [utterance_ground[key].split(" ")]54 list_of_references.append(references)55 hypothesis = utterance_translation[key].split(" ")56 list_of_hypothesis.append(hypothesis)...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!