Best Python code snippet using hypothesis
Genetic Algorithm Population Size.py
Source:Genetic Algorithm Population Size.py
1import numpy as np2import pandas as pd3class DataSet:4 def __init__(self, Asset_File, N, K, epsilon=0.01, delta=1.0):5 """Loads a dataset and divides its contents into variables """6 self.Asset_File = Asset_File7 self.N = N # Total number of assets in a dataset8 self.K = K # Total number of assets in a solution9 self.epsilon = epsilon # Min investment10 self.delta = delta # Max investment11 self.number_of_stocks = 012 self.returns_deviations = []13 self.correlations = []14 self.covariance = np.nan15 temp_li_1 = []16 temp_li_2 = []17 # Splitting rows based on what they contain18 with open('Datasets/{}'.format(Asset_File), newline='') as datafile:19 for row in datafile:20 if len(row.split()) == 1: # if row is len of 1 it will be number of assets21 for x in row.split(' '):22 if x == '':23 continue24 self.number_of_stocks = (int(x))25 elif len(row.split()) == 2: # if row is len of 2 it will be the assets return and standard deviation26 for x in row.split(' '):27 if x == '':28 continue29 self.returns_deviations.append(float(x))30 elif len(row.split()) == 3: # if row is len of 3 it will be the correlation between assets31 for x in row.split(' '):32 if x == '':33 continue34 self.correlations.append(float(x))35 # Variable for storing standard deviations of returns36 for i, z in zip(self.returns_deviations[0::2], self.returns_deviations[1::2]):37 temp_li_1.append([i, z])38 self.returns_deviations = temp_li_139 # Variable for storing correlations between asset40 zeros = np.zeros((int(self.number_of_stocks), int(self.number_of_stocks)))41 for x, y, z in zip(self.correlations[0::3], self.correlations[1::3], self.correlations[2::3]):42 temp_li_2.append([x, y, z])43 zeros[int(x)-1][int(y)-1] = z44 self.correlations = temp_li_245 # Creates a matrix of returns and deviations46 self.returns_deviations=np.array(self.returns_deviations)47 # Splitting the data into variables needed for calculation48 self.deviations = self.returns_deviations[:, 1]49 self.mu = self.returns_deviations[:, 0]50 self.covariance = zeros*self.deviations*self.deviations.reshape((self.deviations.shape[0],1))51 self.sigma = self.covariance+ self.covariance.T - np.diag(self.covariance.diagonal()) # Fills in the second part of the covariance matrix52 # Making sure constraints on minimum and maximum investments are met53 if K * epsilon > 1.0:54 print("Minimum investment is too large")55 raise ValueError56 if K * delta < 1.0:57 print("Maximum investment is too small")58 raise ValueError59 self.F = 1.0 - K * epsilon60class Population():61 def __init__(self, size):62 """Population of solutions"""63 self.Population_size = size #Changed through iterations64 self.population_weights = [] # Weights of the population65 self.population_assets = [] # A list containing assets of individuals in the population66 self.fitness = []67 self.population_proportions = []68 self.best_fitness = 0 # Best f69 self.best_proportions = 0 # Best proportions by which each asset is in an individual70 self.best_weights = 071 self.best_assets = 072 self.best_covariance = 073 self.best_return = 074 self.Obj1 = []75 self.Obj2 = []76 def check_valid_solution(self, weights, proportions, assets, data):77 """Checks whether a solution is valid given constraints"""78 # Checking whether correct number of solutions has been picked79 if np.sum(weights >= data.epsilon) != K:80 raise ValueError("More than " + str(K) + " assets selected (" + str(np.sum(weights > 0.0)) + ") in solution: " + str(weights))81 # Checking whether number and size of proportions is correct82 if np.any(proportions > 1) or np.any(proportions < 0) or len(proportions) != K:83 raise ValueError("The values of proportions are not valid: " + str(proportions))84 # Checking whether proportions sum up to 185 elif not np.isclose(weights.sum(), 1):86 raise ValueError("Proportions don't sum up to 1 (" + str(weights.sum()) + ") in solution: " + str(weights))87 # Checking whether maximum investment amount has not been exceeded88 elif np.any(weights > data.delta):89 raise ValueError("There's at least one proportion larger than delta: " + str(weights))90 # Checking for duplicate assets in a solution91 elif len(np.unique(assets)) != len(assets):92 raise ValueError("Duplicated assets in the portfolio: " + str(assets))93 def create_Population(Population, Lambda, l, data):94 """Initializes random population of solutions"""95 for i in range(Population.Population_size):96 #Initializing individuals in the popuplation97 R = np.random.permutation(N)[:K]98 # Random weights of the 10 assets99 s = np.random.rand(K)100 # Initializes weights101 w = np.zeros(N)102 # Initialized to make sure that the weights sum to 1103 L = s.sum()104 # Making sure that the random weights sum up to 1 given min investment105 w_temp = data.epsilon + s * data.F / L106 # Making sure the highest investment is met107 is_too_large = (w_temp > data.delta)108 # If an investment would be too large the loop would stop109 while is_too_large.sum() > 0:110 # Reversing logic111 is_not_too_large = np.logical_not(is_too_large)112 # Sum of weights113 L = s[is_not_too_large].sum()114 # Calculates temporary F value115 F_temp = 1.0 - (data.epsilon * is_not_too_large.sum() + data.delta * is_too_large.sum())116 # Adding minimal investment and making sure the actual weights sum to 1 given min investment117 w_temp = data.epsilon + s * F_temp / L118 # Implementing Max investment amount119 w_temp[is_too_large] = data.delta120 # Checking for invesments that are too large121 is_too_large = (w_temp > data.delta)122 w[:] = 0123 w[R] = w_temp # Actual weights124 s = w_temp - data.epsilon # Investment proportions125 # Checking whether our solution is valid126 Population.check_valid_solution(w, s, R, data)127 # Adding valid solution to our population128 Population.population_proportions.append(s)129 Population.population_weights.append(w)130 Population.population_assets.append(R.tolist())131 # Calculating fitness of the population132 for i in Population.population_weights:133 obj1 = np.sum((i * i.reshape((i.shape[0], 1))) * data.sigma)134 obj2 = np.sum(i * data.mu)135 f = Lambda[l] * obj1 - (1 - Lambda[l]) * obj2136 Population.fitness.append(f)137 Population.Obj1.append(obj1) # Covariance138 Population.Obj2.append(obj2) # Expected return139 def Genetic_Algorithm(Population, Lambda, l, data):140 """Applies the logic of genetic algorithm to the whole population"""141 if Population.Population_size == 1: # If population has only 4 individual we cannot select 4 at random142 picked_individuals = np.random.permutation(Population.Population_size)[:4].tolist()*4143 else:144 # Selecting 4 different individuals from the population145 picked_individuals = np.random.permutation(Population.Population_size)[:4].tolist() # Selecting 4 non-reccuring individuals from the population146 # Initializing child of the selected individuals147 child_assets = []148 child_proportions = []149 child_weights = np.zeros(N)150 l = 0151 #Pool_1152 pair_1_assets = [Population.population_assets[picked_individuals[0]], Population.population_assets[picked_individuals[1]]]153 pair_1_fitness = [Population.fitness[picked_individuals[0]], Population.fitness[picked_individuals[1]]]154 pair_1_proportions = [Population.population_proportions[picked_individuals[0]], Population.population_proportions[picked_individuals[1]]]155 # Pool_2156 pair_2_assets = [Population.population_assets[picked_individuals[2]], Population.population_assets[picked_individuals[3]]]157 pair_2_fitness = [Population.fitness[picked_individuals[2]], Population.fitness[picked_individuals[3]]]158 pair_2_proportions = [Population.population_proportions[picked_individuals[2]], Population.population_proportions[picked_individuals[3]]]159 # Selecting parents for the uniform crossover160 parent_1_assets = pair_1_assets[pair_1_fitness.index(min(pair_1_fitness))]161 parent_1_proportions = pair_1_proportions[pair_1_fitness.index(min(pair_1_fitness))]162 parent_2_assets = pair_2_assets[pair_2_fitness.index(min(pair_2_fitness))]163 parent_2_proportions = pair_2_proportions[pair_2_fitness.index(min(pair_2_fitness))]164 # Looking for same assets in parents and inputting them into child165 common_assets = []166 for i in parent_1_assets:167 if i in parent_2_assets:168 common_assets.append(i)169 child_assets += common_assets170 # Finding out what are the indexes of those assets in parents171 indexes_1 = []172 indexes_2 = []173 for i in common_assets:174 indexes_1.append(parent_1_assets.index(i))175 indexes_2.append(parent_2_assets.index(i))176 # Adding the proportions of same assets to child with 50% chance177 for m, h in zip(indexes_1, indexes_2):178 rand_1 = np.random.rand()179 if rand_1 > 0.5:180 child_proportions.append(parent_1_proportions[m])181 else:182 child_proportions.append(parent_2_proportions[h])183 # Creating new lists with assets that each parent don't have in common184 temp_parent_1_assets = []185 temp_parent_2_assets = []186 for m, h in zip(parent_1_assets, parent_2_assets):187 temp_parent_1_assets.append(m)188 temp_parent_2_assets.append(h)189 for i in common_assets:190 if i in temp_parent_1_assets:191 temp_parent_1_assets.remove(i)192 for i in common_assets:193 if i in temp_parent_2_assets:194 temp_parent_2_assets.remove(i)195 # Adding other assets and their corresponding proportions to the child196 for m, h in zip(temp_parent_1_assets, temp_parent_2_assets):197 rand_2 = np.random.rand()198 if rand_2 > 0.5:199 child_assets.append(m)200 child_proportions.append(parent_1_proportions[parent_1_assets.index(m)])201 else:202 child_assets.append(h)203 child_proportions.append(parent_2_proportions[parent_2_assets.index(h)])204 # Creating A*205 # A* is a set of assets that are in the parents, but are not in the child (together with their associated values)206 parent_minus_child_assets = []207 parent_minus_child_proportions = []208 for m, h in zip(parent_1_assets, parent_2_assets):209 if m not in child_assets:210 parent_minus_child_assets.append(m)211 parent_minus_child_proportions.append(parent_1_proportions[parent_1_assets.index(m)])212 if h not in child_assets:213 parent_minus_child_assets.append(h)214 parent_minus_child_proportions.append(parent_2_proportions[parent_2_assets.index(h)])215 # Assets that can be potentially added to the child in case parent_minus_child assets (A*) are empty216 other_assets = np.random.permutation(N).tolist()217 for i in other_assets:218 if i in child_assets:219 other_assets.remove(i)220 # Mutation221 mutated_asset = np.random.choice(child_proportions)222 rand_3 = np.random.rand()223 if rand_3 > 0.5:224 child_proportions[child_proportions.index(mutated_asset)] = (0.9 * (data.epsilon + mutated_asset) - data.epsilon) # m=1225 else:226 child_proportions[child_proportions.index(mutated_asset)] = (1.1 * (data.epsilon + mutated_asset) - data.epsilon) # m=2227 mutated_child_proportions = child_proportions228 # Making sure the child does not have two identical assets229 for i in child_assets:230 if child_assets.count(i) > 1:231 mutated_child_proportions.remove(mutated_child_proportions[child_assets.index(i)])232 child_assets.remove(i)233 # Making sure all child proportion are between 0 and 1 (if not they get excluded)234 for i in mutated_child_proportions:235 if i < 0 or i > 1:236 child_assets.remove(child_assets[mutated_child_proportions.index(i)])237 mutated_child_proportions.remove(i)238 # Ensure that child has exactly 10 assets and proportions239 while len(child_assets) > data.K and len(mutated_child_proportions) > data.K:240 child_assets.remove(child_assets.index(min(mutated_child_proportions)))241 mutated_child_proportions.remove(min(mutated_child_proportions))242 # Add assets from A* to child243 while len(child_assets) < data.K and len(mutated_child_proportions) < data.K:244 if len(parent_minus_child_assets) != 0:245 rand_4 = np.random.choice(parent_minus_child_assets)246 child_assets.append(rand_4)247 mutated_child_proportions.append(parent_minus_child_proportions[parent_minus_child_assets.index(rand_4)])248 parent_minus_child_proportions.remove(parent_minus_child_proportions[parent_minus_child_assets.index(rand_4)])249 parent_minus_child_assets.remove(rand_4)250 for i in mutated_child_proportions:251 if i < 0 or i > 1:252 child_assets.remove(child_assets[mutated_child_proportions.index(i)])253 mutated_child_proportions.remove(i)254 for i in child_assets:255 if child_assets.count(i) > 1:256 mutated_child_proportions.remove(mutated_child_proportions[child_assets.index(i)])257 child_assets.remove(i)258 else: #In case A* is empty259 rand_5=np.random.choice(other_assets)260 child_assets.append(rand_5)261 other_assets.remove(rand_5)262 mutated_child_proportions.append(0)263 for i in mutated_child_proportions:264 if i < 0 or i > 1:265 child_assets.remove(child_assets[mutated_child_proportions.index(i)])266 mutated_child_proportions.remove(i)267 for i in child_assets:268 if child_assets.count(i) > 1:269 mutated_child_proportions.remove(mutated_child_proportions[child_assets.index(i)])270 child_assets.remove(i)271 # Given large amount of iterations and randomness all child proportions could be 0 hence set 1 at random to 0.01272 # Does not influence the overall result as it ist immediately replaced by a stronger individual273 if sum(mutated_child_proportions) == 0:274 mutated_child_proportions[mutated_child_proportions.index(np.random.choice(mutated_child_proportions))]=0.01275 # Evaluating child276 mutated_child_proportions = np.array(mutated_child_proportions)277 L = mutated_child_proportions.sum()278 w_temp = data.epsilon + mutated_child_proportions * data.F / L279 is_too_large = (w_temp > data.delta)280 while is_too_large.sum() > 0:281 is_not_too_large = np.logical_not(is_too_large)282 L = mutated_child_proportions[is_not_too_large].sum()283 F_temp = 1.0 - (data.epsilon * is_not_too_large.sum() + data.delta * is_too_large.sum())284 w_temp = data.epsilon + mutated_child_proportions * F_temp / L285 w_temp[is_too_large] = data.delta286 is_too_large = (w_temp > data.delta)287 # Assigning weights to child288 child_weights[:] = 0289 child_weights[child_assets] = w_temp290 mutated_child_proportions = w_temp - data.epsilon291 # Calculating child fitness292 obj1 = np.sum((child_weights * child_weights.reshape((child_weights.shape[0], 1))) * data.sigma)293 obj2 = np.sum(child_weights * data.mu)294 child_fitness = Lambda[l] * obj1 - (1 - Lambda[l]) * obj2295 # Checking whether child is valid296 Population.check_valid_solution(child_weights, mutated_child_proportions, child_assets, data)297 # Substituting child into the population and removing the weakest member298 index_worst_member = np.argmax(Population.fitness)299 Population.fitness[index_worst_member] = child_fitness300 Population.population_proportions[index_worst_member] = mutated_child_proportions301 Population.population_weights[index_worst_member] = child_weights302 Population.population_assets[index_worst_member] = child_assets303 Population.Obj1[index_worst_member] = obj1304 Population.Obj2[index_worst_member] = obj2305 # Finding the best member of the population306 index_best_member = np.argmin(Population.fitness)307 Population.best_fitness = Population.fitness[index_best_member]308 Population.best_proportions = Population.population_proportions[index_best_member]309 Population.best_weights = Population.population_weights[index_best_member]310 Population.best_assets = Population.population_assets[index_best_member]311 Population.best_covariance = Population.Obj1[index_best_member]312 Population.best_return = Population.Obj2[index_best_member]313 return Population.best_fitness, Population.best_proportions, Population.best_assets, Population.best_weights, Population.best_covariance, Population.best_return314# Iterating through data files315stock_lengths = [31,85,89,98,225]316asset_files = ['assets1.txt', 'assets2.txt', 'assets3.txt', 'assets4.txt', 'assets5.txt']317for n, file in zip(stock_lengths, asset_files):318 # Population sizes used in iteration319 population_sizes = [1, 10, 50, 100, 200, 500]320 # Calculating the values for different population sizes321 for pop_size in population_sizes:322 l = 0323 N = n # Total number of assets in data file324 Nvalues = [N]325 Asset_File = file326 K = 10 # Number of assets to include in the portfolio327 Lambda = np.array([0.5]) # Fixed lambda for this calculation328 # Initializing variables for collecting data on different lambdas329 Results_fitness = []330 Results_weights = []331 Results_assets = []332 Results_proportions = []333 Results_Covariances = []334 Results_Returns = []335 # Initializing the dataset336 dataset = DataSet(Asset_File, N, K)337 nevals = 0 # Counter for the number of iterations338 maxEvals = 1000*N # Solution evaluations per run339 # 30 runs for 30 different random seeds340 Runs = 30341 while nevals < Runs:342 # Changing the random seed 30 times343 seed = nevals + 12345344 np.random.seed(seed)345 # Initializing population346 population = Population(pop_size)347 population.create_Population(Lambda, l, dataset)348 for i in range(maxEvals):349 population.Genetic_Algorithm(Lambda, l, dataset)350 # Collecting results351 Results_fitness.append(population.best_fitness)352 Results_weights.append(population.best_weights)353 Results_assets.append(population.best_assets)354 Results_Covariances.append(population.best_covariance)355 Results_Returns.append(population.best_return)356 print(nevals) # Tracking current iteration357 nevals += 1358 Results_fitness = np.array(Results_fitness)359 Results_Returns = np.array(Results_Returns)360 Results_Covariances = np.array(Results_Covariances)361 # Statistics about f values362 f_stats = [Results_fitness.min(), Results_fitness.max(), Results_fitness.mean(), Results_fitness.std()]363 # Statistics about returns364 r_stats = [Results_Returns.min(), Results_Returns.max(), Results_Returns.mean(), Results_Returns.std()]365 # Statistics of the covariances366 cov_stats = [Results_Covariances.min(), Results_Covariances.max(), Results_Covariances.mean(), Results_Covariances.std()]367 # Statistical values about the F, Cov and R368 stats = pd.DataFrame(f_stats)369 stats[1] = r_stats370 stats[2] = cov_stats371 stats.columns = ['F value stats', 'Return stats', 'Covariance stats']372 # Results for the 30 seeds373 results = pd.DataFrame(Results_fitness)374 results[1] = Results_Returns375 results[2] = Results_Covariances376 results.columns = ['F values', 'Returns', 'Covariances']377 # Weights of the best portfolios for the 30 seeds378 weights = pd.DataFrame(Results_weights, columns=list(range(1, N+1)))379 # Indexes of Assets used in each of the best portfolios for the 30 seeds380 col_names = ['asset_{}'.format(i) for i in range(1, 11)]381 assets = pd.DataFrame(Results_assets, columns=col_names)382 # Creating CSV files for further analysis383 df_results = pd.concat([results, assets, weights], axis=1)384 stats.to_csv('Generated data/Different Populations/stats_GA_p={}_{}.csv'.format(str(pop_size),file[:-4]), index=False)...
Genetic Algorithm.py
Source:Genetic Algorithm.py
1import numpy as np2import pandas as pd3class DataSet:4 def __init__(self, Asset_File, N, K, epsilon=0.01, delta=1.0):5 """Loads a dataset and divides its contents into variables """6 self.Asset_File = Asset_File7 self.N = N # Total number of assets in a dataset8 self.K = K9 self.epsilon = epsilon # Min investment10 self.delta = delta # Max investment11 self.number_of_stocks = 012 self.returns_deviations = []13 self.correlations = []14 self.covariance = np.nan15 temp_li_1 = []16 temp_li_2 = []17 # Splitting rows based on what they contain18 with open('Datasets/{}'.format(Asset_File), newline='') as datafile:19 for row in datafile:20 if len(row.split()) == 1: # if row is len of 1 it will be number of assets21 for x in row.split(' '):22 if x == '':23 continue24 self.number_of_stocks = (int(x))25 elif len(row.split()) == 2: # if row is len of 2 it will be the assets return and standard deviation26 for x in row.split(' '):27 if x == '':28 continue29 self.returns_deviations.append(float(x))30 elif len(row.split()) == 3: # if row is len of 3 it will be the correlation between assets31 for x in row.split(' '):32 if x == '':33 continue34 self.correlations.append(float(x))35 # Variable for storing standard deviations of returns36 for i, z in zip(self.returns_deviations[0::2], self.returns_deviations[1::2]):37 temp_li_1.append([i, z])38 self.returns_deviations = temp_li_139 # Variable for storing correlations between assets40 zeros = np.zeros((int(self.number_of_stocks), int(self.number_of_stocks)))41 for x, y, z in zip(self.correlations[0::3], self.correlations[1::3], self.correlations[2::3]):42 temp_li_2.append([x, y, z])43 zeros[int(x)-1][int(y)-1] = z44 self.correlations = temp_li_245 # Creates a matrix of returns and deviations46 self.returns_deviations = np.array(self.returns_deviations)47 # Splitting the data into variables needed for calculation48 self.deviations = self.returns_deviations[:, 1]49 self.mu = self.returns_deviations[:, 0]50 self.covariance = zeros * self.deviations * self.deviations.reshape((self.deviations.shape[0], 1))51 self.sigma = self.covariance + self.covariance.T - np.diag(self.covariance.diagonal()) #Fills in the second part of the covariance matrix52 # Making sure constraints on minimum and maximum investments are met53 if K * epsilon > 1.0:54 print("Minimum investment is too large")55 raise ValueError56 if K * delta < 1.0:57 print("Maximum investment is too small")58 raise ValueError59 self.F = 1.0 - K * epsilon60class Population:61 def __init__(self):62 """Population of solutions"""63 self.Population_size = 100 #Arbitrarily chosen64 self.population_weights = [] # Weights of the individuals in the population65 self.population_assets = [] # Assets of individuals in the population66 self.fitness = [] #A list containg the fitness of the individuals in the population67 self.population_proportions=[]68 self.best_fitness = 0 # Best f69 self.best_proportions = 0 # Best proportions by which each asset is in an individual70 self.best_weights = 071 self.best_assets = 072 self.best_covariance = 073 self.best_return = 074 self.Obj1 = []75 self.Obj2 = []76 def check_valid_solution(self, weights, proportions, assets, data):77 """Checks whether a solution is valid given constraints"""78 # Checking whether correct number of solutions has been picked79 if np.sum(weights != 0) != K:80 raise ValueError("More than " + str(K) + " assets selected (", weights.tolist(), ") in solution: " + str(weights))81 # Checking whether number and size of proportions is correct82 if np.any(proportions > 1) or np.any(proportions < 0) or len(proportions) != K:83 raise ValueError("The values of proportions are not valid: " + str(proportions))84 # Checking whether proportions sum up to 185 elif not np.isclose(weights.sum(), 1):86 raise ValueError("Proportions don't sum up to 1 (" + str(weights.sum()) + ") in solution: " + str(weights))87 # Checking whether maximum investment amount has not been exceeded88 elif np.any(weights > data.delta):89 raise ValueError("There's at least one proportion larger than delta: " + str(weights))90 # Checking for duplicate assets in a solution91 elif len(np.unique(assets)) != len(assets):92 raise ValueError("Duplicated assets in the portfolio: " + str(assets))93 def create_Population(Population, Lambda, l, data):94 """Initializes random population of solutions"""95 for i in range(Population.Population_size):96 #Initializing individuals in the popuplation97 R = np.random.permutation(N)[:K]98 # Random weights of the 10 assets99 s = np.random.rand(K)100 # Initializes weights101 w = np.zeros(N)102 # Initialized to make sure that the weights sum to 1103 L = s.sum()104 # Making sure that the random weights sum up to 1 given min investment105 w_temp = data.epsilon + s * data.F / L106 # Making sure the highest investment is met107 is_too_large = (w_temp > data.delta)108 # If an investment would be too large the loop would stop109 while is_too_large.sum() > 0:110 # Reversing logic111 is_not_too_large = np.logical_not(is_too_large)112 # Sum of weights113 L = s[is_not_too_large].sum()114 # Calculates temporary F value115 F_temp = 1.0 - (data.epsilon * is_not_too_large.sum() + data.delta * is_too_large.sum())116 # Adding minimal investment and making sure the actual weights sum to 1 given min investment117 w_temp = data.epsilon + s * F_temp / L118 # Implementing Max investment amount119 w_temp[is_too_large] = data.delta120 # Checking for invesments that are too large121 is_too_large = (w_temp > data.delta)122 w[:] = 0123 w[R] = w_temp # Actual weights124 s = w_temp - data.epsilon # Investment proportions125 # Checking whether our solution is valid126 Population.check_valid_solution(w, s, R, data)127 # Adding valid solution to our population128 Population.population_proportions.append(s)129 Population.population_weights.append(w)130 Population.population_assets.append(R.tolist())131 # Calculating fitness of the population132 for i in Population.population_weights:133 obj1 = np.sum((i * i.reshape((i.shape[0], 1))) * data.sigma)134 obj2 = np.sum(i * data.mu)135 f = Lambda[l] * obj1 - (1 - Lambda[l]) * obj2136 Population.fitness.append(f)137 Population.Obj1.append(obj1) # Covariance138 Population.Obj2.append(obj2) # Expected Return139 def Genetic_Algorithm(Population, Lambda, l, data):140 """Applies the logic of genetic algorithm to the whole population"""141 if Population.Population_size == 1: # Used in case of different population sizes142 picked_individuals = np.random.permutation(Population.Population_size)[:4].tolist()*4143 else:144 # Selecting 4 different individuals from the population145 picked_individuals = np.random.permutation(Population.Population_size)[:4].tolist()146 # Initializing child of the selected individuals147 child_assets = []148 child_proportions = []149 child_weights = np.zeros(N)150 l = 0151 #Pool_1152 pair_1_assets = [Population.population_assets[picked_individuals[0]], Population.population_assets[picked_individuals[1]]]153 pair_1_fitness = [Population.fitness[picked_individuals[0]], Population.fitness[picked_individuals[1]]]154 pair_1_proportions = [Population.population_proportions[picked_individuals[0]], Population.population_proportions[picked_individuals[1]]]155 # Pool_2156 pair_2_assets = [Population.population_assets[picked_individuals[2]], Population.population_assets[picked_individuals[3]]]157 pair_2_fitness = [Population.fitness[picked_individuals[2]], Population.fitness[picked_individuals[3]]]158 pair_2_proportions = [Population.population_proportions[picked_individuals[2]], Population.population_proportions[picked_individuals[3]]]159 # Selecting parents for the uniform crossover160 parent_1_assets = pair_1_assets[pair_1_fitness.index(min(pair_1_fitness))]161 parent_1_proportions = pair_1_proportions[pair_1_fitness.index(min(pair_1_fitness))]162 parent_2_assets = pair_2_assets[pair_2_fitness.index(min(pair_2_fitness))]163 parent_2_proportions = pair_2_proportions[pair_2_fitness.index(min(pair_2_fitness))]164 # Looking for same assets in parents and inputting them into child165 common_assets = []166 for i in parent_1_assets:167 if i in parent_2_assets:168 common_assets.append(i)169 child_assets += common_assets170 # Finding out what are the indexes of those assets in parents171 indexes_1 = []172 indexes_2 = []173 for i in common_assets:174 indexes_1.append(parent_1_assets.index(i))175 indexes_2.append(parent_2_assets.index(i))176 # Adding the proportions of same assets to child with 50% chance177 for m, h in zip(indexes_1, indexes_2):178 rand_1 = np.random.rand()179 if rand_1 > 0.5:180 child_proportions.append(parent_1_proportions[m])181 else:182 child_proportions.append(parent_2_proportions[h])183 # Creating new lists with assets that each parent don't have in common184 temp_parent_1_assets = []185 temp_parent_2_assets = []186 for m, h in zip(parent_1_assets, parent_2_assets):187 temp_parent_1_assets.append(m)188 temp_parent_2_assets.append(h)189 for i in common_assets:190 if i in temp_parent_1_assets:191 temp_parent_1_assets.remove(i)192 for i in common_assets:193 if i in temp_parent_2_assets:194 temp_parent_2_assets.remove(i)195 # Adding other assets and their corresponding proportions to the child196 for m, h in zip(temp_parent_1_assets, temp_parent_2_assets):197 rand_2 = np.random.rand()198 if rand_2 > 0.5:199 child_assets.append(m)200 child_proportions.append(parent_1_proportions[parent_1_assets.index(m)])201 else:202 child_assets.append(h)203 child_proportions.append(parent_2_proportions[parent_2_assets.index(h)])204 # Creating A*205 # A* is a set of assets that are in the parents, but are not in the child (together with their associated values)206 parent_minus_child_assets = []207 parent_minus_child_proportions = []208 for m, h in zip(parent_1_assets, parent_2_assets):209 if m not in child_assets:210 parent_minus_child_assets.append(m)211 parent_minus_child_proportions.append(parent_1_proportions[parent_1_assets.index(m)])212 if h not in child_assets:213 parent_minus_child_assets.append(h)214 parent_minus_child_proportions.append(parent_2_proportions[parent_2_assets.index(h)])215 # Assets that can be potentially added to the child in case parent_minus_child assets (A*) are empty216 other_assets = np.random.permutation(N).tolist()217 for i in other_assets:218 if i in child_assets:219 other_assets.remove(i)220 # Mutation221 mutated_asset = np.random.choice(child_proportions)222 rand_3 = np.random.rand()223 if rand_3 > 0.5:224 child_proportions[child_proportions.index(mutated_asset)] = (0.9 * (data.epsilon + mutated_asset) - data.epsilon) # m=1225 else:226 child_proportions[child_proportions.index(mutated_asset)] = (1.1 * (data.epsilon + mutated_asset) - data.epsilon) # m=2227 mutated_child_proportions = child_proportions228 # Making sure the child does not have two identical assets229 for i in child_assets:230 if child_assets.count(i) > 1:231 mutated_child_proportions.remove(mutated_child_proportions[child_assets.index(i)])232 child_assets.remove(i)233 # Making sure all child proportion are between 0 and 1 (if not they get excluded)234 for i in mutated_child_proportions:235 if i < 0 or i > 1:236 child_assets.remove(child_assets[mutated_child_proportions.index(i)])237 mutated_child_proportions.remove(i)238 # Ensure that child has exactly 10 assets and proportions239 while len(child_assets) > data.K and len(mutated_child_proportions) > data.K:240 child_assets.remove(child_assets.index(min(mutated_child_proportions)))241 mutated_child_proportions.remove(min(mutated_child_proportions))242 # Add assets from A* to child243 while len(child_assets) < data.K and len(mutated_child_proportions) < data.K:244 if len(parent_minus_child_assets) != 0:245 rand_4 = np.random.choice(parent_minus_child_assets)246 child_assets.append(rand_4)247 mutated_child_proportions.append(parent_minus_child_proportions[parent_minus_child_assets.index(rand_4)])248 parent_minus_child_proportions.remove(parent_minus_child_proportions[parent_minus_child_assets.index(rand_4)])249 parent_minus_child_assets.remove(rand_4)250 for i in mutated_child_proportions:251 if i < 0 or i > 1:252 child_assets.remove(child_assets[mutated_child_proportions.index(i)])253 mutated_child_proportions.remove(i)254 for i in child_assets:255 if child_assets.count(i) > 1:256 mutated_child_proportions.remove(mutated_child_proportions[child_assets.index(i)])257 child_assets.remove(i)258 else: #In case A* is empty259 rand_5=np.random.choice(other_assets)260 child_assets.append(rand_5)261 other_assets.remove(rand_5)262 mutated_child_proportions.append(0)263 for i in mutated_child_proportions:264 if i < 0 or i > 1:265 child_assets.remove(child_assets[mutated_child_proportions.index(i)])266 mutated_child_proportions.remove(i)267 for i in child_assets:268 if child_assets.count(i) > 1:269 mutated_child_proportions.remove(mutated_child_proportions[child_assets.index(i)])270 child_assets.remove(i)271 # Given large amount of iterations and randomness all child proportions could be 0 hence set 1 at random to 0.01272 # Does not influence the overall result as it ist immediately replaced by a stronger individual273 if sum(mutated_child_proportions) == 0:274 mutated_child_proportions[mutated_child_proportions.index(np.random.choice(mutated_child_proportions))]= 0.01275 # Evaluating child276 mutated_child_proportions = np.array(mutated_child_proportions)277 L = mutated_child_proportions.sum()278 w_temp = data.epsilon + mutated_child_proportions * data.F / L279 is_too_large = (w_temp > data.delta)280 while is_too_large.sum() > 0:281 is_not_too_large = np.logical_not(is_too_large)282 L = mutated_child_proportions[is_not_too_large].sum()283 F_temp = 1.0 - (data.epsilon * is_not_too_large.sum() + data.delta * is_too_large.sum())284 w_temp = data.epsilon + mutated_child_proportions * F_temp / L285 w_temp[is_too_large] = data.delta286 is_too_large = (w_temp > data.delta)287 # Assigning weights to child288 child_weights[:] = 0289 child_weights[child_assets] = w_temp290 mutated_child_proportions = w_temp - data.epsilon291 # Calculating child fitness292 obj1 = np.sum((child_weights * child_weights.reshape((child_weights.shape[0], 1))) * data.sigma)293 obj2 = np.sum(child_weights * data.mu)294 child_fitness = Lambda[l] * obj1 - (1 - Lambda[l]) * obj2295 # Checking whether child is valid296 Population.check_valid_solution(child_weights, mutated_child_proportions, child_assets, data)297 # Substituting child into the population and removing the weakest member298 index_worst_member = np.argmax(Population.fitness)299 Population.fitness[index_worst_member] = child_fitness300 Population.population_proportions[index_worst_member] = mutated_child_proportions301 Population.population_weights[index_worst_member] = child_weights302 Population.population_assets[index_worst_member] = child_assets303 Population.Obj1[index_worst_member] = obj1304 Population.Obj2[index_worst_member] = obj2305 # Finding the best member of the population306 index_best_member = np.argmin(Population.fitness)307 Population.best_fitness = Population.fitness[index_best_member]308 Population.best_proportions = Population.population_proportions[index_best_member]309 Population.best_weights = Population.population_weights[index_best_member]310 Population.best_assets = Population.population_assets[index_best_member]311 Population.best_covariance = Population.Obj1[index_best_member]312 Population.best_return = Population.Obj2[index_best_member]313 return Population.best_fitness, Population.best_proportions, Population.best_assets, Population.best_weights, Population.best_covariance, Population.best_return314# Iterating through data files315stock_lengths = [31,85,89,98,225]316asset_files = ['assets1.txt', 'assets2.txt', 'assets3.txt', 'assets4.txt', 'assets5.txt']317for n, file in zip(stock_lengths, asset_files):318 l = 0319 N = n # Total number of assets in data file320 Nvalues = [N]321 Asset_File = file322 K = 10 # Number of assets to include in the portfolio323 E = 50 # Number of different lambda values324 # Initializing variables for collecting data on different lambdas325 lambdas = []326 Results_fitness = []327 Results_weights = []328 Results_assets = []329 Results_proportions = []330 Results_Covariances = []331 Results_Returns = []332 # Initializing the dataset333 dataset = DataSet(Asset_File, N, K)334 nevals = 0 # Counter for the number of iterations335 maxEvals = 1000 * N # Solution evaluations per run336 # Sets a random seed for solution repeatability337 seed = 12345338 np.random.seed(seed)339 # Iterating through different values of lambda340 for e in range(1, E+1):341 Lambda = np.array([(e-1)/(E-1)]) # 50 lambda values equally spaced from 0 to 1342 lambdas.append(Lambda[l])343 # Initializing population344 population = Population()345 population.create_Population(Lambda, l, dataset)346 for i in range(maxEvals):347 population.Genetic_Algorithm(Lambda, l, dataset)348 # Collecting results349 Results_fitness.append(population.best_fitness)350 Results_weights.append(population.best_weights)351 Results_assets.append(population.best_assets)352 Results_Covariances.append(population.best_covariance)353 Results_Returns.append(population.best_return)354 # Tracking which lambda values is being currently calculated355 print(e)356 print("N={0}, Lambda = {1}, f = {2}".format(N, Lambda[l], population.best_fitness))357 Results_fitness = np.array(Results_fitness)358 Results_Returns = np.array(Results_Returns)359 Results_Covariances = np.array(Results_Covariances)360 # Statistics about f values361 f_stats = [Results_fitness.min(), Results_fitness.max(), Results_fitness.mean(), Results_fitness.std()]362 # Statistics about returns363 r_stats = [Results_Returns.min(), Results_Returns.max(), Results_Returns.mean(), Results_Returns.std()]364 # Statisitcs of the covariances365 cov_stats = [Results_Covariances.min(), Results_Covariances.max(), Results_Covariances.mean(), Results_Covariances.std()]366 # Statistical values about the F, Cov and R367 stats = pd.DataFrame(f_stats)368 stats[1] = r_stats369 stats[2] = cov_stats370 stats.columns = ['F value stats', 'Return stats', 'Covariance stats']371 # Results for the 50 lambda values372 results = pd.DataFrame(Results_fitness)373 results[1] = Results_Returns374 results[2] = Results_Covariances375 results.columns = ['F values', 'Returns', 'Covariances']376 # Lambdas used377 lambdas = pd.DataFrame(lambdas, columns=['Lambda'])378 # Weights of the best portfolios for the 50 lambda values379 weights = pd.DataFrame(Results_weights, columns=list(range(1, N+1)))380 # Indexes of Assets used in each of the best portfolios for the 50 lambda values381 col_names = ['asset_{}'.format(i) for i in range(1, 11)]382 assets = pd.DataFrame(Results_assets, columns= col_names)383 # Creating CSV files for further analysis384 df_results = pd.concat([lambdas, results, assets, weights], axis=1)385 stats.to_csv('Generated data/Different Lambdas/stats_GA_'+file[:-4]+'.csv', index = False)...
Random Search.py
Source:Random Search.py
1import numpy as np2import pandas as pd34class DataSet:56 def __init__(self, Asset_File, N, K, epsilon=0.01, delta=1.0):7 """Loads a dataset and divides its contents into variables """8 self.Asset_File = Asset_File9 self.N = N # Total number of assets in a dataset10 self.K = K # Total number of assets in a solution11 self.epsilon = epsilon # Min investment12 self.delta = delta # Max investment13 self.number_of_stocks = 014 self.returns_deviations = []15 self.correlations = []16 self.covariance = np.nan17 temp_li_1 = []18 temp_li_2 = []1920 # Splitting rows based on what they contain21 with open('Datasets/{}'.format(Asset_File), newline='') as datafile:22 for row in datafile:23 if len(row.split()) == 1: # if row is len of 1 it will be number of assets24 for x in row.split(' '):25 if x == '':26 continue27 self.number_of_stocks=(int(x))28 elif len(row.split()) == 2: # if row is len of 2 it will be the assets return and standard deviation29 for x in row.split(' '):30 if x == '':31 continue32 self.returns_deviations.append(float(x))33 elif len(row.split()) == 3: # if row is len of 3 it will be the correlation between assets34 for x in row.split(' '):35 if x == '':36 continue37 self.correlations.append(float(x))3839 # Variable for storing standard deviations of returns40 for i, z in zip(self.returns_deviations[0::2], self.returns_deviations[1::2]):41 temp_li_1.append([i, z])42 self.returns_deviations = temp_li_14344 # Variable for storing correlations between assets45 zeros = np.zeros((int(self.number_of_stocks), int(self.number_of_stocks)))46 for x, y, z in zip(self.correlations[0::3], self.correlations[1::3], self.correlations[2::3]):47 temp_li_2.append([x, y, z])48 zeros[int(x)-1][int(y)-1] = z49 self.correlations = temp_li_25051 # Creates a matrix of returns and deviations52 self.returns_deviations=np.array(self.returns_deviations)5354 # Splitting the data into variables needed for calculation55 self.deviations = self.returns_deviations[:, 1]56 self.mu = self.returns_deviations[:, 0]57 self.covariance = zeros * self.deviations * self.deviations.reshape((self.deviations.shape[0], 1))58 self.sigma = self.covariance + self.covariance.T - np.diag(self.covariance.diagonal()) #Fills in the second part of the covariance matrix5960 # Making sure constraints on minimum and maximum investments are met61 if K * epsilon > 1.0:62 print("Minimum investment is too large")63 raise ValueError64 if K * delta < 1.0:65 print("Maximum investment is too small")66 raise ValueError6768 self.F = 1.0 - K * epsilon6970class Solution:71 def __init__(self, N, K):72 """Initializes a solution"""73 # Initializing random attributes of a solution74 self.Q = np.random.permutation(N)[:K]75 self.s = np.random.rand(K)76 self.w = np.zeros(N)77 self.obj1 = np.nan78 self.obj2 = np.nan7980def check_valid_solution(solution, dataset):81 """Checks whether a solution is valid given constraints"""82 w = solution.w83 # Checking whether correct number of solutions has been picked84 if np.sum(w >= dataset.epsilon) != K:85 raise ValueError("More than " + str(K) + " assets selected (" + str(np.sum(w > 0.0)) + ") in solution: " + str(w))86 # Checking whether number and size of proportions is correct87 elif np.any(solution.s > 1) or np.any(solution.s < 0) or len(solution.s) != K:88 raise ValueError("The values of solution.s are not valid: " + str(solution.s))89 # Checking whether proportions sum up to 190 elif not np.isclose(w.sum(), 1):91 raise ValueError("Proportions don't sum up to 1 (" + str(w.sum()) + ") in solution: " + str(w))92 # Checking whether maximum investment amount has not been exceeded93 elif np.any(w > dataset.delta):94 raise ValueError("There's at least one proportion larger than delta: " + str(w))95 # Checking for duplicate assets in a solution96 elif len(np.unique(solution.Q)) != len(solution.Q):97 raise ValueError("Duplicated assets in the portfolio: " + str(w))9899100def evaluate(solution, dataset, l, Lambda, best_value_found, best_solutions):101 """ Creates a solution - calculates its covariance, expected return and f """102 improved = False103 # Initializing weights104 w = solution.w105 # Initialzed to make sure weights sum to 1 in the next step106 L = solution.s.sum()107 # Calculating weights from random numbers to sum to 1108 w_temp = dataset.epsilon + solution.s * dataset.F / L109 # Making sure the highest investment is met110 is_too_large = (w_temp > dataset.delta)111 # If an investment would be too large the loop would stop112 while is_too_large.sum() > 0:113 # Reversing logic114 is_not_too_large = np.logical_not(is_too_large)115 # Sum of weights116 L = solution.s[is_not_too_large].sum()117 # Temporary f value118 F_temp = 1.0 - (dataset.epsilon * is_not_too_large.sum() + dataset.delta * is_too_large.sum())119 # Calculating acutal weights to sum to 1 (adding minimal investmet)120 w_temp = dataset.epsilon + solution.s * F_temp / L121 # Implementing Max investment amount122 w_temp[is_too_large] = dataset.delta123 # Checking for invesments that are too large124 is_too_large = (w_temp > dataset.delta)125126 w[:] = 0 127 w[solution.Q] = w_temp # Actual weights128 solution.s = w_temp - dataset.epsilon # Investment proportions129130 # Checks whether a solution is valid given constraints131 check_valid_solution(solution, dataset)132133 # Calculates covariance for a solution134 solution.obj1 = np.sum((w * w.reshape((w.shape[0], 1))) * dataset.sigma)135136 # Calculates expected return for a solution137 solution.obj2 = np.sum(w * dataset.mu)138139 # Calculate f140 f = Lambda[l] * solution.obj1 - (1 - Lambda[l]) * solution.obj2141142 # Replace current solution with new solution if new is better143 if f < best_value_found[l]:144 improved = True145 best_value_found[l] = f146 best_solutions.append(solution)147148149 return improved, best_value_found[-1]150151152def RandomSearch(maxEvals, Lambda):153 """Calculates solutions based on the logic of the Random Search algorithm"""154 # An array of weights to weight the two objectives.155 if Lambda == 0.0:156 Lambda = np.array([Lambda])157 best_value_found = np.array([0.0])158 # Best value found for each weight.159 else:160 Lambda = np.array([Lambda])161 best_value_found = np.array(Lambda * [np.inf])162163 # List of best solutions ever found.164 best_solutions = []165166 nevals = 0 # Counter for the number of iterations167 l = 0168 # Generate and evaluate a new solution until maximum solution evaluations not reached169 while nevals < maxEvals:170 s = Solution(N,K)171 improved, f = evaluate(s, dataset, l, Lambda, best_value_found, best_solutions)172 nevals += 1173174 # Collecting information on the best solution175 cov.append(best_solutions[-1].obj1)176 r.append(best_solutions[-1].obj2)177 assets.append(best_solutions[-1].Q)178 weights.append(best_solutions[-1].w)179180 return s, f181182# Iterating through data files183stock_lengths = [31,85,89,98,225]184asset_files = ['assets1.txt', 'assets2.txt', 'assets3.txt', 'assets4.txt', 'assets5.txt']185for n, file in zip(stock_lengths, asset_files):186 N= n # Total number of assets in data file187 Nvalues = [N]188 K = 10 # Number of assets to include in the portfolio189 E = 50 # Number of different lambda values190191 # Initializing variables for collecting data on different lambdas192 Asset_File = file193 cov = []194 r = []195 weights = []196 assets = []197 fvalues = np.empty(E)198 lambvalues = np.empty(E)199200 # Initializing the dataset201 dataset = DataSet(Asset_File, N, K, epsilon=0.01,)202203204 maxEvals = 1000 * N # Maximum solution evaluations205206 # Sets a random seed for solution repeatability207 seed = 12345208 np.random.seed(seed)209210 # Iterating through different values of lambda211 for e in range(1, E + 1):212 Lambda = np.array([(e - 1) / (E - 1)]) # 50 lambda values equally spaced from 0 to 1213 s, f = RandomSearch(maxEvals, Lambda[0])214 print("N={0}, Lambda = {1}, f = {2}".format(N, Lambda[0], f))215 fvalues[e-1] = f216 lambvalues[e-1] = Lambda[0]217218 # Tracking which lambda values is being currently calculated219 print(e)220 print("N={0}, mean = {1}, sd = {2}, min = {3}, max = {4}, lamb={5}".format(N, fvalues.mean(), fvalues.std(), fvalues.min(), fvalues.max(),lambvalues))221222 # Returns223 r = np.array(r)224 # Weights225 weights = np.array(weights)226 # Covariances227 cov = np.array(cov)228 # Statistics about f values229 f_stats = [fvalues.min(), fvalues.max(), fvalues.mean(), fvalues.std()]230 # Statistics about returns231 r_stats = [r.min(), r.max(), r.mean(), r.std()]232 # Statisitcs of the covariances233 cov_stats = [cov.min(), cov.max(), cov.mean(), cov.std()]234 # The actual f values235 fs = np.array(fvalues)236 # Lambda values237 ls = np.array(lambvalues)238239 # Statistical values about the F, Cov and R240 stats = pd.DataFrame(f_stats)241 stats[1] = r_stats242 stats[2] = cov_stats243 stats.columns = ['F value stats', 'Return stats', 'Covariance stats']244245 # Results for the 50 lambda values246 results = pd.DataFrame(fs)247 results[1] = r248 results[2] = cov249 results.columns = ['F values', 'Returns', 'Covariances']250251 # Weights of the best portfolios for the 50 lambda values252 weights = pd.DataFrame(weights, columns=list(range(1, Nvalues[0]+1)))253254 # Indexes of Assets used in each of the best portfolios for the 50 lambda values255 col_names = ['asset_{}'.format(i) for i in range(1, 11)]256 assets = pd.DataFrame(assets, columns=col_names)257258 #Lambdas used259 Lambdas = pd.DataFrame(ls, columns=['Lambda'])260261 # Creating CSV files for further analysis262 df_results = pd.concat([Lambdas, results, assets, weights], axis=1)263 stats.to_csv('Generated data/Different Lambdas/stats_RS_'+file[:-4]+'.csv', index=False)
...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!