Best Python code snippet using autotest_python
reinforcement_learning_module.py
Source:reinforcement_learning_module.py
1import pandas as pd2import enum34from tensorforce import Environment5from tensorforce.agents import Agent6from fe_module import features_file_path, flash_crash_event_starting_timestamp, flash_crash_event_ending_timestamp789class Decision(enum.Enum):10 NORMAL = 011 ABNORMAL = 1121314class ExchangeEnvironment(Environment):15 def __init__(self, environment_observations):16 super().__init__()17 self.state_iterator = 018 self.reward_var = environment_observations['REWARD']19 self.environment_observations = environment_observations.iloc[:, :-2]20 self.no_states = self.environment_observations.shape[0]21 self.current_state = {}2223 def states(self):24 state_dict = dict(type='float', shape=(7,))25 return state_dict2627 def actions(self):28 return {"DECISION": dict(type="int", num_values=2)}2930 def close(self):31 super().close()3233 def reset(self):34 self.state_iterator = 035 self.current_state = self.environment_observations.iloc[self.state_iterator]36 return self.current_state3738 def max_episode_timesteps(self):39 return self.no_states4041 def check_terminal_criteria(self, actions, all_decisions, timestamp, early_terminate=False):42 terminate = False43 cutoff_timestamp = '2022-05-02 07:58:20.119000+00:00'4445 if early_terminate:46 if pd.to_datetime(timestamp) < pd.to_datetime(flash_crash_event_starting_timestamp) and \47 actions['DECISION'] == 1:48 terminate = True49 elif pd.to_datetime(timestamp) > pd.to_datetime(cutoff_timestamp) and \50 sum(all_decisions) == 0:51 terminate = True52 if self.state_iterator == self.no_states - 1:53 terminate = True54 return terminate5556 def calculate_reward(self, reward_val, timestamp, decision, terminal):57 if terminal is True and pd.to_datetime(timestamp) < pd.to_datetime(flash_crash_event_ending_timestamp):58 reward = -20059 else:60 if pd.to_datetime(timestamp) < pd.to_datetime(flash_crash_event_starting_timestamp):61 if decision == Decision.NORMAL.value:62 reward = reward_val63 else:64 reward = 06566 elif pd.to_datetime(flash_crash_event_starting_timestamp) <= pd.to_datetime(timestamp) <= \67 pd.to_datetime(flash_crash_event_ending_timestamp):68 if decision == Decision.NORMAL.value:69 reward = -reward_val70 else:71 reward = 07273 elif pd.to_datetime(timestamp) > pd.to_datetime(flash_crash_event_ending_timestamp):74 if decision == Decision.ABNORMAL.value:75 reward = -3*reward_val76 else:77 reward = 07879 return reward8081 def report_accuracy(self, all_decisions, y):82 y_before = y[:y.index(1)]83 y_during = y[y.index(1): y[y.index(1):].index(0)+len(y_before)]84 y_after = y[::-1][:y[::-1].index(1)]8586 accuracy = {}87 true_before = 088 true_during = 089 true_after = 09091 for i in range(len(y)):92 if all_decisions[i] == y[i]:93 if i <= len(y_before):94 true_before += 195 elif len(y_before) < i < len(y_before) + len(y_during):96 true_during += 197 elif i >= len(y_before) + len(y_during):98 true_after += 199100 accuracy['before'] = true_before / len(y_before)*100101 accuracy['during'] = true_during / len(y_during)*100102 accuracy['after'] = true_after / len(y_after)*100103 accuracy['overall'] = (true_after+true_before+true_during) / len(y)*100104105 print('Accuracy before the crash: ', accuracy['before'])106 print('Accuracy during crash: ', accuracy['during'])107 print('Accuracy after the crash:', accuracy['after'])108 print('Overall Accuracy: ', accuracy['overall'])109110 return accuracy111112 def execute(self, all_decisions, actions):113 timestamp = self.environment_observations.index[self.state_iterator]114 self.state_iterator += 1115 reward_val = self.reward_var.iloc[self.state_iterator]116 next_state = self.environment_observations.iloc[self.state_iterator]117 terminal = self.check_terminal_criteria(118 actions, all_decisions, timestamp, early_terminate=False)119 reward = self.calculate_reward(reward_val,120 timestamp, actions['DECISION'], terminal)121122 return next_state, terminal, reward123124125class ExchangeAgent:126 def __init__(self, environment: ExchangeEnvironment):127128 self.market_evaluation = Agent.create(129 agent='dqn', environment=environment, batch_size=128, memory=len(y))130 self.episode_actions = {}131132133def execute_episode_batch(environment: ExchangeEnvironment, agent: ExchangeAgent, episodes):134 decision_reward_df = pd.DataFrame()135 for i in range(episodes):136 episode_length = 0137 state_vector = environment.reset()138 all_timestamps = []139 all_decisions = []140 all_rewards = []141 terminate = False142 while not terminate:143 episode_length += 1144 actions = agent.market_evaluation.act(states=state_vector)145 state_vector, terminate, reward = environment.execute(all_decisions,146 actions=actions)147 agent.market_evaluation.observe(148 terminal=terminate, reward=reward)149 all_decisions.append(actions['DECISION'])150 all_rewards.append(reward)151 all_timestamps.append(state_vector.name)152153 if episode_length == len(y):154 decision_reward_df['Timestamp'] = all_timestamps155 decision_reward_df[f'model/Decision_Episode{i}'] = all_decisions156 decision_reward_df[f'Rewards_Episode{i}'] = all_rewards157 accuracy = environment.report_accuracy(all_decisions, y)158 if accuracy['overall'] > 90:159 decision_reward_df.to_excel('Decision_reward.xlsx')160161 if accuracy['before'] == 100 and accuracy['during'] > 70 and accuracy['after'] == 100:162 decision_reward_df.to_excel('Decision_reward.xlsx')163 break164165166def simulator(environment: ExchangeEnvironment, agent: ExchangeAgent, episodes=100):167168 execute_episode_batch(environment, agent, episodes)169 environment.close()170171172if __name__ == "__main__":173174 environment_observations = pd.read_csv(175 features_file_path + 'df_environment.csv', index_col='Timestamp')176 exchange_environment = ExchangeEnvironment(177 environment_observations)178 y = environment_observations['LABEL'][1:].to_list()179 exchange_agent = ExchangeAgent(exchange_environment)
...
base.py
Source:base.py
...13 self.job = job14 self.finished = False15 self.line_buffer = status_lib.line_buffer()16 # create and prime the parser state machine17 self.state = self.state_iterator(self.line_buffer)18 self.state.next()19 def process_lines(self, lines):20 """ Feed 'lines' into the parser state machine, and return21 a list of all the new test results produced."""22 self.line_buffer.put_multiple(lines)23 try:24 return self.state.next()25 except StopIteration:26 msg = ("WARNING: parser was called to process status "27 "lines after it was end()ed\n"28 "Current traceback:\n" +29 traceback.format_exc() +30 "\nCurrent stack:\n" +31 "".join(traceback.format_stack()))32 tko_utils.dprint(msg)33 return []34 def end(self, lines=[]):35 """ Feed 'lines' into the parser state machine, signal to the36 state machine that no more lines are forthcoming, and then37 return a list of all the new test results produced."""38 self.line_buffer.put_multiple(lines)39 # run the state machine to clear out the buffer40 self.finished = True41 try:42 return self.state.next()43 except StopIteration:44 msg = ("WARNING: parser was end()ed multiple times\n"45 "Current traceback:\n" +46 traceback.format_exc() +47 "\nCurrent stack:\n" +48 "".join(traceback.format_stack()))49 tko_utils.dprint(msg)50 return []51 @staticmethod52 def make_job(dir):53 """ Create a new instance of the job model used by the54 parser, given a results directory."""55 raise NotImplementedError56 def state_iterator(self, buffer):57 """ A generator method that implements the actual parser58 state machine. """...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!