Best Python code snippet using tox_python
run.py
Source:run.py
1import os2import sys3import subprocess4import numpy as np5from time import time, sleep6import argparse7import json8import copy9import tensorflow as tf10import gym11import gym_auv12import gym_auv.reporting13import multiprocessing14from stable_baselines.common import set_global_seeds15from stable_baselines.common.policies import MlpPolicy, MlpLstmPolicy, MlpLnLstmPolicy16from stable_baselines.common.vec_env import VecVideoRecorder, DummyVecEnv, SubprocVecEnv17import stable_baselines.ddpg.policies18import stable_baselines.td3.policies19import stable_baselines.sac.policies20from stable_baselines.ddpg import AdaptiveParamNoiseSpec, NormalActionNoise, LnMlpPolicy, OrnsteinUhlenbeckActionNoise21from stable_baselines import PPO2, DDPG, TD3, A2C, ACER, ACKTR, SAC, TRPO22from sklearn.model_selection import ParameterGrid23from shapely import speedups24from stable_baselines.gail import ExpertDataset25speedups.enable()26DIR_PATH = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))27def _preprocess_custom_envconfig(rawconfig):28 custom_envconfig = dict(zip(args.envconfig[::2], args.envconfig[1::2]))29 for key in custom_envconfig:30 try:31 custom_envconfig[key] = float(custom_envconfig[key])32 if (custom_envconfig[key] == int(custom_envconfig[key])):33 custom_envconfig[key] = int(custom_envconfig[key])34 except ValueError:35 pass36 return custom_envconfig37def create_env(env_id, envconfig, test_mode=False, render_mode='2d', pilot=None, verbose=False):38 if pilot:39 env = gym.make(env_id, env_config=envconfig, test_mode=test_mode, render_mode=render_mode, pilot=pilot, verbose=verbose)40 env.seed(0) # Thomas 04.08.2141 else:42 env = gym.make(env_id, env_config=envconfig, test_mode=test_mode, render_mode=render_mode, verbose=verbose)43 env.seed(0) # Thomas 04.08.2144 return env45def make_mp_env(env_id, rank, envconfig, seed=0, pilot=None):46 """47 Utility function for multiprocessed env.48 :param env_id: (str) the environment ID49 :param num_env: (int) the number of environments you wish to have in subprocesses50 :param seed: (int) the inital seed for RNG51 :param rank: (int) index of the subprocess52 """53 def _init():54 env = create_env(env_id, envconfig, pilot=pilot)55 env.seed(seed + rank)56 return env57 set_global_seeds(seed)58 return _init59def play_scenario(env, recorded_env, args, agent=None):60 # if args.video:61 # print('Recording enabled')62 # recorded_env = VecVideoRecorder(env, args.video_dir, record_video_trigger=lambda x: x == 0, 63 # video_length=args.recording_length, name_prefix=args.video_name64 # )65 from pyglet.window import key66 key_input = np.array([-1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])67 autopilot = False68 # gail_expert_generation = False69 # gail_actions = []70 # gail_observations = []71 # gail_rewards = []72 # gail_num_episodes = 273 # gail_episode_returns = np.zeros((gail_num_episodes,))74 # gail_episode_starts = []75 # gail_reward_sum = 076 # gail_ep_idx = 077 78 print('Playing scenario: ', env)79 def key_press(k, mod):80 nonlocal autopilot81 if k == key.DOWN: key_input[0] = -182 if k == key.UP: key_input[0] = 183 if k == key.LEFT: key_input[1] = 0.584 if k == key.RIGHT: key_input[1] = -0.585 if k == key.NUM_2: key_input[2] = -186 if k == key.NUM_1: key_input[2] = 187 if k == key.J: key_input[3] = -188 if k == key.U: key_input[3] = 189 if k == key.I: key_input[4] = -190 if k == key.K: key_input[4] = 191 if k == key.O: key_input[5] = -192 if k == key.P: key_input[5] = 193 if k == key.NUM_4: key_input[6] = -194 if k == key.NUM_3: key_input[6] = 195 if k == key.A: 96 autopilot = not autopilot97 print('Autopilot {}'.format(autopilot))98 # if k == key.E: 99 # gail_expert_generation = not gail_expert_generation100 # print('gail_expert_generation {}'.format(gail_expert_generation))101 def key_release(k, mod):102 nonlocal restart, quit103 if k == key.R:104 restart = True105 print('Restart')106 if k == key.Q:107 quit = True108 print('quit')109 if k == key.UP: key_input[0] = -1110 if k == key.DOWN: key_input[0] = -1111 if k == key.LEFT and key_input[1] != 0: key_input[1] = 0112 if k == key.RIGHT and key_input[1] != 0: key_input[1] = 0113 if k == key.NUM_2 and key_input[2] != 0: key_input[2] = 0114 if k == key.NUM_1 and key_input[2] != 0: key_input[2] = 0115 if k == key.U and key_input[3] != 0: key_input[3] = 0116 if k == key.J and key_input[3] != 0: key_input[3] = 0117 if k == key.I and key_input[4] != 0: key_input[4] = 0118 if k == key.K and key_input[4] != 0: key_input[4] = 0119 if k == key.O and key_input[5] != 0: key_input[5] = 0120 if k == key.P and key_input[5] != 0: key_input[5] = 0121 if k == key.NUM_4 and key_input[6] != 0: key_input[6] = 0122 if k == key.NUM_3 and key_input[6] != 0: key_input[6] = 0123 viewer = env._viewer2d if args.render in {'both', '2d'} else env._viewer3d124 viewer.window.on_key_press = key_press125 viewer.window.on_key_release = key_release126 try:127 while True:128 t = time()129 restart = False130 t_steps = 0131 quit = False132 if (args.env == 'PathGeneration-v0'):133 a = np.array([5.0, 5.0, 1.0, 1.0])134 elif (args.env == 'PathColavControl-v0'):135 a = np.array([0.0])136 else:137 a = np.array([0.0, 0.0])138 obs = None139 while True:140 t, dt = time(), time()-t141 if args.env == 'PathGeneration-v0':142 a[0] += key_input[1]143 a[1] = max(0, key_input[0], a[1] + 0.1*key_input[0])144 a[2] += 0.1*key_input[2]145 print('Applied action: ', a)146 sleep(1)147 elif (args.env == 'PathColavControl-v0'):148 a[0] = 0.1*key_input[1]149 else:150 a[0] = key_input[0]151 a[1] = key_input[1]152 try:153 env.rewarder.params["lambda"] = np.clip(np.power(10, np.log10(env.rewarder.params["lambda"]) + key_input[2]*0.05), 0, 1)154 env.rewarder.params["eta"] = np.clip(env.rewarder.params["eta"] + key_input[6]*0.02, 0, 4)155 except KeyError:156 pass157 if args.render in {'3d', 'both'}:158 env._viewer3d.camera_height += 0.15*key_input[3]159 env._viewer3d.camera_height = max(0, env._viewer3d.camera_height)160 env._viewer3d.camera_distance += 0.3*key_input[4]161 env._viewer3d.camera_distance = max(1, env._viewer3d.camera_distance)162 env._viewer3d.camera_angle += 0.3*key_input[5]163 elif args.render == '2d':164 env._viewer2d.camera_zoom += 0.1*key_input[4]165 env._viewer2d.camera_zoom = max(0, env._viewer2d.camera_zoom)166 if autopilot and agent is not None:167 if obs is None:168 a = np.array([0.0, 0.0])169 else:170 a, _ = agent.predict(obs, deterministic=True)171 obs, r, done, info = env.step(a)172 173 # gail_observations.append(obs)174 # gail_actions.append(a)175 # gail_rewards.append(r)176 # gail_episode_starts.append(done)177 # gail_reward_sum += r178 # if gail_ep_idx >= gail_num_episodes and gail_expert_generation:179 # break180 if args.verbose > 0:181 print(', '.join('{:.1f}'.format(x) for x in obs) + '(size {})'.format(len(obs)))182 recorded_env.render()183 t_steps += 1184 if args.save_snapshots and not done:185 if t_steps % 50 == 0:186 env.save_latest_episode(save_history=False)187 for size in (100, 200):#, 300, 400, 500):188 gym_auv.reporting.plot_trajectory(189 env, fig_dir='logs/play_results/', fig_prefix=('_t_step_' + str(t_steps) + '_' + str(size)), local=True, size=size190 )191 if quit: raise KeyboardInterrupt192 if done or restart: 193 # gail_episode_returns[gail_ep_idx] = gail_reward_sum194 # gail_reward_sum = 0195 # gail_ep_idx += 1196 break197 198 env.seed(np.random.randint(1000))199 env.save_latest_episode()200 gym_auv.reporting.report(env, report_dir='logs/play_results/')201 gym_auv.reporting.plot_trajectory(env, fig_dir='logs/play_results/')202 env.reset(save_history=False)203 204 # if gail_ep_idx >= gail_num_episodes and gail_expert_generation:205 # gail_observations = np.concatenate(gail_observations).reshape((-1,) + env.observation_space.shape)206 # gail_actions = np.concatenate(gail_actions).reshape((-1,) + env.action_space.shape)207 # gail_rewards = np.array(gail_rewards)208 # gail_episode_starts = np.array(gail_episode_starts[:-1])209 # gail_numpy_dict = {210 # 'actions': gail_actions,211 # 'obs': gail_observations,212 # 'rewards': gail_rewards,213 # 'episode_returns': gail_episode_returns,214 # 'episode_starts': gail_episode_starts215 # }216 # np.savez('gail_expert', **gail_numpy_dict)217 218 219 except KeyboardInterrupt:220 pass221def main(args):222 envconfig_string = args.envconfig223 custom_envconfig = _preprocess_custom_envconfig(args.envconfig) if args.envconfig is not None else {}224 env_id = 'gym_auv:' + args.env225 env_name = env_id.split(':')[-1] if ':' in env_id else env_id226 envconfig = gym_auv.SCENARIOS[env_name]['config'] if env_name in gym_auv.SCENARIOS else {} 227 envconfig.update(custom_envconfig)228 #NUM_CPU = multiprocessing.cpu_count()229 NUM_CPU = 8230 EXPERIMENT_ID = str(int(time())) + args.algo.lower()231 model = {232 'ppo': PPO2,233 'ddpg': DDPG,234 'td3': TD3,235 'a2c': A2C,236 'acer': ACER,237 'acktr': ACKTR,238 'sac': SAC,239 'trpo': TRPO240 }[args.algo.lower()]241 if args.mode == 'play':242 agent = model.load(args.agent) if args.agent is not None else None243 envconfig_play = envconfig.copy()244 envconfig_play['show_indicators'] = True245 #envconfig_play['autocamera3d'] = False246 env = create_env(env_id, envconfig_play, test_mode=True, render_mode=args.render, pilot=args.pilot, verbose=True)247 print('Created environment instance')248 if args.scenario:249 env.load(args.scenario)250 vec_env = DummyVecEnv([lambda: env])251 recorded_env = VecVideoRecorder(vec_env, args.video_dir, record_video_trigger=lambda x: x==0, 252 video_length=args.recording_length, name_prefix=(args.env if args.video_name == 'auto' else args.video_name)253 )254 print(args.video_dir, args.video_name)255 play_scenario(env, recorded_env, args, agent=agent)256 recorded_env.env.close()257 elif (args.mode == 'enjoy'):258 agent = model.load(args.agent)259 figure_folder = os.path.join(DIR_PATH, 'logs', 'enjoys', args.env, EXPERIMENT_ID)260 os.makedirs(figure_folder, exist_ok=True)261 scenario_folder = os.path.join(figure_folder, 'scenarios')262 os.makedirs(scenario_folder, exist_ok=True)263 video_folder = os.path.join(DIR_PATH, 'logs', 'videos', args.env, EXPERIMENT_ID)264 os.makedirs(video_folder, exist_ok=True)265 266 env = create_env(env_id, envconfig, test_mode=True, render_mode=args.render, pilot=args.pilot)267 if args.scenario:268 env.load(args.scenario)269 vec_env = DummyVecEnv([lambda: env])270 recorded_env = VecVideoRecorder(vec_env, video_folder, record_video_trigger=lambda x: x==0, 271 video_length=args.recording_length, name_prefix=(args.env if args.video_name == 'auto' else args.video_name)272 )273 obs = recorded_env.reset()274 state = None275 t_steps = 0276 ep_number = 1277 done = [False for _ in range(vec_env.num_envs)]278 for _ in range(args.recording_length):279 if args.recurrent:280 action, _states = agent.predict(observation=obs, state=state, mask=done, deterministic=not args.stochastic)281 state = _states282 else:283 action, _states = agent.predict(obs, deterministic=not args.stochastic)284 obs, reward, done, info = recorded_env.step(action)285 recorded_env.render()286 t_steps += 1287 288 if t_steps % 800 == 0 or done:289 if not done:290 env.save_latest_episode(save_history=False)291 gym_auv.reporting.plot_trajectory(env, fig_dir=scenario_folder, fig_prefix=(args.env + '_ep{}_step{}'.format(ep_number, t_steps)))292 gym_auv.reporting.plot_trajectory(env, fig_dir=scenario_folder, fig_prefix=(args.env + '_ep{}_step{}_local'.format(ep_number, t_steps)), local=True)293 if done:294 ep_number += 1295 recorded_env.close()296 elif (args.mode == 'train'):297 figure_folder = os.path.join(DIR_PATH, 'logs', 'figures', args.env, EXPERIMENT_ID)298 os.makedirs(figure_folder, exist_ok=True)299 scenario_folder = os.path.join(figure_folder, 'scenarios')300 os.makedirs(scenario_folder, exist_ok=True)301 video_folder = os.path.join(DIR_PATH, 'logs', 'videos', args.env, EXPERIMENT_ID)302 recording_length = 8000303 os.makedirs(video_folder, exist_ok=True)304 agent_folder = os.path.join(DIR_PATH, 'logs', 'agents', args.env, EXPERIMENT_ID)305 os.makedirs(agent_folder, exist_ok=True)306 tensorboard_log = os.path.join(DIR_PATH, 'logs', 'tensorboard', args.env, EXPERIMENT_ID)307 tensorboard_port = 6006308 if args.nomp or model == DDPG or model == TD3 or model == SAC or model == TRPO:309 num_cpu = 1310 vec_env = DummyVecEnv([lambda: create_env(env_id, envconfig, pilot=args.pilot)])311 else:312 num_cpu = NUM_CPU313 vec_env = SubprocVecEnv([make_mp_env(env_id, i, envconfig, pilot=args.pilot) for i in range(num_cpu)]) 314 if (args.agent is not None):315 agent = model.load(args.agent)316 agent.set_env(vec_env)317 else:318 if (model == PPO2):319 if args.recurrent:320 hyperparams = {321 # 'n_steps': 1024,322 # 'nminibatches': 32,323 # 'lam': 0.95,324 # 'gamma': 0.99,325 # 'noptepochs': 10,326 # 'ent_coef': 0.0,327 # 'learning_rate': 0.0003,328 # 'cliprange': 0.2,329 'n_steps': 1024,330 'nminibatches': 1,331 'lam': 0.98,332 'gamma': 0.999,333 'noptepochs': 4,334 'ent_coef': 0.01,335 'learning_rate': 2e-3,336 }337 class CustomLSTMPolicy(MlpLstmPolicy):338 def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, n_lstm=256, reuse=False, **_kwargs):339 super().__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, n_lstm, reuse,340 net_arch=[256, 256, 'lstm', dict(vf=[64], pi=[64])],341 **_kwargs)342 agent = PPO2(CustomLSTMPolicy, 343 vec_env, verbose=True, tensorboard_log=tensorboard_log, 344 **hyperparams345 )346 else:347 hyperparams = {348 # 'n_steps': 1024,349 # 'nminibatches': 32,350 # 'lam': 0.95,351 # 'gamma': 0.99,352 # 'noptepochs': 10,353 # 'ent_coef': 0.0,354 # 'learning_rate': 0.0003,355 # 'cliprange': 0.2,356 'n_steps': 1024, # Default 128357 'nminibatches': 32, # Default 4358 'lam': 0.98, # Default 0.95359 'gamma': 0.999, # Default 0.99360 'noptepochs': 4, # Default 4361 'ent_coef': 0.01, # Default 0.01362 'learning_rate': 2e-4, # Default 2.5e-4363 }364 #policy_kwargs = dict(act_fun=tf.nn.tanh, net_arch=[64, 64, 64])365 #policy_kwargs = dict(net_arch=[64, 64, 64])366 #layers = [256, 128, 64]367 layers = [64, 64]368 policy_kwargs = dict(net_arch = [dict(vf=layers, pi=layers)])369 agent = PPO2(MlpPolicy, 370 vec_env, verbose=True, tensorboard_log=tensorboard_log, 371 **hyperparams, policy_kwargs=policy_kwargs372 )373 #dataset = ExpertDataset(expert_path='gail_expert.npz', traj_limitation=1, batch_size=128)374 #print('Pretraining {} agent on "{}"'.format(args.algo.upper(), env_id))375 #agent.pretrain(dataset, n_epochs=1000)376 #print('Done pretraining {} agent on "{}"'.format(args.algo.upper(), env_id))377 elif (model == DDPG):378 # rl-baselines-zoo inspired:379 # hyperparams = {380 # 'memory_limit': 50000,381 # 'normalize_observations': True,382 # 'normalize_returns': False,383 # 'gamma': 0.98,384 # 'actor_lr': 0.00156,385 # 'critic_lr': 0.00156,386 # 'batch_size': 256,387 # 'param_noise': AdaptiveParamNoiseSpec(initial_stddev=0.1, desired_action_stddev=0.1)388 # }389 hyperparams = {390 'memory_limit': 1000000, # Default None (DEPRECATED: use buffer_size instead: 50000)391 'normalize_observations': True, # Default False392 'normalize_returns': False, # Default False393 'gamma': 0.98, # Default 0.99394 'actor_lr': 0.00156, # Default 0.0001395 'critic_lr': 0.00156, # Default 0.001396 'batch_size': 256, # Default 128397 # OpenAI Baselines aim for action_space_stddev = 0.2 for continuous dense cases.398 #'param_noise': AdaptiveParamNoiseSpec(initial_stddev=0.287, desired_action_stddev=0.287)399 # DDPG Paper recommends to add this:400 # OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions))????401 # As action noise to encourage exploration.402 'action_noise': OrnsteinUhlenbeckActionNoise(mean=np.zeros(2), sigma=float(0.5) * np.ones(2))403 }404 agent = DDPG(MlpPolicy,405 vec_env, verbose=True, tensorboard_log=tensorboard_log, **hyperparams406 )407 elif (model == TD3):408 # rl-baselines-zoo inspired:409 # hyperparams = {410 # 'batch_size': 256,411 # 'buffer_size': 50000,412 # 'learning_starts': 1000413 # }414 hyperparams = {415 'buffer_size': 1000000, # Default 50000416 'train_freq': 1000, # Default 100417 'gradient_steps': 1000, # Default 100418 'learning_starts': 10000 # Default 100419 }420 action_noise = NormalActionNoise(mean=np.zeros(2), sigma=0.1*np.ones(2))421 agent = TD3(stable_baselines.td3.MlpPolicy, 422 vec_env, verbose=True, tensorboard_log=tensorboard_log, action_noise=action_noise, **hyperparams423 )424 elif model == A2C:425 # rl-baselines-zoo inspired:426 # hyperparams = {427 # 'n_steps': 5,428 # 'gamma': 0.995,429 # 'ent_coef': 0.00001,430 # 'learning_rate': 0.00083,431 # 'lr_schedule': 'linear'432 # }433 # layers = [256, 128, 64]434 hyperparams = {435 'n_steps': 16, # Default 5436 'gamma': 0.99, # Default 0.99437 'ent_coef': 0.001, # Default 0.01438 'learning_rate': 2e-4, # Default 7e-4439 'lr_schedule': 'linear' # Default 'constant' (learning rate updates)440 }441 layers = [64, 64]442 policy_kwargs = dict(net_arch = [dict(vf=layers, pi=layers)])443 agent = A2C(MlpPolicy, 444 vec_env, verbose=True, tensorboard_log=tensorboard_log, 445 **hyperparams, policy_kwargs=policy_kwargs446 )447 elif model == ACER:448 agent = ACER(MlpPolicy, vec_env, verbose=True, tensorboard_log=tensorboard_log)449 elif model == ACKTR:450 # rl-baselines-zoo inspired:451 # hyperparams = {452 # 'gamma': 0.99,453 # 'n_steps': 16,454 # 'ent_coef': 0.0,455 # 'learning_rate': 0.06,456 # 'lr_schedule': 'constant'457 # }458 # agent = ACKTR(MlpPolicy, vec_env, verbose=True, tensorboard_log=tensorboard_log, **hyperparams)459 agent = ACKTR(MlpPolicy, vec_env, verbose=True, tensorboard_log=tensorboard_log)460 elif model == SAC:461 # rl-baselines-zoo inspired:462 # hyperparams = {463 # 'batch_size': 256,464 # 'learning_starts': 1000465 # }466 # agent = SAC(stable_baselines.sac.MlpPolicy, vec_env, verbose=True, tensorboard_log=tensorboard_log, **hyperparams)467 agent = SAC(stable_baselines.sac.MlpPolicy, vec_env, verbose=True, tensorboard_log=tensorboard_log)468 elif model == TRPO:469 agent = TRPO(MlpPolicy, vec_env, verbose=True, tensorboard_log=tensorboard_log)470 print('Training {} agent on "{}"'.format(args.algo.upper(), env_id))471 n_updates = 0472 n_episodes = 0473 def callback(_locals, _globals):474 nonlocal n_updates475 nonlocal n_episodes476 sys.stdout.write('Training update: {}\r'.format(n_updates))477 sys.stdout.flush()478 _self = _locals['self']479 vec_env = _self.get_env()480 class Struct(object): pass481 report_env = Struct()482 report_env.history = []483 report_env.config = envconfig484 report_env.nsensors = report_env.config["n_sensors_per_sector"]*report_env.config["n_sectors"]485 report_env.sensor_angle = 2*np.pi/(report_env.nsensors + 1)486 report_env.last_episode = vec_env.get_attr('last_episode')[0]487 report_env.config = vec_env.get_attr('config')[0]488 report_env.obstacles = vec_env.get_attr('obstacles')[0]489 env_histories = vec_env.get_attr('history')490 for episode in range(max(map(len, env_histories))):491 for env_idx in range(len(env_histories)):492 if (episode < len(env_histories[env_idx])):493 report_env.history.append(env_histories[env_idx][episode])494 report_env.episode = len(report_env.history) + 1495 total_t_steps = _self.get_env().get_attr('total_t_steps')[0]*num_cpu496 agent_filepath = os.path.join(agent_folder, str(total_t_steps) + '.pkl')497 if model == PPO2:498 recording_criteria = n_updates % 10 == 0499 report_criteria = True500 _self.save(agent_filepath)501 elif model == A2C or model == ACER or model == ACKTR: # or model == TRPO:502 save_criteria = n_updates % 100 == 0503 recording_criteria = n_updates % 1000 == 0504 report_criteria = True505 if save_criteria:506 _self.save(agent_filepath)507 elif model == DDPG or model == TD3 or model == SAC or model == TRPO:508 save_criteria = n_updates % 10000 == 0509 recording_criteria = n_updates % 50000 == 0510 report_criteria = report_env.episode > n_episodes511 if save_criteria:512 _self.save(agent_filepath)513 if report_env.last_episode is not None and len(report_env.history) > 0 and report_criteria:514 try:515 #gym_auv.reporting.plot_trajectory(report_env, fig_dir=scenario_folder, fig_prefix=args.env + '_ep_{}'.format(report_env.episode))516 gym_auv.reporting.report(report_env, report_dir=figure_folder)517 #vec_env.env_method('save', os.path.join(scenario_folder, '_ep_{}'.format(report_env.episode)))518 except OSError as e:519 print("Ignoring reporting OSError:")520 print(repr(e))521 if recording_criteria:522 if args.pilot:523 cmd = 'python run.py enjoy {} --agent "{}" --video-dir "{}" --video-name "{}" --recording-length {} --algo {} --pilot {} --envconfig {}{}'.format(524 args.env, agent_filepath, video_folder, args.env + '-' + str(total_t_steps), recording_length, args.algo, args.pilot, envconfig_string, 525 ' --recurrent' if args.recurrent else ''526 )527 else:528 cmd = 'python run.py enjoy {} --agent "{}" --video-dir "{}" --video-name "{}" --recording-length {} --algo {} --envconfig {}{}'.format(529 args.env, agent_filepath, video_folder, args.env + '-' + str(total_t_steps), recording_length, args.algo, envconfig_string, 530 ' --recurrent' if args.recurrent else ''531 )532 subprocess.Popen(cmd)533 534 n_episodes = report_env.episode535 n_updates += 1536 537 agent.learn(538 total_timesteps=1500000, 539 tb_log_name='log',540 callback=callback541 )542 elif (args.mode in ['policyplot', 'vectorfieldplot', 'streamlinesplot']):543 figure_folder = os.path.join(DIR_PATH, 'logs', 'plots', args.env, EXPERIMENT_ID)544 os.makedirs(figure_folder, exist_ok=True)545 agent = PPO2.load(args.agent)546 if args.testvals:547 testvals = json.load(open(args.testvals, 'r'))548 valuegrid = list(ParameterGrid(testvals))549 for valuedict in valuegrid:550 customconfig = envconfig.copy()551 customconfig.update(valuedict)552 env = create_env(env_id, envconfig, test_mode=True, pilot=args.pilot)553 valuedict_str = '_'.join((key + '-' + str(val) for key, val in valuedict.items()))554 print('Running {} test for {}...'.format(args.mode, valuedict_str))555 556 if args.mode == 'policyplot':557 gym_auv.reporting.plot_actions(env, agent, fig_dir=figure_folder, fig_prefix=valuedict_str)558 elif args.mode == 'vectorfieldplot':559 gym_auv.reporting.plot_vector_field(env, agent, fig_dir=figure_folder, fig_prefix=valuedict_str)560 elif args.mode == 'streamlinesplot':561 gym_auv.reporting.plot_streamlines(env, agent, fig_dir=figure_folder, fig_prefix=valuedict_str)562 else:563 env = create_env(env_id, envconfig, test_mode=True, pilot=args.pilot)564 with open(os.path.join(figure_folder, 'config.json'), 'w') as f:565 json.dump(env.config, f)566 if args.mode == 'policyplot':567 gym_auv.reporting.plot_actions(env, agent, fig_dir=figure_folder)568 elif args.mode == 'vectorfieldplot':569 gym_auv.reporting.plot_vector_field(env, agent, fig_dir=figure_folder)570 elif args.mode == 'streamlinesplot':571 gym_auv.reporting.plot_streamlines(env, agent, fig_dir=figure_folder)572 print('Output folder: ', figure_folder)573 elif args.mode == 'test':574 figure_folder = os.path.join(DIR_PATH, 'logs', 'tests', args.env, EXPERIMENT_ID)575 scenario_folder = os.path.join(figure_folder, 'scenarios')576 video_folder = os.path.join(figure_folder, 'videos')577 os.makedirs(figure_folder, exist_ok=True)578 os.makedirs(scenario_folder, exist_ok=True)579 os.makedirs(video_folder, exist_ok=True)580 if not args.onlyplot:581 agent = model.load(args.agent)582 def create_test_env(video_name_prefix, envconfig=envconfig):583 print('Creating test environment: ' + env_id)584 env = create_env(env_id, envconfig, test_mode=True, render_mode=args.render if args.video else None, pilot=args.pilot)585 vec_env = DummyVecEnv([lambda: env])586 if args.video:587 video_length = min(500, args.recording_length)588 recorded_env = VecVideoRecorder(vec_env, video_folder, record_video_trigger=lambda x: (x%video_length) == 0, 589 video_length=video_length, name_prefix=video_name_prefix590 )591 active_env = recorded_env if args.video else vec_env592 return env, active_env593 failed_tests = []594 def run_test(id, reset=True, report_dir=figure_folder, scenario=None, max_t_steps=None, env=None, active_env=None):595 nonlocal failed_tests596 if env is None or active_env is None:597 env, active_env = create_test_env(video_name_prefix=args.env + '_' + id)598 if scenario is not None:599 obs = active_env.reset()600 env.load(args.scenario)601 print('Loaded', args.scenario)602 else: 603 if reset:604 obs = active_env.reset()605 else:606 obs = env.observe()607 gym_auv.reporting.plot_scenario(env, fig_dir=scenario_folder, fig_postfix=id, show=args.onlyplot)608 if args.onlyplot:609 return610 cumulative_reward = 0611 t_steps = 0612 if max_t_steps is None:613 done = False614 else:615 done = t_steps > max_t_steps616 while not done:617 start_time = time()618 action, _states = agent.predict(obs, deterministic=not args.stochastic)619 obs, reward, done, info = active_env.step(action)620 if args.video:621 active_env.render()622 t_steps += 1623 cumulative_reward += reward[0]624 report_msg = '{:<20}{:<20}{:<20.2f}{:<20.2%}{:0.1f}fps\r'.format(625 id, t_steps, cumulative_reward, info[0]['progress'], 1/(time() - start_time))626 sys.stdout.write(report_msg)627 sys.stdout.flush()628 #if max_t_steps:629 # if t_steps >= max_t_steps:630 # done = True631 if args.save_snapshots and t_steps % 1000 == 0 and not done:632 env.save_latest_episode(save_history=False)633 for size in (20, 50, 100, 200, 300, 400, 500):634 gym_auv.reporting.plot_trajectory(635 env, fig_dir=scenario_folder, fig_prefix=(args.env + '_t_step_' + str(t_steps) + '_' + str(size) + '_' + id), local=True, size=size636 )637 elif done:638 gym_auv.reporting.plot_trajectory(env, fig_dir=scenario_folder, fig_prefix=(args.env + '_' + id))639 env.close()640 gym_auv.reporting.report(env, report_dir=report_dir, lastn=-1)641 #gym_auv.reporting.plot_trajectory(env, fig_dir=scenario_folder, fig_prefix=(args.env + '_' + id))642 #env.save(os.path.join(scenario_folder, id))643 if env.collision:644 failed_tests.append(id)645 with open(os.path.join(figure_folder, 'failures.txt'), 'w') as f:646 f.write(', '.join(map(str, failed_tests)))647 return copy.deepcopy(env.last_episode)648 set_global_seeds(0) # Thomas 04.08.21 --- Ensuring deterministic order of environments for fair intercomparison and plots.649 print('Testing scenario "{}" for {} episodes.\n '.format(args.env, args.episodes))650 report_msg_header = '{:<20}{:<20}{:<20}{:<20}{:<20}{:<20}{:<20}'.format('Episode', 'Timesteps', 'Cum. Reward', 'Progress', 'Collisions', 'CT-Error [m]', 'H-Error [deg]')651 print(report_msg_header)652 print('-'*len(report_msg_header)) 653 if args.testvals:654 testvals = json.load(open(args.testvals, 'r'))655 valuegrid = list(ParameterGrid(testvals))656 if args.scenario:657 if args.testvals:658 episode_dict = {}659 for valuedict in valuegrid:660 customconfig = envconfig.copy()661 customconfig.update(valuedict)662 env, active_env = create_test_env(envconfig=customconfig)663 valuedict_str = '_'.join((key + '-' + str(val) for key, val in valuedict.items()))664 colorval = -np.log10(valuedict['reward_lambda']) #should be general665 666 rep_subfolder = os.path.join(figure_folder, valuedict_str)667 os.makedirs(rep_subfolder, exist_ok=True)668 for episode in range(args.episodes):669 last_episode = run_test(valuedict_str + '_ep' + str(episode), report_dir=rep_subfolder)670 episode_dict[valuedict_str] = [last_episode, colorval]671 print('Plotting all')672 gym_auv.reporting.plot_trajectory(env, fig_dir=scenario_folder, fig_prefix=(args.env + '_all_agents'), episode_dict=episode_dict)673 else:674 run_test("ep0", reset=True, scenario=args.scenario, max_t_steps=5000)675 else:676 if args.testvals:677 episode_dict = {}678 agent_index = 1679 for valuedict in valuegrid:680 customconfig = envconfig.copy()681 customconfig.update(valuedict)682 env, active_env = create_test_env(envconfig=customconfig)683 valuedict_str = '_'.join((key + '-' + str(val) for key, val in valuedict.items()))684 colorval = np.log10(valuedict['reward_lambda']) #should be general685 686 rep_subfolder = os.path.join(figure_folder, valuedict_str)687 os.makedirs(rep_subfolder, exist_ok=True)688 for episode in range(args.episodes):689 last_episode = run_test(valuedict_str + '_ep' + str(episode), report_dir=rep_subfolder)690 episode_dict['Agent ' + str(agent_index)] = [last_episode, colorval]691 agent_index += 1692 693 gym_auv.reporting.plot_trajectory(env, fig_dir=figure_folder, fig_prefix=(args.env + '_all_agents'), episode_dict=episode_dict)694 else:695 env, active_env = create_test_env(video_name_prefix=args.env)696 for episode in range(args.episodes):697 run_test('ep' + str(episode), env=env, active_env=active_env, max_t_steps=10000)698 if args.video and active_env:699 active_env.close()700if __name__ == '__main__':701 print("WARNING: DETERMINISTIC SEED ACTIVATED")702 parser = argparse.ArgumentParser()703 parser.add_argument(704 'mode',705 help='Which program mode to run.',706 choices=['play', 'train', 'enjoy', 'test', 'policyplot', 'vectorfieldplot', 'streamlinesplot'],707 )708 parser.add_argument(709 'env',710 help='Name of the gym environment to run.',711 choices=gym_auv.SCENARIOS.keys()712 )713 parser.add_argument(714 '--agent',715 help='Path to the RL agent to simulate.',716 )717 parser.add_argument(718 '--video-dir',719 help='Dir for output video.',720 default='logs/videos/'721 )722 parser.add_argument(723 '--video-name',724 help='Name of output video.',725 default='auto'726 )727 parser.add_argument(728 '--algo',729 help='RL algorithm to use.',730 default='ppo',731 choices=['ppo', 'ddpg', 'td3', 'a2c', 'acer', 'acktr', 'sac','trpo']732 )733 parser.add_argument(734 '--render',735 help='Rendering mode to use.',736 default='2d',737 choices=['2d', '3d', 'both'] #'both' currently broken738 )739 parser.add_argument(740 '--recording-length',741 help='Timesteps to simulate in enjoy mode.',742 type=int,743 default=2000744 )745 parser.add_argument(746 '--episodes',747 help='Number of episodes to simulate in test mode.',748 type=int,749 default=1750 )751 parser.add_argument(752 '--video',753 help='Record video for test mode.',754 action='store_true'755 )756 parser.add_argument(757 '--onlyplot',758 help='Skip simulations, only plot scenario.',759 action='store_true'760 )761 parser.add_argument(762 '--scenario',763 help='Path to scenario file containing environment data to be loaded.',764 )765 parser.add_argument(766 '--verbose',767 help='Print debugging information.',768 action='store_true'769 )770 parser.add_argument(771 '--envconfig',772 help='Override environment config parameters.',773 nargs='*'774 )775 parser.add_argument(776 '--nomp',777 help='Only use single CPU core for training.',778 action='store_true'779 )780 parser.add_argument(781 '--stochastic',782 help='Use stochastic actions.',783 action='store_true'784 )785 parser.add_argument(786 '--recurrent',787 help='Use RNN for policy network.',788 action='store_true'789 )790 parser.add_argument(791 '--pilot',792 help='If training in a controller environment, this is the pilot agent to control.',793 )794 parser.add_argument(795 '--testvals',796 help='Path to JSON file containing config values to test.',797 )798 parser.add_argument(799 '--save-snapshots',800 help='Save snapshots of the vessel trajectory on a fixed interval.',801 )802 args = parser.parse_args()803 from win10toast import ToastNotifier804 toaster = ToastNotifier()805 try:806 main(args)807 toaster.show_toast("run.py", "Program is done", duration=10)808 except Exception as e:809 toaster.show_toast("run.py", "Program has crashed", duration=10)810 raise e...
train.py
Source:train.py
1import gym2import gym_turbine3import os4from time import time5import multiprocessing6import argparse7import numpy as np8import json9from gym_turbine import reporting10from stable_baselines3 import PPO11from stable_baselines3.common.env_util import make_vec_env12from stable_baselines3.common.vec_env import SubprocVecEnv13from stable_baselines3.common.callbacks import BaseCallback, CallbackList, CheckpointCallback14hyperparams = {15 'n_steps': 1024,16 'nminibatches': 256,17 'learning_rate': 1e-5,18 'lam': 0.95,19 'gamma': 0.99,20 'noptepochs': 4,21 'cliprange': 0.2,22 'ent_coef': 0.01,23}24class ReportingCallback(BaseCallback):25 """26 Callback for reporting training27 :param report_dir: Path to the folder where the report will be saved.28 :param verbose:29 """30 def __init__(self, report_dir: str, verbose: int = 0):31 super(ReportingCallback, self).__init__(verbose)32 self.report_dir = report_dir33 self.verbose = verbose34 def _on_step(self) -> bool:35 # check if env is done, if yes report it to csv file36 done_array = np.array(self.locals.get("done") if self.locals.get("done") is not None else self.locals.get("dones"))37 if np.sum(done_array).item() > 0:38 env_histories = self.training_env.get_attr('history')39 class Struct(object): pass40 report_env = Struct()41 report_env.history = []42 for env_idx in range(len(done_array)):43 if done_array[env_idx]:44 report_env.history.append(env_histories[env_idx])45 reporting.report(env=report_env, report_dir=self.report_dir)46 if self.verbose:47 print("reported episode to file")48 return True49 def _on_training_end(self) -> None:50 """51 This event is triggered before exiting the `learn()` method.52 """53 vec_env = self.training_env54 env_histories = vec_env.get_attr('total_history')55 class Struct(object): pass56 report_env = Struct()57 report_env.history = []58 for episode in range(max(map(len, env_histories))):59 for env_idx in range(len(env_histories)):60 if (episode < len(env_histories[env_idx])):61 report_env.history.append(env_histories[env_idx][episode])62 if len(report_env.history) > 0:63 training_data = reporting.format_history(report_env, lastn=-1)64 reporting.make_summary_file(training_data, self.report_dir)65 if self.verbose:66 print("Made summary file of training")67class TensorboardCallback(BaseCallback):68 """69 Custom callback for plotting additional values in tensorboard.70 """71 def __init__(self, verbose=0):72 super(TensorboardCallback, self).__init__(verbose)73 def _on_step(self) -> bool:74 done_array = np.array(self.locals.get("done") if self.locals.get("done") is not None else self.locals.get("dones"))75 if np.sum(done_array).item():76 history = self.training_env.get_attr('history')77 for env_idx in range(len(done_array)):78 if done_array[env_idx]:79 self.logger.record_mean('custom/reward', history[env_idx]['reward'])80 self.logger.record_mean('custom/crashed', history[env_idx]['crashed'])81 return True82if __name__ == '__main__':83 NUM_CPUs = multiprocessing.cpu_count()84 parser = argparse.ArgumentParser()85 parser.add_argument(86 '--timesteps',87 type=int,88 default=500000,89 help='Number of timesteps to train the agent. Default=500000',90 )91 parser.add_argument(92 '--agent',93 help='Path to the RL agent to continue training from.',94 )95 parser.add_argument(96 '--note',97 type=str,98 default=None,99 help="Note with additional info about training"100 )101 parser.add_argument(102 '--no_reporting',103 help='Skip reporting to increase framerate',104 action='store_true'105 )106 args = parser.parse_args()107 # Define necessary directories108 EXPERIMENT_ID = str(int(time())) + 'ppo'109 agents_dir = os.path.join('logs', EXPERIMENT_ID, 'agents')110 os.makedirs(agents_dir, exist_ok=True)111 report_dir = os.path.join('logs', EXPERIMENT_ID, 'training_report')112 tensorboard_log = os.path.join('logs', EXPERIMENT_ID, 'tensorboard')113 # Make environment (NUM_CPUs parallel envs)114 env = make_vec_env('TurbineStab-v0', n_envs=NUM_CPUs, vec_env_cls=SubprocVecEnv)115 # Write note and config to Note.txt file116 with open(os.path.join('logs', EXPERIMENT_ID, "Note.txt"), "a") as file_object:117 file_object.write("env_config: " + json.dumps(env.get_attr('config')[0]))118 if args.note:119 file_object.write(args.note)120 # Callback to save model at checkpoints during training121 checkpoint_callback = CheckpointCallback(save_freq=1000, save_path=agents_dir)122 # Callback to report training to file123 reporting_callback = ReportingCallback(report_dir=report_dir, verbose=True)124 # Callback to report additional values to tensorboard125 tensorboard_callback = TensorboardCallback(verbose=True)126 # Create the callback list127 if args.no_reporting:128 callback = CallbackList([checkpoint_callback, tensorboard_callback])129 else:130 callback = CallbackList([checkpoint_callback, reporting_callback, tensorboard_callback])131 if (args.agent is not None):132 agent = PPO.load(args.agent, env=env, verbose=True, tensorboard_log=tensorboard_log)133 else:134 agent = PPO('MlpPolicy', env, verbose=True, tensorboard_log=tensorboard_log)135 agent.learn(total_timesteps=args.timesteps, callback=callback)136 # Save trained agent137 agent_path = os.path.join(agents_dir, "last_model_" + str(args.timesteps))138 agent.save(agent_path)...
show_env.py
Source:show_env.py
...7 extra = [e for e in env_conf if e not in ignore] if all_envs else []8 if description and default:9 report.line("default environments:")10 max_length = max(len(env) for env in (default + extra) or [""])11 def report_env(e):12 if description:13 text = env_conf[e].description or "[no description]"14 msg = "{} -> {}".format(e.ljust(max_length), text).strip()15 else:16 msg = e17 report.line(msg)18 for e in default:19 report_env(e)20 if all_envs and extra:21 if description:22 if default:23 report.line("")24 report.line("additional environments:")25 for e in extra:...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!