我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用gym.wrappers.Monitor()。
def test(args, env, agent): if args.record: if 'env' in vars(args): env = wrappers.Monitor(env, './videos/' + args.env + str(time()) + '/') else: env = wrappers.Monitor(env, './videos/' + str(time()) + '/') test_rewards = [] test_start = time() test_steps = 0 for iteration in range(1, 1 + args.n_test_iter): state = env.reset() iter_rewards = 0.0 done = False while not done: test_steps += 1 action, _ = agent.forward(state) state, reward, done, _ = env.step(action) iter_rewards += reward test_rewards.append(iter_rewards) print_stats('Test', test_rewards, args.n_test_iter, time() - test_start, test_steps, 0, agent) return test_rewards
def __init__(self, env_name, state_builder=ALEStateBuilder(), repeat_action=4, no_op=30, monitoring_path=None): assert isinstance(state_builder, StateBuilder), 'state_builder should inherit from StateBuilder' assert isinstance(repeat_action, (int, tuple)), 'repeat_action should be int or tuple' if isinstance(repeat_action, int): assert repeat_action >= 1, "repeat_action should be >= 1" elif isinstance(repeat_action, tuple): assert len(repeat_action) == 2, 'repeat_action should be a length-2 tuple: (min frameskip, max frameskip)' assert repeat_action[0] < repeat_action[1], 'repeat_action[0] should be < repeat_action[1]' super(GymEnvironment, self).__init__() self._state_builder = state_builder self._env = gym.make(env_name) self._env.env.frameskip = repeat_action self._no_op = max(0, no_op) self._done = True if monitoring_path is not None: self._env = Monitor(self._env, monitoring_path, video_callable=need_record)
def __init__(self, master, thread_id, clip_gradients=True): super(A3CThread, self).__init__(name=thread_id) self.thread_id = thread_id self.clip_gradients = clip_gradients self.env = make_environment(master.env_name) self.master = master self.config = master.config if thread_id == 0 and self.master.monitor: self.env = wrappers.Monitor(self.env, master.monitor_path, force=True, video_callable=(None if self.master.video else False)) # Only used (and overwritten) by agents that use an RNN self.initial_features = None # Build actor and critic networks with tf.variable_scope("t{}_net".format(self.thread_id)): self.action, self.value, self.actor_states, self.critic_states, self.actions_taken, self.losses, self.adv, self.r, self.n_steps = self.build_networks() self.sync_net = self.create_sync_net_op() inc_step = self.master.global_step.assign_add(self.n_steps) self.train_op = tf.group(self.make_trainer(), inc_step) # Write the summary of each thread in a different directory self.writer = tf.summary.FileWriter(os.path.join(self.master.monitor_path, "thread" + str(self.thread_id)), self.master.session.graph) self.runner = RunnerThread(self.env, self, 20, thread_id == 0 and self.master.video)
def __init__(self, env, monitor_path, video=True, **usercfg): super(SarsaFA, self).__init__() self.env = env self.env = wrappers.Monitor(self.env, monitor_path, force=True, video_callable=(None if video else False)) m = usercfg.get("m", 10) # Number of tilings self.config = dict( m=m, n_x_tiles=9, n_y_tiles=9, Lambda=0.9, epsilon=0, # fully greedy in this case alpha=(0.05 * (0.5 / m)), gamma=1, n_iter=1000, steps_per_episode=env.spec.tags.get("wrapper_config.TimeLimit.max_episode_steps") # Maximum number of allowed steps per episode, as determined (for this environment) by the gym library ) self.config.update(usercfg) O = env.observation_space self.x_low, self.y_low = O.low self.x_high, self.y_high = O.high self.nA = env.action_space.n self.policy = EGreedy(self.config["epsilon"]) self.function_approximation = TileCoding(self.x_low, self.x_high, self.y_low, self.y_high, m, self.config["n_x_tiles"], self.config["n_y_tiles"], self.nA)
def __init__(self, env, monitor_path, video=True, **usercfg): super(Karpathy, self).__init__(**usercfg) self.env = wrappers.Monitor(env, monitor_path, force=True, video_callable=(None if video else False)) self.nA = self.env.action_space.n # Default configuration. Can be overwritten using keyword arguments. self.config.update(dict( # timesteps_per_batch=10000, # n_iter=100, episode_max_length=env.spec.tags.get("wrapper_config.TimeLimit.max_episode_steps"), gamma=0.99, learning_rate=0.05, batch_size=10, # Amount of episodes after which to adapt gradients decay_rate=0.99, # Used for RMSProp n_hidden_units=20, draw_frequency=50, # Draw a plot every 50 episodes repeat_n_actions=1 )) self.config.update(usercfg) self.build_network()
def __init__(self, env, monitor_path, video=True, **usercfg): super(KarpathyCNN, self).__init__(**usercfg) self.env = wrappers.Monitor(env, monitor_path, force=True, video_callable=(None if video else False)) self.nA = env.action_space.n self.monitor_path = monitor_path # Default configuration. Can be overwritten using keyword arguments. self.config.update( dict( # timesteps_per_batch=10000, # n_iter=100, n_hidden_units=200, learning_rate=1e-3, batch_size=10, # Amount of episodes after which to adapt gradients gamma=0.99, # Discount past rewards by a percentage decay=0.99, # Decay of RMSProp optimizer epsilon=1e-9, # Epsilon of RMSProp optimizer draw_frequency=50 # Draw a plot every 50 episodes ) ) self.config.update(usercfg) self.build_network() if self.config["save_model"]: tf.add_to_collection("action", self.action) tf.add_to_collection("states", self.states) self.saver = tf.train.Saver()
def test_env_reuse(): with helpers.tempdir() as temp: env = gym.make('Autoreset-v0') env = Monitor(env, temp) env.reset() _, _, done, _ = env.step(None) assert not done _, _, done, _ = env.step(None) assert done _, _, done, _ = env.step(None) assert not done _, _, done, _ = env.step(None) assert done env.close()
def test_steps_limit_restart(): with helpers.tempdir() as temp: env = gym.make('test.StepsLimitCartpole-v0') env = Monitor(env, temp, video_callable=False) env.reset() # Episode has started _, _, done, info = env.step(env.action_space.sample()) assert done == False # Limit reached, now we get a done signal and the env resets itself _, _, done, info = env.step(env.action_space.sample()) assert done == True assert env.episode_id == 1 env.close()
def main(): episodeCount = 20 stepsPerEpisode = 100 env = gym.make("CartPole-v0") env = wrappers.Monitor(env, "/tmp/cartpole-experiment-1") for episode in range(episodeCount): observation = env.reset() for t in range(stepsPerEpisode): env.render() print(observation) action = env.action_space.sample() observation, reward, done, info = env.step(action) if done: print("Episode finished after {} timesteps".format(t + 1)) break
def main(env_id, policy_file, record, stochastic, extra_kwargs): import gym from gym import wrappers import tensorflow as tf from es_distributed.policies import MujocoPolicy import numpy as np env = gym.make(env_id) if record: import uuid env = wrappers.Monitor(env, '/tmp/' + str(uuid.uuid4()), force=True) if extra_kwargs: import json extra_kwargs = json.loads(extra_kwargs) with tf.Session(): pi = MujocoPolicy.Load(policy_file, extra_kwargs=extra_kwargs) while True: rews, t = pi.rollout(env, render=True, random_stream=np.random if stochastic else None) print('return={:.4f} len={}'.format(rews.sum(), t)) if record: env.close() return
def _create_env(self, monitor_dir, record_freq=None, max_episode_steps=None, **kwargs): monitor_path = os.path.join(self.log_dir, monitor_dir) env = gym.make(self.env_name) if max_episode_steps is not None: env._max_episode_steps = max_episode_steps monitored_env = wrappers.Monitor( env=env, directory=monitor_path, resume=True, video_callable=lambda x: record_freq is not None and x % record_freq == True) if self.env_wrapper is not None: env = self.env_wrapper.wrap_env(monitored_env) else: env = monitored_env return monitored_env, env
def __init__(self, name, log_dir, obs_f = None, reward_f = None, clamp_actions = False, monitor = False): self._env = gym.make(name) self.log_dir = log_dir if monitor: self._env = wrappers.Monitor(self._env, log_dir, force=True) self.obs_f = obs_f self.reward_f = reward_f self.clamp_actions = clamp_actions self.monitor = monitor
def get_env(seed): env = gym.make('Pong-ram-v0') set_global_seeds(seed) env.seed(seed) expt_dir = '/tmp/hw3_vid_dir/' env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True) env = wrap_deepmind_ram(env) return env
def get_env(env_id, seed): env = gym.make(env_id) set_global_seeds(seed) env.seed(seed) expt_dir = './tmp/hw3_vid_dir2/' env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True) env = wrap_deepmind(env) return env
def get_custom_env(env_id, seed): env = gym.make(env_id) set_global_seeds(seed) env.seed(seed) expt_dir = './tmp/hw3_vid_dir2/' env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True) env = wrap_custom(env) return env
def get_env(): env = gym.make('nesgym/NekketsuSoccerPK-v0') env = nesgym.wrap_nes_env(env) expt_dir = '/tmp/soccer/' env = wrappers.Monitor(env, os.path.join(expt_dir, "gym"), force=True) return env
def get_env(task, seed): env_id = task.env_id env = gym.make(env_id) set_global_seeds(seed) env.seed(seed) expt_dir = '/tmp/hw3_vid_dir2/' env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True) env = wrap_deepmind(env) return env
def test(self, just_one=True): """ This is for test-time evaluation. No training is done here. By default, iterate through every snapshot. If `just_one` is true, this only runs one set of weights, to ensure that we record right away since OpenAI will only record subsets and less frequently. Changing the loop over snapshots is also needed. """ os.makedirs(self.args.directory+'/videos') self.env = wrappers.Monitor(self.env, self.args.directory+'/videos', force=True) headdir = self.args.directory+'/snapshots/' snapshots = os.listdir(headdir) snapshots.sort() num_rollouts = 10 if just_one: num_rollouts = 1 for sn in snapshots: print("\n***** Currently on snapshot {} *****".format(sn)) ### Add your own criteria here. # if "800" not in sn: # continue ### with open(headdir+sn, 'rb') as f: weights = pickle.load(f) self.sess.run(self.set_params_op, feed_dict={self.new_weights_v: weights}) returns = [] for i in range(num_rollouts): returns.append( self._compute_return(test=True) ) print("mean: \t{}".format(np.mean(returns))) print("std: \t{}".format(np.std(returns))) print("max: \t{}".format(np.max(returns))) print("min: \t{}".format(np.min(returns))) print("returns:\n{}".format(returns))
def main(): args = parser.parse_args() env = make_environment(args.environment) runner = ModelRunner(env, args.model_directory, args.save_directory, n_iter=args.iterations) try: runner.env = wrappers.Monitor(runner.env, args.save_directory, video_callable=False, force=True) runner.run() except KeyboardInterrupt: pass
def __init__(self, env, monitor_path, video=True, **usercfg): super(A2C, self).__init__(**usercfg) self.monitor_path = monitor_path self.env = wrappers.Monitor(env, monitor_path, force=True, video_callable=(None if video else False)) self.env_runner = EnvRunner(self.env, self, usercfg) self.config.update(dict( timesteps_per_batch=10000, trajectories_per_batch=10, batch_update="timesteps", n_iter=100, gamma=0.99, actor_learning_rate=0.01, critic_learning_rate=0.05, actor_n_hidden=20, critic_n_hidden=20, repeat_n_actions=1, save_model=False )) self.config.update(usercfg) self.build_networks() init = tf.global_variables_initializer() # Launch the graph. self.session = tf.Session() self.session.run(init) if self.config["save_model"]: tf.add_to_collection("action", self.action) tf.add_to_collection("states", self.states) self.saver = tf.train.Saver() self.rewards = tf.placeholder("float", name="Rewards") self.episode_lengths = tf.placeholder("float", name="Episode_lengths") summary_actor_loss = tf.summary.scalar("Actor_loss", self.summary_actor_loss) summary_critic_loss = tf.summary.scalar("Critic_loss", self.summary_critic_loss) summary_rewards = tf.summary.scalar("Rewards", self.rewards) summary_episode_lengths = tf.summary.scalar("Episode_lengths", self.episode_lengths) self.summary_op = tf.summary.merge([summary_actor_loss, summary_critic_loss, summary_rewards, summary_episode_lengths]) self.writer = tf.summary.FileWriter(os.path.join(self.monitor_path, "summaries"), self.session.graph) return
def __init__(self, env, monitor_path, video=True, **usercfg): super(REINFORCE, self).__init__(**usercfg) self.env = wrappers.Monitor(env, monitor_path, force=True, video_callable=(None if video else False)) self.env_runner = EnvRunner(self.env, self, usercfg) self.monitor_path = monitor_path # Default configuration. Can be overwritten using keyword arguments. self.config.update(dict( batch_update="timesteps", timesteps_per_batch=1000, n_iter=100, gamma=0.99, # Discount past rewards by a percentage decay=0.9, # Decay of RMSProp optimizer epsilon=1e-9, # Epsilon of RMSProp optimizer learning_rate=0.05, n_hidden_units=20, repeat_n_actions=1, save_model=False )) self.config.update(usercfg) self.build_network() self.make_trainer() init = tf.global_variables_initializer() # Launch the graph. self.session = tf.Session() self.session.run(init) if self.config["save_model"]: tf.add_to_collection("action", self.action) tf.add_to_collection("states", self.states) self.saver = tf.train.Saver() self.rewards = tf.placeholder("float", name="Rewards") self.episode_lengths = tf.placeholder("float", name="Episode_lengths") summary_loss = tf.summary.scalar("Loss", self.summary_loss) summary_rewards = tf.summary.scalar("Rewards", self.rewards) summary_episode_lengths = tf.summary.scalar("Episode_lengths", self.episode_lengths) self.summary_op = tf.summary.merge([summary_loss, summary_rewards, summary_episode_lengths]) self.writer = tf.summary.FileWriter(os.path.join(self.monitor_path, "task0"), self.session.graph)
def test_no_double_wrapping(): temp = tempfile.mkdtemp() try: env = gym.make("FrozenLake-v0") env = wrappers.Monitor(env, temp) try: env = wrappers.Monitor(env, temp) except error.DoubleWrapperError: pass else: assert False, "Should not allow double wrapping" env.close() finally: shutil.rmtree(temp)
def test(): benchmark = registration.Benchmark( id='MyBenchmark-v0', scorer=scoring.ClipTo01ThenAverage(), tasks=[ {'env_id': 'CartPole-v0', 'trials': 1, 'max_timesteps': 5 }, {'env_id': 'CartPole-v0', 'trials': 1, 'max_timesteps': 100, }]) with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env = wrappers.Monitor(env, directory=temp, video_callable=False) env.seed(0) env.set_monitor_mode('evaluation') rollout(env) env.set_monitor_mode('training') for i in range(2): rollout(env) env.set_monitor_mode('evaluation') rollout(env, good=True) env.close() results = monitoring.load_results(temp) evaluation_score = benchmark.score_evaluation('CartPole-v0', results['data_sources'], results['initial_reset_timestamps'], results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps']) benchmark_score = benchmark.score_benchmark({ 'CartPole-v0': evaluation_score['scores'], }) assert np.all(np.isclose(evaluation_score['scores'], [0.00089999999999999998, 0.0054000000000000003])), "evaluation_score={}".format(evaluation_score) assert np.isclose(benchmark_score, 0.00315), "benchmark_score={}".format(benchmark_score)
def test_monitor_filename(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env = Monitor(env, directory=temp) env.close() manifests = glob.glob(os.path.join(temp, '*.manifest.*')) assert len(manifests) == 1
def test_write_upon_reset_false(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env = Monitor(env, directory=temp, video_callable=False, write_upon_reset=False) env.reset() files = glob.glob(os.path.join(temp, '*')) assert not files, "Files: {}".format(files) env.close() files = glob.glob(os.path.join(temp, '*')) assert len(files) > 0
def test_write_upon_reset_true(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env = Monitor(env, directory=temp, video_callable=False, write_upon_reset=True) env.reset() files = glob.glob(os.path.join(temp, '*')) assert len(files) > 0, "Files: {}".format(files) env.close() files = glob.glob(os.path.join(temp, '*')) assert len(files) > 0
def test_video_callable_false_does_not_record(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env = Monitor(env, temp, video_callable=False) env.reset() env.close() results = monitoring.load_results(temp) assert len(results['videos']) == 0
def test_video_callable_records_videos(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env = Monitor(env, temp) env.reset() env.close() results = monitoring.load_results(temp) assert len(results['videos']) == 1, "Videos: {}".format(results['videos'])
def test_semisuper_succeeds(): """Regression test. Ensure that this can write""" with helpers.tempdir() as temp: env = gym.make('SemisuperPendulumDecay-v0') env = Monitor(env, temp) env.reset() env.step(env.action_space.sample()) env.close()
def test_no_monitor_reset_unless_done(): def assert_reset_raises(env): errored = False try: env.reset() except error.Error: errored = True assert errored, "Env allowed a reset when it shouldn't have" with helpers.tempdir() as temp: # Make sure we can reset as we please without monitor env = gym.make('CartPole-v0') env.reset() env.step(env.action_space.sample()) env.step(env.action_space.sample()) env.reset() # can reset once as soon as we start env = Monitor(env, temp, video_callable=False) env.reset() # can reset multiple times in a row env.reset() env.reset() env.step(env.action_space.sample()) env.step(env.action_space.sample()) assert_reset_raises(env) # should allow resets after the episode is done d = False while not d: _, _, d, _ = env.step(env.action_space.sample()) env.reset() env.reset() env.step(env.action_space.sample()) assert_reset_raises(env) env.close()
def create_env(conf, monitor_on=False): env = gym.make(conf['env']) if conf['monitor_dir'] != '' and monitor_on: env = wrappers.Monitor(env, conf['monitor_dir'], force=True) if conf['use_atari_wrapper']: env = AtariRescale42x42Wrapper(env) env = NormalizeWrapper(env) return env
def create_env(conf): env = gym.make(conf['env']) if conf['monitor_dir']: env = wrappers.Monitor(env, conf['monitor_dir'], force=True) if conf['use_atari_wrapper']: env = AtariRescale42x42Wrapper(env) env = NormalizeWrapper(env) return env
def get_demo_data(env): # env = wrappers.Monitor(env, '/tmp/CartPole-v0', force=True) # agent.restore_model() with tf.variable_scope('get_demo_data'): agent = DQfDDDQN(env, DDQNConfig()) e = 0 while True: done = False score = 0 # sum of reward in one episode state = env.reset() demo = [] while done is False: action = agent.egreedy_action(state) # e-greedy action for train next_state, reward, done, _ = env.step(action) score += reward reward = reward if not done or score == 499 else -100 agent.perceive([state, action, reward, next_state, done, 0.0]) # 0. means it is not a demo data demo.append([state, action, reward, next_state, done, 1.0]) # record the data that could be expert-data agent.train_Q_network(update=False) state = next_state if done: if score == 500: # expert demo data demo = set_n_step(demo, Config.trajectory_n) agent.demo_buffer.extend(demo) agent.sess.run(agent.update_target_net) print("episode:", e, " score:", score, " demo_buffer:", len(agent.demo_buffer), " memory length:", len(agent.replay_buffer), " epsilon:", agent.epsilon) if len(agent.demo_buffer) >= Config.demo_buffer_size: agent.demo_buffer = deque(itertools.islice(agent.demo_buffer, 0, Config.demo_buffer_size)) break e += 1 with open(Config.DEMO_DATA_PATH, 'wb') as f: pickle.dump(agent.demo_buffer, f, protocol=2)
def init_environment(self, name='CartPole-v0', monitor=False): self.env = gym.make(name) if monitor: self.env = wrappers.Monitor(self.env, name, force=True, video_callable=False) self.n_states = self.env.observation_space.shape[0] self.n_actions = self.env.action_space.n # Experience replay self.replay = []
def __init__(self, game_name, histoy_length, render=False): self.env = gym.make(game_name) #self.env = wrappers.Monitor(self.env, 'records/atari-experiment-1') self.render = render self.n_actions = self.env.action_space.n self.n_observation = len(self.env.observation_space.high) self.resize_width = 80 self.resize_height = 80 self.histoy_length = histoy_length # One state contains 'histoy_length' frames self.state_buffer = deque() # Buffer keep 'histoy_length-1' frames self.show_game_info()
def __init__(self, game_name, histoy_length, render=False): self.env = gym.make(game_name) #self.env = wrappers.Monitor(self.env, 'records/atari-experiment-1') self.render = render self.n_actions = self.env.action_space.n self.n_observation = len(self.env.observation_space.high) self.histoy_length = histoy_length # One state contains 'histoy_length' observations self.state_buffer = deque() # Buffer keep 'histoy_length-1' observations self.show_game_info()