import time
from shutil import copyfile
import pandas as pd
import tracemalloc
import numpy as np
import pickle
import os
[docs]class Experiment(object):
"""Optional instrumentation for running an experiment.
Runs a simulation between an arbitrary openAI Gym environment and an algorithm, saving a dataset of (reward, time, space) complexity across each episode,
and optionally saves trajectory information.
Attributes:
seed: random seed set to allow reproducibility
dirPath: (string) location to store the data files
nEps: (int) number of episodes for the simulation
deBug: (bool) boolean, when set to true causes the algorithm to print information to the command line
env: (openAI env) the environment to run the simulations on
epLen: (int) the length of each episode
numIters: (int) the number of iterations of (nEps, epLen) pairs to iterate over with the environment
save_trajectory: (bool) boolean, when set to true saves the entire trajectory information
render_flag: (bool) boolean, when set to true renders the simulations
agent: (or_suite.agent.Agent) an algorithm to run the experiments with
data: (np.array) an array saving the metrics along the sample paths (rewards, time, space)
trajectory_data: (list) a list saving the trajectory information
"""
def __init__(self, env, agent, dict):
'''
Args:
env: (openAI env) the environment to run the simulations on
agent: (or_suite.agent.Agent) an algorithm to run the experiments with
dict: a dictionary containing the arguments to send for the experiment, including:
dirPath: (string) location to store the data files
nEps: (int) number of episodes for the simulation
deBug: (bool) boolean, when set to true causes the algorithm to print information to the command line
env: (openAI env) the environment to run the simulations on
epLen: (int) the length of each episode
numIters: (int) the number of iterations of (nEps, epLen) pairs to iterate over with the environment
save_trajectory: (bool) boolean, when set to true saves the entire trajectory information
render: (bool) boolean, when set to true renders the simulations
pickle: (bool) when set to true saves data to a pickle file
'''
self.seed = dict['seed']
self.dirPath = dict['dirPath']
self.deBug = dict['deBug']
self.nEps = dict['nEps']
self.env = env
self.epLen = dict['epLen']
self.num_iters = dict['numIters']
self.save_trajectory = dict['saveTrajectory']
self.render_flag = dict['render']
self.agent = agent
# initializes the dataset to save the information
self.data = np.zeros([dict['nEps']*self.num_iters, 5])
self.pickle = dict['pickle']
if self.save_trajectory: # initializes the list to save the trajectory
self.trajectory = []
np.random.seed(self.seed) # sets seed for experiment
# Runs the experiment
[docs] def run(self):
'''
Runs the simulations between an environment and an algorithm
'''
# print('**************************************************')
# print('Running experiment')
# print('**************************************************')
index = 0
for i in range(self.num_iters): # loops over the numer of iterations
self.agent.reset() # resets algorithm, updates based on environment's configuration
# updates agent configuration based on environment
self.agent.update_config(self.env, self.env.get_config())
for ep in range(0, self.nEps): # loops over the episodes
if self.deBug:
print('Episode : %s' % (ep))
# Reset the environment
self.env.reset()
if self.render_flag: # optionally renders the environments
self.env.render()
oldState = self.env.state # obtains old state
epReward = 0
# updates agent policy based on episode
self.agent.update_policy(ep)
done = False
h = 0
start_time = time.time() # starts time and memory tracker
tracemalloc.start()
# repeats until episode is finished
while (not done) and h < self.epLen:
# Step through the episode
if self.deBug:
print('state : %s' % (oldState))
action = self.agent.pick_action(
oldState, h) # algorithm picks a state
if self.deBug:
print('action : %s' % (action))
# steps based on the action chosen by the algorithm
newState, reward, done, info = self.env.step(action)
epReward += reward
if self.deBug:
print('new state: %s' % (newState))
print('reward: %s' % (reward))
print('epReward so far: %s' % (epReward))
print(f'Info: {info}')
self.agent.update_obs(
oldState, action, reward, newState, h, info)
if self.save_trajectory: # saves trajectory step if desired
record = {'iter': i,
'episode': ep,
'step': h,
'oldState': oldState,
'action': action,
'reward': reward,
'newState': newState,
'info': info}
self.trajectory.append(record)
oldState = newState
h = h + 1
if self.render_flag: # optionally renders the environment
self.env.render()
current, _ = tracemalloc.get_traced_memory() # collects memory / time usage
tracemalloc.stop()
end_time = time.time()
if self.deBug:
print('final state: %s' % (newState))
# Logging to dataframe
self.data[index, 0] = ep
self.data[index, 1] = i
self.data[index, 2] = epReward
self.data[index, 3] = current
self.data[index, 4] = np.log(((end_time) - (start_time)))
index += 1
self.env.close()
# print('**************************************************')
# print('Experiment complete')
# print('**************************************************')
# Saves the data to the file location provided to the algorithm
[docs] def save_data(self):
'''
Saves the acquired dataset to the noted location
Returns:
dataframe corresponding to the saved data
'''
# print('**************************************************')
# print('Saving data')
# print('**************************************************')
if self.deBug:
print(self.data)
dir_path = self.dirPath
data_loc = 'data.csv'
traj_loc = 'trajectory.obj'
agent_loc = 'agent.obj'
data_filename = os.path.join(dir_path, data_loc)
traj_filename = os.path.join(dir_path, traj_loc)
agent_filename = os.path.join(dir_path, agent_loc)
dt = pd.DataFrame(self.data, columns=[
'episode', 'iteration', 'epReward', 'memory', 'time'])
dt = dt[(dt.T != 0).any()]
print('Writing to file ' + data_loc)
if os.path.exists(dir_path):
# saves the collected dataset
dt.to_csv(data_filename, index=False,
float_format='%.5f', mode='w')
if self.save_trajectory: # saves trajectory to filename
outfile = open(traj_filename, 'wb')
pickle.dump(self.trajectory, outfile)
outfile.close()
else: # same as before, but first makes the directory
os.makedirs(dir_path)
dt.to_csv(data_filename, index=False,
float_format='%.5f', mode='w')
if self.save_trajectory: # saves trajectory to filename
outfile = open(traj_filename, 'wb')
pickle.dump(self.trajectory, outfile)
outfile.close()
if self.pickle:
if hasattr(self.agent, 'tree_list'):
outfile = open(agent_filename, 'wb')
pickle.dump(self.agent.tree_list, outfile)
outfile.close()
elif hasattr(self.agent, 'qVals'):
outfile = open(agent_filename, 'wb')
pickle.dump(self.agent.qVals, outfile)
outfile.close()
# print('**************************************************')
# print('Data save complete')
# print('**************************************************')
return dt