Source code for or_suite.experiment.sb_experiment

import time
from shutil import copyfile
import pandas as pd
import tracemalloc
import numpy as np
import pickle
import os
from stable_baselines3.common.monitor import Monitor
from or_suite.experiment.trajectory_callback import *


[docs]class SB_Experiment(object): """ Optional instrumentation for running an experiment. Runs a simulation between an arbitrary openAI Gym environment and a STABLE BASELINES ALGORITHM, saving a dataset of (reward, time, space) complexity across each episode, and optionally saves trajectory information. Attributes: seed: random seed set to allow reproducibility dirPath: (string) location to store the data files nEps: (int) number of episodes for the simulation deBug: (bool) boolean, when set to true causes the algorithm to print information to the command line env: (openAI env) the environment to run the simulations on epLen: (int) the length of each episode numIters: (int) the number of iterations of (nEps, epLen) pairs to iterate over with the environment save_trajectory: (bool) boolean, when set to true saves the entire trajectory information render_flag: (bool) boolean, when set to true renders the simulations model: (stable baselines algorithm) an algorithm to run the experiments with data: (np.array) an array saving the metrics along the sample paths (rewards, time, space) trajectory_data: (list) a list saving the trajectory information """
[docs] def __init__(self, env, model, dict): ''' Args: env: (openAI env) the environment to run the simulations on model: (stable baseilnes algorithm) an algorithm to run the experiments with dict: a dictionary containing the arguments to send for the experiment, including: - dirPath: (string) location to store the data files - nEps: (int) number of episodes for the simulation - deBug: (bool) boolean, when set to true causes the algorithm to print information to the command line - env: (openAI env) the environment to run the simulations on - epLen: (int) the length of each episode - numIters: (int) the number of iterations of (nEps, epLen) pairs to iterate over with the environment - save_trajectory: (bool) boolean, when set to true saves the entire trajectory information TODO: Feature not implemented - render: (bool) boolean, when set to true renders the simulations TODO: Feature not implemeneted ''' self.seed = dict['seed'] self.dirPath = dict['dirPath'] self.deBug = dict['deBug'] self.nEps = dict['nEps'] self.env = env self.epLen = dict['epLen'] self.num_iters = dict['numIters'] self.save_trajectory = dict['saveTrajectory'] self.render_flag = dict['render'] self.data = np.zeros([dict['nEps']*self.num_iters, 5]) self.model = model # print('epLen: ' + str(self.epLen)) # if trajectory should be saved, save it in list and make callback if self.save_trajectory: self.trajectory = [] self.callback = TrajectoryCallback(verbose=0) np.random.seed(self.seed)
# Runs the experiment
[docs] def run(self): ''' Runs the simulations between an environment and an algorithm ''' # print('**************************************************') # print('Running experiment') # print('**************************************************') index = 0 traj_index = 0 episodes = [] iterations = [] rewards = [] times = [] memory = [] # Running an experiment print(f'New Experiment Run') for i in range(self.num_iters): # loops over all the iterations print(f'Iteration: {i}') tracemalloc.start() # starts timer for memory information # learns over all of the episodes # if trajectory is to be saved, use callback if self.save_trajectory: self.model.learn(total_timesteps=self.epLen * self.nEps, callback=self.callback) else: self.model.learn(total_timesteps=self.epLen*self.nEps) self.callback.update_iter() current, _ = tracemalloc.get_traced_memory() # collects memory information tracemalloc.stop() # appends data to dataset episodes = np.append(episodes, np.arange(0, self.nEps)) iterations = np.append(iterations, [i for _ in range(self.nEps)]) memory = np.append(memory, [current for _ in range(self.nEps)]) # save trajectory info self.trajectory = self.callback.trajectory # print(self.env.get_episode_rewards()) # print(len(self.env.get_episode_rewards())) # rewards are kept cumulatively so we save it out of the loop rewards = np.append(rewards, self.env.get_episode_rewards()) # Times are calculated cumulatively so need to calculate the per iteration time complexity orig_times = [0.] + self.env.get_episode_times() times = [orig_times[i] - orig_times[i-1] for i in np.arange(1, len(orig_times))] # Combining data in dataframe # print(episodes) # print(iterations) # print(rewards) # print(memory) # print(np.log(times)) print(len(episodes), len(iterations), len( rewards), len(times), len(memory)) self.data = pd.DataFrame({'episode': episodes, 'iteration': iterations, 'epReward': rewards, 'time': np.log(times), 'memory': memory})
# print('**************************************************') # print('Experiment complete') # print('**************************************************') # Saves the data to the file location provided to the algorithm
[docs] def save_data(self): ''' Saves the acquired dataset to the noted location Returns: dataframe corresponding to the saved data ''' # print('**************************************************') # print('Saving data') # print('**************************************************') # print(self.data) dir_path = self.dirPath data_loc = 'data.csv' traj_loc = 'trajectory.obj' dt = self.data dt = dt[(dt.T != 0).any()] data_filename = os.path.join(dir_path, data_loc) traj_filename = os.path.join(dir_path, traj_loc) print('Writing to file ' + dir_path + data_loc) if os.path.exists(dir_path): dt.to_csv(data_filename, index=False, float_format='%.5f', mode='w') if self.save_trajectory: # saves trajectory to filename outfile = open(traj_filename, 'wb') pickle.dump(self.trajectory, outfile) outfile.close() else: os.makedirs(dir_path) dt.to_csv(data_filename, index=False, float_format='%.5f', mode='w') if self.save_trajectory: # saves trajectory to filename outfile = open(traj_filename, 'wb') pickle.dump(self.trajectory, outfile) outfile.close() # print('**************************************************') # print('Data save complete') # print('**************************************************') return dt