Source code for or_suite.experiment.experiment

import time
from shutil import copyfile
import pandas as pd
import tracemalloc
import numpy as np
import pickle
import os


[docs]class Experiment(object):
    """Optional instrumentation for running an experiment.

    Runs a simulation between an arbitrary openAI Gym environment and an algorithm, saving a dataset of (reward, time, space) complexity across each episode,
    and optionally saves trajectory information.

    Attributes:
        seed: random seed set to allow reproducibility
        dirPath: (string) location to store the data files
        nEps: (int) number of episodes for the simulation
        deBug: (bool) boolean, when set to true causes the algorithm to print information to the command line
        env: (openAI env) the environment to run the simulations on
        epLen: (int) the length of each episode
        numIters: (int) the number of iterations of (nEps, epLen) pairs to iterate over with the environment
        save_trajectory: (bool) boolean, when set to true saves the entire trajectory information
        render_flag: (bool) boolean, when set to true renders the simulations
        agent: (or_suite.agent.Agent) an algorithm to run the experiments with
        data: (np.array) an array saving the metrics along the sample paths (rewards, time, space)
        trajectory_data: (list) a list saving the trajectory information
    """

    def __init__(self, env, agent, dict):
        '''
        Args:
            env: (openAI env) the environment to run the simulations on
            agent: (or_suite.agent.Agent) an algorithm to run the experiments with
            dict: a dictionary containing the arguments to send for the experiment, including:
                dirPath: (string) location to store the data files
                nEps: (int) number of episodes for the simulation
                deBug: (bool) boolean, when set to true causes the algorithm to print information to the command line
                env: (openAI env) the environment to run the simulations on
                epLen: (int) the length of each episode
                numIters: (int) the number of iterations of (nEps, epLen) pairs to iterate over with the environment
                save_trajectory: (bool) boolean, when set to true saves the entire trajectory information
                render: (bool) boolean, when set to true renders the simulations
                pickle: (bool) when set to true saves data to a pickle file
        '''

        self.seed = dict['seed']
        self.dirPath = dict['dirPath']
        self.deBug = dict['deBug']
        self.nEps = dict['nEps']
        self.env = env
        self.epLen = dict['epLen']
        self.num_iters = dict['numIters']
        self.save_trajectory = dict['saveTrajectory']
        self.render_flag = dict['render']
        self.agent = agent
        # initializes the dataset to save the information
        self.data = np.zeros([dict['nEps']*self.num_iters, 5])
        self.pickle = dict['pickle']

        if self.save_trajectory:  # initializes the list to save the trajectory
            self.trajectory = []

        np.random.seed(self.seed)  # sets seed for experiment

    # Runs the experiment
[docs]    def run(self):
        '''
            Runs the simulations between an environment and an algorithm
        '''

        # print('**************************************************')
        # print('Running experiment')
        # print('**************************************************')

        index = 0

        for i in range(self.num_iters):  # loops over the numer of iterations
            self.agent.reset()  # resets algorithm, updates based on environment's configuration
            # updates agent configuration based on environment
            self.agent.update_config(self.env, self.env.get_config())
            for ep in range(0, self.nEps):  # loops over the episodes
                if self.deBug:
                    print('Episode : %s' % (ep))

                # Reset the environment
                self.env.reset()

                if self.render_flag:  # optionally renders the environments
                    self.env.render()

                oldState = self.env.state  # obtains old state
                epReward = 0

                # updates agent policy based on episode
                self.agent.update_policy(ep)

                done = False
                h = 0

                start_time = time.time()  # starts time and memory tracker
                tracemalloc.start()

                # repeats until episode is finished
                while (not done) and h < self.epLen:
                    # Step through the episode
                    if self.deBug:
                        print('state : %s' % (oldState))
                    action = self.agent.pick_action(
                        oldState, h)  # algorithm picks a state
                    if self.deBug:
                        print('action : %s' % (action))

                    # steps based on the action chosen by the algorithm
                    newState, reward, done, info = self.env.step(action)
                    epReward += reward

                    if self.deBug:
                        print('new state: %s' % (newState))
                        print('reward: %s' % (reward))
                        print('epReward so far: %s' % (epReward))
                        print(f'Info: {info}')

                    self.agent.update_obs(
                        oldState, action, reward, newState, h, info)

                    if self.save_trajectory:  # saves trajectory step if desired
                        record = {'iter': i,
                                  'episode': ep,
                                  'step': h,
                                  'oldState': oldState,
                                  'action': action,
                                  'reward': reward,
                                  'newState': newState,
                                  'info': info}

                        self.trajectory.append(record)

                    oldState = newState
                    h = h + 1
                    if self.render_flag:  # optionally renders the environment
                        self.env.render()

                current, _ = tracemalloc.get_traced_memory()  # collects memory / time usage
                tracemalloc.stop()
                end_time = time.time()

                if self.deBug:
                    print('final state: %s' % (newState))

                # Logging to dataframe
                self.data[index, 0] = ep
                self.data[index, 1] = i
                self.data[index, 2] = epReward
                self.data[index, 3] = current
                self.data[index, 4] = np.log(((end_time) - (start_time)))

                index += 1

            self.env.close()

        # print('**************************************************')
        # print('Experiment complete')
        # print('**************************************************')

    # Saves the data to the file location provided to the algorithm
[docs]    def save_data(self):
        '''
            Saves the acquired dataset to the noted location

            Returns:
                dataframe corresponding to the saved data
        '''

        # print('**************************************************')
        # print('Saving data')
        # print('**************************************************')

        if self.deBug:
            print(self.data)

        dir_path = self.dirPath

        data_loc = 'data.csv'
        traj_loc = 'trajectory.obj'
        agent_loc = 'agent.obj'

        data_filename = os.path.join(dir_path, data_loc)
        traj_filename = os.path.join(dir_path, traj_loc)
        agent_filename = os.path.join(dir_path, agent_loc)

        dt = pd.DataFrame(self.data, columns=[
                          'episode', 'iteration', 'epReward', 'memory', 'time'])
        dt = dt[(dt.T != 0).any()]

        print('Writing to file ' + data_loc)

        if os.path.exists(dir_path):
            # saves the collected dataset
            dt.to_csv(data_filename, index=False,
                      float_format='%.5f', mode='w')
            if self.save_trajectory:  # saves trajectory to filename
                outfile = open(traj_filename, 'wb')
                pickle.dump(self.trajectory, outfile)
                outfile.close()
        else:  # same as before, but first makes the directory
            os.makedirs(dir_path)
            dt.to_csv(data_filename, index=False,
                      float_format='%.5f', mode='w')
            if self.save_trajectory:  # saves trajectory to filename
                outfile = open(traj_filename, 'wb')
                pickle.dump(self.trajectory, outfile)
                outfile.close()

        if self.pickle:

            if hasattr(self.agent, 'tree_list'):
                outfile = open(agent_filename, 'wb')
                pickle.dump(self.agent.tree_list, outfile)
                outfile.close()
            elif hasattr(self.agent, 'qVals'):
                outfile = open(agent_filename, 'wb')
                pickle.dump(self.agent.qVals, outfile)
                outfile.close()

        # print('**************************************************')
        # print('Data save complete')
        # print('**************************************************')

        return dt