Source code for or_suite.experiment.trajectory_callback

import time
from shutil import copyfile
import pandas as pd
import tracemalloc
import numpy as np
import pickle
import os
from stable_baselines3.common.monitor import Monitor
#from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import BaseCallback


[docs]class TrajectoryCallback(BaseCallback): """ Custom callback for plotting trajectory information for a StatebleBaselines experiment. Args: verbose: (int) 1 to print trajectory information to console, 0 not to print Attributes: verbose: (int) 1 to print information, 0 to not print trajectory: (list) A list of dictionaries to hold the trajectory information iteration_number: (int) A running count of which iteration training is on """
[docs] def __init__(self, verbose=0): self.verbose = verbose super(TrajectoryCallback, self).__init__(verbose) self.trajectory = [] self.iteration_number = 0
def _on_step(self) -> bool: """ After each step of training, saves the trajectory information to a dictionary. Trajectory information is iteration, episode, step, oldState, action, reward, newState, info. """ # Get values from local variables reward = self.locals["rewards"][0] action = self.locals["clipped_actions"][0] info = self.locals["infos"][0] next_state = self.locals["new_obs"][0] # if in initial state, have environment's starting state be the previous state prev_state = self.trajectory[-1]["newState"] if not len( self.trajectory) == 0 else self.locals["env"].get_attr("starting_state")[0] # Total number of steps in an episode num_steps = self.locals["n_rollout_steps"] # calculating current episode number episode = int((len(self.trajectory) - self.iteration_number * self.locals["total_timesteps"]) / num_steps) step = self.locals["n_steps"] # iteration is done in outside loop for training, so it is not taken from self.locals iteration = self.iteration_number # create dictionary for this time step step_dict = {'iter': iteration, 'episode': episode, 'step': step, 'oldState': prev_state, 'action': action, 'reward': reward, 'newState': next_state, 'info': info} # append this time step to list of trajectory info for all timesteps self.trajectory.append(step_dict) # print out information if self.verbose: # print(self.locals) print('Iteration: {}'.format(iteration)) print('Episode: {}'.format(episode)) print('Step: {}'.format(step)) print('Old State: {}'.format(prev_state)) print('Action: {}'.format(action)) print('Reward: {}'.format(reward)) print('New State: {}'.format(next_state)) print('Info: {}'.format(info)) print('\n') return True
[docs] def update_iter(self): """ Updates the iteration number. This is called by sb_experiment.py to manually increment the iteration number in its training loop. """ self.iteration_number += 1