Source code for or_suite.agents.ambulance.command_line_metric

import numpy as np
import sys
from .. import Agent


[docs]class commandLineAgent(Agent): """ Allows the user to act as the agent by entering locations for each of the ambulances through the command line. Only works with the metric environment. Methods: reset() : clears data and call_locs which contain data on what has occurred so far in the environment update_config() : (UNIMPLEMENTED) pick_action(state, step) : the user is prompted to enter a new location for each ambulance, and these locations are used as the action Attributes: epLen: (int) number of time steps to run the experiment for data: (float list list) a list of all the states of the environment observed so far call_locs: (float list) the locations of all calls observed so far """
[docs] def __init__(self, epLen): """ Args: epLen: (int) number of time steps to run the experiment for """ self.epLen = epLen self.data = [] self.call_locs = []
[docs] def reset(self): # Resets data and call_locs arrays to be empty self.data = [] self.call_locs = []
[docs] def update_obs(self, obs, action, reward, newObs, timestep, info): '''adds newObs, the most recently observed state, to data adds the most recent call arrival, found in info['arrival'] to call_locs''' # Adds the most recent state obesrved in the environment to data self.data.append(newObs) # Adds the most recent arrival location observed to call_locs self.call_locs.append(info['arrival']) return
[docs] def update_policy(self, k): '''Update internal policy based upon records. Not used, because action is chosen by user.''' # Greedy algorithm does not update policy self.greedy = self.greedy
[docs] def greedy(self, state, timestep, epsilon=0): """ The user is prompted to choose an action by choosing a new location for each ambulance. """ num_ambulance = len(state) action = np.zeros(num_ambulance) for ambulance in range(num_ambulance): done = False while(not done): text = "Where do you want to position ambulance " + \ str(ambulance+1) + "? (choose a number between 0 and 1)\n" new_loc = input(text) try: new_loc = float(new_loc) if new_loc < 0 or new_loc > 1: raise ValueError action[ambulance] = new_loc done = True except ValueError: print("Please enter a number between 0 and 1") return np.float32(action)
[docs] def pick_action(self, state, step): action = self.greedy(state, step) return action