Source code for or_suite.envs.airline_revenue_management.airline_env

import gym
import numpy as np
import sys
import copy
import math

from .. import env_configs


[docs]class AirlineRevenueEnvironment(gym.Env): """ An environment representing the airline revenue management problem Attributes: A: The 2-D float array representing the resource consumption. f: The float array representing the revenue per class. P: The float array representing the distribution over arrivals. epLen: The int number of time steps to run the experiment for. starting_state: The float array representing the number of available seats on each flight. timestep: The int timestep the current episode is on. action_space: (Gym.spaces MultiDiscrete) Actions must be binary arrays of the length of the number of customers. observation_space: (Gym.spaces MultiDiscrete) States must be float arrays of the length of the number of flights. """ metadata = {'render.modes': ['human']}
[docs] def __init__(self, config): """ Args: A: The 2-D float array representing the resource consumption. f: The float array representing the revenue per class. P: The float array representing the distribution over arrivals. epLen: The int number of time steps to run the experiment for. starting_state: The float array representing the number of available seats on each flight. """ # Initializes model parameters based on a configuration dictionary self.config = config self.A = config['A'] # resource consumption self.f = config['f'] # revenue per class self.P = config['P'] # distribution over arrivals self.epLen = config['epLen'] # length of episode self.starting_state = config['starting_state'] # starting state self.config = config # Defines state and action spaces, sets current state to be starting_state self.action_space = gym.spaces.MultiBinary(self.A.shape[1]) sstate = np.asarray(self.starting_state)+1 self.observation_space = gym.spaces.MultiDiscrete(sstate) self.state = np.asarray(self.starting_state) self.timestep = 0
def get_config(self): return self.config # Resets environment to initial state
[docs] def reset(self): """Reinitializes variables and returns the starting state.""" self.state = np.asarray(self.starting_state) self.timestep = 0 return self.state
# Defines one step of the MDP, returning the new state, reward, whether time horizon is finished, and a dictionary of information
[docs] def step(self, action): """Move one step in the environment.""" assert self.action_space.contains(action) # Sample customer arrival pDist = np.append( np.copy(self.P[self.timestep, :]), 1 - np.sum(self.P[self.timestep, :])) customer = np.random.choice(range(self.A.shape[1]+1), 1, p=pDist)[0] # Check if valid action valid = True for j in range(len(action)): nState = np.copy(self.state) - self.A[:, j]*action[j] if not len(nState[nState < 0]) == 0: valid = False # Given a valid action newState = np.copy(self.state) reward = 0. if (not customer == self.A.shape[1]) and valid: if action[customer] == 1: newState = np.copy(self.state) - self.A[:, customer] reward = float(self.f[customer]) self.state = newState episode_over = False self.timestep += 1 if self.timestep == self.epLen: episode_over = True return self.state, reward, episode_over, {'customer': customer}
def get_config(self): return self.config