Source code for or_suite.envs.airline_revenue_management.airline_env

import gym
import numpy as np
import sys
import copy
import math

from .. import env_configs


[docs]class AirlineRevenueEnvironment(gym.Env):
    """
    An environment representing the airline revenue management problem

    Attributes:
        A: The 2-D float array representing the resource consumption.
        f: The float array representing the revenue per class.
        P: The float array representing the distribution over arrivals.
        epLen: The int number of time steps to run the experiment for.
        starting_state: The float array representing the number of available seats on each flight.
        timestep: The int timestep the current episode is on.
        action_space: (Gym.spaces MultiDiscrete) Actions must be binary arrays of the length of the number of customers.
        observation_space: (Gym.spaces MultiDiscrete) States must be float arrays of the length of the number of flights.

    """
    metadata = {'render.modes': ['human']}

[docs]    def __init__(self, config):
        """
        Args:
            A: The 2-D float array representing the resource consumption.
            f: The float array representing the revenue per class.
            P: The float array representing the distribution over arrivals.
            epLen: The int number of time steps to run the experiment for.
            starting_state: The float array representing the number of available seats on each flight.
            """
        # Initializes model parameters based on a configuration dictionary
        self.config = config
        self.A = config['A']  # resource consumption
        self.f = config['f']  # revenue per class
        self.P = config['P']  # distribution over arrivals
        self.epLen = config['epLen']  # length of episode
        self.starting_state = config['starting_state']  # starting state

        self.config = config

        # Defines state and action spaces, sets current state to be starting_state
        self.action_space = gym.spaces.MultiBinary(self.A.shape[1])
        sstate = np.asarray(self.starting_state)+1
        self.observation_space = gym.spaces.MultiDiscrete(sstate)
        self.state = np.asarray(self.starting_state)
        self.timestep = 0

    def get_config(self):
        return self.config

    # Resets environment to initial state
[docs]    def reset(self):
        """Reinitializes variables and returns the starting state."""
        self.state = np.asarray(self.starting_state)
        self.timestep = 0
        return self.state

    # Defines one step of the MDP, returning the new state, reward, whether time horizon is finished, and a dictionary of information

[docs]    def step(self, action):
        """Move one step in the environment."""

        assert self.action_space.contains(action)

        # Sample customer arrival
        pDist = np.append(
            np.copy(self.P[self.timestep, :]), 1 - np.sum(self.P[self.timestep, :]))
        customer = np.random.choice(range(self.A.shape[1]+1), 1, p=pDist)[0]

        # Check if valid action
        valid = True
        for j in range(len(action)):
            nState = np.copy(self.state) - self.A[:, j]*action[j]
            if not len(nState[nState < 0]) == 0:
                valid = False

        # Given a valid action
        newState = np.copy(self.state)
        reward = 0.
        if (not customer == self.A.shape[1]) and valid:
            if action[customer] == 1:
                newState = np.copy(self.state) - self.A[:, customer]
                reward = float(self.f[customer])
        self.state = newState
        episode_over = False
        self.timestep += 1
        if self.timestep == self.epLen:
            episode_over = True
        return self.state, reward, episode_over, {'customer': customer}

    def get_config(self):
        return self.config