Source code for or_suite.envs.resource_allocation.resource_allocation_discrete

"""Discrete Sequential Resource Allocation Problem for n locations with K commodities.

Currently reward is Nash Social Welfare but in the future will integrate more options 
to determine a fair allocation. """
# UNFINISHED
import numpy as np
import gym
from gym import spaces
import math
from .. import env_configs


[docs]class DiscreteResourceAllocationEnvironment(gym.Env): """ Custom Environment that follows gym interface. """ # Because of google colab, we cannot implement the GUI ('human' render mode) metadata = {'render.modes': ['human']} # Define constants for clearer code
[docs] def __init__(self, config=env_configs.resource_allocation_default_config): """ Initializes the Discrete Sequential Resource Allocation Environment. Args: weight_matrix: Weights predefining the commodity needs for each type, every row is a type vector. K: Number of commodities. num_rounds: Number of agents (also the length of an episode). init_budget: Amount of each commodity the principal begins with. type_dist: Function determining the number of people of each type at a location. u: Utility function, given an allocation x and a type theta, u(x,theta) is how good the fit is. """ super(DiscreteResourceAllocationEnvironment, self).__init__() self.config = config self.weight_matrix = config['weight_matrix'] self.num_types = config['weight_matrix'].shape[0] self.num_commodities = config['K'] self.epLen = config['num_rounds'] self.budget = config['init_budget']() self.type_dist = config['type_dist'] self.utility_function = config['utility_function'] # print(config['init_budget']) # print(self.type_dist(0)) # print(np.concatenate([config['init_budget'](),self.type_dist(0)])) self.starting_state = [] for x in config['init_budget'](): self.starting_state.append(int(x)) for y in self.type_dist(0): self.starting_state.append(int(y)) self.starting_state = np.array(self.starting_state) # print(np.concatenate([config['init_budget'],self.type_dist(0)])) self.state = self.starting_state self.timestep = 0 # Action space will be choosing Kxn-dimensional allocation matrix (represented as a vector) self.action_space = spaces.MultiDiscrete( [round(max(self.budget)) for _ in range(self.num_commodities*self.num_types)]) # First K entries of observation space is the remaining budget, next is the number of each type at the location self.observation_space = spaces.MultiDiscrete( [round(max(self.budget)+1) for _ in range(self.num_commodities+self.num_types)])
[docs] def reset(self): """ Important: the observation must be a numpy array Returns: np.array """ # Initialize the timestep self.timestep = 0 self.state = self.starting_state self.budget = self.config['init_budget']() self.action_space = spaces.MultiDiscrete( [round(max(self.budget)) for _ in range(self.num_commodities*self.num_types)]) return self.starting_state
def get_config(self): return self.config
[docs] def step(self, action): """ Move one step in the environment. Args: action: A matrix; the chosen action (each row how much to allocate to prev location). Returns: double, int, 0/1, dict: reward: double; the reward. newState: int; the new state. done: 0/1; the flag for end of the episode. info: dict; any additional information. """ assert self.action_space.contains(action) # assert that each element of action is int for a in action: assert type(a) == int or type(a) == np.int64 # subdividing state of (b,N) into the two components old_budget = self.state[:self.num_commodities] old_type = self.state[self.num_commodities:] # reshaping the allocation into a matrix allocation = np.reshape( np.array(action), (self.num_types, self.num_commodities)) # determines if the allocation is valid, i.e. algorithm is able to allocate the allocation # to each of the types, based on the number of people of each type # print('Allocation: ' + str(allocation)) # print('Budget: ' + str(old_budget)) # print('Types: ' + str(old_type)) # print('New Budget: ' + str(old_budget-np.matmul(old_type, allocation))) if np.min(old_budget - np.matmul(old_type, allocation)) >= -.0005: reward = (1/np.sum(old_type))*sum( [old_type[theta]*np.log(self.utility_function(allocation[theta, :], self.weight_matrix[theta, :])) for theta in range(self.num_types)] ) # updates the budget by the old budget and the allocation given new_budget = old_budget-np.matmul(old_type, allocation) if self.timestep != self.epLen - 1: done = False else: done = True else: # algorithm is allocating more than the budget, output a negative infinity reward print('Out of Budget!') reward = -100 done = True new_budget = old_budget new_type = self.type_dist(self.timestep) info = {'type': new_type} self.state = np.concatenate([new_budget, new_type]) self.action_space = spaces.MultiDiscrete( [round(max(self.budget)) for _ in range(self.num_commodities*self.num_types)]) self.timestep += 1 return self.state, float(reward), done, info
[docs] def render(self, mode='console'): if mode != 'console': raise NotImplementedError()
[docs] def close(self): pass