Source code for or_suite.agents.resource_allocation.fixed_threshold

import numpy as np
import cvxpy as cp
from .. import Agent


[docs]class fixedThresholdAgent(Agent): """ Fixed Guardrail provides lower thresholds on budget distribution calculated by solving the primal-dual paradigm of Eisenberg-Gale Convex Progam Methods: generate_cvxpy_solver() : Creates a generic solver to solve the offline resource allocation problem. get_lower_upper_sol(init_size) : Uses solver to get the lower threshold on budget distribution get_expected_endowments(N=1000) : MCM for estimating Expectation of type distribution using N realizations. reset() : resets bounds of agent to reflect upper and lower bounds of metric space. update_config(env, config) : Updates environment configuration dictionary. update_obs(obs, action, reward, newObs, timestep, info) : Add observation to records. update_policy(k) : Update internal policy based upon records. pick_action(state, step) : move agent to midpoint or perturb current dimension Attributes: num_types (int) : Number of types num_resources (int) : Number of commodities budget_remaining (int) : Amount of each commodity the principal begins with. scale (int) : Hyperparameter to be used in calculating threshold epLen (int) : Number of locations (also the length of an episode). data (list) : All data observed so far first_allocation_done (bool) : Flag that if false, gets upper and lower thresh conf_const (int) : Hyperparameter for confidence bound exp_endowments (list) : Matrix containing expected proportion of endowments for location t stdev_endowments (list) : Matrix describing variance of exp_endowments prob (cvxpy object) : CVXPY problem object solver (lambda function) : Function that solves the problem given data lower_sol (np.array) : Matrix of lower threshold upper_sol (np.array) : Matrix of upper threshold """
[docs] def __init__(self, epLen, env_config): ''' Initialize fixed_threshold agent Args: epLen: number of steps env_config: parameters used in initialization of environment scale: hyperparameter to be used in calculating threshold ''' self.env_config = env_config self.num_types = env_config['weight_matrix'].shape[0] self.num_resources = self.env_config['weight_matrix'].shape[1] self.budget_remaining = np.copy(self.env_config['init_budget']()) self.epLen = epLen self.data = [] self.first_allocation_done = False self.conf_const = 2 self.from_data = env_config['from_data'] self.exp_endowments, self.stdev_endowments = self.get_expected_endowments() self.prob, self.solver = self.generate_cvxpy_solver() self.lower_sol = np.zeros((self.num_types, self.num_resources))
[docs] def generate_cvxpy_solver(self): """ Creates a generic solver to solve the offline resource allocation problem Returns: prob - CVXPY problem object solver - function that solves the problem given data """ num_types = self.num_types num_resources = self.num_resources x = cp.Variable(shape=(num_types, num_resources)) sizes = cp.Parameter(num_types, nonneg=True) weights = cp.Parameter((num_types, num_resources), nonneg=True) budget = cp.Parameter(num_resources, nonneg=True) objective = cp.Maximize( cp.log(cp.sum(cp.multiply(x, weights), axis=1)) @ sizes) constraints = [] constraints += [0 <= x] for i in range(num_resources): constraints += [x[:, i] @ sizes <= budget[i]] prob = cp.Problem(objective, constraints) def solver(true_sizes, true_weights, true_budget): sizes.value = true_sizes weights.value = true_weights budget.value = true_budget prob.solve() return prob.value, np.around(x.value, 5) return prob, solver
[docs] def get_lower_upper_sol(self, init_sizes): """ Uses solver to get the lower threshold """ budget = self.env_config['init_budget']() weights = self.env_config['weight_matrix'] n = self.env_config['num_rounds'] tot_size = np.sum(self.exp_endowments[:, 1:], axis=1) future_size = init_sizes + tot_size conf_bnd = self.conf_const * np.sqrt(np.max(self.stdev_endowments, axis=1) * np.mean(self.exp_endowments, axis=1)*(n-1)) lower_exp_size = future_size * \ (1 + np.max(conf_bnd / future_size)) _, lower_sol = self.solver(lower_exp_size, weights, budget) return lower_sol
[docs] def get_expected_endowments(self, N=1000): """ Monte Carlo Method for estimating Expectation of type distribution using N realizations Only need to run this once to get expectations for all locations Returns: rel_exp_endowments - matrix containing expected proportion of endowments for location t """ num_types = self.env_config['weight_matrix'].shape[0] exp_size = np.zeros((num_types, self.env_config['num_rounds'])) var_size = np.zeros((num_types, self.env_config['num_rounds'])) for t in range(self.env_config['num_rounds']): cur_list = [] for _ in range(N): obs_size = self.env_config['type_dist'](t) exp_size[:, t] += obs_size cur_list.append(obs_size) exp_size[:, t] = (1/N)*exp_size[:, t] var_size[:, t] = np.var(np.asarray(cur_list), axis=0) return exp_size, np.sqrt(var_size)
[docs] def reset(self): ''' Resets data matrix to be empty ''' self.budget_remaining = np.copy(self.env_config['init_budget']()) self.data = []
[docs] def update_config(self, env, config): '''Updates environment configuration dictionary''' self.env_config = config return
[docs] def update_obs(self, obs, action, reward, newObs, timestep, info): '''Add observation to records''' self.data.append(newObs) return
[docs] def update_policy(self, k): '''Update internal policy based upon records''' self.budget_remaining = np.copy(self.env_config['init_budget']())
[docs] def pick_action(self, state, step): ''' Returns allocation of resources based on calculated upper and lower solutions Args: state : vector with first K entries denoting remaining budget, and remaining n entires denoting the number of people of each type that appears step : timestep Returns: matrix where each row is a K-dimensional vector denoting how much of each commodity is given to each type ''' if step == 0: self.current_budget = np.copy(self.env_config['init_budget']()) if self.from_data: mean, stdev = self.env_config['type_dist'](-2) self.exp_endowments = np.transpose(mean) self.stdev_endowments = np.transpose(stdev) sizes = state[self.num_resources:] self.lower_sol = self.get_lower_upper_sol(sizes) if np.all(self.budget_remaining == 0): pass budget_remaining = state[:self.num_resources] sizes = state[self.num_resources:] lower_thresh = self.lower_sol resource_index = budget_remaining - \ np.matmul(sizes, self.lower_sol) > 0 action = resource_index * lower_thresh + \ (1 - resource_index) * \ np.array([budget_remaining / np.sum(sizes), ]*self.num_types) action = np.array([list(map(lambda x: max(x, 0.0), values)) for values in action]) self.budget_remaining = budget_remaining - \ np.matmul(sizes, action) return action