Source code for or_suite.envs.ridesharing.rideshare_graph

"""Implementation of an RL environment in a discrete graph space.

A ridesharing environment over a simple graph. An agent interacts through the
environment by choosing a non-zero node to service a given rideshare request.
"""

import numpy as np
from numpy.random import default_rng
import gym
from gym import spaces
import networkx as nx
import math

from .. import env_configs


[docs]class RideshareGraphEnvironment(gym.Env): """Custom Rideshare Graph Environment that follows gym interface. This is a simple env where the requests are uniformly distributed across nodes. Attributes: config: A dictionary containing the initial configuration of the rideshare graph environment. epLen: An integer representing the total number of time steps. graph: An object containing nodes and edges; each edge has a travel time. num_nodes: An integer count of the number of nodes in the graph. starting_state: A vector representing the initial state of the environment; the first K elements represent the number of cars at each node, and the final 2 elements represent the current request that needs to be satisfied, i.e. node i to node j. state: A vector representing the state of the environment; the first K elements represent the number of cars at each node, and the final 2 elements represent the current request that needs to be satisfied, i.e. node i to node j. timestep: An integer representing the current timestep of the model. num_cars: An integer representing the number of cars in the model. lengths: A 2-dimensional symmetric array containing the distances between each pair of nodes. request_dist: A vector consisting of the distribution used for selecting nodes when generating requests. reward: A lambda function to generate the reward. reward_fail: A lambda function to generate the reward when the RL agent fails; i.e. when a request is not satisfied. action_space: A discrete set of values the action can have; in this case the action space is an integer within {0..K-1}. observation_space: A multidiscrete that represents all possible values of the state; i.e. all possible values for the amount of cars at each node and all possible nodes for any request. """
[docs] def __init__(self, config=env_configs.rideshare_graph_default_config): """Inits RideshareGraphEnvironment with the given configuration. Args: config: A dictionary containing the initial configuration of the rideshare graph environment. """ self.config = config self.epLen = config['epLen'] self.graph = nx.Graph(config['edges']) self.num_nodes = self.graph.number_of_nodes() self.timestep = 0 self.num_cars = config['num_cars'] self.lengths = self.find_lengths(self.graph, self.num_nodes) self.request_dist = config['request_dist'] self.reward = config['reward'] self.reward_denied = config['reward_denied'] self.reward_fail = config['reward_fail'] self.cost = config['cost'] self.fare = config['fare'] self.velocity = config['velocity'] self.max_dist = np.max(self.lengths.flatten()) self.gamma = config['gamma'] self.d_threshold = config['d_threshold'] self.action_space = spaces.Discrete(self.num_nodes) self.num_transit = 0 self.in_transit = np.zeros((self.num_cars, 2), dtype=int) self.travel_time = config['travel_time'] if self.travel_time: vec = [self.num_cars for _ in range(self.num_nodes)] + [self.num_nodes for _ in range(self.num_cars)] + [ self.epLen for _ in range(self.num_cars)] + [self.num_cars] + [self.num_nodes, self.num_nodes] self.starting_state = np.asarray(np.concatenate( (self.config['starting_state'], self.in_transit.flatten(), [self.num_transit], self.request_dist(0, self.num_nodes)))) else: vec = [self.num_cars+1 for _ in range( self.num_nodes)] + [self.num_nodes, self.num_nodes] self.starting_state = np.asarray(np.concatenate( (self.config['starting_state'], self.request_dist(0, self.num_nodes)))) self.observation_space = spaces.MultiDiscrete(vec) self.state = self.starting_state
[docs] def reset(self): """Reinitializes variables and returns the starting state.""" self.timestep = 0 self.state = self.starting_state return self.state
[docs] def get_config(self): """Returns the configuration for the current environment.""" return self.config
[docs] def fulfill_req(self, state, dispatch, sink): """Update the state to represent a car moving from source to sink. Args: dispatch: An integer representing the dispatched node for the rideshare request. sink: An integer representing the destination node of the rideshare request. """ state[dispatch] -= 1 state[sink] += 1
def step_in_transit(self, state): for i in range(state[-3]): state[self.num_nodes + 2 * i + 1] -= 1 # When the transit is complete if state[self.num_nodes + 2 * i + 1] <= 0: # Make the car who completed transit available again transit_arrival = state[self.num_nodes + 2 * i] state[transit_arrival] += 1 # Removing & Compressing down for i in range(state[-3] - 1, -1, -1): if state[self.num_nodes + 2 * i + 1] <= 0: if i == state[-3] - 1: state[self.num_nodes + 2 * i] = 0 state[self.num_nodes + 2 * i + 1] = 0 else: state[self.num_nodes + 2 * i] = state[self.num_nodes + 2 * state[-3] - 2] state[self.num_nodes + 2 * i + 1] = state[self.num_nodes + 2 * state[-3] - 1] state[self.num_nodes + 2 * state[-3] - 2] = 0 state[self.num_nodes + 2 * state[-3] - 1] = 0 state[-3] -= 1
[docs] def find_lengths(self, graph, num_nodes): """Find the lengths between each pair of nodes in [graph]. Given a graph, find_lengths first calculates the pairwise shortest distance between all the nodes, which is stored in a (symmetric) matrix. Args: graph: An object containing nodes and edges; each edge has a travel time. num_nodes: An integer representing the number of nodes in the graph. Returns: A 2-dimensional symmetric array containing the distances between each pair of nodes. """ dict_lengths = dict(nx.all_pairs_dijkstra_path_length( graph, cutoff=None, weight='travel_time')) lengths = np.zeros((num_nodes, num_nodes)) for node1 in range(num_nodes): for node2 in range(num_nodes): lengths[node1, node2] = dict_lengths[node1][node2] return lengths
[docs] def step(self, action): """Move one step in the environment. Args: action: An Integer representing the node selected by the agent to service the request. Returns: A 3-tuple consisting of the following elements: - An updated representation of the state, including updated car locations resulting from the previous dispatch and a new ride request, - An integer reward value based on the action, - A boolean indicating whether or not the model has reached the limit timestep. """ assert self.action_space.contains(action) done = False accepted = False source = self.state[-2] sink = self.state[-1] newState = np.copy(self.state) dispatch_dist = self.lengths[action, source] service_dist = self.lengths[source, sink] reward_range = self.reward( self.fare, self.cost, 0, self.max_dist) - self.reward_fail(self.max_dist, self.cost) max_fail_reward = self.reward_fail(self.max_dist, self.cost) # If there is a car at the location the agent chose if newState[action] > 0: exp = np.exp(self.gamma*(dispatch_dist-self.d_threshold)) prob = 1 / (1 + exp) accept = np.random.binomial(1, prob) # print("prob: " + str(prob)) # print("accept: " + str(accept)) if accept == 1: # print('accept service') self.fulfill_req(newState, action, sink) reward = self.reward(self.fare, self.cost, dispatch_dist, service_dist) reward = (reward - max_fail_reward) / reward_range accepted = True else: # print('decline service') reward = (self.reward_denied() - max_fail_reward) / reward_range else: reward = 0 # updating the state with a new rideshare request new_request = self.request_dist(self.timestep, self.num_nodes) newState[-2] = new_request[0] newState[-1] = new_request[1] # reducing remaining time for cars in transit or completing the transit if self.travel_time: self.step_in_transit(newState) if self.timestep == self.epLen - 1: done = True self.state = newState self.timestep += 1 return self.state, np.float64(reward), done, {'request': new_request, 'acceptance': accepted}