Source code for or_suite.envs.oil_discovery.oil_problem

"""An oil environment also over [0,1].  

Here the agent interacts with the environment by picking a location to travel to, 
paying a cost of travel, and receiving a reward at the new location."""

import numpy as np
import gym
from gym import spaces
import math

from .. import env_configs


[docs]class OilEnvironment(gym.Env):
    """
        An oil discovery problem on the metric space [0,1]^k for some power k.  

        Here the state space and the action space
        are given to have the same dimension.

        Methods:
            get_config() : Returns the config dictionary used to initialize the environment.
            render(mode) : (UNIMPLEMENTED) Renders the environment in the mode passed in; 'human' is the only mode currently supported.
            close() : (UNIMPLEMENTED) Closes the window where the rendering is being drawn.

        Attributes:
            epLen: The (int) number of time steps to run the experiment for.
            oil_prob (lambda function): A function taken as input a state, action and timestep, and outputting a reward for moving agent to that location
            cost_param (float): The parameter regulating the cost for moving the agent from one location to another
            noise_variance (lambda function): A function taken as input state, action, and timestamp, and outputting the noise added on to moving the agent
            starting_state: An int list containing the starting locations for the agent.
            action_space: (Gym.spaces Box) Actions must be the location to move the agent.
            observation_space: (Gym.spaces Box) The location of the agent.
        """

    metadata = {'render.modes': ['human']}

[docs]    def __init__(self, config=env_configs.oil_environment_default_config):

        self.config = config
        self.epLen = config['epLen']
        self.dim = config['dim']
        self.state = config['starting_state']
        self.starting_state = config['starting_state']
        self.timestep = 0
        self.oil_prob = config['oil_prob']
        self.cost_param = config['cost_param']
        self.noise_variance = config['noise_variance']

        self.observation_space = spaces.Box(low=0, high=1,
                                            shape=(self.dim,), dtype=np.float32)
        self.action_space = spaces.Box(low=0, high=1,
                                       shape=(self.dim,), dtype=np.float32)

[docs]    def reset(self):
        """Reset the environment to its original settings."""
        self.timestep = 0
        self.state = self.starting_state
        return self.state

[docs]    def get_config(self):
        return self.config

[docs]    def step(self, action):
        """
        Move one step in the environment.

        Args:
            action: The chosen action; int.
        Returns:
            double, int, 0/1:
            reward: double; the reward.

            newState: int; the new state.

            done: 0/1; the flag for end of the episode.
        """
        if isinstance(action, np.ndarray):
            action = action.astype(np.float32)
        assert self.action_space.contains(action)
        # print('state: ' + str(self.state))
        # print('action: ' + str(action))
        reward = min(1.0, max(self.oil_prob(self.state, action, self.timestep) -
                     self.cost_param*np.sum(np.abs(self.state - action)), 0))
        # print('reward: ' + str(reward))

        newState = np.minimum(1, np.maximum(0, action + np.random.normal([0 for _ in range(
            self.dim)], np.sqrt(self.noise_variance(self.state, action, self.timestep)))), dtype=np.float32)

        # newState = min(1, max(0, action + np.random.normal(0, np.sqrt(self.noise_variance(self.state, action, self.timestep)))))
        # newState = action
        if self.timestep != self.epLen - 1:
            done = False
        else:
            done = True

        self.state = newState
        self.timestep += 1

        return self.state, reward,  done, {}