ExOblivione
diff --git a/‎how-to-use-azureml/reinforcement-learning/README.md‎
Lines changed: 1 addition & 0 deletions b/‎how-to-use-azureml/reinforcement-learning/README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/docker/cpu/Dockerfile‎
Lines changed: 60 additions & 0 deletions b/‎how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/docker/cpu/Dockerfile‎
Lines changed: 60 additions & 0 deletions
diff --git a/‎how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/docker/cpu/patch_files/multi_discrete.py‎
Lines changed: 70 additions & 0 deletions b/‎how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/docker/cpu/patch_files/multi_discrete.py‎
Lines changed: 70 additions & 0 deletions
@@ -35,6 +35,7 @@ Using these samples, you will learn how to do the following.
 | [cartpole_sc.ipynb](cartpole-on-single-compute/cartpole_sc.ipynb)  | Notebook to train a Cartpole playing agent on an Azure Machine Learning Compute Cluster (single node) |
 | [pong_rllib.ipynb](atari-on-distributed-compute/pong_rllib.ipynb)   | Notebook for distributed training of Pong agent using RLlib on multiple compute targets |
 | [minecraft.ipynb](minecraft-on-distributed-compute/minecraft.ipynb)   | Notebook to train an agent to navigate through a lava maze in the Minecraft game |
+| [particle.ipynb](multiagent-particle-envs/particle.ipynb)  | Notebook to train policies in a multiagent cooperative navigation scenario based on OpenAI's Particle environments |
 
 ## Prerequisites
 
 
@@ -0,0 +1,60 @@
+FROM mcr.microsoft.com/azureml/base:openmpi3.1.2-ubuntu18.04
+
+# Install some basic utilities
+RUN apt-get update && apt-get install -y \
+    curl \
+    ca-certificates \
+    sudo \
+	cpio \
+    git \
+    bzip2 \
+    libx11-6 \
+    tmux \
+    htop \
+    gcc \
+    xvfb \
+    python-opengl \
+    x11-xserver-utils \
+    ffmpeg \
+    mesa-utils \
+    nano \
+    vim \
+    rsync \
+ && rm -rf /var/lib/apt/lists/*
+
+# Install python 3.7
+RUN conda install python==3.7 
+
+# Create a working directory
+RUN mkdir /app
+WORKDIR /app
+
+# Install required pip packages
+RUN pip install --upgrade pip setuptools && pip install --upgrade \
+    pandas \
+    matplotlib \
+    psutil \
+    numpy \
+    scipy \
+    gym \
+    azureml-defaults \
+    tensorboardX \
+    tensorflow==1.15 \
+    tensorflow-probability==0.8.0 \
+    onnxruntime \
+    tf2onnx \
+    cloudpickle==1.2.0 \
+    tabulate \
+    dm_tree \
+    lz4 \
+    opencv-python \
+    ray==0.8.3 \
+    ray[rllib]==0.8.3 \
+    ray[tune]==0.8.3
+
+# Install particle
+RUN git clone https://github.com/openai/multiagent-particle-envs.git
+COPY patch_files/* multiagent-particle-envs/multiagent/
+RUN cd multiagent-particle-envs && \
+    pip install -e . && \
+    pip install --upgrade pyglet==1.3.2
@@ -0,0 +1,70 @@
+# MIT License
+
+# Copyright (c) 2018 OpenAI
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import numpy as np
+import gym
+
+
+class MultiDiscrete(gym.Space):
+    """
+    - The multi-discrete action space consists of a series of discrete action spaces with different
+      parameters
+    - It can be adapted to both a Discrete action space or a continuous (Box) action space
+    - It is useful to represent game controllers or keyboards where each key can be represented as
+      a discrete action space
+    - It is parametrized by passing an array of arrays containing [min, max] for each discrete action
+      space where the discrete action space can take any integers from `min` to `max` (both inclusive)
+    Note: A value of 0 always need to represent the NOOP action.
+    e.g. Nintendo Game Controller
+    - Can be conceptualized as 3 discrete action spaces:
+        1) Arrow Keys: Discrete 5  - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4]  - params: min: 0, max: 4
+        2) Button A:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
+        3) Button B:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
+    - Can be initialized as
+        MultiDiscrete([ [0,4], [0,1], [0,1] ])
+    """
+    def __init__(self, array_of_param_array):
+        self.low = np.array([x[0] for x in array_of_param_array])
+        self.high = np.array([x[1] for x in array_of_param_array])
+        self.num_discrete_space = self.low.shape[0]
+
+    def sample(self):
+        """ Returns a array with one sample from each discrete action space """
+        # For each row: round(random .* (max - min) + min, 0)
+        # random_array = prng.np_random.rand(self.num_discrete_space)
+        random_array = np.random.RandomState().rand(self.num_discrete_space)
+        return [int(x) for x in np.floor(np.multiply((self.high - self.low + 1.), random_array) + self.low)]
+
+    def contains(self, x):
+        return len(x) == self.num_discrete_space \
+            and (np.array(x) >= self.low).all() \
+            and (np.array(x) <= self.high).all()
+
+    @property
+    def shape(self):
+        return self.num_discrete_space
+
+    def __repr__(self):
+        return "MultiDiscrete" + str(self.num_discrete_space)
+
+    def __eq__(self, other):
+        return np.array_equal(self.low, other.low) and np.array_equal(self.high, other.high)