doublevkaytester1
diff --git a/‎mars/_resource.pyx‎
Lines changed: 54 additions & 0 deletions b/‎mars/_resource.pyx‎
Lines changed: 54 additions & 0 deletions
diff --git a/‎mars/resource.py‎
Lines changed: 4 additions & 0 deletions b/‎mars/resource.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎mars/services/scheduling/api/oscar.py‎
Lines changed: 3 additions & 3 deletions b/‎mars/services/scheduling/api/oscar.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎mars/services/scheduling/supervisor/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎mars/services/scheduling/supervisor/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎mars/services/scheduling/supervisor/autoscale.py‎
Lines changed: 10 additions & 7 deletions b/‎mars/services/scheduling/supervisor/autoscale.py‎
Lines changed: 10 additions & 7 deletions
diff --git a/‎mars/services/scheduling/supervisor/globalresource.py‎
Lines changed: 181 additions & 0 deletions b/‎mars/services/scheduling/supervisor/globalresource.py‎
Lines changed: 181 additions & 0 deletions
@@ -0,0 +1,54 @@
+# Copyright 1999-2021 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cdef class Resource:
+    cdef readonly:
+        float num_cpus
+        float num_gpus
+        float num_mem_bytes
+
+    def __init__(self, float num_cpus=0, float num_gpus=0, float num_mem_bytes=0):
+        self.num_cpus = num_cpus
+        self.num_gpus = num_gpus
+        self.num_mem_bytes = num_mem_bytes
+
+    def __eq__(self, Resource other):
+        return self.num_mem_bytes == other.num_mem_bytes and \
+               self.num_gpus == other.num_gpus and \
+               self.num_cpus == other.num_cpus
+
+    def __gt__(self, Resource other):
+        return not self.__le__(other)
+
+    def __le__(self, Resource other):
+        # memory first, then gpu, cpu last
+        return self.num_mem_bytes <= other.num_mem_bytes and \
+               self.num_gpus <= other.num_gpus and \
+               self.num_cpus <= other.num_cpus
+
+    def __add__(self, Resource other):
+        return Resource(num_cpus=self.num_cpus + other.num_cpus,
+                        num_gpus=self.num_gpus + other.num_gpus,
+                        num_mem_bytes=self.num_mem_bytes + other.num_mem_bytes)
+    def __sub__(self, Resource other):
+        return Resource(num_cpus=self.num_cpus - other.num_cpus,
+                        num_gpus=self.num_gpus - other.num_gpus,
+                        num_mem_bytes=self.num_mem_bytes - other.num_mem_bytes)
+    def __neg__(self):
+        return Resource(num_cpus=-self.num_cpus, num_gpus=-self.num_gpus, num_mem_bytes=-self.num_mem_bytes)
+
+    def __repr__(self):
+        return f"Resource(num_cpus={self.num_cpus}, num_gpus={self.num_gpus}, num_mem_bytes={self.num_mem_bytes})"
+
+ZeroResource = Resource(num_cpus=0, num_gpus=0, num_mem_bytes=0)
@@ -25,8 +25,12 @@
 import psutil
 
 from .lib import nvutils
+from ._resource import Resource, ZeroResource
 from .utils import get_bool_environ
 
+Resource = Resource
+ZeroResource = ZeroResource
+
 logger = logging.getLogger(__name__)
 
 CGROUP_CPU_STAT_FILE = "/sys/fs/cgroup/cpuacct/cpuacct.usage"
 
@@ -135,11 +135,11 @@ async def finish_subtasks(
 class MockSchedulingAPI(SchedulingAPI):
     @classmethod
     async def create(cls: Type[APIType], session_id: str, address: str) -> APIType:
-        from ..supervisor import GlobalSlotManagerActor, AutoscalerActor
+        from ..supervisor import GlobalResourceManagerActor, AutoscalerActor
 
         await mo.create_actor(
-            GlobalSlotManagerActor,
-            uid=GlobalSlotManagerActor.default_uid(),
+            GlobalResourceManagerActor,
+            uid=GlobalResourceManagerActor.default_uid(),
             address=address,
         )
         await mo.create_actor(
 
@@ -14,7 +14,7 @@
 
 from .assigner import AssignerActor
 from .autoscale import AutoscalerActor
-from .globalslot import GlobalSlotManagerActor
+from .globalresource import GlobalResourceManagerActor
 from .manager import SubtaskManagerActor
 from .queueing import SubtaskQueueingActor
 from .service import SchedulingSupervisorService
@@ -36,7 +36,7 @@ def __init__(self, autoscale_conf: Dict[str, Any]):
         self._autoscale_conf = autoscale_conf
         self._cluster_api = None
         self.queueing_refs = dict()
-        self.global_slot_ref = None
+        self.global_resource_ref = None
         self._dynamic_workers: Set[str] = set()
 
     async def __post_create__(self):
@@ -46,10 +46,10 @@ async def __post_create__(self):
             strategy_cls = getattr(importlib.import_module(module), name)
         else:
             strategy_cls = PendingTaskBacklogStrategy
-        from ..supervisor import GlobalSlotManagerActor
+        from ..supervisor import GlobalResourceManagerActor
 
-        self.global_slot_ref = await mo.actor_ref(
-            GlobalSlotManagerActor.default_uid(), address=self.address
+        self.global_resource_ref = await mo.actor_ref(
+            GlobalResourceManagerActor.default_uid(), address=self.address
         )
         self._cluster_api = await ClusterAPI.create(self.address)
         self._strategy = await strategy_cls.create(self._autoscale_conf, self)
@@ -114,14 +114,17 @@ async def release_workers(self, addresses: List[str]):
             )
         # Ensure global_slot_manager get latest bands timely, so that we can invoke `wait_band_idle`
         # to ensure there won't be new tasks scheduled to the stopping worker.
-        await self.global_slot_ref.refresh_bands()
+        await self.global_resource_ref.refresh_bands()
         excluded_bands = set(b for bands in workers_bands.values() for b in bands)
 
         async def release_worker(address):
             logger.info("Start to release worker %s.", address)
             worker_bands = workers_bands[address]
             await asyncio.gather(
-                *[self.global_slot_ref.wait_band_idle(band) for band in worker_bands]
+                *[
+                    self.global_resource_ref.wait_band_idle(band)
+                    for band in worker_bands
+                ]
             )
             await self._migrate_data_of_bands(worker_bands, excluded_bands)
             await self._cluster_api.release_worker(address)
@@ -353,7 +356,7 @@ async def _scale_out(self, queueing_refs):
 
     async def _scale_in(self):
         idle_bands = set(
-            await self._autoscaler.global_slot_ref.get_idle_bands(
+            await self._autoscaler.global_resource_ref.get_idle_bands(
                 self._worker_idle_timeout
             )
         )
 
@@ -0,0 +1,181 @@
+# Copyright 1999-2021 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import asyncio
+import logging
+import time
+from collections import defaultdict
+from typing import List, DefaultDict, Dict, Tuple
+
+from .... import oscar as mo
+from ....resource import Resource, ZeroResource
+from ....typing import BandType
+
+logger = logging.getLogger(__name__)
+
+
+class GlobalResourceManagerActor(mo.Actor):
+    # {(address, resource_type): {(session_id, subtask_id): Resource(...)}}
+    _band_stid_resources: DefaultDict[BandType, Dict[Tuple[str, str], Resource]]
+    _band_used_resources: Dict[BandType, Resource]
+    _band_total_resources: Dict[BandType, Resource]
+
+    def __init__(self):
+        self._band_stid_resources = defaultdict(dict)
+        self._band_used_resources = defaultdict(lambda: ZeroResource)
+        self._band_idle_start_time = dict()
+        self._band_idle_events = dict()
+        self._band_total_resources = dict()
+        self._cluster_api = None
+        self._band_watch_task = None
+
+    async def __post_create__(self):
+        from ...cluster.api import ClusterAPI
+
+        self._cluster_api = await ClusterAPI.create(self.address)
+
+        async def watch_bands():
+            async for bands in self._cluster_api.watch_all_bands():
+                old_bands = set(self._band_total_resources.keys())
+                await self._refresh_bands(bands)
+                new_bands = set(bands.keys()) - old_bands
+                for band in new_bands:
+                    self._update_band_usage(band, ZeroResource)
+
+        self._band_watch_task = asyncio.create_task(watch_bands())
+
+    async def __pre_destroy__(self):
+        self._band_watch_task.cancel()
+
+    async def refresh_bands(self):
+        bands = await self._cluster_api.get_all_bands()
+        await self._refresh_bands(bands)
+
+    async def _refresh_bands(self, bands):
+        # TODO add `num_mem_bytes` after supported report worker memory
+        band_total_resources = {}
+        for band, slot in bands.items():
+            if band[1].startswith("gpu"):
+                band_total_resources[band] = Resource(num_gpus=slot)
+            elif band[1].startswith("numa"):
+                band_total_resources[band] = Resource(num_cpus=slot)
+            else:
+                raise NotImplementedError(f"Unsupported band type {band}")
+        self._band_total_resources = band_total_resources
+
+    @mo.extensible
+    async def apply_subtask_resources(
+        self,
+        band: BandType,
+        session_id: str,
+        subtask_ids: List[str],
+        subtask_resources: List[Resource],
+    ) -> List[str]:
+        if (
+            not self._band_total_resources or band not in self._band_total_resources
+        ):  # pragma: no cover
+            await self.refresh_bands()
+        idx = 0
+        # only ready bands will pass
+        if band in self._band_total_resources:
+            total_resource = self._band_total_resources[band]
+            for stid, subtask_resource in zip(subtask_ids, subtask_resources):
+                band_used_resource = self._band_used_resources[band]
+                if band_used_resource + subtask_resource > total_resource:
+                    break
+                self._band_stid_resources[band][(session_id, stid)] = subtask_resource
+                self._update_band_usage(band, subtask_resource)
+                idx += 1
+        if idx == 0:
+            logger.debug(
+                "No resources available, status: %r, request: %r",
+                self._band_used_resources,
+                subtask_resources,
+            )
+        return subtask_ids[:idx]
+
+    @mo.extensible
+    def update_subtask_resources(
+        self, band: BandType, session_id: str, subtask_id: str, resource: Resource
+    ):
+        session_subtask_id = (session_id, subtask_id)
+        subtask_resources = self._band_stid_resources[band]
+        if session_subtask_id not in subtask_resources:
+            return
+
+        resource_delta = resource - subtask_resources[session_subtask_id]
+        subtask_resources[session_subtask_id] = resource
+        self._update_band_usage(band, resource_delta)
+
+    @mo.extensible
+    def release_subtask_resource(
+        self, band: BandType, session_id: str, subtask_id: str
+    ):
+        # todo ensure slots released when subtasks ends in all means
+        resource_delta = self._band_stid_resources[band].pop(
+            (session_id, subtask_id), ZeroResource
+        )
+        self._update_band_usage(band, -resource_delta)
+
+    def _update_band_usage(self, band: BandType, band_usage_delta: Resource):
+        self._band_used_resources[band] += band_usage_delta
+        # some code path doesn't call `apply_subtask_resources`
+        band_total_resource = self._band_total_resources.get(band)
+        if (
+            band_total_resource is not None
+            and self._band_used_resources[band] > band_total_resource
+        ):  # pragma: no cover
+            raise Exception(
+                f"Resource exceed: band used resource {self._band_used_resources[band]} "
+                f"band total resource {self._band_total_resources[band]}"
+            )
+        if self._band_used_resources[band] <= ZeroResource:
+            self._band_used_resources.pop(band)
+            self._band_idle_start_time[band] = time.time()
+            if band in self._band_idle_events:
+                self._band_idle_events.pop(band).set()
+        else:
+            self._band_idle_start_time[band] = -1
+
+    def get_used_resources(self) -> Dict[BandType, Resource]:
+        return self._band_used_resources
+
+    def get_remaining_resources(self) -> Dict[BandType, Resource]:
+        resources = {}
+        for band, resource in self._band_total_resources.items():
+            used_resource = self.get_used_resources()[band]
+            resources[band] = resource - used_resource
+        return resources
+
+    async def get_idle_bands(self, idle_duration: int):
+        """Return a band list which all bands has been idle for at least `idle_duration` seconds."""
+        now = time.time()
+        idle_bands = []
+        for band in self._band_total_resources.keys():
+            idle_start_time = self._band_idle_start_time.get(band)
+            if idle_start_time is None:  # pragma: no cover
+                # skip new requested band for this round scale in.
+                self._band_idle_start_time[band] = now
+            elif idle_start_time > 0 and now >= idle_start_time + idle_duration:
+                idle_bands.append(band)
+        return idle_bands
+
+    async def wait_band_idle(self, band: BandType):
+        if self._band_idle_start_time[band] <= 0:
+            if band in self._band_idle_events:
+                event = self._band_idle_events[band]
+            else:
+                event = asyncio.Event()
+                self._band_idle_events[band] = event
+            return event.wait()