Skip to content

Commit cfb23ef

Browse files
author
Xuye (Chris) Qin
authored
Stop recording all mapper meta (mars-project#2900)
1 parent 17754ba commit cfb23ef

File tree

18 files changed

+784
-135
lines changed

18 files changed

+784
-135
lines changed

.codacy.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ exclude_paths:
88
- 'bin'
99
- 'docs'
1010
- 'mars/_version.py'
11-
- 'mars/lib'
11+
- 'mars/lib/**'
1212
- 'mars/web/static'
1313
- 'misc'
1414
- 'versioneer.py'

.codecov.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ ignore:
2828
- "mars/lib/enum.py"
2929
- "mars/lib/six.py"
3030
- "mars/lib/nvutils.py"
31+
- "mars/lib/ordered_set.py"
3132
- "mars/lib/tblib"
3233
- "mars/lib/uhashring"
3334
- "mars/lib/version.py"

LICENSE

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,8 @@ MIT License
259259
- sqlalchemy:1.4.0
260260
- async-lru:1.0.2
261261
- setuptools:55.0.0
262+
- bloom-filter2:2.0
263+
- ordered-set:4.1.0
262264
- react:17.0.0
263265
- react-dom:17.0.0
264266
- react-router-dom:5.2.0
@@ -267,7 +269,6 @@ MIT License
267269
- dagre-d3:0.6.4
268270
- cytoscape:3.19.1
269271
- cytoscape-dagre:2.3.2
270-
- bloom-filter2: 2.0
271272

272273

273274
ISC License

asv_bench/benchmarks/graph_assigner.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@ def setup(self):
3333
df2 = md.DataFrame(
3434
mt.random.rand(num_rows, 4, chunk_size=10), columns=list("abcd")
3535
)
36-
merged_df = df1.merge(df2, left_on="a", right_on="a")
36+
merged_df = df1.merge(
37+
df2, left_on="a", right_on="a", auto_merge="none", bloom_filter=False
38+
)
3739
graph = TileableGraph([merged_df.data])
3840
next(TileableGraphBuilder(graph).build())
3941
self.chunk_graph = next(ChunkGraphBuilder(graph, fuse_enabled=False).build())
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Copyright 1999-2022 Alibaba Group Holding Ltd.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import mars
16+
import mars.tensor as mt
17+
from mars.learn.model_selection import train_test_split
18+
19+
20+
class ModelSelectionSuite:
21+
"""
22+
Benchmark learn model selection.
23+
"""
24+
25+
def setup(self):
26+
self._session = mars.new_session()
27+
28+
def teardown(self):
29+
self._session.stop_server()
30+
31+
def time_train_test_split(self):
32+
t = mt.random.rand(10_000, 10, chunk_size=200)
33+
train_test_split(t, test_size=0.3, session=self._session)

mars/dataframe/utils.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1363,16 +1363,18 @@ def _concat_chunks(merge_chunks: List[ChunkType], output_index: int):
13631363
n_split = []
13641364
out_chunks = []
13651365
for chunk, chunk_memory_size in zip(df_or_series.chunks, memory_sizes):
1366-
if acc_memory_size + chunk_memory_size > to_merge_size:
1367-
if len(to_merge_chunks) > 0:
1368-
# adding current chunk would exceed the maximum,
1369-
# concat previous chunks
1370-
merged_chunk = _concat_chunks(to_merge_chunks, len(n_split))
1371-
out_chunks.append(merged_chunk)
1372-
n_split.append(merged_chunk.shape[0])
1373-
# reset
1374-
acc_memory_size = 0
1375-
to_merge_chunks = []
1366+
if (
1367+
acc_memory_size + chunk_memory_size > to_merge_size
1368+
and len(to_merge_chunks) > 0
1369+
):
1370+
# adding current chunk would exceed the maximum,
1371+
# concat previous chunks
1372+
merged_chunk = _concat_chunks(to_merge_chunks, len(n_split))
1373+
out_chunks.append(merged_chunk)
1374+
n_split.append(merged_chunk.shape[0])
1375+
# reset
1376+
acc_memory_size = 0
1377+
to_merge_chunks = []
13761378

13771379
to_merge_chunks.append(chunk)
13781380
acc_memory_size += chunk_memory_size

0 commit comments

Comments
 (0)