Skip to content

Commit 0fd9079

Browse files
committed
support compute_splits, parall_scan, agg, group_by
1 parent 6cb50ab commit 0fd9079

22 files changed

+8012
-835
lines changed

CHANGELOG.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,13 @@ TableStore SDK for Python 版本记录
33

44
Python SDK的版本号遵循 `Semantic Versioning <http://semver.org/>`_ 规则。
55

6+
Version 5.2.0
7+
-------------
8+
9+
- Support ParallelScan API
10+
- Support Max/Min/Avg/Sum/Count/DistinctCount
11+
- Support GroupBy API
12+
613
Version 4.3.5
714
-------------
815

README.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,10 @@ Github安装
6969
- `批量读(从多张表,一次性读出多行数据) <https://github.com/aliyun/aliyun-tablestore-python-sdk/blob/master/examples/batch_get_row.py>`_
7070
- `范围扫描(给定一个范围,扫描出该范围内的所有数据) <https://github.com/aliyun/aliyun-tablestore-python-sdk/blob/master/examples/get_range.py>`_
7171
- `主键自增列(主键自动生成一个递增ID) <https://github.com/aliyun/aliyun-tablestore-python-sdk/blob/master/examples/pk_auto_incr.py>`_
72-
- `多元索引 <https://github.com/aliyun/aliyun-tablestore-python-sdk/blob/master/examples/search_index.py>`_
72+
- `Search <https://github.com/aliyun/aliyun-tablestore-python-sdk/blob/master/examples/search_index.py>`_
73+
- `Parallel Scan <https://github.com/aliyun/aliyun-tablestore-python-sdk/blob/master/examples/parallel_scan.py>`_
74+
- `Max/Min/Sum/Avg/Count/DistinctCount <https://github.com/aliyun/aliyun-tablestore-python-sdk/blob/master/examples/agg.py>`_
75+
- `GroupBy <https://github.com/aliyun/aliyun-tablestore-python-sdk/blob/master/examples/group_by.py>`_
7376
- `全局二级索引 <https://github.com/aliyun/aliyun-tablestore-python-sdk/blob/master/examples/secondary_index_operations.py>`_
7477
- `局部事务(提交事务) <https://github.com/aliyun/aliyun-tablestore-python-sdk/blob/master/examples/transaction_and_commit.py>`_
7578
- `局部事务(舍弃事务) <https://github.com/aliyun/aliyun-tablestore-python-sdk/blob/master/examples/transaction_and_abort.py>`_

examples/agg.py

Lines changed: 320 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,320 @@
1+
# -*- coding: utf8 -*-
2+
3+
from example_config import *
4+
from tablestore import *
5+
import time
6+
import json
7+
8+
table_name = 'AggExampleTable'
9+
index_name = 'search_index_agg'
10+
client = None
11+
12+
"""
13+
TABLE DATA:
14+
15+
PK1 | PK2 | k | t | g | ka | la | l | b | d | n.t | nk | nl |
16+
---------------------------------------------------------------------------------------------------------------
17+
0 | pk_0 | key000 | this is 0 | 30,114 | ["a", "0"] | [-1, 0] | 0 | True | 0.1 | nested 0 | key000 | 0 |
18+
1 | pk_1 | key001 | this is 1 | 30,114.05 | ["a", "1"] | [-1, 1] | 1 | False | 0.1 | nested 1 | key001 | 1 |
19+
2 | pk_2 | key002 | this is 2 | 30,114.10 | ["a", "2"] | [-1, 2] | 2 | True | 0.1 | nested 2 | key002 | 2 |
20+
3 | pk_3 | key003 | this is 3 | 30,114.15 | ["a", "3"] | [-1, 3] | 3 | False | 0.1 | nested 3 | key003 | 3 |
21+
4 | pk_4 | key004 | this is 4 | 30,114.20 | ["a", "4"] | [-1, 4] | 4 | True | 0.1 | nested 4 | key004 | 4 |
22+
5 | pk_5 | key005 | this is 5 | 30,114.25 | ["a", "5"] | [-1, 5] | | False | 0.1 | nested 5 | key005 | 5 |
23+
6 | pk_6 | key006 | this is 6 | 30,114.30 | ["a", "6"] | [-1, 6] | 6 | True | 0.1 | nested 6 | key006 | 6 |
24+
7 | pk_7 | key007 | this is 7 | 30,114.35 | ["a", "7"] | [-1, 7] | 7 | False | 0.1 | nested 7 | key007 | 7 |
25+
8 | pk_8 | key008 | this is 8 | 30,114.40 | ["a", "8"] | [-1, 8] | 8 | True | 0.1 | nested 8 | key008 | 8 |
26+
9 | pk_9 | key009 | this is 9 | 30,114.45 | ["a", "9"] | [-1, 9] | 9 | False | 0.1 | nested 9 | key009 | 9 |
27+
"""
28+
29+
"""
30+
31+
Sample 1:
32+
33+
SQL: SELECT MAX(l) as max FROM AggExampleTable.search_index_agg WHERE d = 0.1
34+
35+
Result: max: 9.0
36+
37+
"""
38+
def max_agg(table_name, index_name):
39+
print('**** Begin Sample 1 ****\n')
40+
41+
query = TermQuery('d', 0.1)
42+
agg = Max('l', name = 'max')
43+
44+
search_response = client.search(table_name, index_name,
45+
SearchQuery(query, next_token = None, limit=0, aggs=[agg]),
46+
columns_to_get = ColumnsToGet(return_type = ColumnReturnType.ALL_FROM_INDEX))
47+
48+
for agg_result in search_response.agg_results:
49+
print('{\n"name":"%s",\n"value":%s\n}\n' % (agg_result.name, str(agg_result.value)))
50+
51+
"""
52+
53+
Sample 1.1:
54+
55+
SQL: SELECT MAX(ifnull(l, 100)) as max FROM AggExampleTable.search_index_agg WHERE d = 0.1
56+
57+
Result: max: 100
58+
59+
"""
60+
def max_agg(table_name, index_name):
61+
print('**** Begin Sample 1.1 ****\n')
62+
63+
query = TermQuery('d', 0.1)
64+
agg = Max('l', missing_value = 100, name = 'max')
65+
66+
search_response = client.search(table_name, index_name,
67+
SearchQuery(query, next_token = None, limit=0, aggs=[agg]),
68+
columns_to_get = ColumnsToGet(return_type = ColumnReturnType.ALL_FROM_INDEX))
69+
70+
for agg_result in search_response.agg_results:
71+
print('{\n"name":"%s",\n"value":%s\n}\n' % (agg_result.name, str(agg_result.value)))
72+
73+
"""
74+
75+
Sample 2:
76+
77+
SQL: SELECT MIN(l) as min FROM AggExampleTable.search_index_agg WHERE d = 0.1
78+
79+
Result: min: 0
80+
81+
"""
82+
def min_agg(table_name, index_name):
83+
print('**** Begin Sample 2 ****\n')
84+
85+
query = TermQuery('d', 0.1)
86+
agg = Min('l', name = 'min')
87+
88+
search_response = client.search(table_name, index_name,
89+
SearchQuery(query, next_token = None, limit=0, aggs=[agg]),
90+
columns_to_get = ColumnsToGet(return_type = ColumnReturnType.ALL_FROM_INDEX))
91+
92+
for agg_result in search_response.agg_results:
93+
print('{\n"name":"%s",\n"value":%s\n}\n' % (agg_result.name, str(agg_result.value)))
94+
95+
96+
"""
97+
98+
Sample 3:
99+
100+
SQL: SELECT AVG(l) as avg FROM AggExampleTable.search_index_agg WHERE d = 0.1
101+
102+
Result: avg: 4.0
103+
104+
"""
105+
def avg_agg(table_name, index_name):
106+
print('**** Begin Sample 3 ****\n')
107+
108+
query = TermQuery('d', 0.1)
109+
agg = Avg('l', name = 'avg')
110+
111+
search_response = client.search(table_name, index_name,
112+
SearchQuery(query, next_token = None, limit=2, aggs=[agg]),
113+
columns_to_get = ColumnsToGet(return_type = ColumnReturnType.ALL_FROM_INDEX))
114+
115+
for agg_result in search_response.agg_results:
116+
print('{\n"name":"%s",\n"value":%s\n}\n' % (agg_result.name, str(agg_result.value)))
117+
118+
"""
119+
120+
Sample 4:
121+
122+
SQL: SELECT SUM(l) as sum FROM AggExampleTable.search_index_agg WHERE d = 0.1
123+
124+
Result: sum: 40
125+
126+
"""
127+
def sum_agg(table_name, index_name):
128+
print('**** Begin Sample 4 ****\n')
129+
130+
query = TermQuery('d', 0.1)
131+
agg = Sum('l', name = 'sum')
132+
133+
search_response = client.search(table_name, index_name,
134+
SearchQuery(query, next_token = None, limit=2, aggs=[agg]),
135+
columns_to_get = ColumnsToGet(return_type = ColumnReturnType.ALL_FROM_INDEX))
136+
137+
for agg_result in search_response.agg_results:
138+
print('{\n"name":"%s",\n"value":%s\n}\n' % (agg_result.name, str(agg_result.value)))
139+
140+
"""
141+
142+
Sample 5:
143+
144+
SQL: SELECT COUNT(l) as count FROM AggExampleTable.search_index_agg WHERE d = 0.1
145+
146+
Result: count: 9
147+
148+
"""
149+
def count_agg(table_name, index_name):
150+
print('**** Begin Sample 5 ****\n')
151+
152+
query = TermQuery('d', 0.1)
153+
agg = Count('l', name = 'count')
154+
155+
search_response = client.search(table_name, index_name,
156+
SearchQuery(query, next_token = None, limit=2, aggs=[agg]),
157+
columns_to_get = ColumnsToGet(return_type = ColumnReturnType.ALL_FROM_INDEX))
158+
159+
for agg_result in search_response.agg_results:
160+
print('{\n"name":"%s",\n"value":%s\n}\n' % (agg_result.name, str(agg_result.value)))
161+
162+
"""
163+
164+
Sample 6:
165+
166+
SQL: SELECT DISTINCT COUNT(l) as dcount FROM AggExampleTable.search_index_agg WHERE d = 0.1
167+
168+
Result: dcount: 9
169+
170+
"""
171+
def distinct_count_agg(table_name, index_name):
172+
print('**** Begin Sample 6 ****\n')
173+
174+
query = TermQuery('d', 0.1)
175+
agg = DistinctCount('l', name = 'dcount')
176+
177+
search_response = client.search(table_name, index_name,
178+
SearchQuery(query, next_token = None, limit=2, aggs=[agg]),
179+
columns_to_get = ColumnsToGet(return_type = ColumnReturnType.ALL_FROM_INDEX))
180+
181+
for agg_result in search_response.agg_results:
182+
print('{\n"name":"%s",\n"value":%s\n}\n' % (agg_result.name, str(agg_result.value)))
183+
184+
"""
185+
186+
Sample 7:
187+
188+
SQL: SELECT TOP 3 FROM AggExampleTable.search_index_agg WHERE d = 0.1
189+
190+
Result:
191+
([('PK1', 0), ('PK2', 'pk_0')])
192+
([('PK1', 1), ('PK2', 'pk_1')])
193+
([('PK1', 2), ('PK2', 'pk_2')])
194+
195+
"""
196+
def top_rows_agg(table_name, index_name):
197+
print('**** Begin Sample 7 ****\n')
198+
199+
query = TermQuery('d', 0.1)
200+
agg = TopRows(limit = 3, sort = Sort([PrimaryKeySort()]))
201+
202+
search_response = client.search(table_name, index_name,
203+
SearchQuery(query, next_token = None, limit=0, aggs=[agg]),
204+
columns_to_get = ColumnsToGet(return_type = ColumnReturnType.NONE))
205+
206+
for agg_result in search_response.agg_results:
207+
print('{\n"name":"%s",\n"value":%s\n}\n' % (agg_result.name, str(agg_result.value)))
208+
209+
"""
210+
211+
Sample 8:
212+
213+
SQL: SELECT SUM(l) as s1, SUM(n.nl) as s2, COUNT(l) as c1 FROM AggExampleTable.search_index_agg WHERE d = 0.1
214+
215+
Result:
216+
s1 : 40
217+
s2 : 45
218+
c1 : 9
219+
220+
"""
221+
def multiple_agg(table_name, index_name):
222+
print('**** Begin Sample 8 ****\n')
223+
224+
query = TermQuery('d', 0.1)
225+
agg1 = Sum('l', name = 's1')
226+
agg2 = Sum('n.nl', name = 's2')
227+
agg3 = Count('l', name = 'c1')
228+
229+
search_response = client.search(table_name, index_name,
230+
SearchQuery(query, next_token = None, limit = 0, aggs = [agg1, agg2, agg3]),
231+
columns_to_get = ColumnsToGet(return_type = ColumnReturnType.ALL_FROM_INDEX))
232+
233+
for agg_result in search_response.agg_results:
234+
print('{\n"name":"%s",\n"value":%s\n}\n' % (agg_result.name, str(agg_result.value)))
235+
236+
def prepare_data(rows_count):
237+
print ('Begin prepare data: %d' % rows_count)
238+
for i in range(rows_count):
239+
pk = [('PK1', i), ('PK2', 'pk_' + str(i % 10))]
240+
lj = i / 100
241+
li = i % 100
242+
cols = [('k', 'key%03d' % i), ('t', 'this is ' + str(i)),
243+
('g', '%f,%f' % (30.0 + 0.05 * lj, 114.0 + 0.05 * li)), ('ka', '["a", "%d"]' % i),
244+
('la', '[-1, %d]' % i), ('l', i),
245+
('b', i % 2 == 0), ('d', 0.1),
246+
('n', json.dumps([{'nk':'key%03d' % i, 'nl':i, 'nt':'nested ' + str(i)}]))]
247+
248+
if i == 5:
249+
cols.remove(('l', 5))
250+
client.put_row(table_name, Row(pk, cols))
251+
252+
print ('End prepare data.')
253+
print ('Wait for data sync to search index.')
254+
time.sleep(10)
255+
256+
def prepare_table():
257+
table_meta = TableMeta(table_name, [('PK1', 'INTEGER'), ('PK2', 'STRING')])
258+
259+
table_options = TableOptions()
260+
reserved_throughput = ReservedThroughput(CapacityUnit(0, 0))
261+
client.create_table(table_meta, table_options, reserved_throughput)
262+
263+
def prepare_index(index_name):
264+
field_a = FieldSchema('k', FieldType.KEYWORD, index=True, enable_sort_and_agg=True, store=True)
265+
field_b = FieldSchema('t', FieldType.TEXT, index=True, store=True, analyzer=AnalyzerType.SINGLEWORD)
266+
field_c = FieldSchema('g', FieldType.GEOPOINT, index=True, store=True)
267+
field_d = FieldSchema('ka', FieldType.KEYWORD, index=True, is_array=True, store=True)
268+
field_e = FieldSchema('la', FieldType.LONG, index=True, is_array=True, store=True)
269+
field_f = FieldSchema('l', FieldType.LONG, index=True, store=True)
270+
field_g = FieldSchema('b', FieldType.BOOLEAN, index=True, store=True)
271+
field_h = FieldSchema('d', FieldType.DOUBLE, index=True, store=True)
272+
273+
field_n = FieldSchema('n', FieldType.NESTED, sub_field_schemas=[
274+
FieldSchema('nk', FieldType.KEYWORD, index=True, store=True),
275+
FieldSchema('nl', FieldType.LONG, index=True, store=True),
276+
FieldSchema('nt', FieldType.TEXT, index=True, store=True),
277+
])
278+
279+
fields = [field_a, field_b, field_c, field_d, field_e, field_f, field_g, field_h]
280+
fields.append(field_n)
281+
index_setting = IndexSetting(routing_fields=['PK1'])
282+
index_sort = None
283+
index_meta = SearchIndexMeta(fields, index_setting=index_setting, index_sort=index_sort) # default with index sort
284+
client.create_search_index(table_name, index_name, index_meta)
285+
286+
def delete_table():
287+
try:
288+
client.delete_table(table_name)
289+
except:
290+
pass
291+
292+
def delete_search_index(index_name):
293+
try:
294+
client.delete_search_index(table_name, index_name)
295+
except:
296+
pass
297+
298+
if __name__ == '__main__':
299+
client = OTSClient(OTS_ENDPOINT, OTS_ID, OTS_SECRET, OTS_INSTANCE)
300+
delete_search_index(index_name)
301+
delete_table()
302+
303+
prepare_table()
304+
prepare_index(index_name)
305+
prepare_data(10)
306+
307+
time.sleep(30)
308+
309+
# perform queries
310+
max_agg(table_name, index_name)
311+
min_agg(table_name, index_name)
312+
sum_agg(table_name, index_name)
313+
avg_agg(table_name, index_name)
314+
count_agg(table_name, index_name)
315+
distinct_count_agg(table_name, index_name)
316+
top_rows_agg(table_name, index_name)
317+
multiple_agg(table_name, index_name)
318+
319+
delete_search_index(index_name)
320+
delete_table()

0 commit comments

Comments
 (0)