Skip to content

Commit 7cf92c1

Browse files
committed
支持过滤不必要的后缀,提升统计的准确率
1 parent 77396ee commit 7cf92c1

File tree

3 files changed

+39
-15
lines changed

3 files changed

+39
-15
lines changed

bin/config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ def __init__(self, config_file):
2020
self.urls_pv_threshold = int(all_config.get('filter', 'urls_pv_threshold'))
2121
self.urls_pv_threshold_time = int(all_config.get('filter', 'urls_pv_threshold_time'))
2222
self.urls_pv_threshold_min = int(all_config.get('filter', 'urls_pv_threshold_min'))
23+
24+
self.ignore_url_suffix = all_config.get('filter', 'ignore_url_suffix').split(',')
25+
2326
self.fixed_parameter_keys = all_config.get('filter', 'fixed_parameter_keys').split(',')
2427
self.custom_parameters_list = all_config.get('filter', 'custom_parameters').split(',')
2528
self.custom_keys = []

bin/start.py

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def is_ignore_url(url):
7474
return True
7575

7676

77-
def get_new_url(origin_url):
77+
def get_new_url_with_parameters(origin_url):
7878
if len(origin_url.split('?')) == 1:
7979
return origin_url
8080
url_front = origin_url.split('?')[0]
@@ -112,6 +112,36 @@ def get_new_url_for_always_parameters(origin_url):
112112
return new_url
113113

114114

115+
def ignore_url_suffix(origin_url):
116+
if len(origin_url.split('?')) == 1:
117+
uri_parameter = None
118+
else:
119+
uri_parameter = origin_url.split('?')[1:]
120+
121+
uri = origin_url.split('?')[0]
122+
new_uri = uri
123+
for suffix in config.ignore_url_suffix:
124+
if uri.endswith(suffix):
125+
new_uri = uri.split(suffix)[0]
126+
break
127+
if uri_parameter:
128+
return new_uri + '?'.join(uri_parameter)
129+
else:
130+
return new_uri
131+
132+
133+
def get_url(match, log_format):
134+
origin_url = ignore_url_suffix(match.group(log_format.get('url_index')))
135+
if config.is_with_parameters:
136+
url = get_new_url_with_parameters(origin_url)
137+
else:
138+
if config.always_parameter_keys:
139+
url = get_new_url_for_always_parameters(origin_url)
140+
else:
141+
url = match.group(origin_url.split('?')[0].split('.json')[0])
142+
return url
143+
144+
115145
def parse_log_file(target_file, log_format):
116146
# 用户IP
117147
hosts = []
@@ -147,13 +177,7 @@ def parse_log_file(target_file, log_format):
147177
match = pattern.match(line)
148178
if match is None:
149179
continue
150-
if config.is_with_parameters:
151-
url = get_new_url(match.group(log_format.get('url_index')))
152-
else:
153-
if config.always_parameter_keys:
154-
url = get_new_url_for_always_parameters(match.group(log_format.get('url_index')))
155-
else:
156-
url = match.group(log_format.get('url_index')).split('?')[0]
180+
url = get_url(match, log_format)
157181
if is_ignore_url(url):
158182
continue
159183
if match.group(log_format.get('method_index')) not in config.support_method:
@@ -228,13 +252,7 @@ def parse_log_file(target_file, log_format):
228252
if match is None:
229253
continue
230254
method = match.group(log_format.get('method_index'))
231-
if config.is_with_parameters:
232-
url = get_new_url(match.group(log_format.get('url_index')))
233-
else:
234-
if config.always_parameter_keys:
235-
url = get_new_url_for_always_parameters(match.group(log_format.get('url_index')))
236-
else:
237-
url = match.group(log_format.get('url_index')).split('?')[0]
255+
url = get_url(match, log_format)
238256
for url_data in url_data_list:
239257
if url_data.url == ' '.join([method, url]):
240258
url_data.time.append(match.group(log_format.get('time_index')))

conf/config.ini

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ urls_pv_threshold=1000
1919
urls_pv_threshold_time=600
2020
urls_pv_threshold_min=500
2121

22+
# 忽略的url的后缀进行统计,如请求是/customer/get/list.json,将会重写为/customer/get/list进行统计
23+
ignore_url_suffix=.json
24+
2225
# 固定的参数,但is_with_parameters=1时,不会替换一下key的值
2326
fixed_parameter_keys=action,submitType,reportType
2427
# 自定义的参数转换

0 commit comments

Comments
 (0)