Skip to content

Commit 93f72b8

Browse files
committed
移除了协议的展示,优化内存使用
1 parent df58f8e commit 93f72b8

File tree

5 files changed

+55
-48
lines changed

5 files changed

+55
-48
lines changed

bin/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ def __init__(self, config_file):
1616
self.support_method = all_config.get('filter', 'support_method').split(',')
1717
self.is_with_parameters = int(all_config.get('filter', 'is_with_parameters'))
1818
self.urls_most_number = int(all_config.get('filter', 'urls_most_number'))
19+
self.urls_pv_threshold = int(all_config.get('filter', 'urls_pv_threshold'))
1920
self.fixed_parameter_keys = all_config.get('filter', 'fixed_parameter_keys').split(',')
2021
self.custom_parameters_list = all_config.get('filter', 'custom_parameters').split(',')
2122
self.custom_keys = []

bin/start.py

Lines changed: 37 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515

1616
class URLData:
17-
1817
def __init__(self, url=None, pv=None, ratio=None, peak=None):
1918
self.url = url
2019
self.pv = pv
@@ -30,23 +29,23 @@ def parse_log_format():
3029
log_format_list = config.log_format.split()
3130
for item in log_format_list:
3231
if item == 'ip':
33-
log_format_index.setdefault('ip_index', log_format_list.index(item)+1)
32+
log_format_index.setdefault('ip_index', log_format_list.index(item) + 1)
3433
if item == 'real_ip':
35-
log_format_index.setdefault('real_ip_index', log_format_list.index(item)+1)
34+
log_format_index.setdefault('real_ip_index', log_format_list.index(item) + 1)
3635
if item == 'datetime':
37-
log_format_index.setdefault('time_index', log_format_list.index(item)+1)
36+
log_format_index.setdefault('time_index', log_format_list.index(item) + 1)
3837
if item == 'url':
39-
log_format_index.setdefault('url_index', log_format_list.index(item)+1)
38+
log_format_index.setdefault('url_index', log_format_list.index(item) + 1)
4039
if item == 'method':
41-
log_format_index.setdefault('method_index', log_format_list.index(item)+1)
40+
log_format_index.setdefault('method_index', log_format_list.index(item) + 1)
4241
if item == 'protocol':
43-
log_format_index.setdefault('protocol_index', log_format_list.index(item)+1)
42+
log_format_index.setdefault('protocol_index', log_format_list.index(item) + 1)
4443
if item == 'cost':
45-
log_format_index.setdefault('cost_time_index', log_format_list.index(item)+1)
44+
log_format_index.setdefault('cost_time_index', log_format_list.index(item) + 1)
4645
if 'real_ip_index' in log_format_index.keys():
47-
log_format_index.setdefault('host_index', log_format_list.index('real_ip')+1)
46+
log_format_index.setdefault('host_index', log_format_list.index('real_ip') + 1)
4847
else:
49-
log_format_index.setdefault('host_index', log_format_list.index('ip')+1)
48+
log_format_index.setdefault('host_index', log_format_list.index('ip') + 1)
5049
return log_format_index
5150

5251

@@ -113,7 +112,7 @@ def parse_log_file(target_file, log_format):
113112
pattern = re.compile(config.log_pattern)
114113

115114
# 第一次读取整个文件,获取对应的请求时间、请求URL、请求方法、用户IP、请求响应时间等数据
116-
with open('../data/'+target_file, 'r') as f:
115+
with open('../data/' + target_file, 'r') as f:
117116
for line in f:
118117
match = pattern.match(line)
119118
if match is None:
@@ -138,17 +137,19 @@ def parse_log_file(target_file, log_format):
138137
method_counts['post'] += 1
139138
if method == 'GET':
140139
method_counts['get'] += 1
141-
protocol = match.group(log_format.get('protocol_index'))
142-
urls.append(method+' '+url+' '+protocol)
140+
urls.append(method + ' ' + url)
143141
if 'cost_time_index' in log_format.keys():
144-
cost_time_list.append({'time': log_time, 'cost_time': int(float(match.group(log_format.get('cost_time_index')))*1000)})
142+
if cost_time_flag:
143+
cost_time_list.append({'time': log_time, 'cost_time': int(float(match.group(log_format.get('cost_time_index'))) * 1000)})
144+
else:
145+
cost_time_list.append({'time': '', 'cost_time': int(float(match.group(log_format.get('cost_time_index'))) * 1000)})
145146

146147
# 计算PV、UV、平均请求数、GET/POST占比
147148
pv = len(times)
148149
uv = len(set(hosts))
149-
response_avg = int(pv/len(set(times)))
150-
method_counts['post_percentile'] = int(method_counts['post']*100/pv)
151-
method_counts['get_percentile'] = int(method_counts['get']*100/pv)
150+
response_avg = int(pv / len(set(times)))
151+
method_counts['post_percentile'] = int(method_counts['post'] * 100 / pv)
152+
method_counts['get_percentile'] = int(method_counts['get'] * 100 / pv)
152153

153154
# 获取每小时、每分钟、每秒的请求数量
154155
hours_counter = Counter(hours)
@@ -167,11 +168,12 @@ def parse_log_file(target_file, log_format):
167168
# 计算请求占比
168169
url_data_list = []
169170
for item in urls_most_common:
170-
ratio = '%0.3f' % float(item[1]*100/float(pv))
171-
url_data_list.append(URLData(url=item[0], pv=item[1], ratio=ratio))
171+
if item[1] >= config.urls_pv_threshold:
172+
ratio = '%0.3f' % float(item[1] * 100 / float(pv))
173+
url_data_list.append(URLData(url=item[0], pv=item[1], ratio=ratio))
172174

173175
# 第二次读取文件,以获取特定请求的访问时间及响应时间
174-
with open('../data/'+target_file, 'r') as f:
176+
with open('../data/' + target_file, 'r') as f:
175177
for line in f:
176178
match = pattern.match(line)
177179
if match is None:
@@ -181,9 +183,8 @@ def parse_log_file(target_file, log_format):
181183
url = get_new_url(match.group(log_format.get('url_index')))
182184
else:
183185
url = match.group(log_format.get('url_index')).split('?')[0]
184-
protocol = match.group(log_format.get('protocol_index'))
185186
for url_data in url_data_list:
186-
if url_data.url == method+' '+url+' '+protocol:
187+
if url_data.url == ' '.join([method, url]):
187188
url_data.time.append(match.group(log_format.get('time_index')))
188189
if 'cost_time_index' in log_format.keys():
189190
url_data.cost.append(float(match.group(log_format.get('cost_time_index'))))
@@ -233,27 +234,27 @@ def parse_log_file(target_file, log_format):
233234
if cost_time_list:
234235
total_cost_time_pv = float(len(cost_time_list))
235236
if cost_time_range['r1']:
236-
cost_time_range_percentile['r1p'] = '%0.3f' % float(cost_time_range['r1']*100/total_cost_time_pv)
237+
cost_time_range_percentile['r1p'] = '%0.3f' % float(cost_time_range['r1'] * 100 / total_cost_time_pv)
237238
if cost_time_range['r2']:
238-
cost_time_range_percentile['r2p'] = '%0.3f' % float(cost_time_range['r2']*100/total_cost_time_pv)
239+
cost_time_range_percentile['r2p'] = '%0.3f' % float(cost_time_range['r2'] * 100 / total_cost_time_pv)
239240
if cost_time_range['r3']:
240-
cost_time_range_percentile['r3p'] = '%0.3f' % float(cost_time_range['r3']*100/total_cost_time_pv)
241+
cost_time_range_percentile['r3p'] = '%0.3f' % float(cost_time_range['r3'] * 100 / total_cost_time_pv)
241242
if cost_time_range['r4']:
242-
cost_time_range_percentile['r4p'] = '%0.3f' % float(cost_time_range['r4']*100/total_cost_time_pv)
243+
cost_time_range_percentile['r4p'] = '%0.3f' % float(cost_time_range['r4'] * 100 / total_cost_time_pv)
243244
if cost_time_range['r5']:
244-
cost_time_range_percentile['r5p'] = '%0.3f' % float(cost_time_range['r5']*100/total_cost_time_pv)
245+
cost_time_range_percentile['r5p'] = '%0.3f' % float(cost_time_range['r5'] * 100 / total_cost_time_pv)
245246
if cost_time_range['r6']:
246-
cost_time_range_percentile['r6p'] = '%0.3f' % float(cost_time_range['r6']*100/total_cost_time_pv)
247+
cost_time_range_percentile['r6p'] = '%0.3f' % float(cost_time_range['r6'] * 100 / total_cost_time_pv)
247248
if cost_time_range['r7']:
248-
cost_time_range_percentile['r7p'] = '%0.3f' % float(cost_time_range['r7']*100/total_cost_time_pv)
249+
cost_time_range_percentile['r7p'] = '%0.3f' % float(cost_time_range['r7'] * 100 / total_cost_time_pv)
249250
if cost_time_range['r8']:
250-
cost_time_range_percentile['r8p'] = '%0.3f' % float(cost_time_range['r8']*100/total_cost_time_pv)
251+
cost_time_range_percentile['r8p'] = '%0.3f' % float(cost_time_range['r8'] * 100 / total_cost_time_pv)
251252
if cost_time_range['r9']:
252-
cost_time_range_percentile['r9p'] = '%0.3f' % float(cost_time_range['r9']*100/total_cost_time_pv)
253+
cost_time_range_percentile['r9p'] = '%0.3f' % float(cost_time_range['r9'] * 100 / total_cost_time_pv)
253254
if cost_time_range['r10']:
254-
cost_time_range_percentile['r10p'] = '%0.3f' % float(cost_time_range['r10']*100/total_cost_time_pv)
255+
cost_time_range_percentile['r10p'] = '%0.3f' % float(cost_time_range['r10'] * 100 / total_cost_time_pv)
255256
if cost_time_range['r11']:
256-
cost_time_range_percentile['r11p'] = '%0.3f' % float(cost_time_range['r11']*100/total_cost_time_pv)
257+
cost_time_range_percentile['r11p'] = '%0.3f' % float(cost_time_range['r11'] * 100 / total_cost_time_pv)
257258

258259
total_data = {'pv': pv, 'uv': uv, 'response_avg': response_avg, 'response_peak': response_peak,
259260
'response_peak_time': response_peak_time, 'url_data_list': url_data_list,
@@ -282,22 +283,22 @@ def parse_log_file_with_goaccess(target_file):
282283

283284

284285
def main():
285-
286286
log_format = parse_log_format()
287287

288288
result_files = [result_file.replace('.html', '') for result_file in get_dir_files('../result/report/')]
289289
target_files = sorted([data_file for data_file in get_dir_files('../data') if data_file not in result_files])
290290

291291
for target_file in target_files:
292-
print datetime.datetime.now(), ' Start parse file : '+target_file
292+
print datetime.datetime.now(), ' Start parse file : ' + target_file
293293

294294
parse_log_file(target_file, log_format)
295295
if config.goaccess_flag:
296296
parse_log_file_with_goaccess(target_file)
297297

298-
print datetime.datetime.now(), ' End parse file: '+target_file
298+
print datetime.datetime.now(), ' End parse file: ' + target_file
299299

300300
update_index_html()
301301

302+
302303
if __name__ == '__main__':
303304
main()

bin/templates/report.html

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,6 @@ <h1 align="center"><a href="https://github.com/JeffXue/web-log-parser" target="g
9696
<th rowspan="2"><strong>比例</strong></th>
9797
<th rowspan="2"><strong>每秒最大处<br/>理消息数量</strong></th>
9898
<th rowspan="2"><strong>Method</strong></th>
99-
<th rowspan="2"><strong>Protocol</strong></th>
10099
<th colspan="4"><strong>耗时(秒)</strong></th>
101100
</tr>
102101
<tr>
@@ -112,7 +111,6 @@ <h1 align="center"><a href="https://github.com/JeffXue/web-log-parser" target="g
112111
<th><strong>比例</strong></th>
113112
<th><strong>每秒最大处<br/>理消息数量</strong></th>
114113
<th><strong>Method</strong></th>
115-
<th><strong>Protocol</strong></th>
116114
</tr>
117115
{% endif %}
118116

@@ -128,7 +126,6 @@ <h1 align="center"><a href="https://github.com/JeffXue/web-log-parser" target="g
128126
<td><strong>{{ url_data.ratio }}%</strong></td>
129127
<td><strong>{{ url_data.peak }}</strong></td>
130128
<td><strong>{{ url_data.url.split()[0].replace('"', '') }}</strong></td>
131-
<td><strong>{{ url_data.url.split()[2].replace('"', '') }}</strong></td>
132129
<td><strong>{{ url_data.cost_time['avg'] }}</strong></td>
133130
<td><strong>{{ url_data.cost_time['p9'] }}</strong></td>
134131
<td><strong>{{ url_data.cost_time['p5'] }}</strong></td>
@@ -143,7 +140,6 @@ <h1 align="center"><a href="https://github.com/JeffXue/web-log-parser" target="g
143140
<td><strong>{{ url_data.ratio }}%</strong></td>
144141
<td><strong>{{ url_data.peak }}</strong></td>
145142
<td><strong>{{ url_data.url.split()[0].replace('"', '') }}</strong></td>
146-
<td><strong>{{ url_data.url.split()[2].replace('"', '') }}</strong></td>
147143
</tr>
148144
{% endif %}
149145
{% endfor %}

bin/templates/url.html

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,12 @@
1010
<th>序号</th>
1111
<th colspan="11">URL</th>
1212
<th>Method</th>
13-
<th>Protocol</th>
1413
</tr>
1514
{% for url_data in url_datas %}
1615
<tr>
1716
<td>{{ loop.index }}</td>
1817
<td colspan="11">{{ url_data.split()[1]|replace("&amp;", "&")|wordwrap(width=70, break_long_words=True, wrapstring="<br/>")|safe }}</td>
1918
<td>{{ url_data.split()[0].replace('"', '') }}</td>
20-
<td>{{ url_data.split()[2].replace('"', '') }}</td>
2119
</tr>
2220
{% endfor %}
2321
</table>

conf/config.ini

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,32 @@
11
[format]
2-
log-pattern=(\S+)\s-\s-\s\[([^]]+)\s\S+]\s"(\w+)\s(\S+)\s([^"]+)"\s(\d+)\s(\S+)\s(\S+)\s(\S+)\s"([^"]+)"\s"([^"]+)"\s"([^"]+)"\s(\S+)\s"([^"]+)"\s(\S+).*
3-
log-format=ip datetime method url protocol status business_status instance_id length referer agent real_ip cost host hostname
2+
#log-pattern=(\S+)\s-\s-\s\[([^]]+)\s\S+]\s"(\w+)\s(\S+)\s([^"]+)"\s(\d+)\s(\S+)\s(\S+)\s(\S+)\s"([^"]+)"\s"([^"]+)"\s"([^"]+)"\s(\S+)\s"([^"]+)"\s(\S+).*
3+
#log-format=ip datetime method url protocol status business_status instance_id length referer agent real_ip cost host hostname
4+
5+
log-pattern=(\S+)\s\S+\s(\S+)\s(\S+)\s(\d+)\s(\S+)\s(\S+)\s(\S+)\s(\S+)\s(\S+)\s(\S+)
6+
log-format=datetime method url status protocol business_status cost host hostname real_ip
47

58
[filter]
69
support_method=POST,GET
7-
is_with_parameters=1
8-
urls_most_number=100
10+
is_with_parameters=0
11+
urls_most_number=200
12+
urls_pv_threshold=1000
913
fixed_parameter_keys=action,submitType,reportType
1014
custom_parameters=t={timeStamp},v={timeStamp},_={timeStamp}
11-
ignore_urls=/slb.html,/server-status,/httpstatus.html
15+
ignore_urls=/slb.html,/server-status,/httpstatus.html,/server-status-dinghuo/,/server-status-dinghuo
1216
static-file=css,CSS,dae,DAE,eot,EOT,gif,GIF,ico,ICO,jpeg,JPEG,jpg,JPG,js,JS,map,MAP,mp3,MP3,pdf,PDF,png,PNG,svg,SVG,swf,SWF,ttf,TTF,txt,TXT,woff,WOFF
1317

1418
[report]
19+
# 是否开启每秒PV曲线图
1520
second_line_flag=0
21+
# 是否开启耗时占比分布图
1622
cost_time_percentile_flag=1
23+
# 是否开启耗时分布图
1724
cost_time_flag=0
18-
cost_time_threshold=0.200
25+
# 耗时阈值,超过该值的请求会标红
26+
cost_time_threshold=0.500
27+
# 是否上传数据
28+
upload_flag=1
29+
upload_url=http://127.0.0.1:8000/log/upload/
1930

2031
[goaccess]
2132
goaccess_flag=0

0 commit comments

Comments
 (0)