Skip to content

Commit 51e60b6

Browse files
committed
! improve show-duplicate-java-classes: output duplicate ratio and class number of class path oldratlee#59
1 parent 352a288 commit 51e60b6

File tree

2 files changed

+53
-46
lines changed

2 files changed

+53
-46
lines changed

bin/show-duplicate-java-classes

Lines changed: 30 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -84,13 +84,17 @@ def print_box_message(msg):
8484
print('=' * 80)
8585

8686

87+
def str_len(x):
88+
return len(str(x))
89+
90+
8791
def list_jar_file_under_lib_dirs(lib_dirs, recursive):
8892
jar_files = set()
8993

90-
idx_str_max_len = len(str(len(lib_dirs)))
94+
max_idx_str_len = str_len(len(lib_dirs))
9195
for idx, lib_dir in enumerate(lib_dirs, start=1):
9296
print_responsive_message('list jar file under lib dir(%*s/%s): %s' % (
93-
idx_str_max_len, idx, len(lib_dirs), lib_dir))
97+
max_idx_str_len, idx, len(lib_dirs), lib_dir))
9498

9599
if not exists(lib_dir):
96100
print_error('WARN: lib dir %s not exists, ignored!' % lib_dir)
@@ -126,7 +130,7 @@ def list_class_under_jar_file(jar_file, recursive, progress):
126130
if recursive:
127131
index_marker = ' #%3s' % index
128132
print_responsive_message('list class under jar file(%*s/%s%s): %s' % (
129-
len(str(progress[1])), progress[0], progress[1], index_marker, jar_jar_path))
133+
str_len(progress[1]), progress[0], progress[1], index_marker, jar_jar_path))
130134

131135
ret = {}
132136
classes = {f for f in zf.namelist() if f.lower().endswith('.class')}
@@ -155,7 +159,7 @@ def list_class_under_jar_file(jar_file, recursive, progress):
155159

156160
def list_class_under_class_dir(class_dir, progress):
157161
print_responsive_message('list class under class dir(%*s/%s): %s' % (
158-
len(str(progress[1])), progress[0], progress[1], class_dir))
162+
str_len(progress[1]), progress[0], progress[1], class_dir))
159163

160164
if not exists(class_dir):
161165
print_error('WARN: class dir %s not exists, ignored!' % class_dir)
@@ -215,7 +219,7 @@ def find_duplicate_classes(class_to_class_paths):
215219
return class_paths_to_duplicate_classes
216220

217221

218-
def print_duplicate_classes_info(class_paths_to_duplicate_classes):
222+
def print_duplicate_classes_info(class_paths_to_duplicate_classes, class_path_to_classes):
219223
if not class_paths_to_duplicate_classes:
220224
print('COOL! No duplicate classes found!')
221225
return
@@ -244,34 +248,37 @@ def print_duplicate_classes_info(class_paths_to_duplicate_classes):
244248
# use - operator of number key for reverse sort key
245249
class_paths_to_duplicate_classes.sort(key=lambda item: (-len(item[0]), -len(item[1]), item[0]))
246250

247-
idx_str_max_len = len(str(len(class_paths_to_duplicate_classes)))
251+
max_idx_str_len = str_len(len(class_paths_to_duplicate_classes))
248252
for idx, (class_paths, classes) in enumerate(class_paths_to_duplicate_classes, start=1):
249-
print('[%*s] found %s duplicate classes in %s class paths:' % (
250-
idx_str_max_len, idx, len(classes), len(class_paths)))
253+
duplicate_ratio = len(classes) / min((len(class_path_to_classes[cp]) for cp in class_paths))
254+
print('[%*s] found %s(%.3g%%) duplicate classes in %s class paths:' % (
255+
max_idx_str_len, idx, len(classes), duplicate_ratio * 100, len(class_paths)))
251256

252-
class_path_idx_str_max_len = len(str(len(class_paths)))
257+
max_class_path_idx_str_len = str_len(len(class_paths))
258+
max_classes_count_str_len = str_len(max(len(class_path_to_classes[cp]) for cp in class_paths))
253259
for i, cp in enumerate(class_paths, start=1):
254-
print(' %*s: %s' % (class_path_idx_str_max_len, i, cp))
260+
print(' %*s: (contain %*s classes) %s' % (
261+
max_class_path_idx_str_len, i, max_classes_count_str_len, len(class_path_to_classes[cp]), cp))
255262

256263
print_box_message('Duplicate classes detail info:')
257264
for idx, (class_paths, classes) in enumerate(class_paths_to_duplicate_classes, start=1):
258265
print('[%*s] found %s duplicate classes in %s class paths %s :' % (
259-
idx_str_max_len, idx,
260-
len(classes), len(class_paths), ' '.join(class_paths)))
266+
max_idx_str_len, idx, len(classes), len(class_paths), ' '.join(class_paths)))
261267

262-
class_idx_str_max_len = len(str(len(classes)))
268+
max_class_idx_str_len = str_len(len(classes))
263269
for i, c in enumerate(classes, start=1):
264-
print(' %*s: %s' % (class_idx_str_max_len, i, c))
265-
270+
print(' %*s: %s' % (max_class_idx_str_len, i, c))
266271

267-
def print_class_paths_info(class_paths):
268-
class_paths = sorted(class_paths)
269272

270-
print_box_message('Find in %s class paths:' % len(class_paths))
273+
def print_class_paths_info(class_path_to_classes):
274+
max_idx_str_len = str_len(len(class_path_to_classes))
275+
max_classes_count_str_len = str_len(max(len(classes) for classes in class_path_to_classes.values()))
271276

272-
idx_str_max_len = len(str(len(class_paths)))
273-
for idx, class_path in enumerate(class_paths, start=1):
274-
print('%*s: %s' % (idx_str_max_len, idx, class_path))
277+
class_path_to_classes = sorted(class_path_to_classes.items(), key=lambda item: item[0])
278+
print_box_message('Find in %s class paths:' % len(class_path_to_classes))
279+
for idx, (cp, classes) in enumerate(class_path_to_classes, start=1):
280+
print('%*s: (contain %*s classes) %s' % (
281+
max_idx_str_len, idx, max_classes_count_str_len, len(classes), cp))
275282

276283

277284
def main():
@@ -311,8 +318,8 @@ def main():
311318
class_paths_to_duplicate_classes = find_duplicate_classes(class_to_class_paths)
312319

313320
clear_responsive_message()
314-
print_duplicate_classes_info(class_paths_to_duplicate_classes)
315-
print_class_paths_info(class_path_to_classes.keys())
321+
print_duplicate_classes_info(class_paths_to_duplicate_classes, class_path_to_classes)
322+
print_class_paths_info(class_path_to_classes)
316323

317324
return int(bool(class_paths_to_duplicate_classes))
318325

docs/java.md

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -356,27 +356,27 @@ COOL! No duplicate classes found!
356356
================================================================================
357357
Find in 150 class paths:
358358
================================================================================
359-
1: WEB-INF/lib/aopalliance-1.0.jar
360-
2: WEB-INF/lib/asm-3.2.jar
361-
3: WEB-INF/lib/aspectjrt-1.6.1.jar
362-
4: WEB-INF/lib/aspectjweaver-1.6.6.jar
359+
1: (contain 9 classes) WEB-INF/lib/aopalliance-1.0.jar
360+
2: (contain 25 classes) WEB-INF/lib/asm-5.0.4.jar
361+
3: (contain 313 classes) WEB-INF/lib/aviator-5.0.0.jar
362+
4: (contain 687 classes) WEB-INF/lib/cassandra-0.6.1.jar
363363
...
364364
365365
$ show-duplicate-java-classes -c WEB-INF/classes WEB-INF/lib
366366
Found 1272 duplicate classes in 345 class paths and 9 class path sets:
367-
[1] found 188 duplicate classes in 3 class paths:
368-
1: WEB-INF/lib/jdom-2.0.2.jar
369-
2: WEB-INF/lib/jdom2-2.0.6.jar
370-
3: WEB-INF/lib/jdom2-2.0.8.jar
371-
[2] found 150 duplicate classes in 2 class paths:
372-
1: WEB-INF/lib/netty-all-4.0.35.Final.jar
373-
2: WEB-INF/lib/netty-common-4.1.31.Final.jar
374-
[3] found 148 duplicate classes in 2 class paths:
375-
1: WEB-INF/lib/netty-all-4.0.35.Final.jar
376-
2: WEB-INF/lib/netty-handler-4.1.31.Final.jar
377-
[4] found 103 duplicate classes in 2 class paths:
378-
1: WEB-INF/lib/hessian-3.0.14.bugfix-tae3.jar
379-
2: WEB-INF/lib/hessian-4.0.38.jar
367+
[1] found 188(100%) duplicate classes in 3 class paths:
368+
1: (contain 188 classes) WEB-INF/lib/jdom-2.0.2.jar
369+
2: (contain 195 classes) WEB-INF/lib/jdom2-2.0.6.jar
370+
3: (contain 195 classes) WEB-INF/lib/jdom2-2.0.8.jar
371+
[2] found 150(33.8%) duplicate classes in 2 class paths:
372+
1: (contain 1385 classes) WEB-INF/lib/netty-all-4.0.35.Final.jar
373+
2: (contain 444 classes) WEB-INF/lib/netty-common-4.1.31.Final.jar
374+
[3] found 148(55.4%) duplicate classes in 2 class paths:
375+
1: (contain 1385 classes) WEB-INF/lib/netty-all-4.0.35.Final.jar
376+
2: (contain 267 classes) WEB-INF/lib/netty-handler-4.1.31.Final.jar
377+
[4] found 103(82.4%) duplicate classes in 2 class paths:
378+
1: (contain 125 classes) WEB-INF/lib/hessian-3.0.14.bugfix.jar
379+
2: (contain 275 classes) WEB-INF/lib/hessian-4.0.38.jar
380380
...
381381
382382
================================================================================
@@ -401,12 +401,12 @@ Duplicate classes detail info:
401401
================================================================================
402402
Find in 232 class paths:
403403
================================================================================
404-
1: WEB-INF/classes
405-
2: WEB-INF/lib/HikariCP-2.7.8.jar
406-
3: WEB-INF/lib/accessors-smart-1.2.jar
407-
4: WEB-INF/lib/alimonitor-jmonitor-1.1.3.jar
408-
5: WEB-INF/lib/aopalliance-1.0.jar
409-
6: WEB-INF/lib/asm-5.0.4.jar
404+
1: (contain 42 classes) WEB-INF/classes
405+
2: (contain 70 classes) WEB-INF/lib/HikariCP-2.7.8.jar
406+
3: (contain 13 classes) WEB-INF/lib/accessors-smart-1.2.jar
407+
4: (contain 9 classes) WEB-INF/lib/aopalliance-1.0.jar
408+
5: (contain 25 classes) WEB-INF/lib/asm-5.0.4.jar
409+
6: (contain 313 classes) WEB-INF/lib/aviator-5.0.0.jar
410410
...
411411
```
412412

0 commit comments

Comments
 (0)