Skip to content

Commit fcff5c5

Browse files
committed
+ show-duplicate-java-classes: support find jar files in lib sub-dir and jar file recursively
1 parent 6896589 commit fcff5c5

File tree

2 files changed

+80
-19
lines changed

2 files changed

+80
-19
lines changed

bin/show-duplicate-java-classes

Lines changed: 67 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env python3
22
# -*- coding: utf-8 -*-
33
# @Function
4-
# Find duplicate class among java libs.
4+
# Find duplicate classes among java lib dirs and class dirs..
55
#
66
# @Usage
77
# $ show-duplicate-java-classes # find jars from current dir
@@ -17,6 +17,7 @@ __author__ = 'tg123'
1717
import re
1818
import sys
1919
from glob import glob
20+
from io import BytesIO
2021
from optparse import OptionParser
2122
from os import walk
2223
from os.path import relpath, isdir
@@ -34,23 +35,52 @@ def print_box_message(msg):
3435
print('=' * 80)
3536

3637

37-
def list_jar_file_under_lib_dirs(lib_dirs):
38+
def list_jar_file_under_lib_dirs(lib_dirs, recursive):
3839
jar_files = set()
3940
for lib_dir in lib_dirs:
4041
if isdir(lib_dir):
41-
jar_files |= {f for f in glob(lib_dir + '/*.jar')}
42+
if recursive:
43+
jar_files |= {
44+
dir_path + '/' + filename
45+
for dir_path, _, file_names in walk(lib_dir)
46+
for filename in file_names if filename.lower().endswith('.jar')
47+
}
48+
else:
49+
jar_files |= {f for f in glob(lib_dir + '/*.jar')}
4250
else:
4351
jar_files.add(lib_dir)
4452
return jar_files
4553

4654

47-
def list_class_under_jar_file(jar_file):
55+
def list_class_under_jar_file(jar_file, recursive):
56+
"""
57+
:return: map: jar_jar_path('a.jar!b.jar!c.jar') -> classes
58+
"""
59+
60+
def list_zip_in_zip(jar_paths, zf):
61+
ret = {}
62+
classes = {f for f in zf.namelist() if f.lower().endswith('.class')}
63+
ret[jar_paths] = classes
64+
if not recursive:
65+
return ret
66+
67+
jars_in_jar = {f for f in zf.namelist() if f.lower().endswith('.jar')}
68+
for jar in jars_in_jar:
69+
zip_paths = jar_paths + '!' + jar
70+
try:
71+
with ZipFile(BytesIO(zf.read(jar))) as f:
72+
ret.update(list_zip_in_zip(zip_paths, f))
73+
except BadZipfile as e:
74+
print('WARN: %s is bad zip file(%s), ignored!' % (zip_paths, e), file=sys.stderr)
75+
76+
return ret
77+
4878
try:
49-
with ZipFile(jar_file) as zf:
50-
return {f for f in zf.namelist() if f.lower().endswith('.class')}
51-
except BadZipfile:
52-
print('WARN: %s is bad zip file, ignored!' % jar_file, file=sys.stderr)
53-
return set()
79+
with ZipFile(jar_file) as zip_file:
80+
return list_zip_in_zip(jar_file, zip_file)
81+
except BadZipfile as error:
82+
print('WARN: %s is bad zip file(%s), ignored!' % (jar_file, error), file=sys.stderr)
83+
return {}
5484

5585

5686
def list_class_under_class_dir(class_dir):
@@ -63,20 +93,24 @@ def list_class_under_class_dir(class_dir):
6393
# biz functions
6494
################################################################################
6595

66-
def build_index__class_to_class_paths(jar_files, class_dirs):
96+
def build_index__class_to_class_paths(jar_files, class_dirs, recursive_jar):
6797
class_to_class_paths = {}
98+
class_paths = set()
6899

69100
# list all classes in jar files
70101
for jar_file in jar_files:
71-
for class_file in list_class_under_jar_file(jar_file):
72-
class_to_class_paths.setdefault(class_file, set()).add(jar_file)
102+
for jar_jar_path, classes in list_class_under_jar_file(jar_file, recursive=recursive_jar).items():
103+
class_paths.add(jar_jar_path)
104+
for clazz in classes:
105+
class_to_class_paths.setdefault(clazz, set()).add(jar_jar_path)
73106

74107
# list all classes in class dirs
75108
for class_dir in class_dirs:
109+
class_paths.add(class_dir)
76110
for class_file in list_class_under_class_dir(class_dir):
77111
class_to_class_paths.setdefault(class_file, set()).add(class_dir)
78112

79-
return class_to_class_paths, jar_files | set(class_dirs)
113+
return class_to_class_paths, class_paths
80114

81115

82116
_java9_module_file_pattern = re.compile(r'(^|.*/)module-info\.class$')
@@ -136,16 +170,31 @@ def print_class_paths_info(class_paths):
136170

137171

138172
def main():
139-
option_parser = OptionParser('usage: %prog '
140-
'[-c class-dir1 [-c class-dir2] ...] '
141-
'[lib-dir1|jar-file1 [lib-dir2|jar-file2] ...]')
142-
option_parser.add_option('-c', '--class-dir', dest='class_dirs', default=[], action='append', help='add class dir')
173+
option_parser = OptionParser(
174+
usage='%prog [OPTION]...'
175+
' [-c class-dir1 [-c class-dir2] ...]'
176+
' [lib-dir1|jar-file1 [lib-dir2|jar-file2] ...]'
177+
'\nFind duplicate classes among java lib dirs and class dirs.'
178+
'\n\nExamples:'
179+
'\n %prog # find jars from current dir'
180+
'\n %prog path/to/lib_dir1 /path/to/lib_dir2'
181+
'\n %prog -c path/to/class_dir1 -c /path/to/class_dir2'
182+
)
183+
option_parser.add_option('-L', '--recursive-lib', dest='recursive_lib', default=False,
184+
action='store_true', help='find jars in the sub-directories of lib dir')
185+
option_parser.add_option('-J', '--recursive-jar', dest='recursive_jar', default=False,
186+
action='store_true', help='find jars in the jar file')
187+
option_parser.add_option('-c', '--class-dir', dest='class_dirs', default=[],
188+
action='append', help='add class dir')
143189
options, lib_dirs = option_parser.parse_args()
190+
recursive_lib = options.recursive_lib
191+
recursive_jar = options.recursive_jar
144192
if not options.class_dirs and not lib_dirs:
145193
lib_dirs = ['.']
146194

147195
class_to_class_paths, class_paths = build_index__class_to_class_paths(
148-
list_jar_file_under_lib_dirs(lib_dirs), options.class_dirs)
196+
list_jar_file_under_lib_dirs(lib_dirs, recursive=recursive_lib),
197+
options.class_dirs, recursive_jar=recursive_jar)
149198

150199
class_paths_to_duplicate_classes = find_duplicate_classes(class_to_class_paths)
151200

docs/java.md

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,10 @@ show-duplicate-java-classes
243243
244244
# 查找多个指定目录下所有Jar中的重复类
245245
show-duplicate-java-classes path/to/lib_dir1 /path/to/lib_dir2
246+
# 通过 -L 选项,查找子目录中的Jar文件
247+
show-duplicate-java-classes -L path/to/lib_dir1
248+
# 通过 -J 选项,查找Jar文件中的Jar文件(即查找FatJar中包含的Jar)
249+
show-duplicate-java-classes -J path/to/lib_dir1
246250
247251
# 查找多个指定Class目录下的重复类。 Class目录 通过 -c 选项指定
248252
show-duplicate-java-classes -c path/to/class_dir1 -c /path/to/class_dir2
@@ -252,10 +256,18 @@ show-duplicate-java-classes path/to/lib_dir1 /path/to/lib_dir2 -c path/to/class_
252256
253257
# 帮助信息
254258
$ show-duplicate-java-classes -h
255-
Usage: show-duplicate-java-classes [-c class-dir1 [-c class-dir2] ...] [lib-dir1|jar-file1 [lib-dir2|jar-file2] ...]
259+
Usage: show-duplicate-java-classes [OPTION]... [-c class-dir1 [-c class-dir2] ...] [lib-dir1|jar-file1 [lib-dir2|jar-file2] ...]
260+
Find duplicate classes among java lib dirs and class dirs.
261+
262+
Examples:
263+
show-duplicate-java-classes # find jars from current dir
264+
show-duplicate-java-classes path/to/lib_dir1 /path/to/lib_dir2
265+
show-duplicate-java-classes -c path/to/class_dir1 -c /path/to/class_dir2
256266
257267
Options:
258268
-h, --help show this help message and exit
269+
-L, --recursive-lib find jars in the sub-directories of lib dir
270+
-J, --recursive-jar find jars in the jar file
259271
-c CLASS_DIRS, --class-dir=CLASS_DIRS
260272
add class dir
261273
```

0 commit comments

Comments
 (0)