11#!/usr/bin/env python3
22# -*- coding: utf-8 -*-
33# @Function
4- # Find duplicate class among java libs .
4+ # Find duplicate classes among java lib dirs and class dirs. .
55#
66# @Usage
77# $ show-duplicate-java-classes # find jars from current dir
@@ -17,6 +17,7 @@ __author__ = 'tg123'
1717import re
1818import sys
1919from glob import glob
20+ from io import BytesIO
2021from optparse import OptionParser
2122from os import walk
2223from os .path import relpath , isdir
@@ -34,23 +35,52 @@ def print_box_message(msg):
3435 print ('=' * 80 )
3536
3637
37- def list_jar_file_under_lib_dirs (lib_dirs ):
38+ def list_jar_file_under_lib_dirs (lib_dirs , recursive ):
3839 jar_files = set ()
3940 for lib_dir in lib_dirs :
4041 if isdir (lib_dir ):
41- jar_files |= {f for f in glob (lib_dir + '/*.jar' )}
42+ if recursive :
43+ jar_files |= {
44+ dir_path + '/' + filename
45+ for dir_path , _ , file_names in walk (lib_dir )
46+ for filename in file_names if filename .lower ().endswith ('.jar' )
47+ }
48+ else :
49+ jar_files |= {f for f in glob (lib_dir + '/*.jar' )}
4250 else :
4351 jar_files .add (lib_dir )
4452 return jar_files
4553
4654
47- def list_class_under_jar_file (jar_file ):
55+ def list_class_under_jar_file (jar_file , recursive ):
56+ """
57+ :return: map: jar_jar_path('a.jar!b.jar!c.jar') -> classes
58+ """
59+
60+ def list_zip_in_zip (jar_paths , zf ):
61+ ret = {}
62+ classes = {f for f in zf .namelist () if f .lower ().endswith ('.class' )}
63+ ret [jar_paths ] = classes
64+ if not recursive :
65+ return ret
66+
67+ jars_in_jar = {f for f in zf .namelist () if f .lower ().endswith ('.jar' )}
68+ for jar in jars_in_jar :
69+ zip_paths = jar_paths + '!' + jar
70+ try :
71+ with ZipFile (BytesIO (zf .read (jar ))) as f :
72+ ret .update (list_zip_in_zip (zip_paths , f ))
73+ except BadZipfile as e :
74+ print ('WARN: %s is bad zip file(%s), ignored!' % (zip_paths , e ), file = sys .stderr )
75+
76+ return ret
77+
4878 try :
49- with ZipFile (jar_file ) as zf :
50- return { f for f in zf . namelist () if f . lower (). endswith ( '.class' )}
51- except BadZipfile :
52- print ('WARN: %s is bad zip file, ignored!' % jar_file , file = sys .stderr )
53- return set ()
79+ with ZipFile (jar_file ) as zip_file :
80+ return list_zip_in_zip ( jar_file , zip_file )
81+ except BadZipfile as error :
82+ print ('WARN: %s is bad zip file(%s) , ignored!' % ( jar_file , error ) , file = sys .stderr )
83+ return {}
5484
5585
5686def list_class_under_class_dir (class_dir ):
@@ -63,20 +93,24 @@ def list_class_under_class_dir(class_dir):
6393# biz functions
6494################################################################################
6595
66- def build_index__class_to_class_paths (jar_files , class_dirs ):
96+ def build_index__class_to_class_paths (jar_files , class_dirs , recursive_jar ):
6797 class_to_class_paths = {}
98+ class_paths = set ()
6899
69100 # list all classes in jar files
70101 for jar_file in jar_files :
71- for class_file in list_class_under_jar_file (jar_file ):
72- class_to_class_paths .setdefault (class_file , set ()).add (jar_file )
102+ for jar_jar_path , classes in list_class_under_jar_file (jar_file , recursive = recursive_jar ).items ():
103+ class_paths .add (jar_jar_path )
104+ for clazz in classes :
105+ class_to_class_paths .setdefault (clazz , set ()).add (jar_jar_path )
73106
74107 # list all classes in class dirs
75108 for class_dir in class_dirs :
109+ class_paths .add (class_dir )
76110 for class_file in list_class_under_class_dir (class_dir ):
77111 class_to_class_paths .setdefault (class_file , set ()).add (class_dir )
78112
79- return class_to_class_paths , jar_files | set ( class_dirs )
113+ return class_to_class_paths , class_paths
80114
81115
82116_java9_module_file_pattern = re .compile (r'(^|.*/)module-info\.class$' )
@@ -136,16 +170,31 @@ def print_class_paths_info(class_paths):
136170
137171
138172def main ():
139- option_parser = OptionParser ('usage: %prog '
140- '[-c class-dir1 [-c class-dir2] ...] '
141- '[lib-dir1|jar-file1 [lib-dir2|jar-file2] ...]' )
142- option_parser .add_option ('-c' , '--class-dir' , dest = 'class_dirs' , default = [], action = 'append' , help = 'add class dir' )
173+ option_parser = OptionParser (
174+ usage = '%prog [OPTION]...'
175+ ' [-c class-dir1 [-c class-dir2] ...]'
176+ ' [lib-dir1|jar-file1 [lib-dir2|jar-file2] ...]'
177+ '\n Find duplicate classes among java lib dirs and class dirs.'
178+ '\n \n Examples:'
179+ '\n %prog # find jars from current dir'
180+ '\n %prog path/to/lib_dir1 /path/to/lib_dir2'
181+ '\n %prog -c path/to/class_dir1 -c /path/to/class_dir2'
182+ )
183+ option_parser .add_option ('-L' , '--recursive-lib' , dest = 'recursive_lib' , default = False ,
184+ action = 'store_true' , help = 'find jars in the sub-directories of lib dir' )
185+ option_parser .add_option ('-J' , '--recursive-jar' , dest = 'recursive_jar' , default = False ,
186+ action = 'store_true' , help = 'find jars in the jar file' )
187+ option_parser .add_option ('-c' , '--class-dir' , dest = 'class_dirs' , default = [],
188+ action = 'append' , help = 'add class dir' )
143189 options , lib_dirs = option_parser .parse_args ()
190+ recursive_lib = options .recursive_lib
191+ recursive_jar = options .recursive_jar
144192 if not options .class_dirs and not lib_dirs :
145193 lib_dirs = ['.' ]
146194
147195 class_to_class_paths , class_paths = build_index__class_to_class_paths (
148- list_jar_file_under_lib_dirs (lib_dirs ), options .class_dirs )
196+ list_jar_file_under_lib_dirs (lib_dirs , recursive = recursive_lib ),
197+ options .class_dirs , recursive_jar = recursive_jar )
149198
150199 class_paths_to_duplicate_classes = find_duplicate_classes (class_to_class_paths )
151200
0 commit comments