@@ -87,53 +87,97 @@ func (a PipAnalyzer) getPackages(image pkgutil.Image) (map[string]map[string]uti
8787 for i := 0 ; i < len (contents ); i ++ {
8888 c := contents [i ]
8989 fileName := c .Name ()
90- // check if package
91- packageDir := regexp .MustCompile ("^([a-z|A-Z|0-9|_]+)-(([0-9]+?\\ .){2,3})(dist-info|egg-info)$" )
92- packageMatch := packageDir .FindStringSubmatch (fileName )
93- if len (packageMatch ) != 0 {
94- packageName := packageMatch [1 ]
95- version := packageMatch [2 ][:len (packageMatch [2 ])- 1 ]
96-
97- // First, try and use the "top_level.txt",
98- // Many egg packages contains a "top_level.txt" file describing the directories containing the
99- // required code. Combining the sizes of each of these directories should give the total size.
100- var size int64
101- topLevelReader , err := os .Open (filepath .Join (pythonPath , fileName , "top_level.txt" ))
102- if err == nil {
103- scanner := bufio .NewScanner (topLevelReader )
104- scanner .Split (bufio .ScanLines )
105- for scanner .Scan () {
106- // check if directory exists first, then retrieve size
107- contentPath := filepath .Join (pythonPath , scanner .Text ())
108- if _ , err := os .Stat (contentPath ); err == nil {
109- size = size + pkgutil .GetSize (contentPath )
110- } else if _ , err := os .Stat (contentPath + ".py" ); err == nil {
111- // sometimes the top level content is just a single python file; try this too
112- size = size + pkgutil .GetSize (contentPath + ".py" )
113- }
90+ var metadata * os.File
91+ var err error
92+ if strings .HasSuffix (fileName , "egg-info" ) {
93+ // wheel directory
94+ metadata , err = os .Open (filepath .Join (pythonPath , fileName , "PKG-INFO" ))
95+ if err != nil {
96+ logrus .Debugf ("unable to open PKG-INFO for egg %s" , fileName )
97+ }
98+ } else if strings .HasSuffix (fileName , "dist-info" ) {
99+ // egg directory
100+ metadata , err = os .Open (filepath .Join (pythonPath , fileName , "METADATA" ))
101+ if err != nil {
102+ logrus .Debugf ("unable to open METADATA for wheel %s" , fileName )
103+ }
104+ } else {
105+ // no match
106+ continue
107+ }
108+
109+ var line , packageName , version string
110+ if metadata == nil {
111+ // unable to open metadata file: try reading the package itself
112+ mPath := filepath .Join (pythonPath , fileName )
113+ metadata , err = os .Open (mPath )
114+ fInfo , _ := os .Stat (mPath )
115+ if err != nil || fInfo .IsDir () {
116+ // if this also doesn't work, the package doesn't have the correct metadata structure
117+ // try and parse the name using a regex anyway
118+ logrus .Debugf ("failed to locate package metadata: attempting to infer package name" )
119+ packageDir := regexp .MustCompile ("^([a-z|A-Z|0-9|_]+)-(([0-9]+?\\ .){2,3})(dist-info|egg-info)$" )
120+ packageMatch := packageDir .FindStringSubmatch (fileName )
121+ if len (packageMatch ) != 0 {
122+ packageName = packageMatch [1 ]
123+ version = packageMatch [2 ][:len (packageMatch [2 ])- 1 ]
114124 }
115- } else {
116- // if we didn't find a top_level.txt, we'll try the previous alphabetical directory entry heuristic
117- logrus .Infof ("unable to use top_level.txt: falling back to previous alphabetical directory entry heuristic..." )
118-
119- // Retrieves size for actual package/script corresponding to each dist-info metadata directory
120- // by taking the file entry alphabetically before it (for a package) or after it (for a script)
121- // var size int64
122- if i - 1 >= 0 && contents [i - 1 ].Name () == packageName {
123- packagePath := filepath .Join (pythonPath , packageName )
124- size = pkgutil .GetSize (packagePath )
125- } else if i + 1 < len (contents ) && contents [i + 1 ].Name () == packageName + ".py" {
126- size = contents [i + 1 ].Size ()
127- } else {
128- logrus .Errorf ("Could not find Python package %s for corresponding metadata info" , packageName )
129- continue
125+ }
126+ }
127+
128+ if metadata != nil {
129+ scanner := bufio .NewScanner (metadata )
130+ scanner .Split (bufio .ScanLines )
131+ for scanner .Scan () {
132+ line = scanner .Text ()
133+ if strings .HasPrefix (line , "Name" ) {
134+ packageName = strings .Split (line , ": " )[1 ]
135+ // next line is always the version
136+ scanner .Scan ()
137+ version = strings .Split (scanner .Text (), ": " )[1 ]
138+ break
130139 }
131140 }
141+ }
132142
133- currPackage := util.PackageInfo {Version : version , Size : size }
134- mapPath := strings .Replace (pythonPath , path , "" , 1 )
135- addToMap (packages , packageName , mapPath , currPackage )
143+ // First, try and use the "top_level.txt",
144+ // Many egg packages contains a "top_level.txt" file describing the directories containing the
145+ // required code. Combining the sizes of each of these directories should give the total size.
146+ var size int64
147+ topLevelReader , err := os .Open (filepath .Join (pythonPath , fileName , "top_level.txt" ))
148+ if err == nil {
149+ scanner := bufio .NewScanner (topLevelReader )
150+ scanner .Split (bufio .ScanLines )
151+ for scanner .Scan () {
152+ // check if directory exists first, then retrieve size
153+ contentPath := filepath .Join (pythonPath , scanner .Text ())
154+ if _ , err := os .Stat (contentPath ); err == nil {
155+ size = size + pkgutil .GetSize (contentPath )
156+ } else if _ , err := os .Stat (contentPath + ".py" ); err == nil {
157+ // sometimes the top level content is just a single python file; try this too
158+ size = size + pkgutil .GetSize (contentPath + ".py" )
159+ }
160+ }
161+ } else {
162+ logrus .Debugf ("unable to use top_level.txt: falling back to alphabetical directory entry heuristic..." )
163+
164+ // Retrieves size for actual package/script corresponding to each dist-info metadata directory
165+ // by examining the file entries directly before and after it
166+ if i - 1 >= 0 && strings .Contains (contents [i - 1 ].Name (), packageName ) {
167+ packagePath := filepath .Join (pythonPath , contents [i - 1 ].Name ())
168+ size = pkgutil .GetSize (packagePath )
169+ } else if i + 1 < len (contents ) && strings .Contains (contents [i + 1 ].Name (), packageName ) {
170+ packagePath := filepath .Join (pythonPath , contents [i + 1 ].Name ())
171+ size = pkgutil .GetSize (packagePath )
172+ } else {
173+ logrus .Errorf ("failed to locate python package for corresponding package metadata %s" , packageName )
174+ continue
175+ }
136176 }
177+
178+ currPackage := util.PackageInfo {Version : version , Size : size }
179+ mapPath := strings .Replace (pythonPath , path , "" , 1 )
180+ addToMap (packages , packageName , mapPath , currPackage )
137181 }
138182 }
139183
0 commit comments