11import os
2+ import re
23
34from ..constants import IS_WINDOWS_PLATFORM
4- from .fnmatch import fnmatch
5+ from fnmatch import fnmatch
6+ from itertools import chain
57from .utils import create_archive
68
79
810def tar (path , exclude = None , dockerfile = None , fileobj = None , gzip = False ):
911 root = os .path .abspath (path )
1012 exclude = exclude or []
11-
1213 return create_archive (
1314 files = sorted (exclude_paths (root , exclude , dockerfile = dockerfile )),
1415 root = root , fileobj = fileobj , gzip = gzip
1516 )
1617
1718
19+ _SEP = re .compile ('/|\\ \\ ' ) if IS_WINDOWS_PLATFORM else re .compile ('/' )
20+
21+
1822def exclude_paths (root , patterns , dockerfile = None ):
1923 """
2024 Given a root directory path and a list of .dockerignore patterns, return
@@ -23,127 +27,90 @@ def exclude_paths(root, patterns, dockerfile=None):
2327
2428 All paths returned are relative to the root.
2529 """
30+
2631 if dockerfile is None :
2732 dockerfile = 'Dockerfile'
2833
29- patterns = [p .lstrip ('/' ) for p in patterns ]
30- exceptions = [p for p in patterns if p .startswith ('!' )]
31-
32- include_patterns = [p [1 :] for p in exceptions ]
33- include_patterns += [dockerfile , '.dockerignore' ]
34-
35- exclude_patterns = list (set (patterns ) - set (exceptions ))
36-
37- paths = get_paths (root , exclude_patterns , include_patterns ,
38- has_exceptions = len (exceptions ) > 0 )
39-
40- return set (paths ).union (
41- # If the Dockerfile is in a subdirectory that is excluded, get_paths
42- # will not descend into it and the file will be skipped. This ensures
43- # it doesn't happen.
44- set ([dockerfile .replace ('/' , os .path .sep )])
45- if os .path .exists (os .path .join (root , dockerfile )) else set ()
46- )
47-
48-
49- def should_include (path , exclude_patterns , include_patterns , root ):
50- """
51- Given a path, a list of exclude patterns, and a list of inclusion patterns:
52-
53- 1. Returns True if the path doesn't match any exclusion pattern
54- 2. Returns False if the path matches an exclusion pattern and doesn't match
55- an inclusion pattern
56- 3. Returns true if the path matches an exclusion pattern and matches an
57- inclusion pattern
58- """
59- for pattern in exclude_patterns :
60- if match_path (path , pattern ):
61- for pattern in include_patterns :
62- if match_path (path , pattern ):
63- return True
64- if os .path .isabs (pattern ) and match_path (
65- os .path .join (root , path ), pattern ):
66- return True
67- return False
68- return True
69-
70-
71- def should_check_directory (directory_path , exclude_patterns , include_patterns ,
72- root ):
34+ def normalize (p ):
35+ # Leading and trailing slashes are not relevant. Yes,
36+ # "foo.py/" must exclude the "foo.py" regular file. "."
37+ # components are not relevant either, even if the whole
38+ # pattern is only ".", as the Docker reference states: "For
39+ # historical reasons, the pattern . is ignored."
40+ split = [pt for pt in re .split (_SEP , p ) if pt and pt != '.' ]
41+ # ".." component must be cleared with the potential previous
42+ # component, regardless of whether it exists: "A preprocessing
43+ # step [...] eliminates . and .. elements using Go's
44+ # filepath.".
45+ i = 0
46+ while i < len (split ):
47+ if split [i ] == '..' :
48+ del split [i ]
49+ if i > 0 :
50+ del split [i - 1 ]
51+ i -= 1
52+ else :
53+ i += 1
54+ return split
55+
56+ patterns = (
57+ (True , normalize (p [1 :]))
58+ if p .startswith ('!' ) else
59+ (False , normalize (p ))
60+ for p in patterns )
61+ patterns = list (reversed (list (chain (
62+ # Exclude empty patterns such as "." or the empty string.
63+ filter (lambda p : p [1 ], patterns ),
64+ # Always include the Dockerfile and .dockerignore
65+ [(True , dockerfile .split ('/' )), (True , ['.dockerignore' ])]))))
66+ return set (walk (root , patterns ))
67+
68+
69+ def walk (root , patterns , default = True ):
7370 """
74- Given a directory path, a list of exclude patterns, and a list of inclusion
75- patterns:
76-
77- 1. Returns True if the directory path should be included according to
78- should_include.
79- 2. Returns True if the directory path is the prefix for an inclusion
80- pattern
81- 3. Returns False otherwise
71+ A collection of file lying below root that should be included according to
72+ patterns.
8273 """
8374
84- # To account for exception rules, check directories if their path is a
85- # a prefix to an inclusion pattern. This logic conforms with the current
86- # docker logic (2016-10-27):
87- # https://github.com/docker/docker/blob/bc52939b0455116ab8e0da67869ec81c1a1c3e2c/pkg/archive/archive.go#L640-L671
88-
89- def normalize_path (path ):
90- return path .replace (os .path .sep , '/' )
91-
92- path_with_slash = normalize_path (directory_path ) + '/'
93- possible_child_patterns = [
94- pattern for pattern in map (normalize_path , include_patterns )
95- if (pattern + '/' ).startswith (path_with_slash )
96- ]
97- directory_included = should_include (
98- directory_path , exclude_patterns , include_patterns , root
99- )
100- return directory_included or len (possible_child_patterns ) > 0
101-
102-
103- def get_paths (root , exclude_patterns , include_patterns , has_exceptions = False ):
104- paths = []
105-
106- for parent , dirs , files in os .walk (root , topdown = True , followlinks = False ):
107- parent = os .path .relpath (parent , root )
108- if parent == '.' :
109- parent = ''
110-
111- # Remove excluded patterns from the list of directories to traverse
112- # by mutating the dirs we're iterating over.
113- # This looks strange, but is considered the correct way to skip
114- # traversal. See https://docs.python.org/2/library/os.html#os.walk
115- dirs [:] = [
116- d for d in dirs if should_check_directory (
117- os .path .join (parent , d ), exclude_patterns , include_patterns ,
118- root
119- )
120- ]
121-
122- for path in dirs :
123- if should_include (os .path .join (parent , path ),
124- exclude_patterns , include_patterns , root ):
125- paths .append (os .path .join (parent , path ))
126-
127- for path in files :
128- if should_include (os .path .join (parent , path ),
129- exclude_patterns , include_patterns , root ):
130- paths .append (os .path .join (parent , path ))
131-
132- return paths
133-
134-
135- def match_path (path , pattern ):
136-
137- pattern = pattern .rstrip ('/' + os .path .sep )
138- if pattern and not os .path .isabs (pattern ):
139- pattern = os .path .relpath (pattern )
140-
141- pattern_components = pattern .split (os .path .sep )
142- if len (pattern_components ) == 1 and IS_WINDOWS_PLATFORM :
143- pattern_components = pattern .split ('/' )
144-
145- if '**' not in pattern :
146- path_components = path .split (os .path .sep )[:len (pattern_components )]
147- else :
148- path_components = path .split (os .path .sep )
149- return fnmatch ('/' .join (path_components ), '/' .join (pattern_components ))
75+ def match (p ):
76+ if p [1 ][0 ] == '**' :
77+ rec = (p [0 ], p [1 ][1 :])
78+ return [p ] + (match (rec ) if rec [1 ] else [rec ])
79+ elif fnmatch (f , p [1 ][0 ]):
80+ return [(p [0 ], p [1 ][1 :])]
81+ else :
82+ return []
83+
84+ for f in os .listdir (root ):
85+ cur = os .path .join (root , f )
86+ # The patterns if recursing in that directory.
87+ sub = list (chain (* (match (p ) for p in patterns )))
88+ # Whether this file is explicitely included / excluded.
89+ hit = next ((p [0 ] for p in sub if not p [1 ]), None )
90+ # Whether this file is implicitely included / excluded.
91+ matched = default if hit is None else hit
92+ sub = list (filter (lambda p : p [1 ], sub ))
93+ if os .path .isdir (cur ):
94+ # Entirely skip directories if there are no chance any subfile will
95+ # be included.
96+ if all (not p [0 ] for p in sub ) and not matched :
97+ continue
98+ # I think this would greatly speed up dockerignore handling by not
99+ # recursing into directories we are sure would be entirely
100+ # included, and only yielding the directory itself, which will be
101+ # recursively archived anyway. However the current unit test expect
102+ # the full list of subfiles and I'm not 100% sure it would make no
103+ # difference yet.
104+ # if all(p[0] for p in sub) and matched:
105+ # yield f
106+ # continue
107+ children = False
108+ for r in (os .path .join (f , p ) for p in walk (cur , sub , matched )):
109+ yield r
110+ children = True
111+ # The current unit tests expect directories only under those
112+ # conditions. It might be simplifiable though.
113+ if (not sub or not children ) and hit or hit is None and default :
114+ yield f
115+ elif matched :
116+ yield f
0 commit comments