From c3413bfb226c1661e37c7fa14909bb1812d40337 Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Tue, 26 Mar 2013 23:03:20 -0500 Subject: [PATCH 01/19] ensure_binary_mode: python3 and python2/win32 compatibility Python-3 needs binary mode so that it doesn't try to read into unicode strings. Python-2 just uses bytes on Linux, but needs the mode set to binary on Windows. The smudge filter must also read binary because we can have files with a "managed" extension that is not actually managed by git-fat. In that case, we get raw binary data on stdin. It will not match our cookie, but we must not corrupt its contents in the working tree, thus we have to treat it as binary. --- git-fat | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/git-fat b/git-fat index 7edb7ba..f4224f4 100755 --- a/git-fat +++ b/git-fat @@ -47,6 +47,17 @@ def verbose_stderr(*args, **kwargs): def verbose_ignore(*args, **kwargs): pass +def ensure_binary_mode(stream): + try: # Attempt the Python-3 way, also needed to handle unicode + return stream.detach() + except: + pass + if sys.platform == "win32": + # Fall back to Python-2 way, only needed on Windows + import msvcrt + msvcrt.setmode(stream.fileno(), os.O_BINARY) + return stream + def mkdir_p(path): import errno try: @@ -198,7 +209,7 @@ class GitFat(object): return False, None # read file try: - digest, bytes = self.decode_stream(open(fname)) + digest, bytes = self.decode_stream(open(fname, 'rb')) except IOError: return False, None if isinstance(digest, str): @@ -258,10 +269,16 @@ class GitFat(object): version of the file on stdin and produces the "clean" (repository) version on stdout. ''' self.setup() + # Set stdin and stdout to binary mode + sys.stdin = ensure_binary_mode(sys.stdin) + sys.stdout = ensure_binary_mode(sys.stdout) self.filter_clean(sys.stdin, sys.stdout) def cmd_filter_smudge(self): self.setup() + # Ensure streams are treated as binary + sys.stdin = ensure_binary_mode(sys.stdin) + sys.stdout = ensure_binary_mode(sys.stdout) result, bytes = self.decode_stream(sys.stdin) if isinstance(result, str): # We got a digest objfile = os.path.join(self.objdir, result) From 1a345c46956a41385ba3e2b39608f57ae106bb9d Mon Sep 17 00:00:00 2001 From: Will Kelleher Date: Fri, 28 Mar 2014 12:25:31 -0500 Subject: [PATCH 02/19] Abstract the backend and add S3 implementation --- README.md | 11 ++++ git-fat | 182 +++++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 157 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 3889ad2..8e1c7fe 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,17 @@ look like this: remote = your.remote-host.org:/share/fat-store sshuser = fat +## S3 remote + +Edit your .gitfat file and add the following: + + [s3] + bucket={bucketname} + key={access_key_id} + secret={secret_access_key} + +And then you're done. + # A worked example Before we start, let's turn on verbose reporting so we can see what's diff --git a/git-fat b/git-fat index f4224f4..69d7314 100755 --- a/git-fat +++ b/git-fat @@ -14,6 +14,8 @@ import itertools import threading import time import collections +from boto.s3.connection import S3Connection +from boto.s3.key import Key try: from subprocess import check_output @@ -127,6 +129,126 @@ def gitconfig_set(name, value, file=None): args += [name, value] p = subprocess.check_call(args) +try: + import fish + class S3Counter: + def __init__(self): + self.fish = None + def __call__(self, complete, total): + if self.fish is None: + self.fish = fish.ProgressFish(total=total/1024) + self.fish.animate(amount=complete/1024) +except ImportError: + class S3Counter: + def __init__(self): + self.count = 0 + def __call__(self, complete, total): + if complete * 10 / total > self.count: + self.count += 1 + sys.stdout.write('.') + sys.stdout.flush() + +class RsyncBackend(object): + + def __init__(self,remote,ssh_port,ssh_user,options,objdir): + self.verbose = verbose_stderr if os.environ.get('GIT_FAT_VERBOSE') else verbose_ignore + self.remote = remote + self.ssh_port = ssh_port + self.ssh_user = ssh_user + self.options = options + self.objdir = objdir + + def get_rsync_command(self,push): + (remote, ssh_port, ssh_user, options) = self.get_rsync() + if push: + self.verbose('Pushing to %s' % (remote)) + else: + self.verbose('Pulling from %s' % (remote)) + + cmd = ['rsync', '--progress', '--ignore-existing', '--from0', '--files-from=-'] + rshopts = '' + if ssh_user: + rshopts += ' -l ' + self.ssh_user + if ssh_port: + rshopts += ' -p ' + self.ssh_port + if rshopts: + cmd.append('--rsh=ssh' + rshopts) + if options: + cmd += self.options.split(' ') + if push: + cmd += [self.objdir + '/', self.remote + '/'] + else: + cmd += [self.remote + '/', self.objdir + '/'] + return cmd + + def pull(self,files): + cmd = self.get_rsync_command(push=False) + self.verbose('Executing: %s' % ' '.join(cmd)) + p = subprocess.Popen(cmd, stdin=subprocess.PIPE) + p.communicate(input='\x00'.join(files)) + + def push(self,files): + cmd = self.get_rsync_command(push=True) + self.verbose('Executing: %s' % ' '.join(cmd)) + p = subprocess.Popen(cmd, stdin=subprocess.PIPE) + p.communicate(input='\x00'.join(files)) + +class S3Backend(object): + + def __init__(self,bucket,key,secret,objdir): + self.verbose = verbose_stderr if os.environ.get('GIT_FAT_VERBOSE') else verbose_ignore + self.bucket = bucket + self.key = key + self.secret = secret + self.objdir = objdir + + def get_bucket(self): + conn = S3Connection(self.key, self.secret) + bkt = conn.get_bucket(self.bucket) + return bkt + + def pull(self,files): + bkt = self.get_bucket() + for file in files: + localfile = os.path.abspath(os.path.join(self.objdir,file)) + if os.path.isfile(localfile): + self.verbose('Object %s already exists, skipping.' % file) + else: + self.verbose('Getting object %s from s3 bucket %s' % (file,self.bucket)) + k = Key(bkt) + k.key = file + localfile = os.path.abspath(os.path.join(self.objdir,file)) + try: + k.get_contents_to_filename(localfile, + cb=S3Counter(), + num_cb=500) + except KeyboardInterrupt: + # If we cancel during download, make sure the partial + # download is removed. + os.remove(localfile) + raise + + def push(self,files): + bkt = self.get_bucket() + for file in files: + k = bkt.get_key(file) + if bkt.get_key(file): + self.verbose('Object %s already exists in bucket %s, skipping.' % (file,self.bucket)) + else: + k = Key(bkt) + k.key = file + localfile = os.path.abspath(os.path.join(self.objdir,file)) + self.verbose('Uploading object %s to s3 bucket %s' % (file,self.bucket)) + try: + k.set_contents_from_filename(localfile, + cb=S3Counter(), + num_cb=500) + except KeyboardInterrupt: + # If we cancel during upload, delete the partially uploaded + # remote object. Otherwise we'll have problems later. + k.delete() + raise + class GitFat(object): DecodeError = RuntimeError def __init__(self): @@ -142,39 +264,33 @@ class GitFat(object): return len(enc(hashlib.sha1('dummy').hexdigest(), 5)) self.magiclen = magiclen(self.encode) # Current version self.magiclens = [magiclen(enc) for enc in [self.encode_v1, self.encode_v2]] # All prior versions + self.backend = self.get_backend(self.objdir) def setup(self): mkdir_p(self.objdir) - def get_rsync(self): + def get_backend(self,objdir): + """ + Parse the .gitfat config file and pick the first supported backend + to use. Currently supports rsync and s3. + """ cfgpath = os.path.join(self.gitroot,'.gitfat') - remote = gitconfig_get('rsync.remote', file=cfgpath) - ssh_port = gitconfig_get('rsync.sshport', file=cfgpath) - ssh_user = gitconfig_get('rsync.sshuser', file=cfgpath) - options = gitconfig_get('rsync.options', file=cfgpath) - if remote is None: - raise RuntimeError('No rsync.remote in %s' % cfgpath) - return remote, ssh_port, ssh_user, options - def get_rsync_command(self,push): - (remote, ssh_port, ssh_user, options) = self.get_rsync() - if push: - self.verbose('Pushing to %s' % (remote)) + if gitconfig_get('rsync.remote', file=cfgpath): + remote = gitconfig_get('rsync.remote', file=cfgpath) + ssh_port = gitconfig_get('rsync.sshport', file=cfgpath) + ssh_user = gitconfig_get('rsync.sshuser', file=cfgpath) + options = gitconfig_get('rsync.options', file=cfgpath) + return RsyncBackend(remote,ssh_port,ssh_user,options,objdir) + elif gitconfig_get('s3.bucket', file=cfgpath): + bucket = gitconfig_get('s3.bucket', file=cfgpath) + key = gitconfig_get('s3.key', file=cfgpath) + if key is None: + raise RuntimeError('No s3.key in %s' % cfgpath) + secret = gitconfig_get('s3.secret', file=cfgpath) + if secret is None: + raise RuntimeError('No s3.secret in %s' % cfgpath) + return S3Backend(bucket,key,secret,objdir) else: - self.verbose('Pulling from %s' % (remote)) + raise RuntimeError('No supported backends specified in %s' % cfgpath) - cmd = ['rsync', '--progress', '--ignore-existing', '--from0', '--files-from=-'] - rshopts = '' - if ssh_user: - rshopts += ' -l ' + ssh_user - if ssh_port: - rshopts += ' -p ' + ssh_port - if rshopts: - cmd.append('--rsh=ssh' + rshopts) - if options: - cmd += options.split(' ') - if push: - cmd += [self.objdir + '/', remote + '/'] - else: - cmd += [remote + '/', self.objdir + '/'] - return cmd def revparse(self, revname): return subprocess.check_output(['git', 'rev-parse', revname]).strip() def encode_v1(self, digest, bytes): @@ -356,10 +472,7 @@ class GitFat(object): # (includes history). Finer-grained pushing would be useful. pushall = '--all' in args files = self.referenced_objects(all=pushall) & self.catalog_objects() - cmd = self.get_rsync_command(push=True) - self.verbose('Executing: %s' % ' '.join(cmd)) - p = subprocess.Popen(cmd, stdin=subprocess.PIPE) - p.communicate(input='\x00'.join(files)) + self.backend.push(files) def checkout(self, show_orphans=False): 'Update any stale files in the present working tree' for digest, fname in self.orphan_files(): @@ -390,10 +503,7 @@ class GitFat(object): if rev: refargs['rev'] = rev files = self.filter_objects(refargs, self.parse_pull_patterns(args)) - cmd = self.get_rsync_command(push=False) - self.verbose('Executing: %s' % ' '.join(cmd)) - p = subprocess.Popen(cmd, stdin=subprocess.PIPE) - p.communicate(input='\x00'.join(files)) + self.backend.pull(files) self.checkout() def parse_pull_patterns(self, args): From 6986a3a1c91616c8b96ccda4199a4d7ab333b662 Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Wed, 27 Mar 2013 11:19:54 -0500 Subject: [PATCH 03/19] encoding: use uninterpreted bytes whenever possible Git is encoding-agnostic in the sense that it interprets file contents, commit messages and paths as binary. In the case of paths, this means that the non-NUL bytes returned from readdir(2) are stored and later passed to lstat(2) and creat(2). See git-commit(1) for details. To be compatible with Git's mode of operation, we also use raw bytes whenever possible. hashlib's hexdigest returns Python 'str', which we immediately encode as ASCII so that it can be used with path component and cleaned bytes to be committed. Renamed variable 'bytes' to 'bytecount' due to conflict with type Includes contributions from: Stephen Miller --- git-fat | 107 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 54 insertions(+), 53 deletions(-) diff --git a/git-fat b/git-fat index 69d7314..42f44f4 100755 --- a/git-fat +++ b/git-fat @@ -76,10 +76,10 @@ def umask(): return old def readblocks(stream): - bytes = 0 + bytecount = 0 while True: data = stream.read(BLOCK_SIZE) - bytes += len(data) + bytecount += len(data) if not data: break yield data @@ -165,33 +165,31 @@ class RsyncBackend(object): else: self.verbose('Pulling from %s' % (remote)) - cmd = ['rsync', '--progress', '--ignore-existing', '--from0', '--files-from=-'] - rshopts = '' - if ssh_user: - rshopts += ' -l ' + self.ssh_user - if ssh_port: - rshopts += ' -p ' + self.ssh_port + cmd = [b'rsync', b'--progress', b'--ignore-existing', b'--from0', b'--files-from=-'] + rshopts = b'' + if self.ssh_user: + rshopts += b' -l ' + self.ssh_user + if self.ssh_port: + rshopts += b' -p ' + self.ssh_port if rshopts: - cmd.append('--rsh=ssh' + rshopts) - if options: - cmd += self.options.split(' ') + cmd.append(b'--rsh=ssh' + rshopts) if push: - cmd += [self.objdir + '/', self.remote + '/'] + cmd += [self.objdir + b'/', self.remote + b'/'] else: - cmd += [self.remote + '/', self.objdir + '/'] + cmd += [self.remote + b'/', self.objdir + b'/'] return cmd def pull(self,files): cmd = self.get_rsync_command(push=False) self.verbose('Executing: %s' % ' '.join(cmd)) p = subprocess.Popen(cmd, stdin=subprocess.PIPE) - p.communicate(input='\x00'.join(files)) + p.communicate(input=b'\x00'.join(files)) def push(self,files): cmd = self.get_rsync_command(push=True) self.verbose('Executing: %s' % ' '.join(cmd)) p = subprocess.Popen(cmd, stdin=subprocess.PIPE) - p.communicate(input='\x00'.join(files)) + p.communicate(input=b'\x00'.join(files)) class S3Backend(object): @@ -255,13 +253,13 @@ class GitFat(object): self.verbose = verbose_stderr if os.environ.get('GIT_FAT_VERBOSE') else verbose_ignore self.gitroot = subprocess.check_output('git rev-parse --show-toplevel'.split()).strip() self.gitdir = subprocess.check_output('git rev-parse --git-dir'.split()).strip() - self.objdir = os.path.join(self.gitdir, 'fat', 'objects') + self.objdir = os.path.join(self.gitdir, b'fat', b'objects') if os.environ.get('GIT_FAT_VERSION') == '1': self.encode = self.encode_v1 else: self.encode = self.encode_v2 def magiclen(enc): - return len(enc(hashlib.sha1('dummy').hexdigest(), 5)) + return len(enc(hashlib.sha1(b'dummy').hexdigest().encode('ASCII'), 5)) self.magiclen = magiclen(self.encode) # Current version self.magiclens = [magiclen(enc) for enc in [self.encode_v1, self.encode_v2]] # All prior versions self.backend = self.get_backend(self.objdir) @@ -272,7 +270,7 @@ class GitFat(object): Parse the .gitfat config file and pick the first supported backend to use. Currently supports rsync and s3. """ - cfgpath = os.path.join(self.gitroot,'.gitfat') + cfgpath = os.path.join(self.gitroot,b'.gitfat') if gitconfig_get('rsync.remote', file=cfgpath): remote = gitconfig_get('rsync.remote', file=cfgpath) ssh_port = gitconfig_get('rsync.sshport', file=cfgpath) @@ -293,23 +291,23 @@ class GitFat(object): def revparse(self, revname): return subprocess.check_output(['git', 'rev-parse', revname]).strip() - def encode_v1(self, digest, bytes): + def encode_v1(self, digest, bytecount): 'Produce legacy representation of file to be stored in repository.' - return '#$# git-fat %s\n' % (digest,) - def encode_v2(self, digest, bytes): + return (b'#$# git-fat ' + digest + b'\n') + def encode_v2(self, digest, bytecount): 'Produce representation of file to be stored in repository. 20 characters can hold 64-bit integers.' - return '#$# git-fat %s %20d\n' % (digest, bytes) - def decode(self, string, noraise=False): - cookie = '#$# git-fat ' - if string.startswith(cookie): - parts = string[len(cookie):].split() + return (b'#$# git-fat ' + digest + (' %20d\n' % (bytecount,)).encode('ASCII')) + def decode(self, bstring, noraise=False): + cookie = b'#$# git-fat ' + if bstring.startswith(cookie): + parts = bstring[len(cookie):].split() digest = parts[0] - bytes = int(parts[1]) if len(parts) > 1 else None - return digest, bytes + bytecount = int(parts[1]) if len(parts) > 1 else None + return digest, int(bytecount) elif noraise: return None, None else: - raise GitFat.DecodeError('Could not decode %s' % (string)) + raise GitFat.DecodeError('Could not decode %s' % repr(bstring)) def decode_stream(self, stream): 'Return digest if git-fat cache, otherwise return iterator over entire file contents' preamble = stream.read(self.magiclen) @@ -325,13 +323,13 @@ class GitFat(object): return False, None # read file try: - digest, bytes = self.decode_stream(open(fname, 'rb')) + digest, bytecount = self.decode_stream(open(fname, 'rb')) except IOError: return False, None - if isinstance(digest, str): - return digest, bytes + if isinstance(digest, bytes): + return digest, bytecount else: - return None, bytes + return None, bytecount def decode_clean(self, body): ''' Attempt to decode version in working tree. The tree version could be changed to have a more @@ -339,16 +337,17 @@ class GitFat(object): version decodes successfully, it indicates that the fat data is not currently available in this repository. ''' - digest, bytes = self.decode(body, noraise=True) + digest, bytecount = self.decode(body, noraise=True) return digest def filter_clean(self, instream, outstreamclean): h = hashlib.new('sha1') - bytes = 0 - fd, tmpname = tempfile.mkstemp(dir=self.objdir) + bytecount = 0 + # mkstemp requires 'str' rather than native filesystem bytes + fd, tmpname = tempfile.mkstemp(dir=self.objdir.decode(sys.getfilesystemencoding())) try: ishanging = False cached = False # changes to True when file is cached - with os.fdopen(fd, 'w') as cache: + with os.fdopen(fd, 'wb') as cache: outstream = cache blockiter = readblocks(instream) firstblock = True @@ -359,10 +358,10 @@ class GitFat(object): outstream = outstreamclean firstblock = False h.update(block) - bytes += len(block) + bytecount += len(block) outstream.write(block) outstream.flush() - digest = h.hexdigest() + digest = h.hexdigest().encode('ASCII') objfile = os.path.join(self.objdir, digest) if not ishanging: if os.path.exists(objfile): @@ -374,7 +373,7 @@ class GitFat(object): os.rename(tmpname, objfile) self.verbose('git-fat filter-clean: caching to %s' % objfile) cached = True - outstreamclean.write(self.encode(digest, bytes)) + outstreamclean.write(self.encode(digest, bytecount)) finally: if not cached: os.remove(tmpname) @@ -395,16 +394,16 @@ class GitFat(object): # Ensure streams are treated as binary sys.stdin = ensure_binary_mode(sys.stdin) sys.stdout = ensure_binary_mode(sys.stdout) - result, bytes = self.decode_stream(sys.stdin) - if isinstance(result, str): # We got a digest + result, bytecount = self.decode_stream(sys.stdin) + if isinstance(result, bytes): # We got a digest objfile = os.path.join(self.objdir, result) try: - cat(open(objfile), sys.stdout) self.verbose('git-fat filter-smudge: restoring from %s' % objfile) - except IOError: # file not found + cat(open(objfile, 'rb'), sys.stdout) + except IOError: # file not found self.verbose('git-fat filter-smudge: fat object missing %s' % objfile) - sys.stdout.write(self.encode(result, bytes)) # could leave a better notice about how to recover this file - else: # We have an iterable over the original input. + sys.stdout.write(self.encode(result, bytecount)) # could leave a better notice about how to recover this file + else: # We have an iterable over the original input. self.verbose('git-fat filter-smudge: not a managed file') cat_iter(result, sys.stdout) def catalog_objects(self): @@ -419,13 +418,13 @@ class GitFat(object): p2 = subprocess.Popen(['git','cat-file','--batch-check'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) def cut_sha1hash(input, output): for line in input: - output.write(line.split()[0] + '\n') + output.write(line.split()[0] + b'\n') output.close() cut_thread = threading.Thread(target=cut_sha1hash, args=(p1.stdout, p2.stdin)) cut_thread.start() for line in p2.stdout: objhash, objtype, size = line.split() - if objtype == 'blob' and int(size) in self.magiclens: + if objtype == b'blob' and int(size) in self.magiclens: try: fathash = self.decode(subprocess.check_output(['git', 'cat-file', '-p', objhash]))[0] referenced.add(fathash) @@ -571,6 +570,7 @@ class GitFat(object): time1 = time.time() self.verbose('%d of %d blobs are >= %d bytes [elapsed %.3fs]' % (numlarge, numblobs, threshsize, time1-time0)) def cmd_find(self, args): + # FIXME: Need input validation here maxsize = int(args[0]) blobsizes = dict(self.gen_large_blobs('--all', maxsize)) time0 = time.time() @@ -591,14 +591,15 @@ class GitFat(object): revlist.wait() difftree.wait() def cmd_index_filter(self, args): + # FIXME: Need input validation here manage_gitattributes = '--manage-gitattributes' in args filelist = set(f.strip() for f in open(args[0]).readlines()) lsfiles = subprocess.Popen(['git', 'ls-files', '-s'], stdout=subprocess.PIPE) updateindex = subprocess.Popen(['git', 'update-index', '--index-info'], stdin=subprocess.PIPE) - for line in lsfiles.stdout: - mode, sep, tail = line.partition(' ') - blobhash, sep, tail = tail.partition(' ') - stageno, sep, tail = tail.partition('\t') + for line in lsfiles.stdout.read(): + mode, sep, tail = line.partition(b' ') + blobhash, sep, tail = tail.partition(b' ') + stageno, sep, tail = tail.partition(b'\t') filename = tail.strip() if filename not in filelist: continue @@ -606,7 +607,7 @@ class GitFat(object): # skip symbolic links continue # This file will contain the hash of the cleaned object - hashfile = os.path.join(self.gitdir, 'fat', 'index-filter', blobhash) + hashfile = os.path.join(self.gitdir, b'fat', b'index-filter', blobhash) try: cleanedobj = open(hashfile).read().rstrip() except IOError: From 3372983bffa3a1fb490c1791312b91b108e5f11d Mon Sep 17 00:00:00 2001 From: Will Kelleher Date: Tue, 8 Apr 2014 18:10:11 -0500 Subject: [PATCH 04/19] Continue to function if boto is not installed --- git-fat | 122 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 64 insertions(+), 58 deletions(-) diff --git a/git-fat b/git-fat index 42f44f4..0a20289 100755 --- a/git-fat +++ b/git-fat @@ -14,8 +14,6 @@ import itertools import threading import time import collections -from boto.s3.connection import S3Connection -from boto.s3.key import Key try: from subprocess import check_output @@ -42,6 +40,70 @@ except ImportError: return output subprocess.check_output = backport_check_output +try: + from boto.s3.connection import S3Connection + from boto.s3.key import Key + class S3Backend(object): + + def __init__(self,bucket,key,secret,objdir): + self.verbose = verbose_stderr if os.environ.get('GIT_FAT_VERBOSE') else verbose_ignore + self.bucket = bucket + self.key = key + self.secret = secret + self.objdir = objdir + + def get_bucket(self): + conn = S3Connection(self.key, self.secret) + bkt = conn.get_bucket(self.bucket) + return bkt + + def pull(self,files): + bkt = self.get_bucket() + for file in files: + localfile = os.path.abspath(os.path.join(self.objdir,file)) + if os.path.isfile(localfile): + self.verbose('Object %s already exists, skipping.' % file) + else: + self.verbose('Getting object %s from s3 bucket %s' % (file,self.bucket)) + k = Key(bkt) + k.key = file + localfile = os.path.abspath(os.path.join(self.objdir,file)) + try: + k.get_contents_to_filename(localfile, + cb=S3Counter(), + num_cb=500) + except KeyboardInterrupt: + # If we cancel during download, make sure the partial + # download is removed. + os.remove(localfile) + raise + + def push(self,files): + bkt = self.get_bucket() + for file in files: + k = bkt.get_key(file) + if bkt.get_key(file): + self.verbose('Object %s already exists in bucket %s, skipping.' % (file,self.bucket)) + else: + k = Key(bkt) + k.key = file + localfile = os.path.abspath(os.path.join(self.objdir,file)) + self.verbose('Uploading object %s to s3 bucket %s' % (file,self.bucket)) + try: + k.set_contents_from_filename(localfile, + cb=S3Counter(), + num_cb=500) + except KeyboardInterrupt: + # If we cancel during upload, delete the partially uploaded + # remote object. Otherwise we'll have problems later. + k.delete() + raise +except ImportError: + class S3Backend(object): + + def __init__(self,bucket,key,secret,objdir): + raise RuntimeError("S3Backend requires boto.") + BLOCK_SIZE = 4096 def verbose_stderr(*args, **kwargs): @@ -191,62 +253,6 @@ class RsyncBackend(object): p = subprocess.Popen(cmd, stdin=subprocess.PIPE) p.communicate(input=b'\x00'.join(files)) -class S3Backend(object): - - def __init__(self,bucket,key,secret,objdir): - self.verbose = verbose_stderr if os.environ.get('GIT_FAT_VERBOSE') else verbose_ignore - self.bucket = bucket - self.key = key - self.secret = secret - self.objdir = objdir - - def get_bucket(self): - conn = S3Connection(self.key, self.secret) - bkt = conn.get_bucket(self.bucket) - return bkt - - def pull(self,files): - bkt = self.get_bucket() - for file in files: - localfile = os.path.abspath(os.path.join(self.objdir,file)) - if os.path.isfile(localfile): - self.verbose('Object %s already exists, skipping.' % file) - else: - self.verbose('Getting object %s from s3 bucket %s' % (file,self.bucket)) - k = Key(bkt) - k.key = file - localfile = os.path.abspath(os.path.join(self.objdir,file)) - try: - k.get_contents_to_filename(localfile, - cb=S3Counter(), - num_cb=500) - except KeyboardInterrupt: - # If we cancel during download, make sure the partial - # download is removed. - os.remove(localfile) - raise - - def push(self,files): - bkt = self.get_bucket() - for file in files: - k = bkt.get_key(file) - if bkt.get_key(file): - self.verbose('Object %s already exists in bucket %s, skipping.' % (file,self.bucket)) - else: - k = Key(bkt) - k.key = file - localfile = os.path.abspath(os.path.join(self.objdir,file)) - self.verbose('Uploading object %s to s3 bucket %s' % (file,self.bucket)) - try: - k.set_contents_from_filename(localfile, - cb=S3Counter(), - num_cb=500) - except KeyboardInterrupt: - # If we cancel during upload, delete the partially uploaded - # remote object. Otherwise we'll have problems later. - k.delete() - raise - class GitFat(object): DecodeError = RuntimeError def __init__(self): From 8a9775f8bdce69d5776fd906c17d37e0fe8d6080 Mon Sep 17 00:00:00 2001 From: Dennis Lin Date: Wed, 9 Apr 2014 12:13:02 -0500 Subject: [PATCH 05/19] s3: Try to use environment variables if s3.{key,bucket} are undefined --- git-fat | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/git-fat b/git-fat index 0a20289..03df2ce 100755 --- a/git-fat +++ b/git-fat @@ -287,10 +287,16 @@ class GitFat(object): bucket = gitconfig_get('s3.bucket', file=cfgpath) key = gitconfig_get('s3.key', file=cfgpath) if key is None: - raise RuntimeError('No s3.key in %s' % cfgpath) + try: + key = os.environ['AWS_ACCESS_KEY_ID'] + except KeyError: + raise RuntimeError('No s3.key in %s' % cfgpath) secret = gitconfig_get('s3.secret', file=cfgpath) if secret is None: - raise RuntimeError('No s3.secret in %s' % cfgpath) + try: + secret = os.environ['AWS_SECRET_ACCESS_KEY'] + except KeyError: + raise RuntimeError('No s3.secret in %s' % cfgpath) return S3Backend(bucket,key,secret,objdir) else: raise RuntimeError('No supported backends specified in %s' % cfgpath) From 7122b5d43065b9e23a0262fe6ef9d2e8d62878a2 Mon Sep 17 00:00:00 2001 From: zelonght Date: Tue, 6 May 2014 11:23:15 +0700 Subject: [PATCH 06/19] Skip parsing rev when '--' is specified --- git-fat | 2 ++ 1 file changed, 2 insertions(+) diff --git a/git-fat b/git-fat index 03df2ce..b6b4752 100755 --- a/git-fat +++ b/git-fat @@ -508,6 +508,8 @@ class GitFat(object): if '--all' in args: refargs['all'] = True for arg in args: + if arg == '--': + break if arg.startswith('-') or len(arg) != 40: continue rev = self.revparse(arg) From 296412c5158b52d2db8d09ab703a7fad7ae3454f Mon Sep 17 00:00:00 2001 From: Dennis Lin Date: Thu, 19 Feb 2015 14:30:15 -0600 Subject: [PATCH 07/19] Unbreak the rsync backend This is the minimum necessary to get things to work again. We probably could use a more sophisticated mechanism that allows for falling back to different backends. --- git-fat | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/git-fat b/git-fat index b6b4752..f34ab5a 100755 --- a/git-fat +++ b/git-fat @@ -211,7 +211,6 @@ except ImportError: sys.stdout.flush() class RsyncBackend(object): - def __init__(self,remote,ssh_port,ssh_user,options,objdir): self.verbose = verbose_stderr if os.environ.get('GIT_FAT_VERBOSE') else verbose_ignore self.remote = remote @@ -221,13 +220,13 @@ class RsyncBackend(object): self.objdir = objdir def get_rsync_command(self,push): - (remote, ssh_port, ssh_user, options) = self.get_rsync() if push: - self.verbose('Pushing to %s' % (remote)) + self.verbose('Pushing to %s' % (self.remote)) else: - self.verbose('Pulling from %s' % (remote)) + self.verbose('Pulling from %s' % (self.remote)) - cmd = [b'rsync', b'--progress', b'--ignore-existing', b'--from0', b'--files-from=-'] + cmd = [b'rsync', b'--progress', b'--ignore-existing', b'--from0', + b'--files-from=-'] rshopts = b'' if self.ssh_user: rshopts += b' -l ' + self.ssh_user From 06bd6d855054c164acfba204d57f18835f128fb5 Mon Sep 17 00:00:00 2001 From: Yu Feng Date: Wed, 11 Mar 2015 17:35:25 -0700 Subject: [PATCH 08/19] workaround connection reset by peer. See: https://github.com/boto/boto/issues/2207#issuecomment-60682869 Also incorporated touch-up fixes to address the comments by zelonght --- git-fat | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/git-fat b/git-fat index f34ab5a..08a658d 100755 --- a/git-fat +++ b/git-fat @@ -41,6 +41,7 @@ except ImportError: subprocess.check_output = backport_check_output try: + import boto from boto.s3.connection import S3Connection from boto.s3.key import Key class S3Backend(object): @@ -55,6 +56,11 @@ try: def get_bucket(self): conn = S3Connection(self.key, self.secret) bkt = conn.get_bucket(self.bucket) + bkt_location = bkt.get_location() + if bkt_location: + self.verbose("Setting connection to region %s" % bkt_location) + conn = boto.s3.connect_to_region(bkt_location, aws_access_key_id=self.key, aws_secret_access_key=self.secret) + bkt = conn.get_bucket(self.bucket) return bkt def pull(self,files): @@ -205,11 +211,13 @@ except ImportError: def __init__(self): self.count = 0 def __call__(self, complete, total): - if complete * 10 / total > self.count: + if complete * 10 > self.count * total: self.count += 1 sys.stdout.write('.') - sys.stdout.flush() - + if complete == total: + sys.stdout.write('\n') + sys.stdout.flush() + class RsyncBackend(object): def __init__(self,remote,ssh_port,ssh_user,options,objdir): self.verbose = verbose_stderr if os.environ.get('GIT_FAT_VERBOSE') else verbose_ignore From 0df193f5f25e02528f46571cdc32fe3e1017d589 Mon Sep 17 00:00:00 2001 From: Dennis Lin Date: Fri, 5 Feb 2016 12:08:20 -0600 Subject: [PATCH 09/19] s3: Ignore missing files instead of erroring out This way, we download as many files as we can. --- git-fat | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/git-fat b/git-fat index 03df2ce..1e3dcc4 100755 --- a/git-fat +++ b/git-fat @@ -43,6 +43,7 @@ except ImportError: try: from boto.s3.connection import S3Connection from boto.s3.key import Key + import boto class S3Backend(object): def __init__(self,bucket,key,secret,objdir): @@ -77,6 +78,12 @@ try: # download is removed. os.remove(localfile) raise + except boto.exception.S3ResponseError as e: + # Swallow missing files + if e.status == 404: + print("Unable to find: %s" % file) + else: + raise def push(self,files): bkt = self.get_bucket() From 4eb4cae11bda8a821d1c6934fcb52f1a246433a1 Mon Sep 17 00:00:00 2001 From: quangnt Date: Wed, 24 Aug 2016 10:26:49 +0700 Subject: [PATCH 10/19] UE-2075: Free space for Jenkins/Report to add new sequences --- README.md | 1 + git-fat | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8e1c7fe..9af2c19 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,7 @@ Edit your .gitfat file and add the following: bucket={bucketname} key={access_key_id} secret={secret_access_key} + objdir={location_to_store_git_fat_object} And then you're done. diff --git a/git-fat b/git-fat index a72419a..8cde66f 100755 --- a/git-fat +++ b/git-fat @@ -273,7 +273,12 @@ class GitFat(object): self.verbose = verbose_stderr if os.environ.get('GIT_FAT_VERBOSE') else verbose_ignore self.gitroot = subprocess.check_output('git rev-parse --show-toplevel'.split()).strip() self.gitdir = subprocess.check_output('git rev-parse --git-dir'.split()).strip() - self.objdir = os.path.join(self.gitdir, b'fat', b'objects') + cfgpath = os.path.join(self.gitroot,b'.gitfat') + objdir = gitconfig_get('s3.objdir', file=cfgpath) + if objdir and os.path.exists(objdir): + self.objdir = objdir + else: + self.objdir = os.path.join(self.gitdir, b'fat', b'objects') if os.environ.get('GIT_FAT_VERSION') == '1': self.encode = self.encode_v1 else: From 7a1ec1408fac4f902d0596c494294ce66874c590 Mon Sep 17 00:00:00 2001 From: quangnt Date: Mon, 3 Jul 2017 14:14:55 +0700 Subject: [PATCH 11/19] Fix limitations of large file size ( >4GBs) in Windows OS --- git-fat | 52 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/git-fat b/git-fat index 8cde66f..7babe4d 100755 --- a/git-fat +++ b/git-fat @@ -118,7 +118,7 @@ except ImportError: raise RuntimeError("S3Backend requires boto.") BLOCK_SIZE = 4096 - +LARGE_FILE_SIZE = 4294967296 # consider > 4GBs is large file size(Current limitation in windows) def verbose_stderr(*args, **kwargs): return print(*args, file=sys.stderr, **kwargs) def verbose_ignore(*args, **kwargs): @@ -373,14 +373,16 @@ class GitFat(object): def filter_clean(self, instream, outstreamclean): h = hashlib.new('sha1') bytecount = 0 + adjusted_lfs = False # mkstemp requires 'str' rather than native filesystem bytes fd, tmpname = tempfile.mkstemp(dir=self.objdir.decode(sys.getfilesystemencoding())) try: ishanging = False cached = False # changes to True when file is cached + # Process for large file size (> 4GBS) in Windows + (instream, adjusted_lfs) = self.detect_and_adjust_std_for_lfs(instream) with os.fdopen(fd, 'wb') as cache: outstream = cache - blockiter = readblocks(instream) firstblock = True for block in readblocks(instream): if firstblock: @@ -406,6 +408,8 @@ class GitFat(object): cached = True outstreamclean.write(self.encode(digest, bytecount)) finally: + if adjusted_lfs: + instream.close() if not cached: os.remove(tmpname) @@ -428,12 +432,18 @@ class GitFat(object): result, bytecount = self.decode_stream(sys.stdin) if isinstance(result, bytes): # We got a digest objfile = os.path.join(self.objdir, result) + adjusted_lfs = False try: self.verbose('git-fat filter-smudge: restoring from %s' % objfile) + # Process for large file size in Windows + (sys.stdout, adjusted_lfs) = self.detect_and_adjust_std_for_lfs(sys.stdout, objfile) cat(open(objfile, 'rb'), sys.stdout) except IOError: # file not found self.verbose('git-fat filter-smudge: fat object missing %s' % objfile) sys.stdout.write(self.encode(result, bytecount)) # could leave a better notice about how to recover this file + finally: + if adjusted_lfs: + sys.stdout.close() else: # We have an iterable over the original input. self.verbose('git-fat filter-smudge: not a managed file') cat_iter(result, sys.stdout) @@ -569,9 +579,15 @@ class GitFat(object): if gitconfig_get('filter.fat.clean') or gitconfig_get('filter.fat.smudge'): print('Git fat already configured, check configuration in .git/config') else: - gitconfig_set('filter.fat.clean', 'git-fat filter-clean') - gitconfig_set('filter.fat.smudge', 'git-fat filter-smudge') + filter_clean = 'git-fat filter-clean' + filter_smudge = 'git-fat filter-smudge' + if sys.platform == 'win32': + filter_clean = filter_clean + ' %f' + filter_smudge = filter_smudge + ' %f' + gitconfig_set('filter.fat.clean', filter_clean) + gitconfig_set('filter.fat.smudge', filter_smudge) print('Initialized git fat') + def gen_large_blobs(self, revs, threshsize): """Build dict of all blobs""" time0 = time.time() @@ -673,6 +689,34 @@ class GitFat(object): lsfiles.wait() updateindex.wait() + def detect_and_adjust_std_for_lfs(self, std, objfile = None): + """ + Detect and attempt to adjust the stdin/stdout using filestream for large files. + Return: A Tuple includes: + - Boolean indicating stdin/stdout is sdjusted or not. + - file object after adjust for large file size + """ + if sys.platform == 'win32' and len(sys.argv) > 2: + real_file = os.path.abspath(sys.argv[2]) + if std is sys.stdin: + file_check_lfs = real_file + elif std is sys.stdout: + file_check_lfs = objfile + if os.path.getsize(file_check_lfs) >= LARGE_FILE_SIZE: + std = self.ensure_std_lfs(std, real_file) + return (std, True) + else: + return (std, False) + + def ensure_std_lfs(self, std, filepath): + """ + Ensure std working with large file. + """ + if std is sys.stdin: + std = open(filepath, 'rb') + elif std is sys.stdout: + std = open(filepath, 'wb') + return std if __name__ == '__main__': fat = GitFat() From 8a4f77c20634502a6dc4809ea54fcaf39a17175b Mon Sep 17 00:00:00 2001 From: quangnt Date: Fri, 21 Jul 2017 10:51:35 +0700 Subject: [PATCH 12/19] Note to work with large file size in Windows OS --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 9af2c19..ff66ad1 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,9 @@ Some people recommend checking binaries into different repositories or even not * [git-media](https://github.com/schacon/git-media) adopts a similar approach to `git-fat`, but with a different synchronization philosophy and with many Ruby dependencies. # Installation and configuration + +Note: In Windows OS, to make this work with large file size (>4GBs), requires to install windows os, git and python 64 bits versions. + Place `git-fat` in your `PATH`. Edit `.gitattributes` to regard any desired extensions as fat files. From c026fcf51ed3aca6027401bcd8b68cb6c25bd254 Mon Sep 17 00:00:00 2001 From: quangnt Date: Thu, 27 Jul 2017 11:09:11 +0700 Subject: [PATCH 13/19] [UE-2513] Fix limitation upload s3 >5gbs file --- git-fat | 54 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 10 deletions(-) diff --git a/git-fat b/git-fat index 7babe4d..8b88054 100755 --- a/git-fat +++ b/git-fat @@ -45,6 +45,9 @@ try: from boto.s3.connection import S3Connection from boto.s3.key import Key import boto + import math + from filechunkio import FileChunkIO + class S3Backend(object): def __init__(self,bucket,key,secret,objdir): @@ -102,15 +105,43 @@ try: k.key = file localfile = os.path.abspath(os.path.join(self.objdir,file)) self.verbose('Uploading object %s to s3 bucket %s' % (file,self.bucket)) - try: - k.set_contents_from_filename(localfile, - cb=S3Counter(), - num_cb=500) - except KeyboardInterrupt: - # If we cancel during upload, delete the partially uploaded - # remote object. Otherwise we'll have problems later. - k.delete() - raise + # New code to process > 5GBs file size + filesize = os.path.getsize(localfile) + if filesize <= S3_SINGLE_PART_UPLOAD_LIMITATION: # For filesize < 5GBs (limitation of S3 single upload) + try: + k.set_contents_from_filename(localfile, + cb=S3Counter(), + num_cb=500) + except KeyboardInterrupt: + # If we cancel during upload, delete the partially uploaded + # remote object. Otherwise we'll have problems later. + k.delete() + raise + else: # Large file size > 5GBs (use multipart upload) + multi_part = bkt.initiate_multipart_upload(file) + try: + chunks_count = int(math.ceil(filesize / float(S3_MULTI_PARTS_CHUNK_SIZE))) + for i in range(chunks_count): + part_num = i + 1 + offset = i * S3_MULTI_PARTS_CHUNK_SIZE + remaining_bytes = filesize - offset + bytes = min([S3_MULTI_PARTS_CHUNK_SIZE, remaining_bytes]) + self.verbose("**Try uploading part %s of %s" %(str(part_num), str(chunks_count))) + with FileChunkIO(localfile, 'r', offset=offset, bytes=bytes) as fp: + multi_part.upload_part_from_file(fp=fp, part_num=part_num, size=bytes) + if len(multi_part.get_all_parts()) == chunks_count: + multi_part.complete_upload() + self.verbose("Finish uploading file") + else: + multi_part.cancel_upload() + self.verbose("Fail to upload file") + except KeyboardInterrupt: + # If we cancel during upload, delete the partially uploaded + # remote object. Otherwise we'll have problems later. + multi_part.cancel_upload() + k.delete() + raise + except ImportError: class S3Backend(object): @@ -119,6 +150,9 @@ except ImportError: BLOCK_SIZE = 4096 LARGE_FILE_SIZE = 4294967296 # consider > 4GBs is large file size(Current limitation in windows) +S3_SINGLE_PART_UPLOAD_LIMITATION = 5100273664 # 5*1024*1024*1024 (5GBs) * 0.95(SafeMargin): AWS S3 has limitation of 5GBs when upload single part +S3_MULTI_PARTS_CHUNK_SIZE = 419430400 # 4*100*1024*1024 (400MBs) + def verbose_stderr(*args, **kwargs): return print(*args, file=sys.stderr, **kwargs) def verbose_ignore(*args, **kwargs): @@ -587,7 +621,7 @@ class GitFat(object): gitconfig_set('filter.fat.clean', filter_clean) gitconfig_set('filter.fat.smudge', filter_smudge) print('Initialized git fat') - + def gen_large_blobs(self, revs, threshsize): """Build dict of all blobs""" time0 = time.time() From b83c4072a2070bc975d14408277f64220c2c2fce Mon Sep 17 00:00:00 2001 From: quangnt Date: Tue, 26 Sep 2017 16:19:14 +0700 Subject: [PATCH 14/19] Add note to install required libraries --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index ff66ad1..a42564a 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,11 @@ Some people recommend checking binaries into different repositories or even not # Installation and configuration -Note: In Windows OS, to make this work with large file size (>4GBs), requires to install windows os, git and python 64 bits versions. +Note: +* In Windows OS, to make this work with large file size (>4GBs), requires to install windows os, git and python 64 bits versions. +* Require following python libraries: + - [boto](https://pypi.python.org/pypi/boto). Using `pip install boto` + - [filechunkio](https://pypi.python.org/pypi/filechunkio). Using `pip install filechunkio` Place `git-fat` in your `PATH`. From 33e66d17a95966bb6c2ea8c62a447a13c9f1b81a Mon Sep 17 00:00:00 2001 From: zelonght Date: Wed, 9 May 2018 21:58:28 +0700 Subject: [PATCH 15/19] Update error for requirement of S3BackEnd --- git-fat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git-fat b/git-fat index 8b88054..2ed592b 100755 --- a/git-fat +++ b/git-fat @@ -146,7 +146,7 @@ except ImportError: class S3Backend(object): def __init__(self,bucket,key,secret,objdir): - raise RuntimeError("S3Backend requires boto.") + raise RuntimeError("S3Backend requires boto and filechunkio.") BLOCK_SIZE = 4096 LARGE_FILE_SIZE = 4294967296 # consider > 4GBs is large file size(Current limitation in windows) From e04f14baf26a74a6bef4b887fd9574332aeb14cc Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Sun, 21 Jan 2018 11:34:05 -0700 Subject: [PATCH 16/19] Use '.' instead of '' to match any pathspec; needed by git >=2.16 --- git-fat | 2 ++ 1 file changed, 2 insertions(+) diff --git a/git-fat b/git-fat index 2ed592b..65bee5d 100755 --- a/git-fat +++ b/git-fat @@ -512,6 +512,8 @@ class GitFat(object): def orphan_files(self, patterns=[]): 'generator for all orphan placeholders in the working tree' + if not patterns or patterns == ['']: + patterns = ['.'] for fname in subprocess.check_output(['git', 'ls-files', '-z'] + patterns).split('\x00')[:-1]: digest = self.decode_file(fname)[0] if digest: From d5dd944746baa735c07db8df282d9c81cefdba6a Mon Sep 17 00:00:00 2001 From: loi-personify Date: Tue, 6 Nov 2018 19:37:48 -0800 Subject: [PATCH 17/19] Add instruction about timezone --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index a42564a..484b195 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ Note: * Require following python libraries: - [boto](https://pypi.python.org/pypi/boto). Using `pip install boto` - [filechunkio](https://pypi.python.org/pypi/filechunkio). Using `pip install filechunkio` +* Make sure your date-time and timezone is correct. Place `git-fat` in your `PATH`. From a5861b9ef5ccebec954f8dc5ccf26caa72ab562e Mon Sep 17 00:00:00 2001 From: Peter Khoa Date: Wed, 16 Mar 2022 15:53:20 +0700 Subject: [PATCH 18/19] DEVOPS-499: Use different environment variables for S3 ID --- git-fat | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/git-fat b/git-fat index 65bee5d..74ae596 100755 --- a/git-fat +++ b/git-fat @@ -321,10 +321,10 @@ class GitFat(object): return len(enc(hashlib.sha1(b'dummy').hexdigest().encode('ASCII'), 5)) self.magiclen = magiclen(self.encode) # Current version self.magiclens = [magiclen(enc) for enc in [self.encode_v1, self.encode_v2]] # All prior versions - self.backend = self.get_backend(self.objdir) + self.backend = self.get_backend() def setup(self): mkdir_p(self.objdir) - def get_backend(self,objdir): + def get_backend(self): """ Parse the .gitfat config file and pick the first supported backend to use. Currently supports rsync and s3. @@ -335,22 +335,20 @@ class GitFat(object): ssh_port = gitconfig_get('rsync.sshport', file=cfgpath) ssh_user = gitconfig_get('rsync.sshuser', file=cfgpath) options = gitconfig_get('rsync.options', file=cfgpath) - return RsyncBackend(remote,ssh_port,ssh_user,options,objdir) + return RsyncBackend(remote,ssh_port,ssh_user,options,self.objdir) elif gitconfig_get('s3.bucket', file=cfgpath): bucket = gitconfig_get('s3.bucket', file=cfgpath) key = gitconfig_get('s3.key', file=cfgpath) if key is None: - try: - key = os.environ['AWS_ACCESS_KEY_ID'] - except KeyError: - raise RuntimeError('No s3.key in %s' % cfgpath) + key = os.getenv('GITFAT_S3_ACCESS_KEY_ID', os.getenv('AWS_ACCESS_KEY_ID')) + if key is None: + raise RuntimeError('No s3.key in both environment variables and %s' % cfgpath) secret = gitconfig_get('s3.secret', file=cfgpath) if secret is None: - try: - secret = os.environ['AWS_SECRET_ACCESS_KEY'] - except KeyError: - raise RuntimeError('No s3.secret in %s' % cfgpath) - return S3Backend(bucket,key,secret,objdir) + secret = os.getenv('GITFAT_S3_SECRET_ACCESS_KEY', os.getenv('AWS_SECRET_ACCESS_KEY')) + if secret is None: + raise RuntimeError('No s3.secret in both environment variables and %s' % cfgpath) + return S3Backend(bucket,key,secret,self.objdir) else: raise RuntimeError('No supported backends specified in %s' % cfgpath) From 3de5f58e13eecf9ca4804d53bea1284648c3077f Mon Sep 17 00:00:00 2001 From: Peter Khoa Date: Fri, 18 Mar 2022 14:54:27 +0700 Subject: [PATCH 19/19] DEVOPS-499: Auto-correct S3 key and secret based on environment variables in master of Jenkins (#20) --- git-fat | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/git-fat b/git-fat index 74ae596..9f64310 100755 --- a/git-fat +++ b/git-fat @@ -338,16 +338,25 @@ class GitFat(object): return RsyncBackend(remote,ssh_port,ssh_user,options,self.objdir) elif gitconfig_get('s3.bucket', file=cfgpath): bucket = gitconfig_get('s3.bucket', file=cfgpath) + + # For S3 key key = gitconfig_get('s3.key', file=cfgpath) + key_env = os.getenv('GITFAT_S3_ACCESS_KEY_ID') + if key_env is not None and key != key_env: + gitconfig_set('s3.key', key_env, file=cfgpath) + key = key_env if key is None: - key = os.getenv('GITFAT_S3_ACCESS_KEY_ID', os.getenv('AWS_ACCESS_KEY_ID')) - if key is None: - raise RuntimeError('No s3.key in both environment variables and %s' % cfgpath) - secret = gitconfig_get('s3.secret', file=cfgpath) + raise RuntimeError('No s3.key in both environment variables and %s' % cfgpath) + + # For S3 secret + secret = gitconfig_get('s3.secret', file=cfgpath) + secret_env = os.getenv('GITFAT_S3_SECRET_ACCESS_KEY') + if secret_env is not None and secret != secret_env: + gitconfig_set('s3.secret', secret_env, file=cfgpath) + secret = secret_env if secret is None: - secret = os.getenv('GITFAT_S3_SECRET_ACCESS_KEY', os.getenv('AWS_SECRET_ACCESS_KEY')) - if secret is None: - raise RuntimeError('No s3.secret in both environment variables and %s' % cfgpath) + raise RuntimeError('No s3.secret in both environment variables and %s' % cfgpath) + return S3Backend(bucket,key,secret,self.objdir) else: raise RuntimeError('No supported backends specified in %s' % cfgpath)