From c3413bfb226c1661e37c7fa14909bb1812d40337 Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Tue, 26 Mar 2013 23:03:20 -0500 Subject: [PATCH 1/5] ensure_binary_mode: python3 and python2/win32 compatibility Python-3 needs binary mode so that it doesn't try to read into unicode strings. Python-2 just uses bytes on Linux, but needs the mode set to binary on Windows. The smudge filter must also read binary because we can have files with a "managed" extension that is not actually managed by git-fat. In that case, we get raw binary data on stdin. It will not match our cookie, but we must not corrupt its contents in the working tree, thus we have to treat it as binary. --- git-fat | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/git-fat b/git-fat index 7edb7ba..f4224f4 100755 --- a/git-fat +++ b/git-fat @@ -47,6 +47,17 @@ def verbose_stderr(*args, **kwargs): def verbose_ignore(*args, **kwargs): pass +def ensure_binary_mode(stream): + try: # Attempt the Python-3 way, also needed to handle unicode + return stream.detach() + except: + pass + if sys.platform == "win32": + # Fall back to Python-2 way, only needed on Windows + import msvcrt + msvcrt.setmode(stream.fileno(), os.O_BINARY) + return stream + def mkdir_p(path): import errno try: @@ -198,7 +209,7 @@ class GitFat(object): return False, None # read file try: - digest, bytes = self.decode_stream(open(fname)) + digest, bytes = self.decode_stream(open(fname, 'rb')) except IOError: return False, None if isinstance(digest, str): @@ -258,10 +269,16 @@ class GitFat(object): version of the file on stdin and produces the "clean" (repository) version on stdout. ''' self.setup() + # Set stdin and stdout to binary mode + sys.stdin = ensure_binary_mode(sys.stdin) + sys.stdout = ensure_binary_mode(sys.stdout) self.filter_clean(sys.stdin, sys.stdout) def cmd_filter_smudge(self): self.setup() + # Ensure streams are treated as binary + sys.stdin = ensure_binary_mode(sys.stdin) + sys.stdout = ensure_binary_mode(sys.stdout) result, bytes = self.decode_stream(sys.stdin) if isinstance(result, str): # We got a digest objfile = os.path.join(self.objdir, result) From 1a345c46956a41385ba3e2b39608f57ae106bb9d Mon Sep 17 00:00:00 2001 From: Will Kelleher Date: Fri, 28 Mar 2014 12:25:31 -0500 Subject: [PATCH 2/5] Abstract the backend and add S3 implementation --- README.md | 11 ++++ git-fat | 182 +++++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 157 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 3889ad2..8e1c7fe 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,17 @@ look like this: remote = your.remote-host.org:/share/fat-store sshuser = fat +## S3 remote + +Edit your .gitfat file and add the following: + + [s3] + bucket={bucketname} + key={access_key_id} + secret={secret_access_key} + +And then you're done. + # A worked example Before we start, let's turn on verbose reporting so we can see what's diff --git a/git-fat b/git-fat index f4224f4..69d7314 100755 --- a/git-fat +++ b/git-fat @@ -14,6 +14,8 @@ import itertools import threading import time import collections +from boto.s3.connection import S3Connection +from boto.s3.key import Key try: from subprocess import check_output @@ -127,6 +129,126 @@ def gitconfig_set(name, value, file=None): args += [name, value] p = subprocess.check_call(args) +try: + import fish + class S3Counter: + def __init__(self): + self.fish = None + def __call__(self, complete, total): + if self.fish is None: + self.fish = fish.ProgressFish(total=total/1024) + self.fish.animate(amount=complete/1024) +except ImportError: + class S3Counter: + def __init__(self): + self.count = 0 + def __call__(self, complete, total): + if complete * 10 / total > self.count: + self.count += 1 + sys.stdout.write('.') + sys.stdout.flush() + +class RsyncBackend(object): + + def __init__(self,remote,ssh_port,ssh_user,options,objdir): + self.verbose = verbose_stderr if os.environ.get('GIT_FAT_VERBOSE') else verbose_ignore + self.remote = remote + self.ssh_port = ssh_port + self.ssh_user = ssh_user + self.options = options + self.objdir = objdir + + def get_rsync_command(self,push): + (remote, ssh_port, ssh_user, options) = self.get_rsync() + if push: + self.verbose('Pushing to %s' % (remote)) + else: + self.verbose('Pulling from %s' % (remote)) + + cmd = ['rsync', '--progress', '--ignore-existing', '--from0', '--files-from=-'] + rshopts = '' + if ssh_user: + rshopts += ' -l ' + self.ssh_user + if ssh_port: + rshopts += ' -p ' + self.ssh_port + if rshopts: + cmd.append('--rsh=ssh' + rshopts) + if options: + cmd += self.options.split(' ') + if push: + cmd += [self.objdir + '/', self.remote + '/'] + else: + cmd += [self.remote + '/', self.objdir + '/'] + return cmd + + def pull(self,files): + cmd = self.get_rsync_command(push=False) + self.verbose('Executing: %s' % ' '.join(cmd)) + p = subprocess.Popen(cmd, stdin=subprocess.PIPE) + p.communicate(input='\x00'.join(files)) + + def push(self,files): + cmd = self.get_rsync_command(push=True) + self.verbose('Executing: %s' % ' '.join(cmd)) + p = subprocess.Popen(cmd, stdin=subprocess.PIPE) + p.communicate(input='\x00'.join(files)) + +class S3Backend(object): + + def __init__(self,bucket,key,secret,objdir): + self.verbose = verbose_stderr if os.environ.get('GIT_FAT_VERBOSE') else verbose_ignore + self.bucket = bucket + self.key = key + self.secret = secret + self.objdir = objdir + + def get_bucket(self): + conn = S3Connection(self.key, self.secret) + bkt = conn.get_bucket(self.bucket) + return bkt + + def pull(self,files): + bkt = self.get_bucket() + for file in files: + localfile = os.path.abspath(os.path.join(self.objdir,file)) + if os.path.isfile(localfile): + self.verbose('Object %s already exists, skipping.' % file) + else: + self.verbose('Getting object %s from s3 bucket %s' % (file,self.bucket)) + k = Key(bkt) + k.key = file + localfile = os.path.abspath(os.path.join(self.objdir,file)) + try: + k.get_contents_to_filename(localfile, + cb=S3Counter(), + num_cb=500) + except KeyboardInterrupt: + # If we cancel during download, make sure the partial + # download is removed. + os.remove(localfile) + raise + + def push(self,files): + bkt = self.get_bucket() + for file in files: + k = bkt.get_key(file) + if bkt.get_key(file): + self.verbose('Object %s already exists in bucket %s, skipping.' % (file,self.bucket)) + else: + k = Key(bkt) + k.key = file + localfile = os.path.abspath(os.path.join(self.objdir,file)) + self.verbose('Uploading object %s to s3 bucket %s' % (file,self.bucket)) + try: + k.set_contents_from_filename(localfile, + cb=S3Counter(), + num_cb=500) + except KeyboardInterrupt: + # If we cancel during upload, delete the partially uploaded + # remote object. Otherwise we'll have problems later. + k.delete() + raise + class GitFat(object): DecodeError = RuntimeError def __init__(self): @@ -142,39 +264,33 @@ class GitFat(object): return len(enc(hashlib.sha1('dummy').hexdigest(), 5)) self.magiclen = magiclen(self.encode) # Current version self.magiclens = [magiclen(enc) for enc in [self.encode_v1, self.encode_v2]] # All prior versions + self.backend = self.get_backend(self.objdir) def setup(self): mkdir_p(self.objdir) - def get_rsync(self): + def get_backend(self,objdir): + """ + Parse the .gitfat config file and pick the first supported backend + to use. Currently supports rsync and s3. + """ cfgpath = os.path.join(self.gitroot,'.gitfat') - remote = gitconfig_get('rsync.remote', file=cfgpath) - ssh_port = gitconfig_get('rsync.sshport', file=cfgpath) - ssh_user = gitconfig_get('rsync.sshuser', file=cfgpath) - options = gitconfig_get('rsync.options', file=cfgpath) - if remote is None: - raise RuntimeError('No rsync.remote in %s' % cfgpath) - return remote, ssh_port, ssh_user, options - def get_rsync_command(self,push): - (remote, ssh_port, ssh_user, options) = self.get_rsync() - if push: - self.verbose('Pushing to %s' % (remote)) + if gitconfig_get('rsync.remote', file=cfgpath): + remote = gitconfig_get('rsync.remote', file=cfgpath) + ssh_port = gitconfig_get('rsync.sshport', file=cfgpath) + ssh_user = gitconfig_get('rsync.sshuser', file=cfgpath) + options = gitconfig_get('rsync.options', file=cfgpath) + return RsyncBackend(remote,ssh_port,ssh_user,options,objdir) + elif gitconfig_get('s3.bucket', file=cfgpath): + bucket = gitconfig_get('s3.bucket', file=cfgpath) + key = gitconfig_get('s3.key', file=cfgpath) + if key is None: + raise RuntimeError('No s3.key in %s' % cfgpath) + secret = gitconfig_get('s3.secret', file=cfgpath) + if secret is None: + raise RuntimeError('No s3.secret in %s' % cfgpath) + return S3Backend(bucket,key,secret,objdir) else: - self.verbose('Pulling from %s' % (remote)) + raise RuntimeError('No supported backends specified in %s' % cfgpath) - cmd = ['rsync', '--progress', '--ignore-existing', '--from0', '--files-from=-'] - rshopts = '' - if ssh_user: - rshopts += ' -l ' + ssh_user - if ssh_port: - rshopts += ' -p ' + ssh_port - if rshopts: - cmd.append('--rsh=ssh' + rshopts) - if options: - cmd += options.split(' ') - if push: - cmd += [self.objdir + '/', remote + '/'] - else: - cmd += [remote + '/', self.objdir + '/'] - return cmd def revparse(self, revname): return subprocess.check_output(['git', 'rev-parse', revname]).strip() def encode_v1(self, digest, bytes): @@ -356,10 +472,7 @@ class GitFat(object): # (includes history). Finer-grained pushing would be useful. pushall = '--all' in args files = self.referenced_objects(all=pushall) & self.catalog_objects() - cmd = self.get_rsync_command(push=True) - self.verbose('Executing: %s' % ' '.join(cmd)) - p = subprocess.Popen(cmd, stdin=subprocess.PIPE) - p.communicate(input='\x00'.join(files)) + self.backend.push(files) def checkout(self, show_orphans=False): 'Update any stale files in the present working tree' for digest, fname in self.orphan_files(): @@ -390,10 +503,7 @@ class GitFat(object): if rev: refargs['rev'] = rev files = self.filter_objects(refargs, self.parse_pull_patterns(args)) - cmd = self.get_rsync_command(push=False) - self.verbose('Executing: %s' % ' '.join(cmd)) - p = subprocess.Popen(cmd, stdin=subprocess.PIPE) - p.communicate(input='\x00'.join(files)) + self.backend.pull(files) self.checkout() def parse_pull_patterns(self, args): From 6986a3a1c91616c8b96ccda4199a4d7ab333b662 Mon Sep 17 00:00:00 2001 From: Jed Brown Date: Wed, 27 Mar 2013 11:19:54 -0500 Subject: [PATCH 3/5] encoding: use uninterpreted bytes whenever possible Git is encoding-agnostic in the sense that it interprets file contents, commit messages and paths as binary. In the case of paths, this means that the non-NUL bytes returned from readdir(2) are stored and later passed to lstat(2) and creat(2). See git-commit(1) for details. To be compatible with Git's mode of operation, we also use raw bytes whenever possible. hashlib's hexdigest returns Python 'str', which we immediately encode as ASCII so that it can be used with path component and cleaned bytes to be committed. Renamed variable 'bytes' to 'bytecount' due to conflict with type Includes contributions from: Stephen Miller --- git-fat | 107 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 54 insertions(+), 53 deletions(-) diff --git a/git-fat b/git-fat index 69d7314..42f44f4 100755 --- a/git-fat +++ b/git-fat @@ -76,10 +76,10 @@ def umask(): return old def readblocks(stream): - bytes = 0 + bytecount = 0 while True: data = stream.read(BLOCK_SIZE) - bytes += len(data) + bytecount += len(data) if not data: break yield data @@ -165,33 +165,31 @@ class RsyncBackend(object): else: self.verbose('Pulling from %s' % (remote)) - cmd = ['rsync', '--progress', '--ignore-existing', '--from0', '--files-from=-'] - rshopts = '' - if ssh_user: - rshopts += ' -l ' + self.ssh_user - if ssh_port: - rshopts += ' -p ' + self.ssh_port + cmd = [b'rsync', b'--progress', b'--ignore-existing', b'--from0', b'--files-from=-'] + rshopts = b'' + if self.ssh_user: + rshopts += b' -l ' + self.ssh_user + if self.ssh_port: + rshopts += b' -p ' + self.ssh_port if rshopts: - cmd.append('--rsh=ssh' + rshopts) - if options: - cmd += self.options.split(' ') + cmd.append(b'--rsh=ssh' + rshopts) if push: - cmd += [self.objdir + '/', self.remote + '/'] + cmd += [self.objdir + b'/', self.remote + b'/'] else: - cmd += [self.remote + '/', self.objdir + '/'] + cmd += [self.remote + b'/', self.objdir + b'/'] return cmd def pull(self,files): cmd = self.get_rsync_command(push=False) self.verbose('Executing: %s' % ' '.join(cmd)) p = subprocess.Popen(cmd, stdin=subprocess.PIPE) - p.communicate(input='\x00'.join(files)) + p.communicate(input=b'\x00'.join(files)) def push(self,files): cmd = self.get_rsync_command(push=True) self.verbose('Executing: %s' % ' '.join(cmd)) p = subprocess.Popen(cmd, stdin=subprocess.PIPE) - p.communicate(input='\x00'.join(files)) + p.communicate(input=b'\x00'.join(files)) class S3Backend(object): @@ -255,13 +253,13 @@ class GitFat(object): self.verbose = verbose_stderr if os.environ.get('GIT_FAT_VERBOSE') else verbose_ignore self.gitroot = subprocess.check_output('git rev-parse --show-toplevel'.split()).strip() self.gitdir = subprocess.check_output('git rev-parse --git-dir'.split()).strip() - self.objdir = os.path.join(self.gitdir, 'fat', 'objects') + self.objdir = os.path.join(self.gitdir, b'fat', b'objects') if os.environ.get('GIT_FAT_VERSION') == '1': self.encode = self.encode_v1 else: self.encode = self.encode_v2 def magiclen(enc): - return len(enc(hashlib.sha1('dummy').hexdigest(), 5)) + return len(enc(hashlib.sha1(b'dummy').hexdigest().encode('ASCII'), 5)) self.magiclen = magiclen(self.encode) # Current version self.magiclens = [magiclen(enc) for enc in [self.encode_v1, self.encode_v2]] # All prior versions self.backend = self.get_backend(self.objdir) @@ -272,7 +270,7 @@ class GitFat(object): Parse the .gitfat config file and pick the first supported backend to use. Currently supports rsync and s3. """ - cfgpath = os.path.join(self.gitroot,'.gitfat') + cfgpath = os.path.join(self.gitroot,b'.gitfat') if gitconfig_get('rsync.remote', file=cfgpath): remote = gitconfig_get('rsync.remote', file=cfgpath) ssh_port = gitconfig_get('rsync.sshport', file=cfgpath) @@ -293,23 +291,23 @@ class GitFat(object): def revparse(self, revname): return subprocess.check_output(['git', 'rev-parse', revname]).strip() - def encode_v1(self, digest, bytes): + def encode_v1(self, digest, bytecount): 'Produce legacy representation of file to be stored in repository.' - return '#$# git-fat %s\n' % (digest,) - def encode_v2(self, digest, bytes): + return (b'#$# git-fat ' + digest + b'\n') + def encode_v2(self, digest, bytecount): 'Produce representation of file to be stored in repository. 20 characters can hold 64-bit integers.' - return '#$# git-fat %s %20d\n' % (digest, bytes) - def decode(self, string, noraise=False): - cookie = '#$# git-fat ' - if string.startswith(cookie): - parts = string[len(cookie):].split() + return (b'#$# git-fat ' + digest + (' %20d\n' % (bytecount,)).encode('ASCII')) + def decode(self, bstring, noraise=False): + cookie = b'#$# git-fat ' + if bstring.startswith(cookie): + parts = bstring[len(cookie):].split() digest = parts[0] - bytes = int(parts[1]) if len(parts) > 1 else None - return digest, bytes + bytecount = int(parts[1]) if len(parts) > 1 else None + return digest, int(bytecount) elif noraise: return None, None else: - raise GitFat.DecodeError('Could not decode %s' % (string)) + raise GitFat.DecodeError('Could not decode %s' % repr(bstring)) def decode_stream(self, stream): 'Return digest if git-fat cache, otherwise return iterator over entire file contents' preamble = stream.read(self.magiclen) @@ -325,13 +323,13 @@ class GitFat(object): return False, None # read file try: - digest, bytes = self.decode_stream(open(fname, 'rb')) + digest, bytecount = self.decode_stream(open(fname, 'rb')) except IOError: return False, None - if isinstance(digest, str): - return digest, bytes + if isinstance(digest, bytes): + return digest, bytecount else: - return None, bytes + return None, bytecount def decode_clean(self, body): ''' Attempt to decode version in working tree. The tree version could be changed to have a more @@ -339,16 +337,17 @@ class GitFat(object): version decodes successfully, it indicates that the fat data is not currently available in this repository. ''' - digest, bytes = self.decode(body, noraise=True) + digest, bytecount = self.decode(body, noraise=True) return digest def filter_clean(self, instream, outstreamclean): h = hashlib.new('sha1') - bytes = 0 - fd, tmpname = tempfile.mkstemp(dir=self.objdir) + bytecount = 0 + # mkstemp requires 'str' rather than native filesystem bytes + fd, tmpname = tempfile.mkstemp(dir=self.objdir.decode(sys.getfilesystemencoding())) try: ishanging = False cached = False # changes to True when file is cached - with os.fdopen(fd, 'w') as cache: + with os.fdopen(fd, 'wb') as cache: outstream = cache blockiter = readblocks(instream) firstblock = True @@ -359,10 +358,10 @@ class GitFat(object): outstream = outstreamclean firstblock = False h.update(block) - bytes += len(block) + bytecount += len(block) outstream.write(block) outstream.flush() - digest = h.hexdigest() + digest = h.hexdigest().encode('ASCII') objfile = os.path.join(self.objdir, digest) if not ishanging: if os.path.exists(objfile): @@ -374,7 +373,7 @@ class GitFat(object): os.rename(tmpname, objfile) self.verbose('git-fat filter-clean: caching to %s' % objfile) cached = True - outstreamclean.write(self.encode(digest, bytes)) + outstreamclean.write(self.encode(digest, bytecount)) finally: if not cached: os.remove(tmpname) @@ -395,16 +394,16 @@ class GitFat(object): # Ensure streams are treated as binary sys.stdin = ensure_binary_mode(sys.stdin) sys.stdout = ensure_binary_mode(sys.stdout) - result, bytes = self.decode_stream(sys.stdin) - if isinstance(result, str): # We got a digest + result, bytecount = self.decode_stream(sys.stdin) + if isinstance(result, bytes): # We got a digest objfile = os.path.join(self.objdir, result) try: - cat(open(objfile), sys.stdout) self.verbose('git-fat filter-smudge: restoring from %s' % objfile) - except IOError: # file not found + cat(open(objfile, 'rb'), sys.stdout) + except IOError: # file not found self.verbose('git-fat filter-smudge: fat object missing %s' % objfile) - sys.stdout.write(self.encode(result, bytes)) # could leave a better notice about how to recover this file - else: # We have an iterable over the original input. + sys.stdout.write(self.encode(result, bytecount)) # could leave a better notice about how to recover this file + else: # We have an iterable over the original input. self.verbose('git-fat filter-smudge: not a managed file') cat_iter(result, sys.stdout) def catalog_objects(self): @@ -419,13 +418,13 @@ class GitFat(object): p2 = subprocess.Popen(['git','cat-file','--batch-check'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) def cut_sha1hash(input, output): for line in input: - output.write(line.split()[0] + '\n') + output.write(line.split()[0] + b'\n') output.close() cut_thread = threading.Thread(target=cut_sha1hash, args=(p1.stdout, p2.stdin)) cut_thread.start() for line in p2.stdout: objhash, objtype, size = line.split() - if objtype == 'blob' and int(size) in self.magiclens: + if objtype == b'blob' and int(size) in self.magiclens: try: fathash = self.decode(subprocess.check_output(['git', 'cat-file', '-p', objhash]))[0] referenced.add(fathash) @@ -571,6 +570,7 @@ class GitFat(object): time1 = time.time() self.verbose('%d of %d blobs are >= %d bytes [elapsed %.3fs]' % (numlarge, numblobs, threshsize, time1-time0)) def cmd_find(self, args): + # FIXME: Need input validation here maxsize = int(args[0]) blobsizes = dict(self.gen_large_blobs('--all', maxsize)) time0 = time.time() @@ -591,14 +591,15 @@ class GitFat(object): revlist.wait() difftree.wait() def cmd_index_filter(self, args): + # FIXME: Need input validation here manage_gitattributes = '--manage-gitattributes' in args filelist = set(f.strip() for f in open(args[0]).readlines()) lsfiles = subprocess.Popen(['git', 'ls-files', '-s'], stdout=subprocess.PIPE) updateindex = subprocess.Popen(['git', 'update-index', '--index-info'], stdin=subprocess.PIPE) - for line in lsfiles.stdout: - mode, sep, tail = line.partition(' ') - blobhash, sep, tail = tail.partition(' ') - stageno, sep, tail = tail.partition('\t') + for line in lsfiles.stdout.read(): + mode, sep, tail = line.partition(b' ') + blobhash, sep, tail = tail.partition(b' ') + stageno, sep, tail = tail.partition(b'\t') filename = tail.strip() if filename not in filelist: continue @@ -606,7 +607,7 @@ class GitFat(object): # skip symbolic links continue # This file will contain the hash of the cleaned object - hashfile = os.path.join(self.gitdir, 'fat', 'index-filter', blobhash) + hashfile = os.path.join(self.gitdir, b'fat', b'index-filter', blobhash) try: cleanedobj = open(hashfile).read().rstrip() except IOError: From 3372983bffa3a1fb490c1791312b91b108e5f11d Mon Sep 17 00:00:00 2001 From: Will Kelleher Date: Tue, 8 Apr 2014 18:10:11 -0500 Subject: [PATCH 4/5] Continue to function if boto is not installed --- git-fat | 122 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 64 insertions(+), 58 deletions(-) diff --git a/git-fat b/git-fat index 42f44f4..0a20289 100755 --- a/git-fat +++ b/git-fat @@ -14,8 +14,6 @@ import itertools import threading import time import collections -from boto.s3.connection import S3Connection -from boto.s3.key import Key try: from subprocess import check_output @@ -42,6 +40,70 @@ except ImportError: return output subprocess.check_output = backport_check_output +try: + from boto.s3.connection import S3Connection + from boto.s3.key import Key + class S3Backend(object): + + def __init__(self,bucket,key,secret,objdir): + self.verbose = verbose_stderr if os.environ.get('GIT_FAT_VERBOSE') else verbose_ignore + self.bucket = bucket + self.key = key + self.secret = secret + self.objdir = objdir + + def get_bucket(self): + conn = S3Connection(self.key, self.secret) + bkt = conn.get_bucket(self.bucket) + return bkt + + def pull(self,files): + bkt = self.get_bucket() + for file in files: + localfile = os.path.abspath(os.path.join(self.objdir,file)) + if os.path.isfile(localfile): + self.verbose('Object %s already exists, skipping.' % file) + else: + self.verbose('Getting object %s from s3 bucket %s' % (file,self.bucket)) + k = Key(bkt) + k.key = file + localfile = os.path.abspath(os.path.join(self.objdir,file)) + try: + k.get_contents_to_filename(localfile, + cb=S3Counter(), + num_cb=500) + except KeyboardInterrupt: + # If we cancel during download, make sure the partial + # download is removed. + os.remove(localfile) + raise + + def push(self,files): + bkt = self.get_bucket() + for file in files: + k = bkt.get_key(file) + if bkt.get_key(file): + self.verbose('Object %s already exists in bucket %s, skipping.' % (file,self.bucket)) + else: + k = Key(bkt) + k.key = file + localfile = os.path.abspath(os.path.join(self.objdir,file)) + self.verbose('Uploading object %s to s3 bucket %s' % (file,self.bucket)) + try: + k.set_contents_from_filename(localfile, + cb=S3Counter(), + num_cb=500) + except KeyboardInterrupt: + # If we cancel during upload, delete the partially uploaded + # remote object. Otherwise we'll have problems later. + k.delete() + raise +except ImportError: + class S3Backend(object): + + def __init__(self,bucket,key,secret,objdir): + raise RuntimeError("S3Backend requires boto.") + BLOCK_SIZE = 4096 def verbose_stderr(*args, **kwargs): @@ -191,62 +253,6 @@ class RsyncBackend(object): p = subprocess.Popen(cmd, stdin=subprocess.PIPE) p.communicate(input=b'\x00'.join(files)) -class S3Backend(object): - - def __init__(self,bucket,key,secret,objdir): - self.verbose = verbose_stderr if os.environ.get('GIT_FAT_VERBOSE') else verbose_ignore - self.bucket = bucket - self.key = key - self.secret = secret - self.objdir = objdir - - def get_bucket(self): - conn = S3Connection(self.key, self.secret) - bkt = conn.get_bucket(self.bucket) - return bkt - - def pull(self,files): - bkt = self.get_bucket() - for file in files: - localfile = os.path.abspath(os.path.join(self.objdir,file)) - if os.path.isfile(localfile): - self.verbose('Object %s already exists, skipping.' % file) - else: - self.verbose('Getting object %s from s3 bucket %s' % (file,self.bucket)) - k = Key(bkt) - k.key = file - localfile = os.path.abspath(os.path.join(self.objdir,file)) - try: - k.get_contents_to_filename(localfile, - cb=S3Counter(), - num_cb=500) - except KeyboardInterrupt: - # If we cancel during download, make sure the partial - # download is removed. - os.remove(localfile) - raise - - def push(self,files): - bkt = self.get_bucket() - for file in files: - k = bkt.get_key(file) - if bkt.get_key(file): - self.verbose('Object %s already exists in bucket %s, skipping.' % (file,self.bucket)) - else: - k = Key(bkt) - k.key = file - localfile = os.path.abspath(os.path.join(self.objdir,file)) - self.verbose('Uploading object %s to s3 bucket %s' % (file,self.bucket)) - try: - k.set_contents_from_filename(localfile, - cb=S3Counter(), - num_cb=500) - except KeyboardInterrupt: - # If we cancel during upload, delete the partially uploaded - # remote object. Otherwise we'll have problems later. - k.delete() - raise - class GitFat(object): DecodeError = RuntimeError def __init__(self): From 8a9775f8bdce69d5776fd906c17d37e0fe8d6080 Mon Sep 17 00:00:00 2001 From: Dennis Lin Date: Wed, 9 Apr 2014 12:13:02 -0500 Subject: [PATCH 5/5] s3: Try to use environment variables if s3.{key,bucket} are undefined --- git-fat | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/git-fat b/git-fat index 0a20289..03df2ce 100755 --- a/git-fat +++ b/git-fat @@ -287,10 +287,16 @@ class GitFat(object): bucket = gitconfig_get('s3.bucket', file=cfgpath) key = gitconfig_get('s3.key', file=cfgpath) if key is None: - raise RuntimeError('No s3.key in %s' % cfgpath) + try: + key = os.environ['AWS_ACCESS_KEY_ID'] + except KeyError: + raise RuntimeError('No s3.key in %s' % cfgpath) secret = gitconfig_get('s3.secret', file=cfgpath) if secret is None: - raise RuntimeError('No s3.secret in %s' % cfgpath) + try: + secret = os.environ['AWS_SECRET_ACCESS_KEY'] + except KeyError: + raise RuntimeError('No s3.secret in %s' % cfgpath) return S3Backend(bucket,key,secret,objdir) else: raise RuntimeError('No supported backends specified in %s' % cfgpath)