From 17df5108fcffdaf2b591b6dd6e3f76d170036bb3 Mon Sep 17 00:00:00 2001 From: Jwink3101 Date: Thu, 12 Sep 2019 13:41:21 -0600 Subject: [PATCH 01/11] Python3 support. Passes tests but they need to be updated to call specific ones --- git-fat | 71 +++++++++++++++++++++++++-------------------------------- 1 file changed, 31 insertions(+), 40 deletions(-) diff --git a/git-fat b/git-fat index 135f4e2..664722b 100755 --- a/git-fat +++ b/git-fat @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- mode:python -*- -from __future__ import print_function, with_statement +from __future__ import print_function,unicode_literals import sys import hashlib @@ -15,37 +15,24 @@ import threading import time import collections -if not type(sys.version_info) is tuple and sys.version_info.major > 2: - sys.stderr.write('git-fat does not support Python-3 yet. Please use python2.\n') - sys.exit(1) +# if not type(sys.version_info) is tuple and sys.version_info.major > 2: +# sys.stderr.write('git-fat does not support Python-3 yet. Please use python2.\n') +# sys.exit(1) -try: - from subprocess import check_output - del check_output -except ImportError: - def backport_check_output(*popenargs, **kwargs): - r"""Run command with arguments and return its output as a byte string. - - Backported from Python 2.7 as it's implemented as pure python on stdlib. - - >>> check_output(['/usr/bin/python', '--version']) - Python 2.6.2 - """ - process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs) - output, unused_err = process.communicate() - retcode = process.poll() - if retcode: - cmd = kwargs.get("args") - if cmd is None: - cmd = popenargs[0] - error = subprocess.CalledProcessError(retcode, cmd) - error.output = output - raise error - return output - subprocess.check_output = backport_check_output +if sys.version_info[0] > 2: + unicode = str +else: + from io import open BLOCK_SIZE = 4096 +def uni(s,encoding='utf8'): + """Automate unicode conversion""" + if isinstance(s,(str,unicode)): + return s + if hasattr(s,'decode'): + return s.decode(encoding) + def verbose_stderr(*args, **kwargs): return print(*args, file=sys.stderr, **kwargs) def verbose_ignore(*args, **kwargs): @@ -108,7 +95,7 @@ def gitconfig_get(name, file=None): args += ['--file', file] args.append(name) p = subprocess.Popen(args, stdout=subprocess.PIPE) - output = p.communicate()[0].strip() + output = uni(p.communicate()[0].strip()) if p.returncode and file is None: return None elif p.returncode: @@ -127,17 +114,17 @@ class GitFat(object): def __init__(self): self.verbose = verbose_stderr if os.environ.get('GIT_FAT_VERBOSE') else verbose_ignore try: - self.gitroot = subprocess.check_output('git rev-parse --show-toplevel'.split()).strip() + self.gitroot = uni(subprocess.check_output('git rev-parse --show-toplevel'.split()).strip()) except subprocess.CalledProcessError: sys.exit(1) - self.gitdir = subprocess.check_output('git rev-parse --git-dir'.split()).strip() + self.gitdir = uni(subprocess.check_output('git rev-parse --git-dir'.split()).strip()) self.objdir = os.path.join(self.gitdir, 'fat', 'objects') if os.environ.get('GIT_FAT_VERSION') == '1': self.encode = self.encode_v1 else: self.encode = self.encode_v2 def magiclen(enc): - return len(enc(hashlib.sha1('dummy').hexdigest(), 5)) + return len(enc(hashlib.sha1(b'dummy').hexdigest(), 5)) self.magiclen = magiclen(self.encode) # Current version self.magiclens = [magiclen(enc) for enc in [self.encode_v1, self.encode_v2]] # All prior versions def setup(self): @@ -189,6 +176,7 @@ class GitFat(object): 'Produce representation of file to be stored in repository. 20 characters can hold 64-bit integers.' return '#$# git-fat %s %20d\n' % (digest, bytes) def decode(self, string, noraise=False): + string = uni(string) cookie = '#$# git-fat ' if string.startswith(cookie): parts = string[len(cookie):].split() @@ -249,7 +237,7 @@ class GitFat(object): ishanging = True # Working tree version is verbatim from repository (not smudged) outstream = outstreamclean firstblock = False - h.update(block) + h.update(block.encode('utf8')) bytes += len(block) outstream.write(block) outstream.flush() @@ -304,15 +292,17 @@ class GitFat(object): p1 = subprocess.Popen(['git','rev-list','--objects',rev], stdout=subprocess.PIPE) def cut_sha1hash(input, output): for line in input: - output.write(line.split()[0] + '\n') + line = uni(line) + output.write((line.split()[0] + '\n').encode('utf8')) output.close() # ...`cat-file --batch-check` filters for git-fat object candidates in bulk... p2 = subprocess.Popen(['git','cat-file','--batch-check'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) def filter_gitfat_candidates(input, output): for line in input: + line = uni(line) objhash, objtype, size = line.split() if objtype == 'blob' and int(size) in self.magiclens: - output.write(objhash + '\n') + output.write((objhash + '\n').encode('utf8')) output.close() # ...`cat-file --batch` provides full contents of git-fat candidates in bulk p3 = subprocess.Popen(['git','cat-file','--batch'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) @@ -330,7 +320,7 @@ class GitFat(object): size, bytes_read = int(size_str), 0 # We know from filter that item is a candidate git-fat object and # is small enough to read into memory and process - content = '' + content = b'' while bytes_read < size: data = p3.stdout.read(size - bytes_read) if not data: @@ -361,7 +351,8 @@ class GitFat(object): 'generator for all orphan placeholders in the working tree' if not patterns or patterns == ['']: patterns = ['.'] - for fname in subprocess.check_output(['git', 'ls-files', '-z'] + patterns).split('\x00')[:-1]: + for fname in subprocess.check_output(['git', 'ls-files', '-z'] + patterns).split(b'\x00')[:-1]: + fname = uni(fname) digest = self.decode_file(fname)[0] if digest: yield (digest, fname) @@ -398,7 +389,7 @@ class GitFat(object): cmd = self.get_rsync_command(push=True) self.verbose('Executing: %s' % ' '.join(cmd)) p = subprocess.Popen(cmd, stdin=subprocess.PIPE) - p.communicate(input='\x00'.join(files)) + p.communicate(input=b'\x00'.join(f.encode('utf8') for f in files)) if p.returncode: sys.exit(p.returncode) def checkout(self, show_orphans=False): @@ -442,7 +433,7 @@ class GitFat(object): cmd = self.get_rsync_command(push=False) self.verbose('Executing: %s' % ' '.join(cmd)) p = subprocess.Popen(cmd, stdin=subprocess.PIPE) - p.communicate(input='\x00'.join(files)) + p.communicate(input=b'\x00'.join(f.encode('utf8') for f in files)) if p.returncode: sys.exit(p.returncode) self.checkout() @@ -481,7 +472,7 @@ class GitFat(object): fname = os.path.join(self.objdir, obj) h = hashlib.new('sha1') for block in readblocks(open(fname)): - h.update(block) + h.update(block.encode('utf8')) data_hash = h.hexdigest() if obj != data_hash: corrupted_objects.append((obj, data_hash)) From 14974627f365434f22d0b034dfdbdbd222ff81f1 Mon Sep 17 00:00:00 2001 From: Jwink3101 Date: Thu, 12 Sep 2019 14:11:03 -0600 Subject: [PATCH 02/11] Added test runner but need to fix failed test --- .gitignore | 2 ++ run_test.py | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100755 run_test.py diff --git a/.gitignore b/.gitignore index b25c15b..f25b919 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ *~ +TEST_py* +fat-test*/ diff --git a/run_test.py b/run_test.py new file mode 100755 index 0000000..c43450f --- /dev/null +++ b/run_test.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python +# -*- mode:python -*- +""" +Simple runner for test.sh but it modifies it to explicily test python2 and 3 +""" +from __future__ import print_function,unicode_literals +import sys +import os +import shutil +import subprocess + +if sys.version_info[0] <= 2: + from io import open + + +# Build a dead-simple CLI. Not worth argparse, etc +help="""\ +Run tests with specific versions + + $ ./run_test.py # Both Python 2 and 3 + $ ./run_test.py 2 # Only python2 + $ ./run_test.py 3 # Only python3 + +Any argument specified will be appended to the git-fat shebang. For example + + $ ./run_test.py 2.6 + +will change the shebang to + + #!/usr/bin/env python2.6 + +Or specify more than one: + + $ ./run_test.py 2 3 2.6 + +""" +vers = sys.argv[1:] +if len(vers) == 0: + vers = ['2','3'] + +if '-h' in vers or '--help' in vers: + print(help) + sys.exit() + +for ver in vers: + print('-='*20) + print('Testing %s' % ver) + print('-_'*20) + + testdir = 'TEST_py%s' % ver + testdir = os.path.abspath(testdir) + + # Delete the prior test dir and make a new one + if os.path.isdir(testdir): + shutil.rmtree(testdir) + os.makedirs(testdir) + + shebang = '#!/usr/bin/env python%s\n' % ver + pathline = 'export PATH=%s:$PATH\n' % testdir + + testfile = os.path.join(testdir,'test%s.sh' % ver) + fatfile = os.path.join(testdir,'git-fat') + + # Write the files. Do not use multiple with's to support 2.6 + with open('git-fat','rt') as infile: + with open(fatfile,'wt') as outfile: + infile.readline() # Skip shebang + outfile.write(shebang) + outfile.write(infile.read()) + + with open('test.sh','rt') as infile: + with open(testfile,'wt') as outfile: + outfile.write(infile.readline()) # copy shebang + outfile.write(pathline) + outfile.write(infile.read()) + + os.chmod(fatfile, 509) + os.chmod(testfile, 509) + + try: + subprocess.check_call(['./test%s.sh' % ver],cwd=testdir) + except subprocess.CalledProcessError as err: + print('F'*60) + print(err,file=sys.stderr) + print('FAILED python %s'%ver,file=sys.stderr) + sys.exit(1) + + + + + + From 89cc532b6e5e75b79ccd38d77e227d082c9fd44d Mon Sep 17 00:00:00 2001 From: Jwink3101 Date: Thu, 12 Sep 2019 15:06:07 -0600 Subject: [PATCH 03/11] Fixed issue with python2. Issue with instance check for unicode --- git-fat | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/git-fat b/git-fat index 664722b..4cd93a9 100755 --- a/git-fat +++ b/git-fat @@ -208,7 +208,7 @@ class GitFat(object): digest, bytes = self.decode_stream(open(fname)) except IOError: return False, None - if isinstance(digest, str): + if isinstance(digest, (str,unicode)): return digest, bytes else: return None, bytes @@ -269,7 +269,7 @@ class GitFat(object): def cmd_filter_smudge(self): self.setup() result, bytes = self.decode_stream(sys.stdin) - if isinstance(result, str): # We got a digest + if isinstance(result, (str,unicode)): # We got a digest objfile = os.path.join(self.objdir, result) try: cat(open(objfile), sys.stdout) From 2f6c8c6e78c9f73355c6b4f2c2a1349bbe947e5f Mon Sep 17 00:00:00 2001 From: Jwink3101 Date: Thu, 12 Sep 2019 15:33:20 -0600 Subject: [PATCH 04/11] Readded the 2.6 backport but I can't get this OR the original to run in 2.6 --- git-fat | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/git-fat b/git-fat index 4cd93a9..1a36862 100755 --- a/git-fat +++ b/git-fat @@ -15,15 +15,39 @@ import threading import time import collections -# if not type(sys.version_info) is tuple and sys.version_info.major > 2: -# sys.stderr.write('git-fat does not support Python-3 yet. Please use python2.\n') -# sys.exit(1) if sys.version_info[0] > 2: unicode = str else: from io import open +try: + from subprocess import check_output + del check_output +except ImportError: + def backport_check_output(*popenargs, **kwargs): + r"""Run command with arguments and return its output as a byte string. + + Backported from Python 2.7 as it's implemented as pure python on stdlib. + + >>> check_output(['/usr/bin/python', '--version']) + Python 2.6.2 + """ + process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs) + output, unused_err = process.communicate() + retcode = process.poll() + if retcode: + cmd = kwargs.get("args") + if cmd is None: + cmd = popenargs[0] + error = subprocess.CalledProcessError(retcode, cmd) + error.output = output + raise error + return output + + subprocess.check_output = backport_check_output + + BLOCK_SIZE = 4096 def uni(s,encoding='utf8'): From 6bc3ef8aa7997a3969a8ceac48a13b51ca27c18b Mon Sep 17 00:00:00 2001 From: Jwink3101 Date: Thu, 12 Sep 2019 15:52:19 -0600 Subject: [PATCH 05/11] Added retro test and fixed other --- git-fat | 9 +++++---- run_test.py | 20 +++++++++++++++++++- test-retroactive.sh | 4 ++-- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/git-fat b/git-fat index 1a36862..85a30a8 100755 --- a/git-fat +++ b/git-fat @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 # -*- mode:python -*- from __future__ import print_function,unicode_literals @@ -522,7 +522,7 @@ class GitFat(object): This truncates to one hash per line. """ for line in input: - output.write(line[:40] + '\n') + output.write(line[:40] + b'\n') output.close() revlist = subprocess.Popen(['git', 'rev-list', '--all', '--objects'], stdout=subprocess.PIPE, bufsize=-1) objcheck = subprocess.Popen(['git', 'cat-file', '--batch-check'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, bufsize=-1) @@ -570,6 +570,7 @@ class GitFat(object): lsfiles = subprocess.Popen(['git', 'ls-files', '-s'], stdout=subprocess.PIPE) updateindex = subprocess.Popen(['git', 'update-index', '--index-info'], stdin=subprocess.PIPE) for line in lsfiles.stdout: + line = uni(line) mode, sep, tail = line.partition(' ') blobhash, sep, tail = tail.partition(' ') stageno, sep, tail = tail.partition('\t') @@ -607,8 +608,8 @@ class GitFat(object): gitattributes_lines = [] gitattributes_extra = ['%s filter=fat -text' % line.split()[0] for line in filelist] hashobject = subprocess.Popen(['git', 'hash-object', '-w', '--stdin'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) - stdout, stderr = hashobject.communicate('\n'.join(gitattributes_lines + gitattributes_extra) + '\n') - updateindex.stdin.write('%s %s %s\t%s\n' % (mode, stdout.strip(), stageno, '.gitattributes')) + stdout, stderr = hashobject.communicate(b'\n'.join(gitattributes_lines + gitattributes_extra) + b'\n') + updateindex.stdin.write( ('%s %s %s\t%s\n' % (mode, stdout.strip(), stageno, '.gitattributes')).encode('utf8')) updateindex.stdin.close() lsfiles.wait() updateindex.wait() diff --git a/run_test.py b/run_test.py index c43450f..d41fc9f 100755 --- a/run_test.py +++ b/run_test.py @@ -59,6 +59,7 @@ pathline = 'export PATH=%s:$PATH\n' % testdir testfile = os.path.join(testdir,'test%s.sh' % ver) + testfileR = os.path.join(testdir,'test-retroactive%s.sh' % ver) fatfile = os.path.join(testdir,'git-fat') # Write the files. Do not use multiple with's to support 2.6 @@ -73,9 +74,16 @@ outfile.write(infile.readline()) # copy shebang outfile.write(pathline) outfile.write(infile.read()) + + with open('test-retroactive.sh','rt') as infile: + with open(testfileR,'wt') as outfile: + outfile.write(infile.readline()) # copy shebang + outfile.write(pathline) + outfile.write(infile.read()) os.chmod(fatfile, 509) os.chmod(testfile, 509) + os.chmod(testfileR, 509) try: subprocess.check_call(['./test%s.sh' % ver],cwd=testdir) @@ -85,7 +93,17 @@ print('FAILED python %s'%ver,file=sys.stderr) sys.exit(1) - + print('###################') + print('###### RETRO ######') + print('###################') + + try: + subprocess.check_call(['./test-retroactive%s.sh' % ver],cwd=testdir) + except subprocess.CalledProcessError as err: + print('F'*60) + print(err,file=sys.stderr) + print('FAILED RETRO python %s'%ver,file=sys.stderr) + sys.exit(1) diff --git a/test-retroactive.sh b/test-retroactive.sh index 51a38ec..d1df53c 100755 --- a/test-retroactive.sh +++ b/test-retroactive.sh @@ -25,12 +25,12 @@ git add .gitattributes git checkout . git commit -am'Import big files into git-fat' -git log --stat +#git log --stat git fat find 10000 | awk '{print $1}' > fat-files git filter-branch --index-filter "git fat index-filter $(fullpath fat-files) --manage-gitattributes" --tag-name-filter cat -- --all -git log --stat +#git log --stat git checkout HEAD^ rm * git checkout . From 566e505b68c30026469dce1157f741eb88a2a82b Mon Sep 17 00:00:00 2001 From: Jwink3101 Date: Thu, 12 Sep 2019 16:24:36 -0600 Subject: [PATCH 06/11] minor cleanup and changed back shebang from debugging --- git-fat | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/git-fat b/git-fat index 85a30a8..04a4656 100755 --- a/git-fat +++ b/git-fat @@ -1,6 +1,5 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python # -*- mode:python -*- - from __future__ import print_function,unicode_literals import sys @@ -15,12 +14,11 @@ import threading import time import collections - if sys.version_info[0] > 2: unicode = str else: from io import open - + try: from subprocess import check_output del check_output @@ -47,7 +45,6 @@ except ImportError: subprocess.check_output = backport_check_output - BLOCK_SIZE = 4096 def uni(s,encoding='utf8'): From 06477f333e85e41c5a35c7e23db20de1e9fc1aea Mon Sep 17 00:00:00 2001 From: Jwink3101 Date: Wed, 18 Sep 2019 09:59:41 -0600 Subject: [PATCH 07/11] Tests on an actual binary file --- test.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test.sh b/test.sh index 0ee63ea..7a9b657 100755 --- a/test.sh +++ b/test.sh @@ -19,7 +19,9 @@ git commit -m'Initial fat repository' ln -s /oe/dss-oe/dss-add-ons-testing-build/deploy/licenses/common-licenses/GPL-3 c git add c git commit -m'add broken symlink' -echo 'fat content a' > a.fat +#echo 'fat content a' > a.fat +# Encode random bytes so it can't be interpreted as a string +echo "AEi2RiChMJ+N0hR+AJs89QDmu4D3kaKDnmlgy0YJqm8A"|base64 --decode > a.fat git add a.fat git commit -m'add a.fat' echo 'fat content b' > b.fat From c756f1b8ce415f27235fbf166762915c6f003994 Mon Sep 17 00:00:00 2001 From: Jwink3101 Date: Wed, 18 Sep 2019 11:10:32 -0600 Subject: [PATCH 08/11] Changed one of the files to be random bytes to make sure we're encoding properly --- test.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/test.sh b/test.sh index 7a9b657..500481a 100755 --- a/test.sh +++ b/test.sh @@ -27,10 +27,10 @@ git commit -m'add a.fat' echo 'fat content b' > b.fat git add b.fat git commit -m'add b.fat' -echo 'revise fat content a' > a.fat +#echo 'revise fat content a' > a.fat +echo "AEmBARZ2es0t0mPeXOLAjQCd4QlgPiKFJz9rPqqBiGQA"|base64 --decode > a.fat git commit -am'revise a.fat' git fat push - cd .. git clone fat-test fat-test2 cd fat-test2 @@ -49,7 +49,11 @@ then fi git fat init git fat pull -- 'a.fa*' -cat a.fat +t=$(cat a.fat | base64 ) +if [ "$t" != "AEmBARZ2es0t0mPeXOLAjQCd4QlgPiKFJz9rPqqBiGQA" ]; then + echo 'ERROR: "git fat pull" did not return binary file' + exit 1 +fi echo 'file which is committed and removed afterwards' > d git add d git commit -m'add d with normal content' From 87ba62243a429fd8f5dde39e645c3d1c1889aa27 Mon Sep 17 00:00:00 2001 From: Jwink3101 Date: Wed, 18 Sep 2019 11:11:39 -0600 Subject: [PATCH 09/11] Take II at python3 conversion --- git-fat | 112 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 66 insertions(+), 46 deletions(-) diff --git a/git-fat b/git-fat index 04a4656..7ac2fbe 100755 --- a/git-fat +++ b/git-fat @@ -1,6 +1,6 @@ #!/usr/bin/env python # -*- mode:python -*- -from __future__ import print_function,unicode_literals +from __future__ import print_function, with_statement,unicode_literals import sys import hashlib @@ -19,6 +19,22 @@ if sys.version_info[0] > 2: else: from io import open +def touni(s,encoding='utf8'): + """Automate unicode conversion""" + if isinstance(s,(str,unicode)): + return s + if hasattr(s,'decode'): + return s.decode(encoding) + raise ValueError('Cound not decode') + +def tobytes(s,encoding='utf8'): + """Automatic byte conversion""" + if isinstance(s,bytes): + return s + if hasattr(s,'encode'): + return s.encode(encoding) + raise ValueError('Could not encode') + try: from subprocess import check_output del check_output @@ -40,20 +56,12 @@ except ImportError: cmd = popenargs[0] error = subprocess.CalledProcessError(retcode, cmd) error.output = output - raise error + raise error return output - subprocess.check_output = backport_check_output BLOCK_SIZE = 4096 -def uni(s,encoding='utf8'): - """Automate unicode conversion""" - if isinstance(s,(str,unicode)): - return s - if hasattr(s,'decode'): - return s.decode(encoding) - def verbose_stderr(*args, **kwargs): return print(*args, file=sys.stderr, **kwargs) def verbose_ignore(*args, **kwargs): @@ -98,6 +106,7 @@ def difftreez_reader(input): newread = input.read(BLOCK_SIZE) if not newread: break + newread = touni(newread) partial += newread while True: head, sep, partial = partial.partition('\0') @@ -116,13 +125,13 @@ def gitconfig_get(name, file=None): args += ['--file', file] args.append(name) p = subprocess.Popen(args, stdout=subprocess.PIPE) - output = uni(p.communicate()[0].strip()) + output = p.communicate()[0].strip() if p.returncode and file is None: return None elif p.returncode: return gitconfig_get(name) else: - return output + return touni(output) def gitconfig_set(name, value, file=None): args = ['git', 'config'] if file is not None: @@ -135,10 +144,12 @@ class GitFat(object): def __init__(self): self.verbose = verbose_stderr if os.environ.get('GIT_FAT_VERBOSE') else verbose_ignore try: - self.gitroot = uni(subprocess.check_output('git rev-parse --show-toplevel'.split()).strip()) + self.gitroot = subprocess.check_output('git rev-parse --show-toplevel'.split()).strip() + self.gitroot = touni(self.gitroot) except subprocess.CalledProcessError: sys.exit(1) - self.gitdir = uni(subprocess.check_output('git rev-parse --git-dir'.split()).strip()) + self.gitdir = subprocess.check_output('git rev-parse --git-dir'.split()).strip() + self.gitdir = touni(self.gitdir) self.objdir = os.path.join(self.gitdir, 'fat', 'objects') if os.environ.get('GIT_FAT_VERSION') == '1': self.encode = self.encode_v1 @@ -172,7 +183,6 @@ class GitFat(object): self.verbose('Pushing to %s' % (remote)) else: self.verbose('Pulling from %s' % (remote)) - cmd = ['rsync', '--progress', '--ignore-existing', '--from0', '--files-from=-'] rshopts = '' if ssh_user: @@ -189,7 +199,7 @@ class GitFat(object): cmd += [remote + '/', self.objdir + '/'] return cmd def revparse(self, revname): - return subprocess.check_output(['git', 'rev-parse', revname]).strip() + return touni(subprocess.check_output(['git', 'rev-parse', revname]).strip()) def encode_v1(self, digest, bytes): 'Produce legacy representation of file to be stored in repository.' return '#$# git-fat %s\n' % (digest,) @@ -197,8 +207,8 @@ class GitFat(object): 'Produce representation of file to be stored in repository. 20 characters can hold 64-bit integers.' return '#$# git-fat %s %20d\n' % (digest, bytes) def decode(self, string, noraise=False): - string = uni(string) cookie = '#$# git-fat ' + string = touni(string) if string.startswith(cookie): parts = string[len(cookie):].split() digest = parts[0] @@ -226,10 +236,10 @@ class GitFat(object): return False, None # read file try: - digest, bytes = self.decode_stream(open(fname)) + digest, bytes = self.decode_stream(open(fname,'rb')) except IOError: return False, None - if isinstance(digest, (str,unicode)): + if isinstance(digest, str): return digest, bytes else: return None, bytes @@ -249,7 +259,7 @@ class GitFat(object): try: ishanging = False cached = False # changes to True when file is cached - with os.fdopen(fd, 'w') as cache: + with os.fdopen(fd, 'wb') as cache: outstream = cache firstblock = True for block in readblocks(instream): @@ -258,7 +268,7 @@ class GitFat(object): ishanging = True # Working tree version is verbatim from repository (not smudged) outstream = outstreamclean firstblock = False - h.update(block.encode('utf8')) + h.update(block) bytes += len(block) outstream.write(block) outstream.flush() @@ -274,7 +284,7 @@ class GitFat(object): os.rename(tmpname, objfile) self.verbose('git-fat filter-clean: caching to %s' % objfile) cached = True - outstreamclean.write(self.encode(digest, bytes)) + outstreamclean.write(tobytes(self.encode(digest, bytes))) finally: if not cached: os.remove(tmpname) @@ -285,22 +295,30 @@ class GitFat(object): version of the file on stdin and produces the "clean" (repository) version on stdout. ''' self.setup() - self.filter_clean(sys.stdin, sys.stdout) + if hasattr(sys.stdin,'buffer'): + stdin,stdout = sys.stdin.buffer,sys.stdout.buffer + else: + stdin,stdout = sys.stdin,sys.stdout + self.filter_clean(stdin, stdout) def cmd_filter_smudge(self): self.setup() - result, bytes = self.decode_stream(sys.stdin) - if isinstance(result, (str,unicode)): # We got a digest + if hasattr(sys.stdin,'buffer'): + stdin,stdout = sys.stdin.buffer,sys.stdout.buffer + else: + stdin,stdout = sys.stdin,sys.stdout + result, bytes = self.decode_stream(stdin) + if isinstance(result, str): # We got a digest objfile = os.path.join(self.objdir, result) try: - cat(open(objfile), sys.stdout) + cat(open(objfile,'rb'), stdout) self.verbose('git-fat filter-smudge: restoring from %s' % objfile) except IOError: # file not found self.verbose('git-fat filter-smudge: fat object missing %s' % objfile) - sys.stdout.write(self.encode(result, bytes)) # could leave a better notice about how to recover this file + stdout.write(self.encode(result, bytes)) # could leave a better notice about how to recover this file else: # We have an iterable over the original input. self.verbose('git-fat filter-smudge: not a managed file') - cat_iter(result, sys.stdout) + cat_iter(result, stdout) def catalog_objects(self): return set(os.listdir(self.objdir)) def referenced_objects(self, rev=None, all=False): @@ -313,17 +331,18 @@ class GitFat(object): p1 = subprocess.Popen(['git','rev-list','--objects',rev], stdout=subprocess.PIPE) def cut_sha1hash(input, output): for line in input: - line = uni(line) - output.write((line.split()[0] + '\n').encode('utf8')) + line = touni(line) + line = line.split()[0] + '\n' + output.write(tobytes(line)) output.close() # ...`cat-file --batch-check` filters for git-fat object candidates in bulk... p2 = subprocess.Popen(['git','cat-file','--batch-check'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) def filter_gitfat_candidates(input, output): for line in input: - line = uni(line) + line = touni(line) objhash, objtype, size = line.split() if objtype == 'blob' and int(size) in self.magiclens: - output.write((objhash + '\n').encode('utf8')) + output.write(tobytes(objhash + '\n')) output.close() # ...`cat-file --batch` provides full contents of git-fat candidates in bulk p3 = subprocess.Popen(['git','cat-file','--batch'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) @@ -337,7 +356,7 @@ class GitFat(object): metadata_line = p3.stdout.readline() if not metadata_line: break # EOF - objhash, objtype, size_str = metadata_line.split() + objhash, objtype, size_str = touni(metadata_line).split() size, bytes_read = int(size_str), 0 # We know from filter that item is a candidate git-fat object and # is small enough to read into memory and process @@ -349,7 +368,7 @@ class GitFat(object): content += data bytes_read += len(data) try: - fathash = self.decode(content)[0] + fathash = touni(self.decode(content)[0]) referenced.add(fathash) except GitFat.DecodeError: pass @@ -373,7 +392,7 @@ class GitFat(object): if not patterns or patterns == ['']: patterns = ['.'] for fname in subprocess.check_output(['git', 'ls-files', '-z'] + patterns).split(b'\x00')[:-1]: - fname = uni(fname) + fname = touni(fname) digest = self.decode_file(fname)[0] if digest: yield (digest, fname) @@ -410,7 +429,7 @@ class GitFat(object): cmd = self.get_rsync_command(push=True) self.verbose('Executing: %s' % ' '.join(cmd)) p = subprocess.Popen(cmd, stdin=subprocess.PIPE) - p.communicate(input=b'\x00'.join(f.encode('utf8') for f in files)) + p.communicate(input=b'\x00'.join(tobytes(file) for file in files)) if p.returncode: sys.exit(p.returncode) def checkout(self, show_orphans=False): @@ -454,7 +473,7 @@ class GitFat(object): cmd = self.get_rsync_command(push=False) self.verbose('Executing: %s' % ' '.join(cmd)) p = subprocess.Popen(cmd, stdin=subprocess.PIPE) - p.communicate(input=b'\x00'.join(f.encode('utf8') for f in files)) + p.communicate(input=b'\x00'.join(tobytes(file) for file in files)) if p.returncode: sys.exit(p.returncode) self.checkout() @@ -492,8 +511,8 @@ class GitFat(object): for obj in self.catalog_objects(): fname = os.path.join(self.objdir, obj) h = hashlib.new('sha1') - for block in readblocks(open(fname)): - h.update(block.encode('utf8')) + for block in readblocks(open(fname,'rb')): + h.update(block) data_hash = h.hexdigest() if obj != data_hash: corrupted_objects.append((obj, data_hash)) @@ -528,6 +547,7 @@ class GitFat(object): numblobs = 0; numlarge = 1 # Build dict with the sizes of all large blobs for line in objcheck.stdout: + line = touni(line) objhash, blob, size = line.split() if blob != 'blob': continue @@ -567,7 +587,7 @@ class GitFat(object): lsfiles = subprocess.Popen(['git', 'ls-files', '-s'], stdout=subprocess.PIPE) updateindex = subprocess.Popen(['git', 'update-index', '--index-info'], stdin=subprocess.PIPE) for line in lsfiles.stdout: - line = uni(line) + line = touni(line) mode, sep, tail = line.partition(' ') blobhash, sep, tail = tail.partition(' ') stageno, sep, tail = tail.partition('\t') @@ -589,24 +609,24 @@ class GitFat(object): hashobject.stdin.close() filterclean = threading.Thread(target=dofilter) filterclean.start() - cleanedobj = hashobject.stdout.read().rstrip() + cleanedobj = touni(hashobject.stdout.read()).rstrip() catfile.wait() hashobject.wait() filterclean.join() mkdir_p(os.path.dirname(hashfile)) open(hashfile, 'w').write(cleanedobj + '\n') - updateindex.stdin.write('%s %s %s\t%s\n' % (mode, cleanedobj, stageno, filename)) + updateindex.stdin.write(tobytes('%s %s %s\t%s\n' % (mode, cleanedobj, stageno, filename))) if manage_gitattributes: try: - mode, blobsha1, stageno, filename = subprocess.check_output(['git', 'ls-files', '-s', '.gitattributes']).split() - gitattributes_lines = subprocess.check_output(['git', 'cat-file', 'blob', blobsha1]).splitlines() + mode, blobsha1, stageno, filename = touni(subprocess.check_output(['git', 'ls-files', '-s', '.gitattributes'])).split() + gitattributes_lines = touni(subprocess.check_output(['git', 'cat-file', 'blob', blobsha1])).splitlines() except ValueError: # Nothing to unpack, thus no file mode, stageno = '100644', '0' gitattributes_lines = [] gitattributes_extra = ['%s filter=fat -text' % line.split()[0] for line in filelist] hashobject = subprocess.Popen(['git', 'hash-object', '-w', '--stdin'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) - stdout, stderr = hashobject.communicate(b'\n'.join(gitattributes_lines + gitattributes_extra) + b'\n') - updateindex.stdin.write( ('%s %s %s\t%s\n' % (mode, stdout.strip(), stageno, '.gitattributes')).encode('utf8')) + stdout, stderr = hashobject.communicate(b'\n'.join(tobytes(l) for l in gitattributes_lines + gitattributes_extra) + b'\n') + updateindex.stdin.write(tobytes('%s %s %s\t%s\n' % (mode, stdout.strip(), stageno, '.gitattributes'))) updateindex.stdin.close() lsfiles.wait() updateindex.wait() From 773879f7e3c087326c714bf31061cf234149ce2b Mon Sep 17 00:00:00 2001 From: Jwink3101 Date: Tue, 8 Oct 2019 11:27:47 -0600 Subject: [PATCH 10/11] one more fix --- git-fat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git-fat b/git-fat index 7ac2fbe..412146a 100755 --- a/git-fat +++ b/git-fat @@ -315,7 +315,7 @@ class GitFat(object): self.verbose('git-fat filter-smudge: restoring from %s' % objfile) except IOError: # file not found self.verbose('git-fat filter-smudge: fat object missing %s' % objfile) - stdout.write(self.encode(result, bytes)) # could leave a better notice about how to recover this file + stdout.write(tobytes(self.encode(result, bytes))) # could leave a better notice about how to recover this file else: # We have an iterable over the original input. self.verbose('git-fat filter-smudge: not a managed file') cat_iter(result, stdout) From 96c95b0d96db23da89d8b38f3f4acb8d16e32fe7 Mon Sep 17 00:00:00 2001 From: Jwink3101 Date: Mon, 5 Oct 2020 15:30:57 -0600 Subject: [PATCH 11/11] uncommented out section --- test-retroactive.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test-retroactive.sh b/test-retroactive.sh index d1df53c..51a38ec 100755 --- a/test-retroactive.sh +++ b/test-retroactive.sh @@ -25,12 +25,12 @@ git add .gitattributes git checkout . git commit -am'Import big files into git-fat' -#git log --stat +git log --stat git fat find 10000 | awk '{print $1}' > fat-files git filter-branch --index-filter "git fat index-filter $(fullpath fat-files) --manage-gitattributes" --tag-name-filter cat -- --all -#git log --stat +git log --stat git checkout HEAD^ rm * git checkout .