Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Refactor the "git fat checkout" operation (and therefore also "git fa…
…t pull") to significantly improve its speed.

The main change here is that git checkout-index is now called only a single time for all files.
In some tests the performance improvement is six-fold.

With this change it doesn't make sense anymore to print out each file as it is being processed,
so only the number of files that need to be restored is shown.
The actual filenames are only printed if git-fat is verbose.
  • Loading branch information
purdeaandrei committed Jul 13, 2015
commit be6cbbefaf6c204a6564154ceebd0942d6aaf46a
47 changes: 32 additions & 15 deletions git-fat
Original file line number Diff line number Diff line change
Expand Up @@ -403,21 +403,38 @@ class GitFat(object):
def checkout(self, show_orphans=False):
'Update any stale files in the present working tree'
self.assert_init_done()
for digest, fname in self.orphan_files():
objpath = os.path.join(self.objdir, digest)
if os.access(objpath, os.R_OK):
print('Restoring %s -> %s' % (digest, fname))
# The output of our smudge filter depends on the existence of
# the file in .git/fat/objects, but git caches the file stat
# from the previous time the file was smudged, therefore it
# won't try to re-smudge. I don't know a git command that
# specifically invalidates that cache, but touching the file
# also does the trick.
os.utime(fname, None)
# This re-smudge is essentially a copy that restores permissions.
subprocess.check_call(['git', 'checkout-index', '--index', '--force', fname])
elif show_orphans:
print('Data unavailable: %s %s' % (digest,fname))
orphan_files_is_accessible = [(digest, fname, os.access(os.path.join(self.objdir, digest), os.R_OK)) for digest, fname in self.orphan_files()]
filenames_to_restore = [fname for digest, fname, accessible in orphan_files_is_accessible if accessible]

for fname in filenames_to_restore:
# The output of our smudge filter depends on the existence of
# the file in .git/fat/objects, but git caches the file stat
# from the previous time the file was smudged, therefore it
# won't try to re-smudge. I don't know a git command that
# specifically invalidates that cache, but touching the file
# also does the trick.
os.utime(fname, None)

if show_orphans:
for digest, fname, accessible in orphan_files_is_accessible:
if not accessible:
print('Data unavailable: %s %s' % (digest,fname))

filenames_str = "\n".join(filenames_to_restore) + "\n"

print('Restoring %d Files' % (len(filenames_to_restore),))
self.verbose(filenames_str)

if filenames_to_restore:
# This re-smudge is essentially a copy that restores permissions.
cmd = ['git', 'checkout-index', '--stdin', '--index', '--force']
p = subprocess.Popen(cmd, stdin = subprocess.PIPE)
p.communicate(filenames_str)
retcode = p.wait()
if retcode != 0:
error = subprocess.CalledProcessError(retcode, " ".join(cmd))
raise error

def cmd_pull(self, args):
'Pull anything that I have referenced, but not stored'
self.setup()
Expand Down