// Copyright (c) .NET Foundation and Contributors. All rights reserved. // Licensed under the MIT license. See LICENSE file in the project root for full license information. #nullable enable using System.Diagnostics.CodeAnalysis; using System.IO.MemoryMappedFiles; using System.Text; namespace Nerdbank.GitVersioning.ManagedGit; /// /// Supports retrieving objects from a Git pack file. /// public class GitPack : IDisposable { private readonly Func packStream; private readonly Lazy indexStream; private readonly GitPackCache cache; private readonly MemoryMappedFile? packFile = null; private readonly MemoryMappedViewAccessor? accessor = null; // Maps GitObjectIds to offets in the git pack. private readonly Dictionary offsets = new Dictionary(); // A histogram which tracks the objects which have been retrieved from this GitPack. The key is the offset // of the object. Used to get some insights in usage patterns. #if DEBUG private readonly Dictionary histogram = new Dictionary(); #endif private readonly Lazy indexReader; // Operating on git packfiles can potentially open a lot of streams which point to the pack file. For example, // deltafied objects can have base objects which are in turn delafied. Opening and closing these streams has // become a performance bottleneck. This is mitigated by pooling streams (i.e. reusing the streams after they // are closed by the caller). private readonly Queue pooledStreams = new Queue(); /// /// Initializes a new instance of the class. /// /// /// A delegate which fetches objects from the Git object store. /// /// /// The full path to the index file. /// /// /// The full path to the pack file. /// /// /// A which is used to cache objects which operate /// on the pack file. /// public GitPack(GetObjectFromRepositoryDelegate getObjectFromRepositoryDelegate, string indexPath, string packPath, GitPackCache? cache = null) : this(getObjectFromRepositoryDelegate, new Lazy(() => File.OpenRead(indexPath)), () => File.OpenRead(packPath), cache) { } /// /// Initializes a new instance of the class. /// /// /// A delegate which fetches objects from the Git object store. /// /// /// A function which creates a new which provides read-only /// access to the index file. /// /// /// A function which creates a new which provides read-only /// access to the pack file. /// /// /// A which is used to cache objects which operate /// on the pack file. /// public GitPack(GetObjectFromRepositoryDelegate getObjectFromRepositoryDelegate, Lazy indexStream, Func packStream, GitPackCache? cache = null) { this.GetObjectFromRepository = getObjectFromRepositoryDelegate ?? throw new ArgumentNullException(nameof(getObjectFromRepositoryDelegate)); this.indexReader = new Lazy(this.OpenIndex); this.packStream = packStream ?? throw new ArgumentException(nameof(packStream)); this.indexStream = indexStream ?? throw new ArgumentNullException(nameof(indexStream)); this.cache = cache ?? new GitPackMemoryCache(); if (IntPtr.Size > 4) { this.packFile = MemoryMappedFile.CreateFromFile(this.packStream(), mapName: null, 0, MemoryMappedFileAccess.Read, HandleInheritability.None, leaveOpen: false); this.accessor = this.packFile.CreateViewAccessor(0, 0, MemoryMappedFileAccess.Read); } } /// /// A delegate for methods which fetch objects from the Git object store. /// /// /// The Git object ID of the object to fetch. /// /// /// The object type of the object to fetch. /// /// /// A which represents the requested object. /// public delegate Stream? GetObjectFromRepositoryDelegate(GitObjectId sha, string objectType); /// /// Gets a delegate which fetches objects from the Git object store. /// public GetObjectFromRepositoryDelegate GetObjectFromRepository { get; private set; } /// /// Finds a git object using a partial object ID. /// /// /// A partial object ID. /// /// /// /// If found, a full object ID which matches the partial object ID. /// Otherwise, . /// public GitObjectId? Lookup(Span objectId, bool endsWithHalfByte = false) { (long? _, GitObjectId? actualObjectId) = this.indexReader.Value.GetOffset(objectId, endsWithHalfByte); return actualObjectId; } /// /// Attempts to retrieve a Git object from this Git pack. /// /// /// The Git object Id of the object to retrieve. /// /// /// The object type of the object to retrieve. /// /// /// If found, receives a which represents the object. /// /// /// if the object was found; otherwise, . /// public bool TryGetObject(GitObjectId objectId, string objectType, [NotNullWhen(true)] out Stream? value) { long? offset = this.GetOffset(objectId); if (offset is null) { value = null; return false; } else { // This try-catch should probably be replaced by a non-throwing GetObject implementation. // This is in turn dependend on a proper GitPackReader.TryGetObject implementation. try { value = this.GetObject(offset.Value, objectType); return true; } catch (GitException gexc) when (gexc.ErrorCode == GitException.ErrorCodes.ObjectNotFound) { value = null; return false; } } } /// /// Gets a Git object at a specific offset. /// /// /// The offset of the Git object, relative to the pack file. /// /// /// The object type of the object to retrieve. /// /// /// A which represents the object. /// public Stream GetObject(long offset, string objectType) { #if DEBUG if (!this.histogram.TryAdd(offset, 1)) { this.histogram[offset] += 1; } #endif if (this.cache.TryOpen(offset, out (Stream ContentStream, string ObjectType)? hit)) { if (hit.Value.ObjectType != objectType) { throw new GitException($"An object of type {objectType} could not be located at offset {offset}.") { ErrorCode = GitException.ErrorCodes.ObjectNotFound }; } return hit.Value.ContentStream; } GitPackObjectType packObjectType; switch (objectType) { case "commit": packObjectType = GitPackObjectType.OBJ_COMMIT; break; case "tree": packObjectType = GitPackObjectType.OBJ_TREE; break; case "blob": packObjectType = GitPackObjectType.OBJ_BLOB; break; case "tag": packObjectType = GitPackObjectType.OBJ_TAG; break; default: throw new GitException($"The object type '{objectType}' is not supported by the {nameof(GitPack)} class."); } Stream? packStream = this.GetPackStream(); Stream objectStream; try { objectStream = GitPackReader.GetObject(this, packStream, offset, objectType, packObjectType); } catch { packStream.Dispose(); throw; } return this.cache.Add(offset, objectStream, objectType); } /// /// Writes cache statistics to a . /// /// /// A to which the cache statistics are written. /// public void GetCacheStatistics(StringBuilder builder) { builder.AppendLine($"Git Pack:"); #if DEBUG int histogramCount = 25; builder.AppendLine($"Top {histogramCount} / {this.histogram.Count} items:"); foreach (KeyValuePair item in this.histogram.OrderByDescending(v => v.Value).Take(25)) { builder.AppendLine($" {item.Key}: {item.Value}"); } builder.AppendLine(); #endif this.cache.GetCacheStatistics(builder); } /// public void Dispose() { if (this.indexReader.IsValueCreated) { this.indexReader.Value.Dispose(); } this.accessor?.Dispose(); this.packFile?.Dispose(); this.cache.Dispose(); } private long? GetOffset(GitObjectId objectId) { if (this.offsets.TryGetValue(objectId, out long cachedOffset)) { return cachedOffset; } GitPackIndexReader? indexReader = this.indexReader.Value; long? offset = indexReader.GetOffset(objectId); if (offset is not null) { this.offsets.Add(objectId, offset.Value); } return offset; } private Stream GetPackStream() { // On 64-bit processes, we can use Memory Mapped Streams (the address space // will be large enough to map the entire packfile). On 32-bit processes, // we directly access the underlying stream. if (IntPtr.Size > 4) { return new MemoryMappedStream(this.accessor); } else { return this.packStream(); } } private GitPackIndexReader OpenIndex() { return new GitPackIndexMappedReader(this.indexStream.Value); } }