Added support for large tensors, save safetensors on CPU

shaltielshmid · shaltielshmid · commit ebb656805c4b · 2024-02-28T01:45:39.000+02:00
diff --git a/TorchSharp.PyBridge.Tests/TorchSharp.PyBridge.Tests.csproj b/TorchSharp.PyBridge.Tests/TorchSharp.PyBridge.Tests.csproj
@@ -40,7 +40,7 @@
     <PackageReference Include="NUnit3TestAdapter" Version="4.4.2" />
     <PackageReference Include="NUnit.Analyzers" Version="3.6.1" />
     <PackageReference Include="coverlet.collector" Version="3.2.0" />
-    <PackageReference Include="TorchSharp-cpu" Version="0.101.3" />
+    <PackageReference Include="TorchSharp-cpu" Version="0.102.0" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/TorchSharp.PyBridge/PyBridgeModuleExtensions.cs b/TorchSharp.PyBridge/PyBridgeModuleExtensions.cs
@@ -121,6 +121,7 @@ public static Module load_py(this Module module, string location, bool strict =
         public static Module load_py(this Module module, System.IO.Stream stream, bool strict = true, IList<string>? skip = null, Dictionary<string, bool>? loadedParameters = null, bool leaveOpen = false) {
             // Create a dispose score so that we don't keep anyof the loaded tensors past this function
             using var d = torch.NewDisposeScope();
+            using var d2 = torch.no_grad(); // To circumvent a bug introduced in 0.102.0
 
             // Unpickle the state dictionary into memory
             var stateHashtable = PyTorchUnpickler.UnpickleStateDict(stream, leaveOpen);
@@ -182,6 +183,7 @@ public static Module load_safetensors(this Module module, string location, bool
         public static Module load_safetensors(this Module module, System.IO.Stream stream, bool strict = true, IList<string>? skip = null, Dictionary<string, bool>? loadedParameters = null, bool leaveOpen = false) {
             // Create a dispose score so that we don't keep anyof the loaded tensors past this function
             using var d = torch.NewDisposeScope();
+            using var d2 = torch.no_grad(); // To circumvent a bug introduced in 0.102.0
 
             // Retrieve the current state dict of the module, so that we can make sure to only load the relevant
             // tensors from the file.
diff --git a/TorchSharp.PyBridge/PyTorchPickler.cs b/TorchSharp.PyBridge/PyTorchPickler.cs
@@ -95,8 +95,8 @@ protected override bool persistentId(object pid, out object? newpid) {
                 // Start by serializing the object to a file in the archive
                 var entry = _archive.CreateEntry($"model/data/{_tensorCount}");
                 using (var stream = entry.Open())
-                    stream.Write(tensor.bytes.ToArray(), 0, tensor.bytes.Length);
-
+                    tensor.WriteBytesToStream(stream);
+                    
                 // Collect the items for our persistentId, as above.
                 newpid = new object[] {
                     "storage",
diff --git a/TorchSharp.PyBridge/PyTorchUnpickler.cs b/TorchSharp.PyBridge/PyTorchUnpickler.cs
@@ -81,30 +81,17 @@ protected override object persistentLoad(object pid) {
                 string archiveKey = (string)opid[2];
                 // Tuple Item3: location (cpu/gpu), but we always load onto CPU. 
                 // Tuple Item4: numElems (the number of elements in the tensor)
-                int numElem = (int)opid[4];
-
+                
                 // Convert the storage name into the relevant scalar type (e.g., LongStorage => torch.long)
                 // and then check how many bytes each element is
                 var dtype = GetScalarTypeFromStorageName(storageType);
-                var elemSize = (int)torch.empty(0, dtype).ElementSize;
-
-                int totalSize = numElem * elemSize;
-
-                //
-                // TODO: Fix this so that you can read large tensors. Right now, they are limited to 2GB
-                //
-                if (totalSize > int.MaxValue)
-                    throw new NotImplementedException("Loading tensors larger than 2GB");
-
+                
                 // Retrieve the entry from the archive
                 var entry = _archive.Entries.First(f => f.FullName.EndsWith($"data/{archiveKey}"));
-                // Read in the relevant bytes from the entry
-                var bytesBuffer = new byte[totalSize];
-                entry!.Open().Read(bytesBuffer, 0, totalSize);
-
+                
                 // Send this back, so our TensorObjectConstructor can create our torch.tensor from the object.
                 return new TensorObject() {
-                    data = bytesBuffer,
+                    data = entry!.Open(),
                     dtype = dtype
                 };
             }
@@ -176,7 +163,8 @@ public object construct(object[] args) {
                 torch.Tensor t = shape.Length == 0 ? torch.zeros(1, arg0.dtype)
                                                    : torch.WrappedTensorDisposeScope(() =>
                                                             torch.zeros(shape, arg0.dtype).as_strided(shape, stride, storageOffset));
-                t.bytes = arg0.data;
+                t.ReadBytesFromStream(arg0.data);
+                arg0.data.Close();
                 return t;
             }
         }
@@ -201,7 +189,7 @@ public object construct(object[] args) {
         /// Therefore, this class is a simple wrapper for the bytes + dtype of the storage.
         /// </summary>
         class TensorObject {
-            public byte[]? data { get; set; }
+            public Stream data { get; set; }
             public torch.ScalarType dtype { get; set; }
         }
     }
diff --git a/TorchSharp.PyBridge/Safetensors.cs b/TorchSharp.PyBridge/Safetensors.cs
@@ -29,14 +29,14 @@ static class Safetensors {
 
                 var tensor = torch.empty(kvp.Value.Shape, dtype: ConvertToTorchDType(kvp.Value.DataType));
 
-                // Make sure the length isn't > int.MaxValue, since .NET has the 2GB limit
+                // Make sure the length matches the number of bytes to load
                 long length = kvp.Value.Offsets[1] - kvp.Value.Offsets[0];
-                if (length > int.MaxValue)
-                    throw new NotImplementedException("Loading tensors larger than 2GB");
+                if (length != tensor.ElementSize * tensor.NumberOfElements)
+                    throw new NotImplementedException($"Error when loading tensor {kvp.Key} - mismatched # of elements");
 
                 stream.Position = offset + kvp.Value.Offsets[0];
-                tensor.bytes = stream.ReadBytes((int)length);
-
+                tensor.ReadBytesFromStream(stream);
+                
                 ret.Add(kvp.Key, tensor);
             }
 
@@ -75,9 +75,14 @@ public static void SaveStateDict(Stream stream, Dictionary<string, torch.Tensor>
             var br = new BinaryWriter(stream);
             br.Write((ulong)indexJson.Length);
             br.Write(indexJson);
-            foreach (var kvp in orderedState)
-                br.Write(kvp.Value.bytes);
-
+            foreach (var kvp in orderedState) {
+                if (kvp.Value.device.type == DeviceType.CPU)
+                    kvp.Value.WriteBytesToStream(stream);
+                else {
+                    using var tmp = kvp.Value.cpu();
+                    tmp.WriteBytesToStream(stream);
+                }
+            }
             if (!leaveOpen)
                 br.Close();
         }
diff --git a/TorchSharp.PyBridge/TorchSharp.PyBridge.csproj b/TorchSharp.PyBridge/TorchSharp.PyBridge.csproj
@@ -11,7 +11,7 @@
 
   <ItemGroup>
     <PackageReference Include="Razorvine.Pickle" Version="1.5.0" />
-    <PackageReference Include="TorchSharp" Version="[0.101.3,)" PrivateAssets="All" />
+    <PackageReference Include="TorchSharp" Version="[0.102.0,)" PrivateAssets="All" />
     <PackageReference Include="TqdmSharp" Version="1.3.3" />
   </ItemGroup>
   
@@ -22,10 +22,14 @@
     <PackageProjectUrl>https://github.com/shaltielshmid/TorchSharp.PyBridge</PackageProjectUrl>
     <RepositoryUrl>https://github.com/shaltielshmid/TorchSharp.PyBridge.git</RepositoryUrl>
     <RepositoryType>git</RepositoryType>
-    <Version>1.2.0</Version>
-    <AssemblyVersion>1.2.0.0</AssemblyVersion>
-    <FileVersion>1.2.0.0</FileVersion>
-    <PackageReleaseNotes>1.2.0: Added `load_safetensors` and `save_safetensors` extensions for modules.</PackageReleaseNotes>
+    <Version>1.3.0</Version>
+    <AssemblyVersion>1.3.0.0</AssemblyVersion>
+    <FileVersion>1.3.0.0</FileVersion>
+    <PackageReleaseNotes>
+        1.3.0: 
+        - Added support for loading tensors that are greater than 2GB (following the update in TorchSharp 0.102.0)
+        - Added support for loading and saving safetensors when model isn't on CPU. 
+    </PackageReleaseNotes>
   </PropertyGroup>
 
   <ItemGroup>