Skip to content

Commit a7a15ff

Browse files
authored
Port SymSGD trainer (dotnet#624)
* Port SymSGD trainer.
1 parent 4bd866e commit a7a15ff

35 files changed

+5154
-4
lines changed

src/Microsoft.ML.Console/Microsoft.ML.Console.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
<ProjectReference Include="..\Microsoft.ML.Data\Microsoft.ML.Data.csproj" />
1616
<ProjectReference Include="..\Microsoft.ML.Ensemble\Microsoft.ML.Ensemble.csproj" />
1717
<ProjectReference Include="..\Microsoft.ML.FastTree\Microsoft.ML.FastTree.csproj" />
18+
<ProjectReference Include="..\Microsoft.ML.HalLearners\Microsoft.ML.HalLearners.csproj" />
1819
<ProjectReference Include="..\Microsoft.ML.KMeansClustering\Microsoft.ML.KMeansClustering.csproj" />
1920
<ProjectReference Include="..\Microsoft.ML.LightGBM\Microsoft.ML.LightGBM.csproj" />
2021
<ProjectReference Include="..\Microsoft.ML.Maml\Microsoft.ML.Maml.csproj" />

src/Microsoft.ML.HalLearners/Microsoft.ML.HalLearners.csproj

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
<Project Sdk="Microsoft.NET.Sdk">
1+
<Project Sdk="Microsoft.NET.Sdk">
22

33
<PropertyGroup>
44
<TargetFramework>netstandard2.0</TargetFramework>
55
<IncludeInPackage>Microsoft.ML.HalLearners</IncludeInPackage>
6+
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
67
</PropertyGroup>
78

89
<ItemGroup>

src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs

Lines changed: 850 additions & 0 deletions
Large diffs are not rendered by default.

src/Microsoft.ML.HalLearners/doc.xml

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<?xml version="1.0" encoding="utf-8"?>
1+
<?xml version="1.0" encoding="utf-8"?>
22
<doc>
33
<members>
44

@@ -22,6 +22,24 @@
2222
</code>
2323
</example>
2424
</member>
25-
25+
<member name="SymSGD">
26+
<summary>
27+
Parallel Stochastic Gradient Descent trainer.
28+
</summary>
29+
<remarks>
30+
<a href='https://en.wikipedia.org/wiki/Stochastic_gradient_descent'>Stochastic gradient descent (SGD)</a> is an interative algorithm
31+
that optimizes a differentiable objective function. <a href='https://arxiv.org/abs/1705.08030'>SYMSGD</a> parallelizes SGD using Sound Combiners.
32+
</remarks>
33+
<example>
34+
<code language="csharp">
35+
new SymSgdBinaryClassifier()
36+
{
37+
NumberOfIterations = 50,
38+
L2Regularization = 0,
39+
Shuffle = true
40+
}
41+
</code>
42+
</example>
43+
</member>
2644
</members>
2745
</doc>

src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<Project Sdk="Microsoft.NET.Sdk">
1+
<Project Sdk="Microsoft.NET.Sdk">
22

33
<PropertyGroup>
44
<TargetFramework>netstandard2.0</TargetFramework>

src/Microsoft.ML/CSharpApi.cs

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -838,6 +838,18 @@ public void Add(Microsoft.ML.Trainers.StochasticGradientDescentBinaryClassifier
838838
_jsonNodes.Add(Serialize("Trainers.StochasticGradientDescentBinaryClassifier", input, output));
839839
}
840840

841+
public Microsoft.ML.Trainers.SymSgdBinaryClassifier.Output Add(Microsoft.ML.Trainers.SymSgdBinaryClassifier input)
842+
{
843+
var output = new Microsoft.ML.Trainers.SymSgdBinaryClassifier.Output();
844+
Add(input, output);
845+
return output;
846+
}
847+
848+
public void Add(Microsoft.ML.Trainers.SymSgdBinaryClassifier input, Microsoft.ML.Trainers.SymSgdBinaryClassifier.Output output)
849+
{
850+
_jsonNodes.Add(Serialize("Trainers.SymSgdBinaryClassifier", input, output));
851+
}
852+
841853
public Microsoft.ML.Transforms.ApproximateBootstrapSampler.Output Add(Microsoft.ML.Transforms.ApproximateBootstrapSampler input)
842854
{
843855
var output = new Microsoft.ML.Transforms.ApproximateBootstrapSampler.Output();
@@ -9761,6 +9773,128 @@ public StochasticGradientDescentBinaryClassifierPipelineStep(Output output)
97619773
}
97629774
}
97639775

9776+
namespace Trainers
9777+
{
9778+
9779+
/// <summary>
9780+
/// Train a symbolic SGD.
9781+
/// </summary>
9782+
public sealed partial class SymSgdBinaryClassifier : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
9783+
{
9784+
9785+
9786+
/// <summary>
9787+
/// Degree of lock-free parallelism. Determinism not guaranteed. Multi-threading is not supported currently.
9788+
/// </summary>
9789+
public int? NumberOfThreads { get; set; }
9790+
9791+
/// <summary>
9792+
/// Number of passes over the data.
9793+
/// </summary>
9794+
[TlcModule.SweepableDiscreteParamAttribute("NumberOfIterations", new object[]{1, 5, 10, 20, 30, 40, 50})]
9795+
public int NumberOfIterations { get; set; } = 50;
9796+
9797+
/// <summary>
9798+
/// Tolerance for difference in average loss in consecutive passes.
9799+
/// </summary>
9800+
public float Tolerance { get; set; } = 0.0001f;
9801+
9802+
/// <summary>
9803+
/// Learning rate
9804+
/// </summary>
9805+
[TlcModule.SweepableDiscreteParamAttribute("LearningRate", new object[]{"<Auto>", 10f, 1f, 0.1f, 0.01f, 0.001f})]
9806+
public float? LearningRate { get; set; }
9807+
9808+
/// <summary>
9809+
/// L2 regularization
9810+
/// </summary>
9811+
[TlcModule.SweepableDiscreteParamAttribute("L2Regularization", new object[]{0f, 1E-05f, 1E-05f, 1E-06f, 1E-07f})]
9812+
public float L2Regularization { get; set; }
9813+
9814+
/// <summary>
9815+
/// The number of iterations each thread learns a local model until combining it with the global model. Low value means more updated global model and high value means less cache traffic.
9816+
/// </summary>
9817+
[TlcModule.SweepableDiscreteParamAttribute("UpdateFrequency", new object[]{"<Auto>", 5, 20})]
9818+
public int? UpdateFrequency { get; set; }
9819+
9820+
/// <summary>
9821+
/// The acceleration memory budget in MB
9822+
/// </summary>
9823+
public long MemorySize { get; set; } = 1024;
9824+
9825+
/// <summary>
9826+
/// Shuffle data?
9827+
/// </summary>
9828+
public bool Shuffle { get; set; } = true;
9829+
9830+
/// <summary>
9831+
/// Apply weight to the positive class, for imbalanced data
9832+
/// </summary>
9833+
public float PositiveInstanceWeight { get; set; } = 1f;
9834+
9835+
/// <summary>
9836+
/// Column to use for labels
9837+
/// </summary>
9838+
public string LabelColumn { get; set; } = "Label";
9839+
9840+
/// <summary>
9841+
/// The data to be used for training
9842+
/// </summary>
9843+
public Var<Microsoft.ML.Runtime.Data.IDataView> TrainingData { get; set; } = new Var<Microsoft.ML.Runtime.Data.IDataView>();
9844+
9845+
/// <summary>
9846+
/// Column to use for features
9847+
/// </summary>
9848+
public string FeatureColumn { get; set; } = "Features";
9849+
9850+
/// <summary>
9851+
/// Normalize option for the feature column
9852+
/// </summary>
9853+
public Microsoft.ML.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Models.NormalizeOption.Auto;
9854+
9855+
/// <summary>
9856+
/// Whether learner should cache input training data
9857+
/// </summary>
9858+
public Microsoft.ML.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Models.CachingOptions.Auto;
9859+
9860+
9861+
public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IBinaryClassificationOutput, Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITrainerOutput
9862+
{
9863+
/// <summary>
9864+
/// The trained model
9865+
/// </summary>
9866+
public Var<Microsoft.ML.Runtime.EntryPoints.IPredictorModel> PredictorModel { get; set; } = new Var<Microsoft.ML.Runtime.EntryPoints.IPredictorModel>();
9867+
9868+
}
9869+
public Var<IDataView> GetInputData() => TrainingData;
9870+
9871+
public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment)
9872+
{
9873+
if (previousStep != null)
9874+
{
9875+
if (!(previousStep is ILearningPipelineDataStep dataStep))
9876+
{
9877+
throw new InvalidOperationException($"{ nameof(SymSgdBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input.");
9878+
}
9879+
9880+
TrainingData = dataStep.Data;
9881+
}
9882+
Output output = experiment.Add(this);
9883+
return new SymSgdBinaryClassifierPipelineStep(output);
9884+
}
9885+
9886+
private class SymSgdBinaryClassifierPipelineStep : ILearningPipelinePredictorStep
9887+
{
9888+
public SymSgdBinaryClassifierPipelineStep(Output output)
9889+
{
9890+
Model = output.PredictorModel;
9891+
}
9892+
9893+
public Var<IPredictorModel> Model { get; }
9894+
}
9895+
}
9896+
}
9897+
97649898
namespace Transforms
97659899
{
97669900

src/Native/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,3 +182,4 @@ add_subdirectory(CpuMathNative)
182182
add_subdirectory(FastTreeNative)
183183
add_subdirectory(LdaNative)
184184
add_subdirectory(FactorizationMachineNative)
185+
add_subdirectory(SymSgdNative)
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
project (SymSgdNative)
2+
3+
set(SOURCES
4+
SymSgdNative.cpp
5+
)
6+
7+
if(WIN32)
8+
find_library(MKL_LIBRARY MklImports HINTS ${CMAKE_SOURCE_DIR}/../../packages/mlnetmkldeps/0.0.0.5/runtimes/win-x64/native)
9+
else()
10+
list(APPEND SOURCES ${VERSION_FILE_PATH})
11+
if(CMAKE_SYSTEM_NAME STREQUAL Darwin)
12+
message("Linking SymSgdNative with MKL on macOS.")
13+
find_library(MKL_LIBRARY libMklImports.dylib HINTS "${CMAKE_SOURCE_DIR}/../../packages/mlnetmkldeps/0.0.0.5/runtimes/osx-x64/native")
14+
else()
15+
message("Linking SymSgdNative with MKL on linux.")
16+
find_library(MKL_LIBRARY libMklImports.so HINTS ${CMAKE_SOURCE_DIR}/../../packages/mlnetmkldeps/0.0.0.5/runtimes/linux-x64/native)
17+
SET(CMAKE_SKIP_BUILD_RPATH FALSE)
18+
SET(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
19+
SET(CMAKE_INSTALL_RPATH "${CMAKE_SOURCE_DIR}/../../packages/mlnetmkldeps/0.0.0.5/runtimes")
20+
SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
21+
SET(CMAKE_INSTALL_RPATH "${CMAKE_SOURCE_DIR}/../../packages/mlnetmkldeps/0.0.0.5/runtimes")
22+
endif()
23+
endif()
24+
25+
add_library(SymSgdNative SHARED ${SOURCES} ${RESOURCES})
26+
target_link_libraries(SymSgdNative PUBLIC ${MKL_LIBRARY})
27+
28+
if(CMAKE_SYSTEM_NAME STREQUAL Darwin)
29+
set_target_properties(SymSgdNative PROPERTIES INSTALL_RPATH "${CMAKE_SOURCE_DIR}/../../packages/mlnetmkldeps/0.0.0.5/runtimes/osx-x64/native")
30+
endif()
31+
32+
install_library_and_symbols (SymSgdNative)

src/Native/SymSgdNative/Macros.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
#pragma once
6+
#define MIN(__X__, __Y__) (((__X__) > (__Y__)) ? (__Y__) : (__X__))
7+
8+
// This is a very large prime number used for permutation
9+
#define VERYLARGEPRIME 961748941
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
#pragma once
6+
#include "../Stdafx.h"
7+
8+
extern "C" float cblas_sdot(const int vecSize, const float* denseVecX, const int incX, const float* denseVecY, const int incY);
9+
extern "C" float cblas_sdoti(const int sparseVecSize, const float* sparseVecValues, const int* sparseVecIndices, float* denseVec);
10+
extern "C" void cblas_saxpy(const int vecSize, const float coef, const float* denseVecX, const int incX, float* denseVecY, const int incY);
11+
extern "C" void cblas_saxpyi(const int sparseVecSize, const float coef, const float* sparseVecValues, const int* sparseVecIndices, float* denseVec);
12+
13+
float SDOT(const int vecSize, const float* denseVecX, const float* denseVecY)
14+
{
15+
return cblas_sdot(vecSize, denseVecX, 1, denseVecY, 1);
16+
}
17+
18+
float SDOTI(const int sparseVecSize, const int* sparseVecIndices, const float* sparseVecValues, float* denseVec)
19+
{
20+
return cblas_sdoti(sparseVecSize, sparseVecValues, sparseVecIndices, denseVec);
21+
}
22+
23+
void SAXPY(const int vecSize, const float* denseVecX, float* denseVecY, float coef)
24+
{
25+
return cblas_saxpy(vecSize, coef, denseVecX, 1, denseVecY, 1);
26+
}
27+
28+
void SAXPYI(const int sparseVecSize, const int* sparseVecIndices, const float* sparseVecValues, float* denseVec, float coef)
29+
{
30+
cblas_saxpyi(sparseVecSize, coef, sparseVecValues, sparseVecIndices, denseVec);
31+
}

0 commit comments

Comments
 (0)