Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Added SsaSpikeDetectorPrediction sample.
  • Loading branch information
montebhoover committed Nov 30, 2018
commit 57313b16f5275142b66499fe6ede74284f5f27a9
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
using System;
using System.Linq;
using System.Collections.Generic;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Core.Data;
using Microsoft.ML.Data;
using Microsoft.ML.Runtime.Api;
using Microsoft.ML.Runtime.TimeSeriesProcessing;
using Microsoft.ML.TimeSeries;
using Microsoft.ML.Data;
using System;
using System.Collections.Generic;
using System.IO;
using Microsoft.ML.Core.Data;
using System.Linq;

namespace Microsoft.ML.Samples.Dynamic
{
Expand Down Expand Up @@ -56,9 +55,9 @@ public static void SsaChangePointDetectorTransform()
Source = inputColumnName,
Name = outputColumnName,
Confidence = 95, // The confidence for spike detection in the range [0, 100]
ChangeHistoryLength = 8, // The length of the window for detecting a change in trend; shorter windows are more sensitive to spikes.
ChangeHistoryLength = 8, // The length of the window for detecting a change in trend; shorter windows are more sensitive to spikes.
TrainingWindowSize = TrainingSize, // The number of points from the beginning of the sequence used for training.
SeasonalWindowSize = SeasonalitySize + 1, // An upper bound on the largest relevant seasonality in the input time - series."
SeasonalWindowSize = SeasonalitySize + 1 // An upper bound on the largest relevant seasonality in the input time - series."

};

Expand Down Expand Up @@ -99,15 +98,15 @@ public static void SsaChangePointDetectorTransform()
// 400 0 357.11 0.03 45298370.86
}

// This example shows change point detection as above, but demonstrates how to persist the trained model
// and then re-load it to predict change points in new data.
// This example shows change point detection as above, but demonstrates how to train a model
// that can run predictions on streaming data, and how to persist the trained model and then re-load it.
public static void SsaChangePointDetectorPrediction()
{
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
// as well as the source of randomness.
var ml = new MLContext();

// Generate sample series data with a recurring pattern and then a change in trend
// Generate sample series data with a recurring pattern
const int SeasonalitySize = 5;
const int TrainingSeasons = 3;
const int TrainingSize = SeasonalitySize * TrainingSeasons;
Expand All @@ -126,20 +125,20 @@ public static void SsaChangePointDetectorPrediction()
{
Source = inputColumnName,
Name = outputColumnName,
Confidence = 95, // The confidence for spike detection in the range [0, 100]
ChangeHistoryLength = 8, // The length of the window for detecting a change in trend; shorter windows are more sensitive to spikes.
TrainingWindowSize = TrainingSize, // The number of points from the beginning of the sequence used for training.
SeasonalWindowSize = SeasonalitySize + 1, // An upper bound on the largest relevant seasonality in the input time - series."
Confidence = 95, // The confidence for spike detection in the range [0, 100]
ChangeHistoryLength = 8, // The length of the window for detecting a change in trend; shorter windows are more sensitive to spikes.
TrainingWindowSize = TrainingSize, // The number of points from the beginning of the sequence used for training.
SeasonalWindowSize = SeasonalitySize + 1 // An upper bound on the largest relevant seasonality in the input time series."

};

// Train the change point detector.
ITransformer model = new SsaChangePointEstimator(ml, args).Fit(dataView);

// Create a prediction engine from the model for feeding new data
// Create a prediction engine from the model for feeding new data.
var engine = model.CreateTimeSeriesPredictionFunction<SsaChangePointData, ChangePointPrediction>(ml);

// Start streaming new data points with no change point to the prediction engine
// Start streaming new data points with no change point to the prediction engine.
Console.WriteLine($"Output from ChangePoint predictions on new data:");
Console.WriteLine("Data\tAlert\tScore\tP-Value\tMartingale value");
ChangePointPrediction prediction = null;
Expand All @@ -150,15 +149,15 @@ public static void SsaChangePointDetectorPrediction()
Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]);
}

// Now stream data points that reflect a change in trend
// Now stream data points that reflect a change in trend.
for (int i = 0; i < 5; i++)
{
var value = (i + 1) * 100;
prediction = engine.Predict(new SsaChangePointData(value));
Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]);
}

// Now we demonstrate saving and loading the model to disk.
// Now we demonstrate saving and loading the model.

// Save the model that exists within the prediction engine.
// The engine has been updating this model with every new data point.
Expand All @@ -169,10 +168,10 @@ public static void SsaChangePointDetectorPrediction()
using (var file = File.OpenRead(modelPath))
model = TransformerChain.LoadFrom(ml, file);

// We must create a new prediction engine from the persisted model
// We must create a new prediction engine from the persisted model.
engine = model.CreateTimeSeriesPredictionFunction<SsaChangePointData, ChangePointPrediction>(ml);

// Run predictions on more data.
// Run predictions on the loaded model.
for (int i = 0; i < 5; i++)
{
var value = (i + 1) * 100;
Expand Down
183 changes: 149 additions & 34 deletions docs/samples/Microsoft.ML.Samples/Dynamic/SsaSpikeDetectorTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.Api;
using Microsoft.ML.Runtime.TimeSeriesProcessing;
using Microsoft.ML.Core.Data;

using Microsoft.ML.TimeSeries;
using System.IO;
using Microsoft.ML.Data;

namespace Microsoft.ML.Samples.Dynamic
{
Expand All @@ -27,36 +32,41 @@ class SsaSpikePrediction

// This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot).
// SsaSpikeDetector is applied then to identify spiking points in the series.
// SsaSpikeDetector differs from IidSpikeDetector in that it can account for temporal seasonality
// in the data.
public static void SsaSpikeDetectorTransform()
{
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
// as well as the source of randomness.
var ml = new MLContext();

// Generate sample series data with a spike
const int size = 16;
var data = new List<SsaSpikeData>(size);
for (int i = 0; i < size / 2; i++)
data.Add(new SsaSpikeData(5));
// Generate sample series data with a recurring pattern and a spike within the pattern
const int SeasonalitySize = 5;
const int TrainingSeasons = 3;
const int TrainingSize = SeasonalitySize * TrainingSeasons;
var data = new List<SsaSpikeData>();
for (int i = 0; i < TrainingSeasons; i++)
for (int j = 0; j < SeasonalitySize; j++)
data.Add(new SsaSpikeData(j));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Data generation used accross samples consider moving them in the Microsoft.ML.SamplesUtils.DatasetUtils.

// This is a spike
data.Add(new SsaSpikeData(10));
for (int i = 0; i < size / 2; i++)
data.Add(new SsaSpikeData(5));
data.Add(new SsaSpikeData(100));
for (int i = 0; i < SeasonalitySize; i++)
data.Add(new SsaSpikeData(i));

// Convert data to IDataView.
var dataView = ml.CreateStreamingDataView(data);

// Setup IidSpikeDetector arguments
string outputColumnName = "Prediction";
string inputColumnName = "Value";
var outputColumnName = nameof(SsaSpikePrediction.Prediction);
var inputColumnName = nameof(SsaSpikeData.Value);
var args = new SsaSpikeDetector.Arguments()
{
Source = inputColumnName,
Name = outputColumnName,
Confidence = 95, // The confidence for spike detection in the range [0, 100]
PvalueHistoryLength = size / 4, // The size of the sliding window for computing the p-value
TrainingWindowSize = size / 2, // The number of points from the beginning of the sequence used for training.
SeasonalWindowSize = size / 8, // An upper bound on the largest relevant seasonality in the input time - series."
Confidence = 95, // The confidence for spike detection in the range [0, 100]
PvalueHistoryLength = 8, // The size of the sliding window for computing the p-value; shorter windows are more sensitive to spikes.
TrainingWindowSize = TrainingSize, // The number of points from the beginning of the sequence used for training.
SeasonalWindowSize = SeasonalitySize + 1 // An upper bound on the largest relevant seasonality in the input time series."
};

// The transformed data.
Expand All @@ -66,30 +76,135 @@ public static void SsaSpikeDetectorTransform()
var predictionColumn = transformedData.AsEnumerable<SsaSpikePrediction>(ml, reuseRowObject: false);

Console.WriteLine($"{outputColumnName} column obtained post-transformation.");
Console.WriteLine("Alert\tScore\tP-Value");
Console.WriteLine("Data\tAlert\tScore\tP-Value");
int k = 0;
foreach (var prediction in predictionColumn)
Console.WriteLine("{0}\t{1:0.00}\t{2:0.00}", prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2]);
Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", data[k++].Value, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2]);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"{0}\t{1}\t{2:0.00}\t{3:0.00}" [](start = 34, length = 30)

think we prefer interpolation.

Console.WriteLine("");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Console.WriteLine(""); [](start = 11, length = 23)

remove, and include the newline in the above.


// Prediction column obtained post-transformation.
// Alert Score P-Value
// 0 0.00 0.50
// 0 0.00 0.50
// 0 0.00 0.50
// 0 0.00 0.50
// 0 0.00 0.50
// 0 0.00 0.50
// 0 0.00 0.50
// 0 0.00 0.50
// 1 5.00 0.00 <-- alert is on, predicted spike
// 0 -2.50 0.09
// 0 -2.50 0.22
// 0 0.00 0.47
// 0 0.00 0.47
// 0 0.00 0.26
// 0 0.00 0.38
// 0 0.00 0.50
// 0 0.00 0.50
// Data Alert Score P-Value
// 0 0 - 2.53 0.50
// 1 0 - 0.01 0.01
// 2 0 0.76 0.14
// 3 0 0.69 0.28
// 4 0 1.44 0.18
// 0 0 - 1.84 0.17
// 1 0 0.22 0.44
// 2 0 0.20 0.45
// 3 0 0.16 0.47
// 4 0 1.33 0.18
// 0 0 - 1.79 0.07
// 1 0 0.16 0.50
// 2 0 0.09 0.50
// 3 0 0.08 0.45
// 4 0 1.31 0.12
// 100 1 98.21 0.00 <-- alert is on, predicted spike
// 0 0 - 13.83 0.29
// 1 0 - 1.74 0.44
// 2 0 - 0.47 0.46
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

doe it make sense to limit the preview/data to something smaller,maybe like 7 rows ?

// 3 0 - 16.50 0.29
// 4 0 - 29.82 0.21
}

// This example shows spike detection as above, but demonstrates how to train a model
// that can run predictions on streaming data, and how to persist the trained model and then re-load it.
public static void SsaSpikeDetectorPrediction()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same comment as in the other PR; maybe the end-to-end should go on tutorials. The API call usage should go here.

{
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
// as well as the source of randomness.
var ml = new MLContext();

// Generate sample series data with a recurring pattern
const int SeasonalitySize = 5;
const int TrainingSeasons = 3;
const int TrainingSize = SeasonalitySize * TrainingSeasons;
var data = new List<SsaSpikeData>();
for (int i = 0; i < TrainingSeasons; i++)
for (int j = 0; j < SeasonalitySize; j++)
data.Add(new SsaSpikeData(j));

// Convert data to IDataView.
var dataView = ml.CreateStreamingDataView(data);

// Setup IidSpikeDetector arguments
var outputColumnName = nameof(SsaSpikePrediction.Prediction);
var inputColumnName = nameof(SsaSpikeData.Value);
var args = new SsaSpikeDetector.Arguments()
{
Source = inputColumnName,
Name = outputColumnName,
Confidence = 95, // The confidence for spike detection in the range [0, 100]
PvalueHistoryLength = 8, // The size of the sliding window for computing the p-value; shorter windows are more sensitive to spikes.
TrainingWindowSize = TrainingSize, // The number of points from the beginning of the sequence used for training.
SeasonalWindowSize = SeasonalitySize + 1 // An upper bound on the largest relevant seasonality in the input time series."
};

// Train the change point detector.
ITransformer model = new SsaSpikeEstimator(ml, args).Fit(dataView);

// Create a prediction engine from the model for feeding new data.
var engine = model.CreateTimeSeriesPredictionFunction<SsaSpikeData, SsaSpikePrediction>(ml);

// Start streaming new data points with no change point to the prediction engine.
Console.WriteLine($"Output from spike predictions on new data:");
Console.WriteLine("Data\tAlert\tScore\tP-Value");
SsaSpikePrediction prediction = null;
for (int j = 0; j < 2; j++)
{
for (int i = 0; i < 5; i++)
{
var value = i;
prediction = engine.Predict(new SsaSpikeData(value));
Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2]);
}
}

// Now send a data point that reflects a spike.
var newValue = 100;
prediction = engine.Predict(new SsaSpikeData(newValue));
Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", newValue, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2]);

// Now we demonstrate saving and loading the model.

// Save the model that exists within the prediction engine.
// The engine has been updating this model with every new data point.
var modelPath = "model.zip";
engine.CheckPoint(ml, modelPath);

// Load the model.
using (var file = File.OpenRead(modelPath))
model = TransformerChain.LoadFrom(ml, file);

// We must create a new prediction engine from the persisted model.
engine = model.CreateTimeSeriesPredictionFunction<SsaSpikeData, SsaSpikePrediction>(ml);

// Run predictions on the loaded model.
for (int i = 0; i < 5; i++)
{
var value = i;
prediction = engine.Predict(new SsaSpikeData(value));
Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2]);
}

// Output from spike predictions on new data:
// Data Alert Score P-Value
// 0 0 - 1.01 0.50
// 1 0 - 0.24 0.22
// 2 0 - 0.31 0.30
// 3 0 0.44 0.01
// 4 0 2.16 0.00
// 0 0 - 0.78 0.27
// 1 0 - 0.80 0.30
// 2 0 - 0.84 0.31
// 3 0 0.33 0.31
// 4 0 2.21 0.07
// 100 1 86.17 0.00 <-- alert is on, predicted spike
// 0 0 - 2.74 0.40
// 1 0 - 1.47 0.42
// 2 0 - 17.50 0.24
// 3 0 - 30.82 0.16
// 4 0 - 23.24 0.28
}
}
}