-
Notifications
You must be signed in to change notification settings - Fork 1.9k
SSA time series samples #1788
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
SSA time series samples #1788
Changes from 1 commit
5e311c2
1c93fe1
57313b1
daf5dfb
694d7a4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,6 +4,11 @@ | |
| using Microsoft.ML.Runtime.Data; | ||
| using Microsoft.ML.Runtime.Api; | ||
| using Microsoft.ML.Runtime.TimeSeriesProcessing; | ||
| using Microsoft.ML.Core.Data; | ||
|
|
||
| using Microsoft.ML.TimeSeries; | ||
| using System.IO; | ||
| using Microsoft.ML.Data; | ||
|
|
||
| namespace Microsoft.ML.Samples.Dynamic | ||
| { | ||
|
|
@@ -27,36 +32,41 @@ class SsaSpikePrediction | |
|
|
||
| // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). | ||
| // SsaSpikeDetector is applied then to identify spiking points in the series. | ||
| // SsaSpikeDetector differs from IidSpikeDetector in that it can account for temporal seasonality | ||
| // in the data. | ||
| public static void SsaSpikeDetectorTransform() | ||
| { | ||
| // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, | ||
| // as well as the source of randomness. | ||
| var ml = new MLContext(); | ||
|
|
||
| // Generate sample series data with a spike | ||
| const int size = 16; | ||
| var data = new List<SsaSpikeData>(size); | ||
| for (int i = 0; i < size / 2; i++) | ||
| data.Add(new SsaSpikeData(5)); | ||
| // Generate sample series data with a recurring pattern and a spike within the pattern | ||
| const int SeasonalitySize = 5; | ||
| const int TrainingSeasons = 3; | ||
| const int TrainingSize = SeasonalitySize * TrainingSeasons; | ||
| var data = new List<SsaSpikeData>(); | ||
| for (int i = 0; i < TrainingSeasons; i++) | ||
| for (int j = 0; j < SeasonalitySize; j++) | ||
| data.Add(new SsaSpikeData(j)); | ||
| // This is a spike | ||
| data.Add(new SsaSpikeData(10)); | ||
| for (int i = 0; i < size / 2; i++) | ||
| data.Add(new SsaSpikeData(5)); | ||
| data.Add(new SsaSpikeData(100)); | ||
| for (int i = 0; i < SeasonalitySize; i++) | ||
| data.Add(new SsaSpikeData(i)); | ||
|
|
||
| // Convert data to IDataView. | ||
| var dataView = ml.CreateStreamingDataView(data); | ||
|
|
||
| // Setup IidSpikeDetector arguments | ||
| string outputColumnName = "Prediction"; | ||
| string inputColumnName = "Value"; | ||
| var outputColumnName = nameof(SsaSpikePrediction.Prediction); | ||
| var inputColumnName = nameof(SsaSpikeData.Value); | ||
| var args = new SsaSpikeDetector.Arguments() | ||
| { | ||
| Source = inputColumnName, | ||
| Name = outputColumnName, | ||
| Confidence = 95, // The confidence for spike detection in the range [0, 100] | ||
| PvalueHistoryLength = size / 4, // The size of the sliding window for computing the p-value | ||
| TrainingWindowSize = size / 2, // The number of points from the beginning of the sequence used for training. | ||
| SeasonalWindowSize = size / 8, // An upper bound on the largest relevant seasonality in the input time - series." | ||
| Confidence = 95, // The confidence for spike detection in the range [0, 100] | ||
| PvalueHistoryLength = 8, // The size of the sliding window for computing the p-value; shorter windows are more sensitive to spikes. | ||
| TrainingWindowSize = TrainingSize, // The number of points from the beginning of the sequence used for training. | ||
| SeasonalWindowSize = SeasonalitySize + 1 // An upper bound on the largest relevant seasonality in the input time series." | ||
| }; | ||
|
|
||
| // The transformed data. | ||
|
|
@@ -66,30 +76,135 @@ public static void SsaSpikeDetectorTransform() | |
| var predictionColumn = transformedData.AsEnumerable<SsaSpikePrediction>(ml, reuseRowObject: false); | ||
|
|
||
| Console.WriteLine($"{outputColumnName} column obtained post-transformation."); | ||
| Console.WriteLine("Alert\tScore\tP-Value"); | ||
| Console.WriteLine("Data\tAlert\tScore\tP-Value"); | ||
| int k = 0; | ||
| foreach (var prediction in predictionColumn) | ||
| Console.WriteLine("{0}\t{1:0.00}\t{2:0.00}", prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2]); | ||
| Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", data[k++].Value, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2]); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
think we prefer interpolation. |
||
| Console.WriteLine(""); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
remove, and include the newline in the above. |
||
|
|
||
| // Prediction column obtained post-transformation. | ||
| // Alert Score P-Value | ||
| // 0 0.00 0.50 | ||
| // 0 0.00 0.50 | ||
| // 0 0.00 0.50 | ||
| // 0 0.00 0.50 | ||
| // 0 0.00 0.50 | ||
| // 0 0.00 0.50 | ||
| // 0 0.00 0.50 | ||
| // 0 0.00 0.50 | ||
| // 1 5.00 0.00 <-- alert is on, predicted spike | ||
| // 0 -2.50 0.09 | ||
| // 0 -2.50 0.22 | ||
| // 0 0.00 0.47 | ||
| // 0 0.00 0.47 | ||
| // 0 0.00 0.26 | ||
| // 0 0.00 0.38 | ||
| // 0 0.00 0.50 | ||
| // 0 0.00 0.50 | ||
| // Data Alert Score P-Value | ||
| // 0 0 - 2.53 0.50 | ||
| // 1 0 - 0.01 0.01 | ||
| // 2 0 0.76 0.14 | ||
| // 3 0 0.69 0.28 | ||
| // 4 0 1.44 0.18 | ||
| // 0 0 - 1.84 0.17 | ||
| // 1 0 0.22 0.44 | ||
| // 2 0 0.20 0.45 | ||
| // 3 0 0.16 0.47 | ||
| // 4 0 1.33 0.18 | ||
| // 0 0 - 1.79 0.07 | ||
| // 1 0 0.16 0.50 | ||
| // 2 0 0.09 0.50 | ||
| // 3 0 0.08 0.45 | ||
| // 4 0 1.31 0.12 | ||
| // 100 1 98.21 0.00 <-- alert is on, predicted spike | ||
| // 0 0 - 13.83 0.29 | ||
| // 1 0 - 1.74 0.44 | ||
| // 2 0 - 0.47 0.46 | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. doe it make sense to limit the preview/data to something smaller,maybe like 7 rows ? |
||
| // 3 0 - 16.50 0.29 | ||
| // 4 0 - 29.82 0.21 | ||
| } | ||
|
|
||
| // This example shows spike detection as above, but demonstrates how to train a model | ||
| // that can run predictions on streaming data, and how to persist the trained model and then re-load it. | ||
| public static void SsaSpikeDetectorPrediction() | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same comment as in the other PR; maybe the end-to-end should go on tutorials. The API call usage should go here. |
||
| { | ||
| // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, | ||
| // as well as the source of randomness. | ||
| var ml = new MLContext(); | ||
|
|
||
| // Generate sample series data with a recurring pattern | ||
| const int SeasonalitySize = 5; | ||
| const int TrainingSeasons = 3; | ||
| const int TrainingSize = SeasonalitySize * TrainingSeasons; | ||
| var data = new List<SsaSpikeData>(); | ||
| for (int i = 0; i < TrainingSeasons; i++) | ||
| for (int j = 0; j < SeasonalitySize; j++) | ||
| data.Add(new SsaSpikeData(j)); | ||
|
|
||
| // Convert data to IDataView. | ||
| var dataView = ml.CreateStreamingDataView(data); | ||
|
|
||
| // Setup IidSpikeDetector arguments | ||
| var outputColumnName = nameof(SsaSpikePrediction.Prediction); | ||
| var inputColumnName = nameof(SsaSpikeData.Value); | ||
| var args = new SsaSpikeDetector.Arguments() | ||
| { | ||
| Source = inputColumnName, | ||
| Name = outputColumnName, | ||
| Confidence = 95, // The confidence for spike detection in the range [0, 100] | ||
| PvalueHistoryLength = 8, // The size of the sliding window for computing the p-value; shorter windows are more sensitive to spikes. | ||
| TrainingWindowSize = TrainingSize, // The number of points from the beginning of the sequence used for training. | ||
| SeasonalWindowSize = SeasonalitySize + 1 // An upper bound on the largest relevant seasonality in the input time series." | ||
| }; | ||
|
|
||
| // Train the change point detector. | ||
| ITransformer model = new SsaSpikeEstimator(ml, args).Fit(dataView); | ||
|
|
||
| // Create a prediction engine from the model for feeding new data. | ||
| var engine = model.CreateTimeSeriesPredictionFunction<SsaSpikeData, SsaSpikePrediction>(ml); | ||
|
|
||
| // Start streaming new data points with no change point to the prediction engine. | ||
| Console.WriteLine($"Output from spike predictions on new data:"); | ||
| Console.WriteLine("Data\tAlert\tScore\tP-Value"); | ||
| SsaSpikePrediction prediction = null; | ||
| for (int j = 0; j < 2; j++) | ||
| { | ||
| for (int i = 0; i < 5; i++) | ||
| { | ||
| var value = i; | ||
| prediction = engine.Predict(new SsaSpikeData(value)); | ||
| Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2]); | ||
| } | ||
| } | ||
|
|
||
| // Now send a data point that reflects a spike. | ||
| var newValue = 100; | ||
| prediction = engine.Predict(new SsaSpikeData(newValue)); | ||
| Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", newValue, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2]); | ||
|
|
||
| // Now we demonstrate saving and loading the model. | ||
|
|
||
| // Save the model that exists within the prediction engine. | ||
| // The engine has been updating this model with every new data point. | ||
| var modelPath = "model.zip"; | ||
| engine.CheckPoint(ml, modelPath); | ||
|
|
||
| // Load the model. | ||
| using (var file = File.OpenRead(modelPath)) | ||
| model = TransformerChain.LoadFrom(ml, file); | ||
|
|
||
| // We must create a new prediction engine from the persisted model. | ||
| engine = model.CreateTimeSeriesPredictionFunction<SsaSpikeData, SsaSpikePrediction>(ml); | ||
|
|
||
| // Run predictions on the loaded model. | ||
| for (int i = 0; i < 5; i++) | ||
| { | ||
| var value = i; | ||
| prediction = engine.Predict(new SsaSpikeData(value)); | ||
| Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2]); | ||
| } | ||
|
|
||
| // Output from spike predictions on new data: | ||
| // Data Alert Score P-Value | ||
| // 0 0 - 1.01 0.50 | ||
| // 1 0 - 0.24 0.22 | ||
| // 2 0 - 0.31 0.30 | ||
| // 3 0 0.44 0.01 | ||
| // 4 0 2.16 0.00 | ||
| // 0 0 - 0.78 0.27 | ||
| // 1 0 - 0.80 0.30 | ||
| // 2 0 - 0.84 0.31 | ||
| // 3 0 0.33 0.31 | ||
| // 4 0 2.21 0.07 | ||
| // 100 1 86.17 0.00 <-- alert is on, predicted spike | ||
| // 0 0 - 2.74 0.40 | ||
| // 1 0 - 1.47 0.42 | ||
| // 2 0 - 17.50 0.24 | ||
| // 3 0 - 30.82 0.16 | ||
| // 4 0 - 23.24 0.28 | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Data generation used accross samples consider moving them in the Microsoft.ML.SamplesUtils.DatasetUtils.