1 |
4977ce53
|
Roman Kalivoda
|
//
|
2 |
|
|
// Author: Roman Kalivoda
|
3 |
|
|
//
|
4 |
|
|
|
5 |
d358b79e
|
Roman Kalivoda
|
using System;
|
6 |
abfd9c7c
|
Roman Kalivoda
|
using System.Collections.Generic;
|
7 |
|
|
using System.Linq;
|
8 |
0d31f7e0
|
Roman Kalivoda
|
using log4net;
|
9 |
abfd9c7c
|
Roman Kalivoda
|
using Microsoft.ML;
|
10 |
|
|
|
11 |
|
|
namespace ServerApp.Predictor
|
12 |
|
|
{
|
13 |
4977ce53
|
Roman Kalivoda
|
/// <summary>
|
14 |
|
|
/// Implementation of the naive Bayes classifier in ML.NET.
|
15 |
|
|
/// </summary>
|
16 |
abfd9c7c
|
Roman Kalivoda
|
class NaiveBayesClassifier : IPredictor
|
17 |
|
|
{
|
18 |
0d31f7e0
|
Roman Kalivoda
|
private static readonly ILog _log = LogManager.GetLogger(typeof(NaiveBayesClassifier));
|
19 |
|
|
|
20 |
4977ce53
|
Roman Kalivoda
|
/// <summary>
|
21 |
|
|
/// Context of the ML.NET framework.
|
22 |
|
|
/// </summary>
|
23 |
d358b79e
|
Roman Kalivoda
|
private MLContext _mlContext;
|
24 |
abfd9c7c
|
Roman Kalivoda
|
|
25 |
4977ce53
|
Roman Kalivoda
|
/// <summary>
|
26 |
|
|
/// Model instance
|
27 |
|
|
/// </summary>
|
28 |
d358b79e
|
Roman Kalivoda
|
private ITransformer _trainedModel;
|
29 |
|
|
|
30 |
|
|
private PredictionEngine<ModelInput, ModelOutput> _predictionEngine;
|
31 |
|
|
|
32 |
|
|
IDataView _trainingDataView;
|
33 |
9fc5fa93
|
Roman Kalivoda
|
|
34 |
4977ce53
|
Roman Kalivoda
|
/// <summary>
|
35 |
|
|
/// Instantiates new <c>MLContext</c>.
|
36 |
|
|
/// </summary>
|
37 |
abfd9c7c
|
Roman Kalivoda
|
public NaiveBayesClassifier()
|
38 |
|
|
{
|
39 |
d358b79e
|
Roman Kalivoda
|
_mlContext = new MLContext();
|
40 |
9fc5fa93
|
Roman Kalivoda
|
}
|
41 |
|
|
|
42 |
0e7b6b11
|
Roman Kalivoda
|
public NaiveBayesClassifier(string filename) : this()
|
43 |
|
|
{
|
44 |
|
|
DataViewSchema modelSchema;
|
45 |
|
|
this._trainedModel = _mlContext.Model.Load(filename, out modelSchema);
|
46 |
|
|
// TODO check if the loaded model has valid input and output schema
|
47 |
|
|
this._predictionEngine = _mlContext.Model.CreatePredictionEngine<ModelInput, ModelOutput>(this._trainedModel);
|
48 |
|
|
}
|
49 |
|
|
|
50 |
3c4b53fe
|
Roman Kalivoda
|
public void Save(string filename)
|
51 |
|
|
{
|
52 |
|
|
if (this._trainingDataView is null)
|
53 |
|
|
{
|
54 |
|
|
throw new NullReferenceException("DataView is not set.");
|
55 |
|
|
}
|
56 |
|
|
if( this._trainedModel is null)
|
57 |
|
|
{
|
58 |
|
|
throw new NullReferenceException("Trained model instance does not exist. This predictor has not been trained yet.");
|
59 |
|
|
}
|
60 |
|
|
if(filename is null)
|
61 |
|
|
{
|
62 |
|
|
throw new ArgumentNullException(nameof(filename));
|
63 |
|
|
}
|
64 |
|
|
this._mlContext.Model.Save(this._trainedModel, this._trainingDataView.Schema, filename);
|
65 |
|
|
}
|
66 |
|
|
|
67 |
4977ce53
|
Roman Kalivoda
|
public void Fit(IEnumerable<ModelInput> trainInput)
|
68 |
abfd9c7c
|
Roman Kalivoda
|
{
|
69 |
d358b79e
|
Roman Kalivoda
|
this._trainingDataView = _mlContext.Data.LoadFromEnumerable(trainInput);
|
70 |
|
|
var pipeline = _mlContext.Transforms.Conversion.MapValueToKey(nameof(ModelInput.Label))
|
71 |
ce0940b5
|
Roman Kalivoda
|
.Append(_mlContext.Transforms.Conversion.ConvertType(nameof(ModelInput.Hour)))
|
72 |
|
|
.Append(_mlContext.Transforms.Concatenate("Features",
|
73 |
|
|
new[] { nameof(ModelInput.Temp), nameof(ModelInput.Rain), nameof(ModelInput.Wind), nameof(ModelInput.Hour) }))
|
74 |
|
|
.Append(_mlContext.Transforms.NormalizeMeanVariance("Features", useCdf:false))
|
75 |
d358b79e
|
Roman Kalivoda
|
.AppendCacheCheckpoint(_mlContext)
|
76 |
|
|
.Append(_mlContext.MulticlassClassification.Trainers.NaiveBayes())
|
77 |
ce0940b5
|
Roman Kalivoda
|
.Append(_mlContext.Transforms.Conversion.MapKeyToValue(nameof(ModelOutput.PredictedLabel)));
|
78 |
d358b79e
|
Roman Kalivoda
|
|
79 |
cdeee9f8
|
Roman Kalivoda
|
var cvResults = _mlContext.MulticlassClassification.CrossValidate(this._trainingDataView, pipeline);
|
80 |
0d31f7e0
|
Roman Kalivoda
|
_log.Debug("Cross-validated the trained model");
|
81 |
cdeee9f8
|
Roman Kalivoda
|
this._trainedModel = cvResults.OrderByDescending(fold => fold.Metrics.MicroAccuracy).Select(fold => fold.Model).First();
|
82 |
0d31f7e0
|
Roman Kalivoda
|
_log.Info($"Selected the model #{cvResults.OrderByDescending(fold => fold.Metrics.MicroAccuracy).Select(fold => fold.Fold).First()} as the best.");
|
83 |
d358b79e
|
Roman Kalivoda
|
this._predictionEngine = _mlContext.Model.CreatePredictionEngine<ModelInput, ModelOutput>(this._trainedModel);
|
84 |
662b2404
|
Roman Kalivoda
|
|
85 |
d358b79e
|
Roman Kalivoda
|
}
|
86 |
9fc5fa93
|
Roman Kalivoda
|
|
87 |
d358b79e
|
Roman Kalivoda
|
public string Predict(ModelInput input)
|
88 |
|
|
{
|
89 |
0d31f7e0
|
Roman Kalivoda
|
_log.Debug($"Predicting for input: {input}");
|
90 |
60a60164
|
Roman Kalivoda
|
return this._predictionEngine.Predict(input).PredictedLabel;
|
91 |
abfd9c7c
|
Roman Kalivoda
|
}
|
92 |
|
|
|
93 |
d358b79e
|
Roman Kalivoda
|
public void Evaluate(IEnumerable<ModelInput> modelInputs)
|
94 |
abfd9c7c
|
Roman Kalivoda
|
{
|
95 |
d358b79e
|
Roman Kalivoda
|
var testDataView = this._mlContext.Data.LoadFromEnumerable(modelInputs);
|
96 |
ce0940b5
|
Roman Kalivoda
|
var data = _trainedModel.Transform(testDataView);
|
97 |
|
|
var testMetrics = _mlContext.MulticlassClassification.Evaluate(data);
|
98 |
d358b79e
|
Roman Kalivoda
|
|
99 |
|
|
Console.WriteLine($"*************************************************************************************************************");
|
100 |
|
|
Console.WriteLine($"* Metrics for Multi-class Classification model - Test Data ");
|
101 |
|
|
Console.WriteLine($"*------------------------------------------------------------------------------------------------------------");
|
102 |
|
|
Console.WriteLine($"* MicroAccuracy: {testMetrics.MicroAccuracy:0.###}");
|
103 |
|
|
Console.WriteLine($"* MacroAccuracy: {testMetrics.MacroAccuracy:0.###}");
|
104 |
|
|
Console.WriteLine($"* LogLoss: {testMetrics.LogLoss:#.###}");
|
105 |
|
|
Console.WriteLine($"* LogLossReduction: {testMetrics.LogLossReduction:#.###}");
|
106 |
ce0940b5
|
Roman Kalivoda
|
Console.WriteLine($"* Confusion Matrix: {testMetrics.ConfusionMatrix.GetFormattedConfusionTable()}");
|
107 |
d358b79e
|
Roman Kalivoda
|
Console.WriteLine($"*************************************************************************************************************");
|
108 |
abfd9c7c
|
Roman Kalivoda
|
}
|
109 |
|
|
}
|
110 |
|
|
}
|