Projekt

Obecné

Profil

Stáhnout (6.45 KB) Statistiky
| Větev: | Tag: | Revize:
1
//
2
// Author: Roman Kalivoda
3
//
4

    
5
using System.Collections.Generic;
6
using System;
7
using ServerApp.Parser.Parsers;
8
using ServerApp.Parser.OutputInfo;
9
using System.Linq;
10
using Microsoft.ML;
11
using log4net;
12

    
13
namespace ServerApp.Predictor
14
{
15
    /// <summary>
16
    /// A class responsible for preparation of features for classifiers.
17
    /// </summary>
18
    class FeatureExtractor
19
    {
20
        private static readonly ILog _log = LogManager.GetLogger(typeof(FeatureExtractor));
21

    
22
        /// <summary>
23
        /// A DataParser instance used to access info objects.
24
        /// </summary>
25
        private readonly IDataParser DataParser;
26

    
27
        /// <summary>
28
        /// A configuration object of the <c>Predictor</c> package
29
        /// </summary>
30
        private PredictorConfiguration Configuration;
31

    
32
        /// <summary>
33
        /// Instantiates new FeatureExtractor class.
34
        /// </summary>
35
        /// <param name="dataParser">Data parser used to access training data.</param>
36
        public FeatureExtractor(IDataParser dataParser, PredictorConfiguration configuration)
37
        {
38
            this.DataParser = dataParser;
39
            this.Configuration = configuration;
40
        }
41

    
42
        /// <summary>
43
        /// TODO comment
44
        /// </summary>
45
        /// <param name="area"></param>
46
        /// <param name="startDate"></param>
47
        /// <param name="endDate"></param>
48
        /// <param name="interval"></param>
49
        /// <param name="wholeDay"></param>
50
        /// <returns></returns>
51
        public List<ModelInput> PrepareTrainingInput(int area)
52
        {
53
            List<string> buildings = new List<string>(); 
54
            List<ActivityInfo> attendance = DataParser.AttendanceList;
55

    
56
            // find all buildings in area
57
            foreach (KeyValuePair<string, int> kvp in Configuration.BuildingsToAreas)
58
            {
59
                if (kvp.Value == area)
60
                {
61
                    buildings.Add(kvp.Key);
62
                }
63
            }
64
            List<ActivityInfo> activities = attendance.Where(e => buildings.Contains(e.building)).GroupBy(e => e.startTime).Select(g => g.Aggregate((a, b) => new ActivityInfo(null, a.amount + b.amount, a.startTime, -1))).OrderBy(e => e.startTime).ToList();
65
           activities = RejectOutliers(activities);
66
            var inputs = activities.Join(DataParser.WeatherList, activity => activity.startTime, weatherInfo => weatherInfo.startTime, (activity, weatherInfo) => new
67
            {
68
                amount = activity.amount,
69
                modelInput = new ModelInput
70
                {
71
                    Hour = activity.startTime.Hour,
72
                    Temp = (float)weatherInfo.temp,
73
                    Rain = (float)weatherInfo.rain,
74
                    Time = activity.startTime,
75
                    Wind = (float)weatherInfo.wind
76
                }
77
            }).ToList();
78

    
79
            int max = inputs.Select(e => e.amount).Max();
80
            foreach (var input in inputs)
81
            {
82
                double ratio = input.amount / (double)max;
83
                input.modelInput.Label = RatioToLabel(ratio);
84
            }
85

    
86
            return inputs.Select(e => e.modelInput).ToList();
87
        }
88

    
89
        private static List<ActivityInfo> RejectOutliers(List<ActivityInfo> data)
90
        {
91
            MLContext mlContext = new MLContext();
92
            IDataView input = mlContext.Data.LoadFromEnumerable(data);
93
            var pipeline = mlContext.Transforms.Conversion.ConvertType(nameof(ActivityInfo.amount)).Append(mlContext.Transforms.DetectIidSpike(nameof(AnomalyDetectionResult.Prediction), nameof(ActivityInfo.amount), 99.0, data.Count / 4));
94
            ITransformer transformer = pipeline.Fit(mlContext.Data.LoadFromEnumerable(new List<ActivityInfo>()));
95
            IDataView transformedData = transformer.Transform(input);
96
            List<AnomalyDetectionResult> predictions = mlContext.Data.CreateEnumerable<AnomalyDetectionResult>(transformedData, false).ToList();
97
            List<ActivityInfo> result = new List<ActivityInfo>();
98

    
99
            for (int i=0; i<predictions.Count; i++)
100
            {
101
                if(predictions[i].Prediction[0] == 1)
102
                {
103
                    _log.Debug($"Rejecting an outlier activity: {predictions[i].Prediction[1]}, p-value: {predictions[i].Prediction[2]}, from: {data[i].startTime}");
104
                } else
105
                {
106
                    result.Add(data[i]);
107
                }
108
            }
109
            return result;
110
        }
111

    
112
        private string RatioToLabel(double ratio)
113
        {
114
            if (ratio < 0.1f)
115
            {
116
                return "10%";
117
            }
118
            else if (ratio < 0.2f)
119
            {
120
                return "20%";
121
            }
122
            else if (ratio < 0.3f)
123
            {
124
                return "30%";
125
            }
126
            else if (ratio < 0.4f)
127
            {
128
                return "40%";
129
            }
130
            else if (ratio < 0.5f)
131
            {
132
                return "50%";
133
            }
134
            else if (ratio < 0.6f)
135
            {
136
                return "60%";
137
            }
138
            else if (ratio < 0.7f)
139
            {
140
                return "70%";
141
            }
142
            else if (ratio < 0.8f)
143
            {
144
                return "80%";
145
            }
146
            else if (ratio < 0.9f)
147
            {
148
                return "90%";
149
            }
150
            else
151
            {
152
                return "100%";
153
            }
154
        }
155

    
156
        internal double LabelToRatio(string label)
157
        {
158
            if (label is null)
159
            {
160
                return -1f;
161
            }
162
            else if (label.Equals("10%"))
163
            {
164
                return 10f;
165
            }
166
            else if (label.Equals("20%"))
167
            {
168
                return 20f;
169
            }
170
            else if (label.Equals("30%"))
171
            {
172
                return 30f;
173
            }
174
            else if (label.Equals("40%"))
175
            {
176
                return 40f;
177
            }
178
            else if (label.Equals("50%"))
179
            {
180
                return 50f;
181
            }
182
            else if (label.Equals("60%"))
183
            {
184
                return 60f;
185
            }
186
            else if (label.Equals("70%"))
187
            {
188
                return 70f;
189
            }
190
            else if (label.Equals("80%"))
191
            {
192
                return 80f;
193
            }
194
            else if (label.Equals("90%"))
195
            {
196
                return 90f;
197
            }
198
            else
199
            {
200
                return 100f;
201
            }
202
        }
203
    }
204
}
(2-2/9)