Projekt

Obecné

Profil

« Předchozí | Další » 

Revize 0a9f9349

Přidáno uživatelem Vojtěch Bartička před asi 2 roky(ů)

Backend document rendering

Zobrazit rozdíly:

Backend/Backend/Program.cs
9 9
using RestAPI.Middleware;
10 10
using RestAPI.Utils;
11 11
using Serilog;
12
using AutoMapper;
13 12
using Core.MapperProfiles;
14 13

  
15 14
var builder = WebApplication.CreateBuilder(args);
Backend/Core/Services/AnnotationService/AnnotationServiceEF.cs
23 23
        private readonly ILogger logger;
24 24
        private readonly IMapper mapper;
25 25

  
26
        private const string TAG_ID_ATTRIBUTE_NAME = "aswi-tag-id";
27
        private const string TAG_INSTANCE_ATTRIBUTE_NAME = "aswi-iag-instance";
28

  
26 29
        public AnnotationServiceEF(DatabaseContext context, ILogger logger, IMapper mapper)
27 30
        {
28 31
            this.context = context;
......
111 114
            }
112 115

  
113 116
            var documentContent = context.Documents.Where(d => d.Id == annotation.Document.Id).Select(d => d.Content).First();
114
            var preprocessingResult = PreprocessHTML(documentContent.Content);
115

  
116
            // We probably cannot use AutoMapper since we are dealing with too many different entities
117
            AnnotationInfo annotationInfo = new()
118
            {
119
                SourceDocumentContent = documentContent.Content,
120
                DocumentToRender = preprocessingResult.docToRender,
121
                TagStartPositions = preprocessingResult.openingTagPositions,
122
                TagLengths = preprocessingResult.openingTagLengths,
123
                Note = annotation.Note,
124
                State = annotation.State,
125
                Type = IsHtml(documentContent.Content) ? EDocumentType.HTML : EDocumentType.TEXT
126
            };
127 117

  
128 118
            var tags = context.AnnotationTags.Where(at => at.Annotation.Id == annotationId)
129 119
                .Include(at => at.Tag).ThenInclude(t => t.Category)
130 120
                .Include(at => at.SubTag)
131 121
                .ToList();
132 122

  
123
            List<TagInstanceInfo> tagInstanceInfos = new();
133 124
            foreach (var tag in tags)
134 125
            {
135 126
                var tagInstance = mapper.Map<TagInstanceInfo>(tag);
136
                annotationInfo.TagInstances.Add(tagInstance);
127
                tagInstanceInfos.Add(tagInstance);
137 128
            }
138 129

  
130
            var docToRender = PreprocessHTML(documentContent.Content, tags);
131

  
132
            // We probably cannot use AutoMapper since we are dealing with too many different entities
133
            AnnotationInfo annotationInfo = new()
134
            {
135
                SourceDocumentContent = documentContent.Content,
136
                DocumentToRender = docToRender,
137
                TagStartPositions = TagStartPositions.ToArray(),
138
                TagLengths = TagStartLengths.ToArray(),
139
                Note = annotation.Note,
140
                State = annotation.State,
141
                Type = IsHtml(documentContent.Content) ? EDocumentType.HTML : EDocumentType.TEXT,
142
                TagInstances = tagInstanceInfos
143
            };
144

  
139 145
            return annotationInfo;
140 146
        }
141 147

  
142
        private (string docToRender, int[] openingTagPositions, int[] openingTagLengths, int[] closingTagPositions, int[] closingTagLengths) PreprocessHTML(string htmlSource)
148
        private List<int> TagStartPositions = new();
149
        private List<int> TagStartLengths = new();
150
        private List<int> TagClosingPositions = new();
151
        private List<int> TagClosingLengths = new();
152
        private Dictionary<HtmlNode, HtmlNode> NodeDict = new();
153

  
154
        private string PreprocessHTML(string htmlSource, List<AnnotationTag> tags)
143 155
        {
144 156
            var docOriginal = new HtmlDocument();
145 157
            docOriginal.LoadHtml(htmlSource);
146 158
            var docToEdit = new HtmlDocument();
147 159
            docToEdit.LoadHtml(htmlSource);
148 160

  
149
            string idAttributeName = "aswi-tag-id";
150

  
151 161
            var descendantsOriginal = docOriginal.DocumentNode.DescendantsAndSelf();
152 162
            var descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf();
153 163

  
154
            List<int> tagStartPositions = new();
155
            List<int> tagStartLengths = new();
164
            int currentId = 0;
156 165

  
166
            FillNodeDict(descendantsOriginal, descendantsToEdit);
167
            AssignIdsToOriginalDocument(descendantsOriginal, ref currentId);
157 168

  
158
            List<int> tagClosingPositions = new();
159
            List<int> tagClosingLengths = new();
169
            var cssNode = GenerateCSS(docToEdit, tags, new());
170
            docToEdit.DocumentNode.ChildNodes.Add(cssNode);
160 171

  
161
            int currentId = 0;
172
            WrapTextInSpan(descendantsOriginal, docToEdit);
162 173

  
163
            var zipped = descendantsOriginal.Zip(descendantsToEdit, (orig, toEdit) => new { Original = orig, ToEdit = toEdit });
164
            foreach (var node in zipped)
174
            int descendantsCount = descendantsToEdit.Count();
175
            foreach (var tag in tags)
165 176
            {
166
                var originalNode = node.Original;
167
                var toEditNode = node.ToEdit;
168

  
169
                if (originalNode.Name.Contains("#"))
177
                int i = 0;
178
                List<HtmlNode> addedForSelection = new();
179
                while (i < descendantsCount)
170 180
                {
171
                    continue;
181
                    for (; i < descendantsCount; i++)
182
                    {
183
                        var node = descendantsToEdit.ElementAt(i);
184
                        if (!node.Name.Contains("#text") || addedForSelection.Contains(node) || addedForSelection.Contains(node.ParentNode) ||
185
                            node.ParentNode.Name == "style")
186
                        {
187
                            continue;
188
                        }
189

  
190
                        var start = TagStartPositions[node.ParentNode.GetAttributeValue(TAG_ID_ATTRIBUTE_NAME, -1)];
191
                        var end = TagClosingPositions[node.ParentNode.GetAttributeValue(TAG_ID_ATTRIBUTE_NAME, -1)];
192

  
193
                        int selectionStart = tag.Position;
194
                        int selectionEnd = tag.Position + tag.Length;
195

  
196
                        if (selectionStart < end && selectionEnd > start)
197
                        {
198
                            if (selectionStart <= start && selectionEnd >= end)
199
                            {
200
                                addedForSelection.Add(SolveFullFill(node, selectionStart, selectionEnd, start, end, docToEdit, tag));
201
                            }
202
                            else if (selectionStart <= start)
203
                            {
204
                                addedForSelection.AddRange(SolveRightGap(node, selectionStart, selectionEnd, start, end, docToEdit, tag));
205
                            }
206
                            else if (selectionEnd >= end)
207
                            {
208
                                addedForSelection.AddRange(SolveLeftGap(node, selectionStart, selectionEnd, start, end, docToEdit, tag));
209
                            }
210
                            else
211
                            {
212
                                addedForSelection.AddRange(SolveLeftRightGap(node, selectionStart, selectionEnd, start, end, docToEdit, tag));
213
                            }
214
                            break;
215
                        }
216
                    }
172 217
                }
173 218

  
174
                tagStartPositions.Add(originalNode.OuterStartIndex);
175
                tagStartLengths.Add(originalNode.InnerStartIndex - originalNode.OuterStartIndex);
176
                currentId = tagStartPositions.Count - 1;
177
                toEditNode.Attributes.Add(idAttributeName, currentId.ToString());
178

  
179
                tagClosingPositions.Add(originalNode.InnerStartIndex + originalNode.InnerLength);
180
                tagClosingLengths.Add((originalNode.OuterStartIndex + originalNode.OuterLength) - (originalNode.InnerStartIndex + originalNode.InnerLength));
181 219
            }
182 220

  
183 221
            string docToRender = docToEdit.DocumentNode.OuterHtml;
184 222
            HtmlSanitizer sanitizer = new HtmlSanitizer();
185 223
            sanitizer.AllowedAttributes.Clear();
186
            sanitizer.AllowedAttributes.Add(idAttributeName);
224
            sanitizer.AllowedAttributes.Add(TAG_ID_ATTRIBUTE_NAME);
225
            sanitizer.AllowedAttributes.Add(TAG_INSTANCE_ATTRIBUTE_NAME);
187 226
            if (sanitizer.AllowedTags.Contains("script"))
188 227
            {
189 228
                sanitizer.AllowedTags.Remove("script");
......
191 230

  
192 231
            docToRender = sanitizer.Sanitize(docToRender);
193 232

  
194
            return (docToRender, tagStartPositions.ToArray(), tagStartLengths.ToArray(), tagClosingPositions.ToArray(), tagClosingLengths.ToArray());
233
            return docToRender;
234
        }
235

  
236
        private HtmlNode SolveFullFill(HtmlNode node, int selectionStart, int selectionEnd, int start, int end, HtmlDocument docToEdit, AnnotationTag tag)
237
        {
238
            // full fill
239
            string textSelected = node.InnerText;
240

  
241
            var parentNode = node.ParentNode;
242
            int nodeIndex = parentNode.ChildNodes.IndexOf(node);
243
            parentNode.ChildNodes.RemoveAt(nodeIndex);
244

  
245
            EPosition markerPosition = EPosition.MARK_NONE;
246
            if (selectionEnd == end && selectionStart == start)
247
            {
248
                markerPosition = EPosition.MARK_LEFT_RIGHT;
249
            }
250

  
251
            HtmlNode spanSelected = CreateSpan(docToEdit, textSelected, TAG_ID_ATTRIBUTE_NAME, TagStartPositions.Count,
252
                TAG_INSTANCE_ATTRIBUTE_NAME, tag.Instance, start, markerPosition);
253
            parentNode.ChildNodes.Insert(nodeIndex, spanSelected);
254

  
255
            return spanSelected;
256
        }
257

  
258
        private List<HtmlNode> SolveRightGap(HtmlNode node, int selectionStart, int selectionEnd, int start, int end, HtmlDocument docToEdit,
259
                                             AnnotationTag tag)
260
        {
261
            // partial fill, end gap
262
            string text = node.InnerText;
263
            string textAfter = text.Substring(Math.Min(selectionStart - start + tag.Length, text.Length));
264
            string textSelected = text.Substring(0, selectionEnd - start);
265

  
266
            var parentNode = node.ParentNode;
267
            int nodeIndex = parentNode.ChildNodes.IndexOf(node);
268
            parentNode.ChildNodes.RemoveAt(nodeIndex);
269

  
270
            int spanSelectedStart = start;
271
            int spanAfterStart = start + textSelected.Length;
272

  
273
            HtmlNode spanSelected = CreateSpan(docToEdit, textSelected, TAG_ID_ATTRIBUTE_NAME, TagStartPositions.Count,
274
                TAG_INSTANCE_ATTRIBUTE_NAME, tag.Instance, spanSelectedStart, EPosition.MARK_RIGHT);
275
            parentNode.ChildNodes.Insert(nodeIndex, spanSelected);
276

  
277
            HtmlNode spanAfter = CreateSpan(docToEdit, textAfter, TAG_ID_ATTRIBUTE_NAME, TagStartPositions.Count,
278
                TAG_INSTANCE_ATTRIBUTE_NAME, null, spanAfterStart);
279
            parentNode.ChildNodes.Insert(nodeIndex + 1, spanAfter);
280

  
281
            return new() { spanSelected, spanAfter };
282
        }
283

  
284
        private List<HtmlNode> SolveLeftGap(HtmlNode node, int selectionStart, int selectionEnd, int start, int end, HtmlDocument docToEdit,
285
                                             AnnotationTag tag)
286
        {
287
            // partial fill, start gap
288
            string text = node.InnerText;
289
            string textBefore = text.Substring(0, selectionStart - start);
290
            string textSelected = text.Substring(selectionStart - start, Math.Min(tag.Length, text.Length - textBefore.Length));
291

  
292
            var parentNode = node.ParentNode;
293
            int nodeIndex = parentNode.ChildNodes.IndexOf(node);
294
            parentNode.ChildNodes.RemoveAt(nodeIndex);
295

  
296
            int spanBeforeStart = start;
297
            int spanSelectedStart = start + textBefore.Length;
298

  
299
            HtmlNode spanBefore = CreateSpan(docToEdit, textBefore, TAG_ID_ATTRIBUTE_NAME, TagStartPositions.Count,
300
                TAG_INSTANCE_ATTRIBUTE_NAME, null, spanBeforeStart);
301
            parentNode.ChildNodes.Insert(nodeIndex, spanBefore);
302

  
303
            HtmlNode spanSelected = CreateSpan(docToEdit, textSelected, TAG_ID_ATTRIBUTE_NAME, TagStartPositions.Count,
304
                TAG_INSTANCE_ATTRIBUTE_NAME, tag.Instance, spanSelectedStart, EPosition.MARK_LEFT);
305
            parentNode.ChildNodes.Insert(nodeIndex + 1, spanSelected);
306

  
307
            return new() { spanSelected, spanBefore };
308
        }
309

  
310
        private List<HtmlNode> SolveLeftRightGap(HtmlNode node, int selectionStart, int selectionEnd, int start, int end, HtmlDocument docToEdit,
311
                                                 AnnotationTag tag)
312
        {
313
            // partial fill, start gap end gap
314
            string text = node.InnerText;
315
            string textBefore = text.Substring(0, selectionStart - start);
316
            string textAfter = text.Substring(selectionStart - start + tag.Length);
317
            string textSelected = text.Substring(selectionStart - start, tag.Length);
318

  
319
            var parentNode = node.ParentNode;
320
            int nodeIndex = parentNode.ChildNodes.IndexOf(node);
321
            parentNode.ChildNodes.RemoveAt(nodeIndex);
322

  
323
            int spanBeforeStart = start;
324
            int spanSelectedStart = start + textBefore.Length;
325
            int spanAfterStart = start + textBefore.Length + textSelected.Length;
326

  
327
            HtmlNode spanBefore = CreateSpan(docToEdit, textBefore, TAG_ID_ATTRIBUTE_NAME, TagStartPositions.Count,
328
                TAG_INSTANCE_ATTRIBUTE_NAME, null, spanBeforeStart);
329
            parentNode.ChildNodes.Insert(nodeIndex, spanBefore);
330

  
331
            HtmlNode spanSelected = CreateSpan(docToEdit, textSelected, TAG_ID_ATTRIBUTE_NAME, TagStartPositions.Count,
332
                TAG_INSTANCE_ATTRIBUTE_NAME, tag.Instance, spanSelectedStart, EPosition.MARK_LEFT_RIGHT);
333
            parentNode.ChildNodes.Insert(nodeIndex + 1, spanSelected);
334

  
335
            HtmlNode spanAfter = CreateSpan(docToEdit, textAfter, TAG_ID_ATTRIBUTE_NAME, TagStartPositions.Count,
336
                TAG_INSTANCE_ATTRIBUTE_NAME, null, spanAfterStart);
337
            parentNode.ChildNodes.Insert(nodeIndex + 2, spanAfter);
338

  
339
            return new() { spanSelected, spanBefore, spanAfter };
340
        }
341

  
342
        private HtmlNode CreateSpan(HtmlDocument doc, string text, string tagIdAttributeName, int tagId, string tagInstanceAttributeName,
343
                                            Guid? instanceId, int startPosition, EPosition position = EPosition.MARK_NONE)
344
        {
345
            HtmlNode span = doc.CreateElement("span");
346
            span.InnerHtml = text;
347
            TagStartPositions.Add(startPosition);
348
            TagStartLengths.Add(0);
349
            TagClosingPositions.Add(startPosition + text.Length);
350
            TagClosingLengths.Add(0);
351
            span.Attributes.Add(tagIdAttributeName, tagId.ToString());
352

  
353
            if (instanceId != null)
354
            {
355
                span.AddClass("annotation");
356
                span.Attributes.Add(tagInstanceAttributeName, instanceId.Value.ToString());
357

  
358
                if (position == EPosition.MARK_LEFT || position == EPosition.MARK_LEFT_RIGHT)
359
                {
360
                    span.Attributes.Add("start", "1");
361
                }
362
                if (position == EPosition.MARK_RIGHT || position == EPosition.MARK_LEFT_RIGHT)
363
                {
364
                    span.Attributes.Add("end", "1");
365
                }
366
            }
367

  
368
            return span;
369
        }
370

  
371
        private enum EPosition
372
        {
373
            MARK_LEFT = 5,
374
            MARK_RIGHT = 3,
375
            MARK_LEFT_RIGHT = 2,
376
            MARK_NONE = 0
377
        }
378

  
379
        private void WrapTextInSpan(IEnumerable<HtmlNode> descendantsOriginal, HtmlDocument docToEdit)
380
        {
381
            foreach (var node in descendantsOriginal)
382
            {
383
                var originalNode = node;
384
                var toEditNode = NodeDict[node];
385

  
386
                if (originalNode.Name.Contains("#"))
387
                {
388
                    continue;
389
                }
390
                else
391
                {
392
                    bool onlyText = true;
393
                    bool onlySubtags = true;
394

  
395
                    foreach (var child in node.ChildNodes)
396
                    {
397
                        if (child.Name.Contains("#"))
398
                        {
399
                            onlySubtags = false;
400
                        }
401
                        else
402
                        {
403
                            onlyText = false;
404
                        }
405
                    }
406

  
407
                    if (onlyText || onlySubtags)
408
                    {
409
                        continue;
410
                    }
411
                    else
412
                    {
413

  
414
                        foreach (var child in node.ChildNodes)
415
                        {
416
                            if (child.Name.Contains("#text"))
417
                            {
418
                                HtmlNode coveringSpan = docToEdit.CreateElement("span");
419
                                coveringSpan.InnerHtml = child.InnerHtml;
420
                                TagStartPositions.Add(child.InnerStartIndex);
421
                                TagStartLengths.Add(0);
422
                                TagClosingPositions.Add(child.InnerStartIndex + child.InnerLength);
423
                                TagClosingLengths.Add(0);
424
                                coveringSpan.Attributes.Add(TAG_ID_ATTRIBUTE_NAME, (TagStartPositions.Count - 1).ToString());
425

  
426
                                var parent = NodeDict[node];
427
                                var index = parent.ChildNodes.IndexOf(NodeDict[child]);
428

  
429
                                parent.ChildNodes.RemoveAt(index);
430
                                parent.ChildNodes.Insert(index, coveringSpan);
431
                            }
432
                        }
433
                    }
434
                }
435
            }
436
        }
437

  
438
        private HtmlNode GenerateCSS(HtmlDocument docToEdit, List<AnnotationTag> tags, List<int> paddings)
439
        {
440
            HtmlNode style = docToEdit.CreateElement("style");
441

  
442
            string inner = "span.annotation {border-bottom: 2px solid;}";
443
            inner += "span {line-height: 30px}\n";
444

  
445
            // TODO temporary
446
            int lastPadding = 1;
447
            foreach (var tag in tags)
448
            {
449
                inner += $"span[{TAG_INSTANCE_ATTRIBUTE_NAME}=\"{tag.Instance}\"] {{ border-color:{tag.Tag.Color}; padding-bottom: {lastPadding}px }}";
450
                lastPadding += 2;
451
            }
452

  
453
            inner += "span[end=\"1\"] {border-end-end-radius: 0px; border-right: 1px solid darkgray}\n";
454
            inner += "span[start=\"1\"] {border-end-start-radius: 0px; border-left: 1px solid darkgray}\n";
455

  
456
            style.InnerHtml = inner;
457
            return style;
458
        }
459

  
460
        private void AssignIdsToOriginalDocument(IEnumerable<HtmlNode> descendantsOriginal, ref int currentId)
461
        {
462
            foreach (var node in descendantsOriginal)
463
            {
464
                var originalNode = node;
465
                var toEditNode = NodeDict[node];
466

  
467
                if (originalNode.Name.Contains("#"))
468
                {
469
                    continue;
470
                }
471
                else
472
                {
473
                    TagStartPositions.Add(originalNode.OuterStartIndex);
474
                    TagStartLengths.Add(originalNode.InnerStartIndex - originalNode.OuterStartIndex);
475
                    currentId = TagStartPositions.Count - 1;
476
                    toEditNode.Attributes.Add(TAG_ID_ATTRIBUTE_NAME, currentId.ToString());
477

  
478
                    TagClosingPositions.Add(originalNode.InnerStartIndex + originalNode.InnerLength);
479
                    TagClosingLengths.Add((originalNode.OuterStartIndex + originalNode.OuterLength) - (originalNode.InnerStartIndex + originalNode.InnerLength));
480
                }
481
            }
482
        }
483

  
484
        private void FillNodeDict(IEnumerable<HtmlNode> descendantsOriginal, IEnumerable<HtmlNode> descendantsToEdit)
485
        {
486
            var zipped = descendantsOriginal.Zip(descendantsToEdit, (orig, toEdit) => new
487
            {
488
                Original = orig,
489
                ToEdit = toEdit
490
            });
491
            foreach (var node in zipped)
492
            {
493
                var originalNode = node.Original;
494
                var toEditNode = node.ToEdit;
495
                NodeDict.Add(originalNode, toEditNode);
496
            }
195 497
        }
196 498

  
197 499
        // TODO temporary
198 500
        private bool IsHtml(string text)
199 501
        {
200
            return text.Contains("<!DOCTYPE html>");
502
            return text.Contains("<html>");
201 503
        }
202 504

  
203 505
        public void AddAnnotationInstance(Guid annotationId, Guid userId, ERole userRole, AnnotationInstanceAddRequest request)
......
279 581
            context.AnnotationTags
280 582
                .Where(at => at.Id == tagInstanceId).ToList()
281 583
                .ForEach(a => context.AnnotationTags.Remove(a));
282
            
584

  
283 585
            context.SaveChanges();
284 586
        }
285 587
    }

Také k dispozici: Unified diff