Projekt

Obecné

Profil

« Předchozí | Další » 

Revize 553486ad

Přidáno uživatelem Vojtěch Bartička před asi 2 roky(ů)

Added caching and incremental updates to annotations

Zobrazit rozdíly:

Backend/Core/Services/AnnotationService/AnnotationServiceEF.cs
14 14
using Ganss.XSS;
15 15
using HtmlAgilityPack;
16 16
using System.Text.RegularExpressions;
17
using Newtonsoft.Json;
17 18

  
18 19
namespace Core.Services.AnnotationService
19 20
{
......
25 26

  
26 27
        private const string TAG_ID_ATTRIBUTE_NAME = "aswi-tag-id";
27 28
        private const string TAG_INSTANCE_ATTRIBUTE_NAME = "aswi-tag-instance";
29
        private const string TAG_EF_ID_ATTRIBUTE_NAME = "aswi-tag-ef-id";
28 30

  
29 31
        public AnnotationServiceEF(DatabaseContext context, ILogger logger, IMapper mapper)
30 32
        {
......
116 118
            var documentContent = context.Documents.Where(d => d.Id == annotation.Document.Id).Select(d => d.Content).First();
117 119

  
118 120
            var tags = context.AnnotationTags.Where(at => at.Annotation.Id == annotationId)
119
                .Include(at => at.Tag).ThenInclude(t => t.Category)
121
                .Include(at => at.Tag)
122
                .ThenInclude(t => t.Category)
120 123
                .Include(at => at.SubTag)
121 124
                .ToList();
122 125

  
......
127 130
                tagInstanceInfos.Add(tagInstance);
128 131
            }
129 132

  
130
            var docToRender = PreprocessHTML(documentContent.Content, tags);
133
            var docToRender = "";
134
            if (annotation.CachedDocumentHTML == "")
135
            {
136
                docToRender = FullPreprocessHTML(documentContent.Content, tags);
137
                annotation.CachedStartPositions = JsonConvert.SerializeObject(TagStartPositions);
138
                annotation.CachedLengths = JsonConvert.SerializeObject(TagStartLengths);
139
                annotation.CachedClosingPositions = JsonConvert.SerializeObject(TagClosingPositions);
140
                annotation.CachedClosingLengths = JsonConvert.SerializeObject(TagClosingLengths);
141
                //annotation.CachedNodeDict = JsonConvert.SerializeObject(NodeDict);
142
                annotation.ModifiedType = EModified.NONE;
143
                annotation.CachedDocumentHTML = docToRender;
144
                context.SaveChanges();
145
            }
146
            else
147
            {
148
                docToRender = annotation.CachedDocumentHTML;
149
                TagStartPositions = JsonConvert.DeserializeObject<List<int>>(annotation.CachedStartPositions);
150
                TagStartLengths = JsonConvert.DeserializeObject<List<int>>(annotation.CachedLengths);
151
                TagClosingPositions = JsonConvert.DeserializeObject<List<int>>(annotation.CachedClosingPositions);
152
                TagClosingLengths = JsonConvert.DeserializeObject<List<int>>(annotation.CachedClosingLengths);
153

  
154
                // The annotation has been modified and we need to either add the new tag or remove the tag
155
                if (annotation.ModifiedType != EModified.NONE)
156
                {
157
                    if (annotation.ModifiedType == EModified.ADDED)
158
                    {
159
                        var lastModifiedTag = context.AnnotationTags.Where(at => at.Id == annotation.LastModifiedTagId).First();
160
                        docToRender = PartialPreprocessHTMLAddTag(docToRender, documentContent.Content, lastModifiedTag, tags);
161
                    }
162
                    else if (annotation.ModifiedType == EModified.REMOVED)
163
                    {
164
                        docToRender = PartialPreprocessHTMLRemoveTag(docToRender, documentContent.Content, new AnnotationTag() { Id = annotation.LastModifiedTagId.Value }, tags);
165
                    }
166

  
167
                    annotation.ModifiedType = EModified.NONE;
168
                    annotation.CachedStartPositions = JsonConvert.SerializeObject(TagStartPositions);
169
                    annotation.CachedLengths = JsonConvert.SerializeObject(TagStartLengths);
170
                    annotation.CachedClosingPositions = JsonConvert.SerializeObject(TagClosingPositions);
171
                    annotation.CachedClosingLengths = JsonConvert.SerializeObject(TagClosingLengths);
172
                    annotation.CachedDocumentHTML = docToRender;
173
                    //annotation.CachedNodeDict = JsonConvert.SerializeObject(NodeDict);
174
                    context.SaveChanges();
175
                }
176
            }
131 177

  
132 178
            // We probably cannot use AutoMapper since we are dealing with too many different entities
133 179
            AnnotationInfo annotationInfo = new()
......
142 188
                TagInstances = tagInstanceInfos
143 189
            };
144 190

  
191
            NodeDict.Clear();
192

  
145 193
            return annotationInfo;
146 194
        }
147 195

  
......
151 199
        private List<int> TagClosingLengths = new();
152 200
        private Dictionary<HtmlNode, HtmlNode> NodeDict = new();
153 201

  
154
        private string PreprocessHTML(string htmlSource, List<AnnotationTag> tags)
202
        /*
203
         *      Full HTML Preprocessing -------------------------------------------------------------------------------
204
         */
205

  
206

  
207
        private string FullPreprocessHTML(string htmlSource, List<AnnotationTag> tags)
155 208
        {
156 209
            var docOriginal = new HtmlDocument();
157 210
            docOriginal.LoadHtml(htmlSource);
......
159 212
            docToEdit.LoadHtml(htmlSource);
160 213

  
161 214
            var descendantsOriginal = docOriginal.DocumentNode.DescendantsAndSelf();
162
            var descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf();
215
            var descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf().ToList();
163 216

  
164 217
            int currentId = 0;
165 218

  
......
168 221

  
169 222
            WrapTextInSpan(descendantsOriginal, docToEdit);
170 223

  
224
            int descCount = descendantsToEdit.Count;
225

  
171 226
            foreach (var tag in tags)
172 227
            {
173 228
                int i = 0;
174 229
                List<HtmlNode> addedForSelection = new();
175
                while (i < descendantsToEdit.Count())
230
                while (i < descCount)
176 231
                {
177
                    for (; i < descendantsToEdit.Count(); i++)
232
                    for (; i < descCount; i++)
178 233
                    {
179 234
                        var node = descendantsToEdit.ElementAt(i);
180 235
                        if (!node.Name.Contains("#text") || addedForSelection.Contains(node) || addedForSelection.Contains(node.ParentNode) ||
......
209 264
                            {
210 265
                                addedForSelection.AddRange(SolveLeftRightGap(node, selectionStart, selectionEnd, start, end, docToEdit, tag));
211 266
                            }
267
                            descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf().ToList();
268
                            descCount = descendantsToEdit.Count;
212 269
                            break;
213 270
                        }
214 271
                    }
......
227 284
            if (sanitizer.AllowedTags.Contains("script"))
228 285
            {
229 286
                sanitizer.AllowedTags.Remove("script");
230
            }   
287
            }
231 288
            if (!sanitizer.AllowedTags.Contains("style"))
232 289
            {
233 290
                sanitizer.AllowedTags.Add("style");
......
257 314
                markerPosition = EPosition.MARK_LEFT_RIGHT;
258 315
            }
259 316

  
260
            HtmlNode spanSelected = CreateSpan(docToEdit, textSelected, TAG_ID_ATTRIBUTE_NAME, TagStartPositions.Count,
261
                TAG_INSTANCE_ATTRIBUTE_NAME, tag.Instance, start, markerPosition);
317
            HtmlNode spanSelected = CreateSpan(docToEdit, textSelected, TagStartPositions.Count, tag.Instance, tag.Id, start, markerPosition);
262 318
            parentNode.ChildNodes.Insert(nodeIndex, spanSelected);
263 319

  
264 320
            return spanSelected;
......
279 335
            int spanSelectedStart = start;
280 336
            int spanAfterStart = start + textSelected.Length;
281 337

  
282
            HtmlNode spanSelected = CreateSpan(docToEdit, textSelected, TAG_ID_ATTRIBUTE_NAME, TagStartPositions.Count,
283
                TAG_INSTANCE_ATTRIBUTE_NAME, tag.Instance, spanSelectedStart, EPosition.MARK_RIGHT);
338
            HtmlNode spanSelected = CreateSpan(docToEdit, textSelected, TagStartPositions.Count, tag.Instance, tag.Id, spanSelectedStart, EPosition.MARK_RIGHT);
284 339
            parentNode.ChildNodes.Insert(nodeIndex, spanSelected);
285 340

  
286
            HtmlNode spanAfter = CreateSpan(docToEdit, textAfter, TAG_ID_ATTRIBUTE_NAME, TagStartPositions.Count,
287
                TAG_INSTANCE_ATTRIBUTE_NAME, null, spanAfterStart);
341
            HtmlNode spanAfter = CreateSpan(docToEdit, textAfter, TagStartPositions.Count, null, null, spanAfterStart);
288 342
            parentNode.ChildNodes.Insert(nodeIndex + 1, spanAfter);
289 343

  
290 344
            return new() { spanSelected, spanAfter };
......
305 359
            int spanBeforeStart = start;
306 360
            int spanSelectedStart = start + textBefore.Length;
307 361

  
308
            HtmlNode spanBefore = CreateSpan(docToEdit, textBefore, TAG_ID_ATTRIBUTE_NAME, TagStartPositions.Count,
309
                TAG_INSTANCE_ATTRIBUTE_NAME, null, spanBeforeStart);
362
            HtmlNode spanBefore = CreateSpan(docToEdit, textBefore, TagStartPositions.Count, null, null, spanBeforeStart);
310 363
            parentNode.ChildNodes.Insert(nodeIndex, spanBefore);
311 364

  
312
            HtmlNode spanSelected = CreateSpan(docToEdit, textSelected, TAG_ID_ATTRIBUTE_NAME, TagStartPositions.Count,
313
                TAG_INSTANCE_ATTRIBUTE_NAME, tag.Instance, spanSelectedStart, EPosition.MARK_LEFT);
365
            HtmlNode spanSelected = CreateSpan(docToEdit, textSelected, TagStartPositions.Count, tag.Instance, tag.Id, spanSelectedStart, EPosition.MARK_LEFT);
314 366
            parentNode.ChildNodes.Insert(nodeIndex + 1, spanSelected);
315 367

  
316 368
            return new() { spanSelected, spanBefore };
......
333 385
            int spanSelectedStart = start + textBefore.Length;
334 386
            int spanAfterStart = start + textBefore.Length + textSelected.Length;
335 387

  
336
            HtmlNode spanBefore = CreateSpan(docToEdit, textBefore, TAG_ID_ATTRIBUTE_NAME, TagStartPositions.Count,
337
                TAG_INSTANCE_ATTRIBUTE_NAME, null, spanBeforeStart);
388
            HtmlNode spanBefore = CreateSpan(docToEdit, textBefore, TagStartPositions.Count, null, null, spanBeforeStart);
338 389
            parentNode.ChildNodes.Insert(nodeIndex, spanBefore);
339 390

  
340
            HtmlNode spanSelected = CreateSpan(docToEdit, textSelected, TAG_ID_ATTRIBUTE_NAME, TagStartPositions.Count,
341
                TAG_INSTANCE_ATTRIBUTE_NAME, tag.Instance, spanSelectedStart, EPosition.MARK_LEFT_RIGHT);
391
            HtmlNode spanSelected = CreateSpan(docToEdit, textSelected, TagStartPositions.Count, tag.Instance, tag.Id, spanSelectedStart, EPosition.MARK_LEFT_RIGHT);
342 392
            parentNode.ChildNodes.Insert(nodeIndex + 1, spanSelected);
343 393

  
344
            HtmlNode spanAfter = CreateSpan(docToEdit, textAfter, TAG_ID_ATTRIBUTE_NAME, TagStartPositions.Count,
345
                TAG_INSTANCE_ATTRIBUTE_NAME, null, spanAfterStart);
394
            HtmlNode spanAfter = CreateSpan(docToEdit, textAfter, TagStartPositions.Count, null, null, spanAfterStart);
346 395
            parentNode.ChildNodes.Insert(nodeIndex + 2, spanAfter);
347 396

  
348 397
            return new() { spanSelected, spanBefore, spanAfter };
349 398
        }
350 399

  
351
        private HtmlNode CreateSpan(HtmlDocument doc, string text, string tagIdAttributeName, int tagId, string tagInstanceAttributeName,
352
                                            Guid? instanceId, int startPosition, EPosition position = EPosition.MARK_NONE)
400
        private HtmlNode CreateSpan(HtmlDocument doc, string text, int tagId, Guid? instanceId, Guid? entityId, int startPosition, EPosition position = EPosition.MARK_NONE)
353 401
        {
354 402
            HtmlNode span = doc.CreateElement("span");
355 403
            span.InnerHtml = text;
......
357 405
            TagStartLengths.Add(0);
358 406
            TagClosingPositions.Add(startPosition + text.Length);
359 407
            TagClosingLengths.Add(0);
360
            span.Attributes.Add(tagIdAttributeName, tagId.ToString());
408
            span.Attributes.Add(TAG_ID_ATTRIBUTE_NAME, tagId.ToString());
361 409

  
362 410
            if (instanceId != null)
363 411
            {
364 412
                span.AddClass("annotation");
365
                span.Attributes.Add(tagInstanceAttributeName, instanceId.Value.ToString());
413
                span.Attributes.Add(TAG_INSTANCE_ATTRIBUTE_NAME, instanceId.Value.ToString());
414
                span.Attributes.Add(TAG_EF_ID_ATTRIBUTE_NAME, entityId.ToString());
366 415

  
367 416
                if (position == EPosition.MARK_LEFT || position == EPosition.MARK_LEFT_RIGHT)
368 417
                {
......
505 554
            }
506 555
        }
507 556

  
557

  
558
        /*
559
         *      Full HTML Preprocessing -------------------------------------------------------------------------------
560
         */
561

  
562
        /*
563
         *      Partial HTML Preprocessing ----------------------------------------------------------------------------
564
         */
565

  
566
        private string PartialPreprocessHTMLAddTag(string htmlToEdit, string htmlOriginal, AnnotationTag tagToAdd, List<AnnotationTag> tags)
567
        {
568
            var docOriginal = new HtmlDocument();
569
            docOriginal.LoadHtml(htmlOriginal);
570
            var docToEdit = new HtmlDocument();
571
            docToEdit.LoadHtml(htmlToEdit);
572

  
573
            var descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf().ToList();
574
            RemoveCSS(docToEdit);
575

  
576
            int i = 0;
577
            List<HtmlNode> addedForSelection = new();
578

  
579
            int descendantsCount = descendantsToEdit.Count();
580
            while (i < descendantsCount)
581
            {
582
                for (; i < descendantsCount; i++)
583
                {
584
                    var node = descendantsToEdit.ElementAt(i);
585
                    if (!node.Name.Contains("#text") || addedForSelection.Contains(node) || addedForSelection.Contains(node.ParentNode) ||
586
                        node.ParentNode.Name == "style")
587
                    {
588
                        continue;
589
                    }
590

  
591
                    int nodeId = node.ParentNode.GetAttributeValue(TAG_ID_ATTRIBUTE_NAME, -1);
592

  
593
                    var start = TagStartPositions[nodeId] + TagStartLengths[nodeId];
594
                    var end = TagClosingPositions[nodeId];
595

  
596
                    int selectionStart = tagToAdd.Position;
597
                    int selectionEnd = tagToAdd.Position + tagToAdd.Length;
598

  
599
                    if (selectionStart < end && selectionEnd > start)
600
                    {
601
                        if (selectionStart <= start && selectionEnd >= end)
602
                        {
603
                            addedForSelection.Add(SolveFullFill(node, selectionStart, selectionEnd, start, end, docToEdit, tagToAdd));
604
                        }
605
                        else if (selectionStart <= start)
606
                        {
607
                            addedForSelection.AddRange(SolveRightGap(node, selectionStart, selectionEnd, start, end, docToEdit, tagToAdd));
608
                        }
609
                        else if (selectionEnd >= end)
610
                        {
611
                            addedForSelection.AddRange(SolveLeftGap(node, selectionStart, selectionEnd, start, end, docToEdit, tagToAdd));
612
                        }
613
                        else
614
                        {
615
                            addedForSelection.AddRange(SolveLeftRightGap(node, selectionStart, selectionEnd, start, end, docToEdit, tagToAdd));
616
                        }
617
                        descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf().ToList();
618
                        descendantsCount = descendantsToEdit.Count();
619
                        break;
620
                    }
621
                }
622
            }
623

  
624
            var cssNode = GenerateCSS(docToEdit, tags, new());
625
            docToEdit.DocumentNode.ChildNodes.Insert(0, cssNode);
626

  
627
            return docToEdit.DocumentNode.OuterHtml;
628
        }
629

  
630

  
631
        private string PartialPreprocessHTMLRemoveTag(string htmlToEdit, string htmlOriginal, AnnotationTag tagToRemove, List<AnnotationTag> tags)
632
        {
633
            var docOriginal = new HtmlDocument();
634
            docOriginal.LoadHtml(htmlOriginal);
635
            var docToEdit = new HtmlDocument();
636
            docToEdit.LoadHtml(htmlToEdit);
637

  
638
            var descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf().ToList();
639
            RemoveCSS(docToEdit);
640

  
641
            int i = 0;
642
            int descendantsCount = descendantsToEdit.Count();
643
            while (i < descendantsCount)
644
            {
645
                for (; i < descendantsCount; i++)
646
                {
647
                    var node = descendantsToEdit.ElementAt(i);
648
                    if (!node.Attributes.Contains(TAG_EF_ID_ATTRIBUTE_NAME))
649
                    {
650
                        continue;
651
                    }
652
                    else
653
                    {
654
                        if (node.Attributes[TAG_EF_ID_ATTRIBUTE_NAME].Value != tagToRemove.Id.ToString())
655
                        {
656
                            continue;
657
                        }
658

  
659
                        node.Attributes.Remove(TAG_EF_ID_ATTRIBUTE_NAME);
660
                        node.Attributes.Remove(TAG_INSTANCE_ATTRIBUTE_NAME);
661
                        node.Attributes.Remove("class");
662
                        node.Attributes.Remove("start");
663
                        node.Attributes.Remove("end");
664

  
665
                        /*var parent = node.ParentNode;
666
                        var contents = node.ChildNodes;
667
                        int index = parent.ChildNodes.IndexOf(node);
668
                        parent.ChildNodes.RemoveAt(index);
669

  
670
                        List<HtmlNode> newChildren = new();
671
                        for (int j = 0; j < index; j++)
672
                        {
673
                            newChildren.Add(parent.ChildNodes[j]);
674
                        }
675
                        for (int j = 0; j < contents.Count; j++)
676
                        {
677
                            newChildren.Add(contents[j]);
678
                        }
679
                        for (int j = index; j < parent.ChildNodes.Count; j++)
680
                        {
681
                            newChildren.Add(parent.ChildNodes[j]);
682
                        }
683

  
684
                        parent.ChildNodes.Clear();
685
                        foreach (var child in newChildren) { parent.ChildNodes.Add(child); }*/
686
                        
687
                        descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf().ToList();
688
                        descendantsCount = descendantsToEdit.Count();
689
                        break;
690
                    }
691
                }
692
            }
693

  
694
            var cssNode = GenerateCSS(docToEdit, tags, new());
695
            docToEdit.DocumentNode.ChildNodes.Insert(0, cssNode);
696

  
697
            return docToEdit.DocumentNode.OuterHtml;
698
        }
699

  
700
        private void RemoveCSS(HtmlDocument doc)
701
        {
702
            doc.DocumentNode.ChildNodes.RemoveAt(0);
703
        }
704

  
705
        /*
706
         *      Partial HTML Preprocessing ----------------------------------------------------------------------------
707
         */
708

  
709

  
508 710
        // TODO temporary
509 711
        private bool IsHtml(string text)
510 712
        {
......
529 731

  
530 732
            AnnotationTag annotationTag = new()
531 733
            {
734
                Id = Guid.NewGuid(),
532 735
                Annotation = annotation,
533 736
                Instance = request.InstanceId == null ? Guid.NewGuid() : request.InstanceId.Value,
534 737
                Length = request.Length,
......
552 755
                throw new ArgumentException($"Unknown tag type {request.Type}");
553 756
            }
554 757

  
758
            annotation.LastModifiedTagId = annotationTag.Id;
759
            annotation.ModifiedType = EModified.ADDED;
760

  
555 761
            context.AnnotationTags.Add(annotationTag);
556 762
            context.SaveChanges();
557 763
        }
......
587 793
                throw new InvalidOperationException("Could not find tag instance");
588 794
            }
589 795

  
590
            context.AnnotationTags
591
                .Where(at => at.Id == tagInstanceId).ToList()
592
                .ForEach(a => context.AnnotationTags.Remove(a));
796

  
797
            var annotationTag = context.AnnotationTags.First(at => at.Id == tagInstanceId);
798
            annotation.LastModifiedTagId = annotationTag.Id;
799
            annotation.ModifiedType = EModified.REMOVED;
800

  
801
            context.AnnotationTags.Remove(annotationTag);
593 802

  
594 803
            context.SaveChanges();
595 804
        }

Také k dispozici: Unified diff