Revize 3c185841
Přidáno uživatelem Vojtěch Bartička před asi 2 roky(ů)
Backend/Core/Services/AnnotationService/AnnotationServiceEF.cs | ||
---|---|---|
11 | 11 |
using Microsoft.EntityFrameworkCore; |
12 | 12 |
using AutoMapper; |
13 | 13 |
using Models.Tags; |
14 |
using Ganss.XSS; |
|
15 |
using HtmlAgilityPack; |
|
16 |
using System.Text.RegularExpressions; |
|
14 | 17 |
|
15 | 18 |
namespace Core.Services.AnnotationService |
16 | 19 |
{ |
... | ... | |
98 | 101 |
.Include(a => a.User) |
99 | 102 |
.Include(a => a.Document).ThenInclude(d => d.Content) |
100 | 103 |
.First(); |
101 |
|
|
104 |
|
|
102 | 105 |
if (userRole < ERole.ADMINISTRATOR) |
103 | 106 |
{ |
104 | 107 |
if (annotation.User.Id != userId) |
... | ... | |
108 | 111 |
} |
109 | 112 |
|
110 | 113 |
var documentContent = context.Documents.Where(d => d.Id == annotation.Document.Id).Select(d => d.Content).First(); |
114 |
var preprocessingResult = PreprocessHTML(documentContent.Content); |
|
111 | 115 |
|
112 | 116 |
// We probably cannot use AutoMapper since we are dealing with too many different entities |
113 | 117 |
AnnotationInfo annotationInfo = new() |
114 | 118 |
{ |
115 |
DocumentText = documentContent.Content, |
|
119 |
SourceDocumentContent = documentContent.Content, |
|
120 |
DocumentToRender = preprocessingResult.docToRender, |
|
121 |
TagStartPositions = preprocessingResult.openingTagPositions, |
|
122 |
TagLengths = preprocessingResult.openingTagLengths, |
|
116 | 123 |
Note = annotation.Note, |
117 | 124 |
State = annotation.State, |
118 | 125 |
Type = IsHtml(documentContent.Content) ? EDocumentType.HTML : EDocumentType.TEXT |
... | ... | |
132 | 139 |
return annotationInfo; |
133 | 140 |
} |
134 | 141 |
|
142 |
private (string docToRender, int[] openingTagPositions, int[] openingTagLengths, int[] closingTagPositions, int[] closingTagLengths) PreprocessHTML(string htmlSource) |
|
143 |
{ |
|
144 |
var docOriginal = new HtmlDocument(); |
|
145 |
docOriginal.LoadHtml(htmlSource); |
|
146 |
var docToEdit = new HtmlDocument(); |
|
147 |
docToEdit.LoadHtml(htmlSource); |
|
148 |
|
|
149 |
string idAttributeName = "aswi-tag-id"; |
|
150 |
|
|
151 |
var descendantsOriginal = docOriginal.DocumentNode.DescendantsAndSelf(); |
|
152 |
var descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf(); |
|
153 |
|
|
154 |
List<int> tagStartPositions = new(); |
|
155 |
List<int> tagStartLengths = new(); |
|
156 |
|
|
157 |
|
|
158 |
List<int> tagClosingPositions = new(); |
|
159 |
List<int> tagClosingLengths = new(); |
|
160 |
|
|
161 |
int currentId = 0; |
|
162 |
|
|
163 |
var zipped = descendantsOriginal.Zip(descendantsToEdit, (orig, toEdit) => new { Original = orig, ToEdit = toEdit }); |
|
164 |
foreach (var node in zipped) |
|
165 |
{ |
|
166 |
var originalNode = node.Original; |
|
167 |
var toEditNode = node.ToEdit; |
|
168 |
|
|
169 |
if (originalNode.Name.Contains("#")) |
|
170 |
{ |
|
171 |
continue; |
|
172 |
} |
|
173 |
|
|
174 |
tagStartPositions.Add(originalNode.OuterStartIndex); |
|
175 |
tagStartLengths.Add(originalNode.OuterStartIndex - originalNode.InnerStartIndex); |
|
176 |
currentId = tagStartPositions.Count - 1; |
|
177 |
toEditNode.Attributes.Add(idAttributeName, currentId.ToString()); |
|
178 |
|
|
179 |
tagClosingPositions.Add(originalNode.InnerStartIndex + originalNode.InnerLength); |
|
180 |
tagClosingLengths.Add((originalNode.OuterStartIndex + originalNode.OuterLength) - (originalNode.InnerStartIndex + originalNode.InnerLength)); |
|
181 |
} |
|
182 |
|
|
183 |
string docToRender = docToEdit.DocumentNode.OuterHtml; |
|
184 |
HtmlSanitizer sanitizer = new HtmlSanitizer(); |
|
185 |
sanitizer.AllowedAttributes.Clear(); |
|
186 |
sanitizer.AllowedAttributes.Add(idAttributeName); |
|
187 |
|
|
188 |
docToRender = sanitizer.Sanitize(docToRender); |
|
189 |
|
|
190 |
return (docToRender, tagStartPositions.ToArray(), tagStartLengths.ToArray(), tagClosingPositions.ToArray(), tagClosingLengths.ToArray()); |
|
191 |
} |
|
192 |
|
|
135 | 193 |
// TODO temporary |
136 | 194 |
private bool IsHtml(string text) |
137 | 195 |
{ |
Také k dispozici: Unified diff
HTML preprocessing and sanitization