Projekt

Obecné

Profil

« Předchozí | Další » 

Revize 3c185841

Přidáno uživatelem Vojtěch Bartička před asi 2 roky(ů)

HTML preprocessing and sanitization

Zobrazit rozdíly:

Backend/Core/Core.csproj
10 10
    <PackageReference Include="AutoMapper" Version="11.0.1" />
11 11
    <PackageReference Include="AutoMapper.Extensions.Microsoft.DependencyInjection" Version="11.0.0" />
12 12
    <PackageReference Include="BCrypt.Net-Next" Version="4.0.3" />
13
    <PackageReference Include="HtmlAgilityPack" Version="1.11.42" />
14
    <PackageReference Include="HtmlSanitizer" Version="7.1.488" />
13 15
    <PackageReference Include="Microsoft.EntityFrameworkCore" Version="6.0.3" />
14 16
    <PackageReference Include="Microsoft.EntityFrameworkCore.Relational" Version="6.0.3" />
15 17
    <PackageReference Include="Microsoft.EntityFrameworkCore.Tools" Version="6.0.3">
Backend/Core/Services/AnnotationService/AnnotationServiceEF.cs
11 11
using Microsoft.EntityFrameworkCore;
12 12
using AutoMapper;
13 13
using Models.Tags;
14
using Ganss.XSS;
15
using HtmlAgilityPack;
16
using System.Text.RegularExpressions;
14 17

  
15 18
namespace Core.Services.AnnotationService
16 19
{
......
98 101
                .Include(a => a.User)
99 102
                .Include(a => a.Document).ThenInclude(d => d.Content)
100 103
                .First();
101
            
104

  
102 105
            if (userRole < ERole.ADMINISTRATOR)
103 106
            {
104 107
                if (annotation.User.Id != userId)
......
108 111
            }
109 112

  
110 113
            var documentContent = context.Documents.Where(d => d.Id == annotation.Document.Id).Select(d => d.Content).First();
114
            var preprocessingResult = PreprocessHTML(documentContent.Content);
111 115

  
112 116
            // We probably cannot use AutoMapper since we are dealing with too many different entities
113 117
            AnnotationInfo annotationInfo = new()
114 118
            {
115
                DocumentText = documentContent.Content,
119
                SourceDocumentContent = documentContent.Content,
120
                DocumentToRender = preprocessingResult.docToRender,
121
                TagStartPositions = preprocessingResult.openingTagPositions,
122
                TagLengths = preprocessingResult.openingTagLengths,
116 123
                Note = annotation.Note,
117 124
                State = annotation.State,
118 125
                Type = IsHtml(documentContent.Content) ? EDocumentType.HTML : EDocumentType.TEXT
......
132 139
            return annotationInfo;
133 140
        }
134 141

  
142
        private (string docToRender, int[] openingTagPositions, int[] openingTagLengths, int[] closingTagPositions, int[] closingTagLengths) PreprocessHTML(string htmlSource)
143
        {
144
            var docOriginal = new HtmlDocument();
145
            docOriginal.LoadHtml(htmlSource);
146
            var docToEdit = new HtmlDocument();
147
            docToEdit.LoadHtml(htmlSource);
148

  
149
            string idAttributeName = "aswi-tag-id";
150

  
151
            var descendantsOriginal = docOriginal.DocumentNode.DescendantsAndSelf();
152
            var descendantsToEdit = docToEdit.DocumentNode.DescendantsAndSelf();
153

  
154
            List<int> tagStartPositions = new();
155
            List<int> tagStartLengths = new();
156

  
157

  
158
            List<int> tagClosingPositions = new();
159
            List<int> tagClosingLengths = new();
160

  
161
            int currentId = 0;
162

  
163
            var zipped = descendantsOriginal.Zip(descendantsToEdit, (orig, toEdit) => new { Original = orig, ToEdit = toEdit });
164
            foreach (var node in zipped)
165
            {
166
                var originalNode = node.Original;
167
                var toEditNode = node.ToEdit;
168

  
169
                if (originalNode.Name.Contains("#"))
170
                {
171
                    continue;
172
                }
173

  
174
                tagStartPositions.Add(originalNode.OuterStartIndex);
175
                tagStartLengths.Add(originalNode.OuterStartIndex - originalNode.InnerStartIndex);
176
                currentId = tagStartPositions.Count - 1;
177
                toEditNode.Attributes.Add(idAttributeName, currentId.ToString());
178

  
179
                tagClosingPositions.Add(originalNode.InnerStartIndex + originalNode.InnerLength);
180
                tagClosingLengths.Add((originalNode.OuterStartIndex + originalNode.OuterLength) - (originalNode.InnerStartIndex + originalNode.InnerLength));
181
            }
182

  
183
            string docToRender = docToEdit.DocumentNode.OuterHtml;
184
            HtmlSanitizer sanitizer = new HtmlSanitizer();
185
            sanitizer.AllowedAttributes.Clear();
186
            sanitizer.AllowedAttributes.Add(idAttributeName);
187

  
188
            docToRender = sanitizer.Sanitize(docToRender);
189

  
190
            return (docToRender, tagStartPositions.ToArray(), tagStartLengths.ToArray(), tagClosingPositions.ToArray(), tagClosingLengths.ToArray());
191
        }
192

  
135 193
        // TODO temporary
136 194
        private bool IsHtml(string text)
137 195
        {
Backend/Core/Services/DocumentService/DocumentServiceEF.cs
11 11
using System.Web;
12 12
using AutoMapper;
13 13
using Models.Users;
14
using Ganss.XSS;
14 15

  
15 16
namespace Core.Services.DocumentService
16 17
{
Backend/Models/Annotations/AnnotationInfo.cs
10 10
{
11 11
    public class AnnotationInfo
12 12
    {
13
        public string DocumentText { get; set; }
13
        public string SourceDocumentContent { get; set; }
14
        public string DocumentToRender { get; set; }
15
        public int[] TagStartPositions { get; set; }
16
        public int[] TagLengths { get; set; }
14 17
        public EState State { get; set; }
15 18
        public EDocumentType Type { get; set; }
16 19
        public string Note { get; set; }

Také k dispozici: Unified diff