Skip to content

Commit

Permalink
Add SanitizeDocument overload that takes a Stream
Browse files Browse the repository at this point in the history
Fixes #158
  • Loading branch information
mganss committed Jan 27, 2019
1 parent dfd5e9d commit 11b2716
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 14 deletions.
22 changes: 22 additions & 0 deletions src/HtmlSanitizer/HtmlSanitizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;

Expand Down Expand Up @@ -490,6 +491,27 @@ public string SanitizeDocument(string html, string baseUrl = "", IMarkupFormatte
}
}

/// <summary>
/// Sanitizes the specified HTML document. Even if only a fragment is given, a whole document will be returned.
/// </summary>
/// <param name="html">The HTML document to sanitize.</param>
/// <param name="baseUrl">The base URL relative URLs are resolved against. No resolution if empty.</param>
/// <param name="outputFormatter">The formatter used to render the DOM. Using the <see cref="OutputFormatter"/> if null.</param>
/// <returns>The sanitized HTML document.</returns>
public string SanitizeDocument(Stream html, string baseUrl = "", IMarkupFormatter outputFormatter = null)
{
var parser = HtmlParserFactory();

using (var dom = parser.Parse(html))
{
DoSanitize(dom, dom.DocumentElement, baseUrl);

var output = dom.ToHtml(outputFormatter ?? OutputFormatter);

return output;
}
}

/// <summary>
/// Creeates an instance of <see cref="HtmlParser"/>.
/// </summary>
Expand Down
32 changes: 18 additions & 14 deletions test/HtmlSanitizer.Tests/HtmlSanitizer.Tests.csproj
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFrameworks>netcoreapp2.1;netcoreapp2.0;net452</TargetFrameworks>
<TargetFrameworks>netcoreapp2.1;netcoreapp2.0;net46</TargetFrameworks>
<AssemblyName>HtmlSanitizer.Tests</AssemblyName>
<PackageId>HtmlSanitizer.Tests</PackageId>
<GenerateRuntimeConfigurationFiles>true</GenerateRuntimeConfigurationFiles>
Expand All @@ -23,19 +23,23 @@
</ItemGroup>

<ItemGroup>
<PackageReference Include="coverlet.msbuild" Version="2.1.1" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="15.8.0" />
<PackageReference Include="xunit.runner.visualstudio" Version="2.4.0" />
<PackageReference Include="xunit.runner.console" Version="2.4.0" />
<PackageReference Include="xunit" Version="2.4.0" />
</ItemGroup>

<ItemGroup Condition=" '$(TargetFramework)' == 'netcoreapp2.0' ">
</ItemGroup>

<ItemGroup Condition=" '$(TargetFramework)' == 'net452' ">
<Reference Include="System" />
<Reference Include="Microsoft.CSharp" />
<PackageReference Include="coverlet.msbuild" Version="2.5.1">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>
</PackageReference>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="15.9.0" />
<PackageReference Include="xunit.runner.visualstudio" Version="2.4.1">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>
</PackageReference>
<PackageReference Include="xunit.runner.console" Version="2.4.1">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>
</PackageReference>
<PackageReference Include="xunit" Version="2.4.1" />
<PackageReference Include="System.Text.Encoding.CodePages">
<Version>4.5.1</Version>
</PackageReference>
</ItemGroup>

<ItemGroup>
Expand Down
24 changes: 24 additions & 0 deletions test/HtmlSanitizer.Tests/Tests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
using AngleSharp.Dom.Css;
using System.Threading;
using System.Reflection;
using System.IO;
using System.Text;

// Tests based on tests from http://roadkill.codeplex.com/

Expand All @@ -36,6 +38,7 @@ public class HtmlSanitizerTests: IClassFixture<HtmlSanitizerFixture>

public HtmlSanitizerTests(HtmlSanitizerFixture fixture)
{
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
Sanitizer = fixture.Sanitizer;
}

Expand Down Expand Up @@ -3097,6 +3100,27 @@ public void FilterUrlTest()

Assert.Equal(@"<img src=""https://www.example.com/test.png"">", actual);
}


[Fact]
public void EncodingTest()
{
// https://github.com/mganss/HtmlSanitizer/issues/158

var sanitizer = new HtmlSanitizer();
sanitizer.AllowedTags.Add("meta");
sanitizer.AllowedAttributes.Add("http-equiv");
sanitizer.AllowedAttributes.Add("content");

var html = @"<html><head><meta http-equiv=""Content-Type"" content=""text/html; charset=iso-8859-1""></head><body>kopieën</body></html>";

using (var stream = new MemoryStream(Encoding.GetEncoding("iso-8859-1").GetBytes(html)))
{
var actual = sanitizer.SanitizeDocument(stream);

Assert.Equal(html, actual);
}
}
}
}

Expand Down

0 comments on commit 11b2716

Please sign in to comment.