From 992d38eb9b0695f3aacf8a62d505cb0746244a77 Mon Sep 17 00:00:00 2001
From: WebFreak001 <gh@webfreak.org>
Date: Sun, 21 Jun 2020 20:42:46 +0200
Subject: [PATCH 1/6] add dparse.strings utility to unescape strings

Deprecates the StringBehavior values, fixes #417 as alternative API
---
 src/dparse/lexer.d   |  12 +-
 src/dparse/strings.d | 686 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 694 insertions(+), 4 deletions(-)
 create mode 100644 src/dparse/strings.d

diff --git a/src/dparse/lexer.d b/src/dparse/lexer.d
index be26daa6..bbc54fda 100644
--- a/src/dparse/lexer.d
+++ b/src/dparse/lexer.d
@@ -190,20 +190,24 @@ public enum WhitespaceBehavior : ubyte
     skip = 0b0000_0001,
 }
 
+private enum stringBehaviorNotWorking = "Automatic string parsing is not "
+    ~ "supported and was previously not working. To unescape strings use the "
+    ~ "`dparse.strings : unescapeString` function on the token texts instead.";
+
 /**
  * Configure string lexing behavior
  */
 public enum StringBehavior : ubyte
 {
     /// Do not include quote characters, process escape sequences
-    compiler = 0b0000_0000,
+    deprecated(stringBehaviorNotWorking) compiler = 0b0000_0000,
     /// Opening quotes, closing quotes, and string suffixes are included in the
     /// string token
-    includeQuoteChars = 0b0000_0001,
+    deprecated(stringBehaviorNotWorking) includeQuoteChars = 0b0000_0001,
     /// String escape sequences are not replaced
-    notEscaped = 0b0000_0010,
+    deprecated(stringBehaviorNotWorking) notEscaped = 0b0000_0010,
     /// Not modified at all. Useful for formatters or highlighters
-    source = includeQuoteChars | notEscaped
+    source = 0b0000_0011,
 }
 
 public enum CommentBehavior : bool
diff --git a/src/dparse/strings.d b/src/dparse/strings.d
new file mode 100644
index 00000000..6e79198f
--- /dev/null
+++ b/src/dparse/strings.d
@@ -0,0 +1,686 @@
+/// Utility for unescaping D string literals of any kind
+module dparse.strings;
+
+import std.algorithm;
+import std.array;
+import std.ascii : isAlphaNum, isHexDigit, isWhite;
+import std.conv;
+import std.range;
+import std.string;
+import std.utf;
+
+/**
+ * Checks if a string literal input has correct start/end sequences (quotes) to
+ * be any kind of D string literal.
+ *
+ * Bugs: doesn't check for validity of token strings.
+ *
+ * Standards: $(LINK https://dlang.org/spec/lex.html#string_literals)
+ */
+bool isStringLiteral(const(char)[] literal, out char stringCloseChar,
+		out bool hasPostfix, out bool parseEscapes, out int prefixLength)
+{
+	// there are no 1 character strings
+	if (literal.length < 2)
+		return false;
+
+	// check for valid start
+	bool allowPostfix;
+	switch (literal[0])
+	{
+	case 'r': // WysiwygString
+	case 'x': // HexString
+		if (literal[1] != '"')
+			return false;
+		stringCloseChar = '"';
+		allowPostfix = true;
+		prefixLength = 2;
+		break;
+	case 'q': // DelimitedString
+		if (literal[1] == '{')
+			stringCloseChar = '}';
+		else if (literal[1] == '"')
+			stringCloseChar = '"';
+		else
+			return false;
+
+		allowPostfix = false;
+		prefixLength = 2;
+		break;
+	case '`':
+	case '"':
+		stringCloseChar = literal[0];
+		allowPostfix = true;
+		parseEscapes = stringCloseChar == '"';
+		prefixLength = 1;
+		break;
+	default:
+		return false;
+	}
+
+	if (allowPostfix && literal[$ - 1].among!('c', 'w', 'd'))
+	{
+		hasPostfix = true;
+		literal = literal[0 .. $ - 1];
+	}
+
+	if (literal.length <= prefixLength || literal[$ - 1] != stringCloseChar)
+		return false;
+
+	if (parseEscapes)
+	{
+		// check if end escapes the quote, making this an invalid string
+		auto end = literal[0 .. $ - 1].lastIndexOfNeither("\\");
+		if (end != -1)
+		{
+			// don't need to subtract 1
+			size_t countBackslashes = literal.length - end;
+
+			if ((countBackslashes % 2) != 0)
+				return false; // uneven backslash count -> invalid end
+		}
+	}
+
+	return true;
+}
+
+/// ditto
+bool isStringLiteral(const(char)[] literal)
+{
+	char stringCloseChar;
+	bool hasPostfix, parseEscapes;
+	int prefixLength;
+	return isStringLiteral(literal, stringCloseChar, hasPostfix, parseEscapes,
+		prefixLength);
+}
+
+///
+unittest
+{
+	assert(isStringLiteral(`"hello"`));
+	assert(isStringLiteral(`"hello world!"`));
+	assert(isStringLiteral(`r"hello world!"c`));
+	assert(isStringLiteral(`r"hello world!"d`));
+	assert(isStringLiteral(`q{cool}`));
+	assert(isStringLiteral(`q{cool\}`));
+	assert(isStringLiteral(`"\\"`));
+	assert(!isStringLiteral(`"\\\"`));
+	assert(isStringLiteral(`"\\\\"`));
+	assert(isStringLiteral(`"a\\\\"`));
+	assert(isStringLiteral(`""`));
+	assert(isStringLiteral(`q""`));
+	assert(isStringLiteral(`x""`));
+	assert(!isStringLiteral(``));
+	assert(!isStringLiteral(`"`));
+	assert(!isStringLiteral(`w""`));
+	assert(!isStringLiteral(`hello"`));
+	assert(!isStringLiteral(`"hello`));
+	assert(!isStringLiteral(`"hello world`));
+	assert(!isStringLiteral(`hello world`));
+	assert(!isStringLiteral(`r"`));
+	assert(!isStringLiteral(`rr"ok"`));
+	assert(!isStringLiteral(`x"`));
+	assert(!isStringLiteral(`x" `));
+	assert(!isStringLiteral(`qqqq`));
+}
+
+/// Defines different handler types what to do when invalid escape sequences are
+/// found inside $(LREF unescapeString).
+enum InvalidEscapeAction
+{
+	/// keep the backslash character as well as the escape characters in the
+	/// string like in the input string.
+	keep = 0,
+	/// Ignore and skip offending characters, drop them from the output. Named
+	/// character entities are still being included like keep as they are not
+	/// currently implemented.
+	skip,
+	/// Throw a ConvException on invalid escape sequences. Does not throw
+	/// anything on unknown named character entities as they are not currently
+	/// implemented but instead treats them like $(LREF keep).
+	error
+}
+
+/**
+ * Unescapes a D string, effectively being the same as mixing in the string into
+ * some function call, but only for single string literals.
+ *
+ * Strips quotes, prefixes and suffixes, interprets escape sequences in normal
+ * double quoted strings and interprets hex strings. Returns simple slices for
+ * non-escaped strings.
+ *
+ * It's undefined how invalid/malformed strings are evaluated.
+ *
+ * Bugs: doesn't check for validity of token strings, doesn't interpret named
+ * character entity escape sequences, (HTML-kind escape sequences) doesn't check
+ * nesting level of delimited strings.
+ *
+ * Standards: $(LINK https://dlang.org/spec/lex.html#string_literals)
+ */
+string unescapeString(
+	InvalidEscapeAction invalidEscapeAction = InvalidEscapeAction.error
+)(
+	string input
+)
+in (isStringLiteral(input))
+{
+	char stringCloseChar;
+	bool hasPostfix, parseEscapes;
+	int prefixLength;
+	isStringLiteral(input, stringCloseChar, hasPostfix, parseEscapes,
+		prefixLength);
+
+	if (hasPostfix)
+		input = input[0 .. $ - 1];
+
+	auto content = input[prefixLength .. $ - 1];
+
+	if (!content.length)
+		return content;
+
+	if (input[0] == 'x')
+	{
+		// hex string, obsolete but still implemented
+		return parseHexStringContent!invalidEscapeAction(content);
+	}
+	else if (input[0] == 'q' && input[1] == '"')
+	{
+		content = content.normalizeNewLines;
+		if (isIdentifierChar(content[0]))
+		{
+			auto ln = content.indexOf('\n');
+			if (ln == -1)
+			{
+				final switch (invalidEscapeAction)
+				{
+				case InvalidEscapeAction.keep:
+					return content;
+				case InvalidEscapeAction.skip:
+					return null;
+				case InvalidEscapeAction.error:
+					throw new ConvException("Invalid delimited escape string");
+				}
+			}
+			auto delimiter = content[0 .. ln];
+			content = content[ln + 1 .. $];
+			if (!content.endsWith(chain("\n", delimiter)))
+			{
+				final switch (invalidEscapeAction)
+				{
+				case InvalidEscapeAction.keep:
+					return content;
+				case InvalidEscapeAction.skip:
+					auto lastNl = content.lastIndexOf('\n');
+					if (lastNl == -1)
+						return content;
+					else
+						return content[0 .. lastNl];
+				case InvalidEscapeAction.error:
+					throw new ConvException("Delimited escape string not ending correctly");
+				}
+			}
+			return content[0 .. $ - delimiter.length];
+		}
+		else
+		{
+			char delimiterChar = content[0];
+			char endChar;
+			switch (delimiterChar)
+			{
+			case '[': endChar = ']'; break;
+			case '(': endChar = ')'; break;
+			case '<': endChar = '>'; break;
+			case '{': endChar = '}'; break;
+			default: endChar = delimiterChar; break;
+			}
+
+			if (content[1 .. $].endsWith(endChar))
+				return content[1 .. $ - 1];
+			else
+			{
+				final switch (invalidEscapeAction)
+				{
+				case InvalidEscapeAction.keep:
+					return content;
+				case InvalidEscapeAction.skip:
+					return content[1 .. $];
+				case InvalidEscapeAction.error:
+					throw new ConvException("Invalid delimited escape string");
+				}
+			}
+		}
+	}
+	else
+	{
+		if (!parseEscapes)
+			return content.normalizeNewLines;
+		else
+			return unescapeDoubleQuotedContent!invalidEscapeAction(
+					content.normalizeNewLines);
+	}
+}
+
+///
+unittest
+{
+	assert(unescapeString(q{r"I am Oz"}) == r"I am Oz");
+	assert(unescapeString(q{r"c:\games\Sudoku.exe"}) == r"c:\games\Sudoku.exe");
+	assert(unescapeString(q{r"ab\n"}) == r"ab\n");
+
+	assert(unescapeString(q{`the Great and Powerful.`}) == `the Great and Powerful.`);
+	assert(unescapeString(q{`c:\games\Empire.exe`}) == `c:\games\Empire.exe`);
+	assert(unescapeString(q{`The "lazy" dog`}) == `The "lazy" dog`);
+	assert(unescapeString(q{`a"b\n`}) == `a"b\n`);
+
+	assert(unescapeString(q{"Who are you?"}) == "Who are you?");
+	assert(unescapeString(q{"c:\\games\\Doom.exe"}) == "c:\\games\\Doom.exe");
+	assert(unescapeString(q{"ab\n"}) == "ab\n");
+
+	assert(unescapeString(`x"0A"`) == hexString!"0A");
+	assert(unescapeString(`x"00 FBCD 32FD 0A"`) == hexString!"00 FBCD 32FD 0A");
+
+	assert(unescapeString(`q"(foo(xxx))"`) == q"(foo(xxx))");
+	assert(unescapeString(`q"[foo{]"`) == q"[foo{]");
+	assert(unescapeString(`q"<foo{>"`) == q"<foo{>");
+	assert(unescapeString(`q"{foo(}"`) == q"{foo(}");
+	assert(unescapeString(`q"EOS
+This
+is a multi-line
+heredoc string
+EOS"`) == q"EOS
+This
+is a multi-line
+heredoc string
+EOS");
+	assert(unescapeString(`q"/foo]/"`) == `foo]`);
+
+	assert(unescapeString(`q{this is the voice of}`) == q{this is the voice of});
+	assert(unescapeString(`q{/*}*/ }`) == q{/*}*/ });
+	assert(unescapeString(`q{ world(q{control}); }`) == q{ world(q{control}); });
+	assert(unescapeString(`q{ __TIME__ }`) == q{ __TIME__ });
+
+	assert(unescapeString(q{"hello"c}) == "hello");
+	assert(unescapeString(q{"hello"w}) == "hello");
+	assert(unescapeString(q{"hello"d}) == "hello");
+
+	assert(unescapeString(`""`) == "");
+	assert(unescapeString(`"hello\'world\"cool\""`) == "hello\'world\"cool\"");
+	assert(unescapeString(`"\x0A"`) == "\x0A");
+	assert(unescapeString(`"\u200b"`) == "\u200b");
+	assert(unescapeString(`"\U0001F4A9"`) == "\U0001F4A9");
+	assert(unescapeString(`"\0"`) == "\0");
+	assert(unescapeString(`"\1"`) == "\1");
+	assert(unescapeString(`"\12"`) == "\12");
+	assert(unescapeString(`"\127"`) == "\127");
+	assert(unescapeString(`"\1278"`) == "\1278");
+	assert(unescapeString(`"\12a8"`) == "\12a8");
+	assert(unescapeString(`"\1a28"`) == "\1a28");
+	assert(unescapeString(`x"afDE"`) == "\xaf\xDE");
+	assert(unescapeString("\"hello\nworld\rfoo\r\nbar\u2028ok\u2029\"")
+			== "hello\nworld\nfoo\nbar\nok\n");
+}
+
+unittest
+{
+	import std.exception : assertThrown;
+
+	// unimplemented named characters
+	assert(unescapeString(`"\&foo;"`) == "\\&foo;");
+
+	assertThrown!ConvException(unescapeString(`"\&foo"`));
+	assert(unescapeString!(InvalidEscapeAction.keep)(`"\&foo"`) == "\\&foo");
+	assert(unescapeString!(InvalidEscapeAction.skip)(`"\&foo"`) == "");
+}
+
+unittest
+{
+	import std.exception : assertThrown;
+
+	assertThrown!ConvException(unescapeString(`q"EOS"`));
+	assert(unescapeString!(InvalidEscapeAction.keep)(`q"EOS"`) == "EOS");
+	assert(unescapeString!(InvalidEscapeAction.skip)(`q"EOS"`) == "");
+
+	assertThrown!ConvException(unescapeString(`q"EOS
+hello"`));
+	assert(unescapeString!(InvalidEscapeAction.keep)(`q"EOS
+hello"`) == "hello");
+	assert(unescapeString!(InvalidEscapeAction.skip)(`q"EOS
+hello"`) == "hello");
+	assert(unescapeString!(InvalidEscapeAction.skip)(`q"EOS
+hello
+world"`) == "hello");
+
+	assertThrown!ConvException(unescapeString(`q"/xd"`));
+	assert(unescapeString!(InvalidEscapeAction.keep)(`q"/xd"`) == "/xd");
+	assert(unescapeString!(InvalidEscapeAction.skip)(`q"/xd"`) == "xd");
+
+	assertThrown!ConvException(unescapeString(`"\x"`));
+	assert(unescapeString!(InvalidEscapeAction.keep)(`"\x"`) == "\\x");
+	assert(unescapeString!(InvalidEscapeAction.skip)(`"\x"`) == "");
+
+	assertThrown!ConvException(unescapeString(`"\u0"`));
+	assert(unescapeString!(InvalidEscapeAction.keep)(`"\u0"`) == "\\u0");
+	assert(unescapeString!(InvalidEscapeAction.skip)(`"\u0"`) == "");
+
+	assertThrown!ConvException(unescapeString(`"\U0000000"`));
+	assert(unescapeString!(InvalidEscapeAction.keep)(`"\U0000000"`) == "\\U0000000");
+	assert(unescapeString!(InvalidEscapeAction.skip)(`"\U0000000"`) == "");
+
+	assertThrown!ConvException(unescapeString(`"\xAG"`));
+	assert(unescapeString!(InvalidEscapeAction.keep)(`"\xAG"`) == "\\xAG");
+	assert(unescapeString!(InvalidEscapeAction.skip)(`"\xAG"`) == "");
+
+	assertThrown!ConvException(unescapeString(`"\u00AG"`));
+	assert(unescapeString!(InvalidEscapeAction.keep)(`"\u00AG"`) == "\\u00AG");
+	assert(unescapeString!(InvalidEscapeAction.skip)(`"\u00AG"`) == "");
+
+	assertThrown!ConvException(unescapeDoubleQuotedContent(`a\`));
+	assert(unescapeDoubleQuotedContent!(InvalidEscapeAction.keep)(`a\`) == "a\\");
+	assert(unescapeDoubleQuotedContent!(InvalidEscapeAction.skip)(`a\`) == "a");
+
+	assertThrown!ConvException(unescapeString(`"\z"`));
+	assert(unescapeString!(InvalidEscapeAction.keep)(`"\z"`) == "\\z");
+	assert(unescapeString!(InvalidEscapeAction.skip)(`"\z"`) == "z");
+
+	assert(parseHexStringContent("") == "");
+
+	assertThrown!ConvException(unescapeString(`x"AG"`));
+	assert(unescapeString!(InvalidEscapeAction.keep)(`x"AG"`) == "AG");
+	assert(unescapeString!(InvalidEscapeAction.skip)(`x"AG"`) == "");
+
+	assertThrown!ConvException(unescapeString(`x"A"`));
+	assert(unescapeString!(InvalidEscapeAction.keep)(`x"A"`) == "A");
+	assert(unescapeString!(InvalidEscapeAction.skip)(`x"A"`) == "");
+}
+
+private string unescapeDoubleQuotedContent(
+	InvalidEscapeAction invalidEscapeAction = InvalidEscapeAction.error
+)(
+	string input
+)
+{
+	auto escape = input.indexOf('\\');
+	if (escape == -1)
+		return input;
+
+	auto ret = appender!string;
+	ret.reserve(input.length);
+	size_t start = 0;
+
+	bool requireMinLength(size_t length)
+	{
+		if (escape + length >= input.length)
+		{
+			final switch (invalidEscapeAction)
+			{
+			case InvalidEscapeAction.keep:
+				ret ~= input[start .. $];
+				start = input.length;
+				return false;
+			case InvalidEscapeAction.skip:
+				start = input.length;
+				return false;
+			case InvalidEscapeAction.error:
+				throw new ConvException("Unfinished escape at end of string");
+			}
+		}
+		else
+		{
+			return true;
+		}
+	}
+
+	void errorInvalidCharacter(size_t continueAt)
+	{
+		final switch (invalidEscapeAction)
+		{
+		case InvalidEscapeAction.keep:
+			ret ~= input[start .. start = continueAt];
+			break;
+		case InvalidEscapeAction.skip:
+			start = continueAt;
+			break;
+		case InvalidEscapeAction.error:
+			throw new ConvException("Invalid escape character before index "
+					~ continueAt.to!string);
+		}
+	}
+
+	bool parseUnicode(size_t length)
+	{
+		auto c = input[escape + 2 .. escape + 2 + length];
+		if (!c.all!isHexDigit)
+		{
+			errorInvalidCharacter(escape + 2 + length);
+			return false;
+		}
+		dchar ch = cast(dchar) c.to!uint(16);
+		char[4] buf;
+		auto size = encode(buf, ch);
+		ret ~= buf[0 .. size];
+		start = escape + 2 + length;
+		return true;
+	}
+
+	Loop: while (escape != -1)
+	{
+		ret ~= input[start .. escape];
+		start = escape;
+
+		if (!requireMinLength(1))
+			break;
+
+	Switch:
+		switch (input[escape + 1])
+		{
+		case '\'':
+		case '"':
+		case '?':
+		case '\\':
+			ret ~= input[escape + 1];
+			start = escape + 2;
+			break;
+
+			static foreach (escapeChar; "abfnrtv")
+			{
+		case escapeChar:
+				mixin("ret ~= '\\" ~ escapeChar ~ "';");
+				start = escape + 2;
+				break Switch;
+			}
+
+		case 'x':
+			if (!requireMinLength(3))
+				break Loop;
+			char a = input[escape + 2];
+			char b = input[escape + 3];
+			if (!a.isHexDigit || !b.isHexDigit)
+			{
+				errorInvalidCharacter(escape + 4);
+				break;
+			}
+			ret ~= cast(char)(a.parseHexChar << 4 | b.parseHexChar);
+			start = escape + 4;
+			break;
+		case 'u':
+			if (!requireMinLength(1 + 4))
+				break Loop;
+			parseUnicode(4);
+			break;
+		case 'U':
+			if (!requireMinLength(1 + 8))
+				break Loop;
+			parseUnicode(8);
+			break;
+		case '0': .. case '7':
+			int length = 1;
+			foreach (n; 2 .. 4)
+			{
+				if (escape + 1 + n > input.length)
+					break;
+				char c = input[escape + n];
+				if (c >= '0' && c <= '7')
+					length = n;
+				else
+					break;
+			}
+			int c = input[escape + 1 .. escape + 1 + length].to!int(8);
+			ret ~= cast(char) c;
+			start = escape + 1 + length;
+			break;
+		case '&':
+			auto end = input.indexOf(';', escape + 2);
+			if (end == -1)
+				errorInvalidCharacter(input.length);
+			else
+				ret ~= input[escape .. start = end + 1];
+			break;
+		default:
+			errorInvalidCharacter(escape + 1);
+			break;
+		}
+
+		escape = input.indexOf('\\', start);
+	}
+	ret ~= input[start .. $];
+	return ret.data;
+}
+
+unittest
+{
+	assert(unescapeDoubleQuotedContent(`hello world`) == "hello world");
+	assert(unescapeDoubleQuotedContent(`hello\nworld`) == "hello\nworld");
+	assert(unescapeDoubleQuotedContent(`hello\tworld`) == "hello\tworld");
+	assert(unescapeDoubleQuotedContent(`hello\u200bworld`) == "hello\u200bworld");
+	assert(unescapeDoubleQuotedContent(`hello \"\\ok`) == "hello \"\\ok");
+}
+
+private string parseHexStringContent(
+	InvalidEscapeAction invalidEscapeAction = InvalidEscapeAction.error
+)(
+	string input
+)
+{
+	if (!input.length)
+		return input;
+
+	auto ret = appender!string;
+	ret.reserve(input.length / 3);
+	char buf;
+	foreach (i, char c; input)
+	{
+		if (c.isWhite)
+			continue;
+
+		if (!c.isHexDigit)
+		{
+			final switch (invalidEscapeAction)
+			{
+			case InvalidEscapeAction.keep:
+				if (buf != char.init)
+				{
+					ret ~= buf;
+					buf = char.init;
+				}
+				ret ~= c;
+				break;
+			case InvalidEscapeAction.skip:
+				break;
+			case InvalidEscapeAction.error:
+				throw new ConvException("Invalid hex character at index "
+						~ i.to!string);
+			}
+		}
+		else
+		{
+			if (buf == char.init)
+			{
+				buf = c;
+			}
+			else
+			{
+				ret ~= cast(char)(buf.parseHexChar << 4 | c.parseHexChar);
+				buf = char.init;
+			}
+		}
+	}
+
+	if (buf != char.init)
+	{
+		final switch (invalidEscapeAction)
+		{
+		case InvalidEscapeAction.keep:
+			ret ~= buf;
+			break;
+		case InvalidEscapeAction.skip:
+			break;
+		case InvalidEscapeAction.error:
+			throw new ConvException("Unterminated hex character at end of string");
+		}
+	}
+
+	return ret.data;
+}
+
+private int parseHexChar(char c)
+in (c.isHexDigit)
+in ('a' > 'A' && 'A' > '0') // just checking that ASCII doesn't suddenly change
+{
+	// can omit range ends and digit check because of function preconditions
+	if (c >= 'a')
+		return (c - 'a') + 10;
+	else if (c >= 'A')
+		return (c - 'A') + 10;
+	else
+		return c - '0';
+}
+
+private bool isIdentifierChar(char c)
+{
+	return isAlphaNum(c) || c == '_';
+}
+
+/// normalizes all line endings with \n, as parsed in D strings
+private string normalizeNewLines(string text)
+{
+	import std.utf : codeLength;
+
+	enum exoticLineBreakLength = codeLength!char('\u2028');
+	static immutable nlCharacters = ['\r', '\u2028', '\u2029'];
+
+	auto end = text.indexOfAny(nlCharacters);
+	if (end == -1)
+		return text;
+	auto ret = appender!string;
+	ret.reserve(text.length);
+	size_t start = 0;
+	while (end != -1)
+	{
+		ret ~= text[start .. end];
+		ret ~= '\n';
+		if (end + 1 < text.length && text[end] == '\r' && text[end + 1] == '\n')
+			end++;
+		else if (text[end] != '\r')
+			end += exoticLineBreakLength - 1;
+		start = end + 1;
+		end = text[start .. $].indexOfAny(nlCharacters);
+		if (end != -1)
+			end += start;
+	}
+	ret ~= text[start .. $];
+	return ret.data;
+}
+
+///
+unittest
+{
+	string testNoChange = "hello\nworld!";
+	assert(normalizeNewLines(testNoChange).ptr is testNoChange.ptr);
+
+	assert(normalizeNewLines("hello\rworld") == "hello\nworld");
+	assert(normalizeNewLines("hello\r\nworld") == "hello\nworld");
+	assert(normalizeNewLines("hello\r\n\nworld") == "hello\n\nworld");
+	assert(normalizeNewLines("hello\u2028\nworld") == "hello\n\nworld");
+	assert(normalizeNewLines("hello\u2029\nworld") == "hello\n\nworld");
+	assert(normalizeNewLines("hello\r") == "hello\n");
+}

From aab6f61cde3acc1dea3b31816a4bb735fc817689 Mon Sep 17 00:00:00 2001
From: WebFreak001 <gh@webfreak.org>
Date: Sun, 21 Jun 2020 20:58:46 +0200
Subject: [PATCH 2/6] fix StringBehavior enum for old compilers

---
 src/dparse/lexer.d | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/dparse/lexer.d b/src/dparse/lexer.d
index bbc54fda..0c9477c5 100644
--- a/src/dparse/lexer.d
+++ b/src/dparse/lexer.d
@@ -197,17 +197,21 @@ private enum stringBehaviorNotWorking = "Automatic string parsing is not "
 /**
  * Configure string lexing behavior
  */
-public enum StringBehavior : ubyte
+// was enum, but struct now for deprecations and support with old compilers
+public struct StringBehavior
 {
     /// Do not include quote characters, process escape sequences
-    deprecated(stringBehaviorNotWorking) compiler = 0b0000_0000,
-    /// Opening quotes, closing quotes, and string suffixes are included in the
-    /// string token
-    deprecated(stringBehaviorNotWorking) includeQuoteChars = 0b0000_0001,
+    deprecated(stringBehaviorNotWorking) static immutable StringBehavior compiler = StringBehavior(0b0000_0000);
+    /// Opening quotes, closing quotes, and string suffixes are included in
+    /// the string token
+    deprecated(stringBehaviorNotWorking) static immutable StringBehavior includeQuoteChars = StringBehavior(0b0000_0001);
     /// String escape sequences are not replaced
-    deprecated(stringBehaviorNotWorking) notEscaped = 0b0000_0010,
+    deprecated(stringBehaviorNotWorking) static immutable StringBehavior notEscaped = StringBehavior(0b0000_0010);
     /// Not modified at all. Useful for formatters or highlighters
-    source = 0b0000_0011,
+    static immutable StringBehavior source = StringBehavior(0b0000_0011);
+
+    ubyte behavior;
+    alias behavior this;
 }
 
 public enum CommentBehavior : bool

From cde5a26a1ae5b1860c2f3c9fdfbd91895cd70d03 Mon Sep 17 00:00:00 2001
From: WebFreak001 <gh@webfreak.org>
Date: Sun, 21 Jun 2020 22:21:35 +0200
Subject: [PATCH 3/6] fix dparse.strings on old dmd

---
 src/dparse/strings.d | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/src/dparse/strings.d b/src/dparse/strings.d
index 6e79198f..df26de76 100644
--- a/src/dparse/strings.d
+++ b/src/dparse/strings.d
@@ -162,7 +162,11 @@ string unescapeString(
 )(
 	string input
 )
-in (isStringLiteral(input))
+in
+{
+	assert(isStringLiteral(input));
+}
+do
 {
 	char stringCloseChar;
 	bool hasPostfix, parseEscapes;
@@ -481,13 +485,13 @@ private string unescapeDoubleQuotedContent(
 			start = escape + 2;
 			break;
 
-			static foreach (escapeChar; "abfnrtv")
-			{
-		case escapeChar:
-				mixin("ret ~= '\\" ~ escapeChar ~ "';");
-				start = escape + 2;
-				break Switch;
-			}
+		case 'a': ret ~= '\a'; start = escape + 2; break;
+		case 'b': ret ~= '\b'; start = escape + 2; break;
+		case 'f': ret ~= '\f'; start = escape + 2; break;
+		case 'n': ret ~= '\n'; start = escape + 2; break;
+		case 'r': ret ~= '\r'; start = escape + 2; break;
+		case 't': ret ~= '\t'; start = escape + 2; break;
+		case 'v': ret ~= '\v'; start = escape + 2; break;
 
 		case 'x':
 			if (!requireMinLength(3))
@@ -623,8 +627,12 @@ private string parseHexStringContent(
 }
 
 private int parseHexChar(char c)
-in (c.isHexDigit)
-in ('a' > 'A' && 'A' > '0') // just checking that ASCII doesn't suddenly change
+in
+{
+	assert(c.isHexDigit);
+	assert('a' > 'A' && 'A' > '0'); // just checking that ASCII doesn't suddenly change
+}
+do
 {
 	// can omit range ends and digit check because of function preconditions
 	if (c >= 'a')
@@ -646,7 +654,7 @@ private string normalizeNewLines(string text)
 	import std.utf : codeLength;
 
 	enum exoticLineBreakLength = codeLength!char('\u2028');
-	static immutable nlCharacters = ['\r', '\u2028', '\u2029'];
+	static immutable dchar[] nlCharacters = ['\r', '\u2028', '\u2029'];
 
 	auto end = text.indexOfAny(nlCharacters);
 	if (end == -1)

From 2aaaa2b82c135e3e000fcff06f93671ef0766b42 Mon Sep 17 00:00:00 2001
From: Jan Jurzitza <gh@webfreak.org>
Date: Mon, 22 Jun 2020 12:21:20 +0200
Subject: [PATCH 4/6] clarify documentation of InvalidEscapeAction

---
 src/dparse/strings.d | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/dparse/strings.d b/src/dparse/strings.d
index df26de76..6c42477c 100644
--- a/src/dparse/strings.d
+++ b/src/dparse/strings.d
@@ -132,8 +132,8 @@ enum InvalidEscapeAction
 	/// string like in the input string.
 	keep = 0,
 	/// Ignore and skip offending characters, drop them from the output. Named
-	/// character entities are still being included like keep as they are not
-	/// currently implemented.
+	/// character entities are still being included like $(LREF keep) as they
+    /// are not currently implemented.
 	skip,
 	/// Throw a ConvException on invalid escape sequences. Does not throw
 	/// anything on unknown named character entities as they are not currently

From 915d6dc371c7bd1b8e4972008ca49df65b786f12 Mon Sep 17 00:00:00 2001
From: Jan Jurzitza <gh@webfreak.org>
Date: Mon, 22 Jun 2020 14:32:25 +0200
Subject: [PATCH 5/6] fix tabs

---
 src/dparse/strings.d | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/dparse/strings.d b/src/dparse/strings.d
index 6c42477c..7ffe2258 100644
--- a/src/dparse/strings.d
+++ b/src/dparse/strings.d
@@ -133,7 +133,7 @@ enum InvalidEscapeAction
 	keep = 0,
 	/// Ignore and skip offending characters, drop them from the output. Named
 	/// character entities are still being included like $(LREF keep) as they
-    /// are not currently implemented.
+	/// are not currently implemented.
 	skip,
 	/// Throw a ConvException on invalid escape sequences. Does not throw
 	/// anything on unknown named character entities as they are not currently

From 5ac08e9c3605ad0b50876ddee75abb985506ac44 Mon Sep 17 00:00:00 2001
From: Jan Jurzitza <gh@webfreak.org>
Date: Tue, 23 Jun 2020 09:40:22 +0200
Subject: [PATCH 6/6] clearer code & small cleanups

---
 src/dparse/strings.d | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/dparse/strings.d b/src/dparse/strings.d
index 7ffe2258..310e3987 100644
--- a/src/dparse/strings.d
+++ b/src/dparse/strings.d
@@ -439,7 +439,8 @@ private string unescapeDoubleQuotedContent(
 		final switch (invalidEscapeAction)
 		{
 		case InvalidEscapeAction.keep:
-			ret ~= input[start .. start = continueAt];
+			ret ~= input[start .. continueAt];
+			start = continueAt;
 			break;
 		case InvalidEscapeAction.skip:
 			start = continueAt;
@@ -535,9 +536,14 @@ private string unescapeDoubleQuotedContent(
 		case '&':
 			auto end = input.indexOf(';', escape + 2);
 			if (end == -1)
+			{
 				errorInvalidCharacter(input.length);
+			}
 			else
-				ret ~= input[escape .. start = end + 1];
+			{
+				ret ~= input[escape .. end + 1];
+				start = end + 1;
+			}
 			break;
 		default:
 			errorInvalidCharacter(escape + 1);
@@ -671,9 +677,7 @@ private string normalizeNewLines(string text)
 		else if (text[end] != '\r')
 			end += exoticLineBreakLength - 1;
 		start = end + 1;
-		end = text[start .. $].indexOfAny(nlCharacters);
-		if (end != -1)
-			end += start;
+		end = text.indexOfAny(nlCharacters, start);
 	}
 	ret ~= text[start .. $];
 	return ret.data;