Revert "[lexical-markdown] Fix: normalize markdown in $convertFromMar…

…kdownString to comply with CommonMark spec (#6608)" (#6627)
facebook · Sep 12, 2024 · b0c9809 · b0c9809
1 parent f50f168
commit b0c9809
Show file tree

Hide file tree

Showing 4 changed files with 22 additions and 138 deletions.
diff --git a/packages/lexical-markdown/src/MarkdownTransformers.ts b/packages/lexical-markdown/src/MarkdownTransformers.ts
@@ -153,14 +153,6 @@ export type TextMatchTransformer = Readonly<{
   type: 'text-match';
 }>;
 
-const ORDERED_LIST_REGEX = /^(\s*)(\d{1,})\.\s/;
-const UNORDERED_LIST_REGEX = /^(\s*)[-*+]\s/;
-const CHECK_LIST_REGEX = /^(\s*)(?:-\s)?\s?(\[(\s|x)?\])\s/i;
-const HEADING_REGEX = /^(#{1,6})\s/;
-const QUOTE_REGEX = /^>\s/;
-const CODE_START_REGEX = /^[ \t]*```(\w+)?/;
-const CODE_END_REGEX = /[ \t]*```$/;
-
 const createBlockNode = (
   createNode: (match: Array<string>) => ElementNode,
 ): ElementTransformer['replace'] => {
@@ -274,7 +266,7 @@ export const HEADING: ElementTransformer = {
     const level = Number(node.getTag().slice(1));
     return '#'.repeat(level) + ' ' + exportChildren(node);
   },
-  regExp: HEADING_REGEX,
+  regExp: /^(#{1,6})\s/,
   replace: createBlockNode((match) => {
     const tag = ('h' + match[1].length) as HeadingTagType;
     return $createHeadingNode(tag);
@@ -296,7 +288,7 @@ export const QUOTE: ElementTransformer = {
     }
     return output.join('\n');
   },
-  regExp: QUOTE_REGEX,
+  regExp: /^>\s/,
   replace: (parentNode, children, _match, isImport) => {
     if (isImport) {
       const previousNode = parentNode.getPreviousSibling();
@@ -336,9 +328,9 @@ export const CODE: MultilineElementTransformer = {
   },
   regExpEnd: {
     optional: true,
-    regExp: CODE_END_REGEX,
+    regExp: /[ \t]*```$/,
   },
-  regExpStart: CODE_START_REGEX,
+  regExpStart: /^[ \t]*```(\w+)?/,
   replace: (
     rootNode,
     children,
@@ -407,7 +399,7 @@ export const UNORDERED_LIST: ElementTransformer = {
   export: (node, exportChildren) => {
     return $isListNode(node) ? listExport(node, exportChildren, 0) : null;
   },
-  regExp: UNORDERED_LIST_REGEX,
+  regExp: /^(\s*)[-*+]\s/,
   replace: listReplace('bullet'),
   type: 'element',
 };
@@ -417,7 +409,7 @@ export const CHECK_LIST: ElementTransformer = {
   export: (node, exportChildren) => {
     return $isListNode(node) ? listExport(node, exportChildren, 0) : null;
   },
-  regExp: CHECK_LIST_REGEX,
+  regExp: /^(\s*)(?:-\s)?\s?(\[(\s|x)?\])\s/i,
   replace: listReplace('check'),
   type: 'element',
 };
@@ -427,7 +419,7 @@ export const ORDERED_LIST: ElementTransformer = {
   export: (node, exportChildren) => {
     return $isListNode(node) ? listExport(node, exportChildren, 0) : null;
   },
-  regExp: ORDERED_LIST_REGEX,
+  regExp: /^(\s*)(\d{1,})\.\s/,
   replace: listReplace('number'),
   type: 'element',
 };
@@ -527,47 +519,3 @@ export const LINK: TextMatchTransformer = {
   trigger: ')',
   type: 'text-match',
 };
-
-export function normalizeMarkdown(input: string): string {
-  const lines = input.split('\n');
-  let inCodeBlock = false;
-  const sanitizedLines: string[] = [];
-
-  for (let i = 0; i < lines.length; i++) {
-    const line = lines[i];
-    const lastLine = sanitizedLines[sanitizedLines.length - 1];
-
-    // Detect the start or end of a code block
-    if (CODE_START_REGEX.test(line) || CODE_END_REGEX.test(line)) {
-      inCodeBlock = !inCodeBlock;
-      sanitizedLines.push(line);
-      continue;
-    }
-
-    // If we are inside a code block, keep the line unchanged
-    if (inCodeBlock) {
-      sanitizedLines.push(line);
-      continue;
-    }
-
-    // In markdown the concept of "empty paragraphs" does not exist.
-    // Blocks must be separated by an empty line. Non-empty adjacent lines must be merged.
-    if (
-      line === '' ||
-      lastLine === '' ||
-      !lastLine ||
-      HEADING_REGEX.test(lastLine) ||
-      HEADING_REGEX.test(line) ||
-      QUOTE_REGEX.test(line) ||
-      ORDERED_LIST_REGEX.test(line) ||
-      UNORDERED_LIST_REGEX.test(line) ||
-      CHECK_LIST_REGEX.test(line)
-    ) {
-      sanitizedLines.push(line);
-    } else {
-      sanitizedLines[sanitizedLines.length - 1] = lastLine + line;
-    }
-  }
-
-  return sanitizedLines.join('\n');
-}
diff --git a/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts b/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts
@@ -22,10 +22,7 @@ import {
   Transformer,
   TRANSFORMERS,
 } from '../..';
-import {
-  MultilineElementTransformer,
-  normalizeMarkdown,
-} from '../../MarkdownTransformers';
+import {MultilineElementTransformer} from '../../MarkdownTransformers';
 
 // Matches html within a mdx file
 const MDX_HTML_TRANSFORMER: MultilineElementTransformer = {
@@ -95,36 +92,19 @@ describe('Markdown', () => {
       html: '<h6><span style="white-space: pre-wrap;">Hello world</span></h6>',
       md: '###### Hello world',
     },
-    {
-      // Multiline paragraphs: https://spec.commonmark.org/dingus/?text=Hello%0Aworld%0A!
-      html: '<p><span style="white-space: pre-wrap;">Helloworld!</span></p>',
-      md: ['Hello', 'world', '!'].join('\n'),
-      skipExport: true,
-    },
     {
       // Multiline paragraphs
-      // TO-DO: It would be nice to support also hard line breaks (<br>) as \ or double spaces
-      // See https://spec.commonmark.org/0.31.2/#hard-line-breaks.
-      // Example: '<p><span style="white-space: pre-wrap;">Hello\\\nworld\\\n!</span></p>',
-      html: '<p><span style="white-space: pre-wrap;">Hello<br>world<br>!</span></p>',
+      html: '<p><span style="white-space: pre-wrap;">Hello</span><br><span style="white-space: pre-wrap;">world</span><br><span style="white-space: pre-wrap;">!</span></p>',
       md: ['Hello', 'world', '!'].join('\n'),
-      skipImport: true,
     },
     {
       html: '<blockquote><span style="white-space: pre-wrap;">Hello</span><br><span style="white-space: pre-wrap;">world!</span></blockquote>',
       md: '> Hello\n> world!',
     },
-    // TO-DO: <br> should be preserved
-    // {
-    //   html: '<ul><li value="1"><span style="white-space: pre-wrap;">Hello</span></li><li value="2"><span style="white-space: pre-wrap;">world<br>!<br>!</span></li></ul>',
-    //   md: '- Hello\n- world<br>!<br>!',
-    //   skipImport: true,
-    // },
     {
-      // Multiline list items: https://spec.commonmark.org/dingus/?text=-%20Hello%0A-%20world%0A!%0A!
-      html: '<ul><li value="1"><span style="white-space: pre-wrap;">Hello</span></li><li value="2"><span style="white-space: pre-wrap;">world!!</span></li></ul>',
+      // Multiline list items
+      html: '<ul><li value="1"><span style="white-space: pre-wrap;">Hello</span></li><li value="2"><span style="white-space: pre-wrap;">world</span><br><span style="white-space: pre-wrap;">!</span><br><span style="white-space: pre-wrap;">!</span></li></ul>',
       md: '- Hello\n- world\n!\n!',
-      skipExport: true,
     },
     {
       html: '<ul><li value="1"><span style="white-space: pre-wrap;">Hello</span></li><li value="2"><span style="white-space: pre-wrap;">world</span></li></ul>',
@@ -294,8 +274,8 @@ describe('Markdown', () => {
       skipExport: true,
     },
     {
-      // https://spec.commonmark.org/dingus/?text=%3E%20Hello%0Aworld%0A!
-      html: '<blockquote><span style="white-space: pre-wrap;">Helloworld!</span></blockquote>',
+      // Import only: multiline quote will be prefixed with ">" on each line during export
+      html: '<blockquote><span style="white-space: pre-wrap;">Hello</span><br><span style="white-space: pre-wrap;">world</span><br><span style="white-space: pre-wrap;">!</span></blockquote>',
       md: '> Hello\nworld\n!',
       skipExport: true,
     },
@@ -318,9 +298,8 @@ describe('Markdown', () => {
     },
     {
       customTransformers: [MDX_HTML_TRANSFORMER],
-      html: '<p><span style="white-space: pre-wrap;">Some HTML in mdx:</span></p><pre spellcheck="false" data-language="MyComponent"><span style="white-space: pre-wrap;">From HTML: Line 1Some Text</span></pre>',
+      html: '<p><span style="white-space: pre-wrap;">Some HTML in mdx:</span></p><pre spellcheck="false" data-language="MyComponent"><span style="white-space: pre-wrap;">From HTML: Line 1\nSome Text</span></pre>',
       md: 'Some HTML in mdx:\n\n<MyComponent>Line 1\nSome Text</MyComponent>',
-      skipExport: true,
     },
   ];
 
@@ -428,47 +407,3 @@ describe('Markdown', () => {
     });
   }
 });
-
-describe('normalizeMarkdown', () => {
-  it('should combine lines separated by a single \n unless they are in a codeblock', () => {
-    const markdown = `
-1
-2
-
-3
-
-\`\`\`md
-1
-2
-
-3
-\`\`\`
-
-\`\`\`js
-1
-2
-
-3
-\`\`\`
-`;
-    expect(normalizeMarkdown(markdown)).toBe(`
-12
-
-3
-
-\`\`\`md
-1
-2
-
-3
-\`\`\`
-
-\`\`\`js
-1
-2
-
-3
-\`\`\`
-`);
-  });
-});
diff --git a/packages/lexical-markdown/src/index.ts b/packages/lexical-markdown/src/index.ts
@@ -31,7 +31,6 @@ import {
   ITALIC_STAR,
   ITALIC_UNDERSCORE,
   LINK,
-  normalizeMarkdown,
   ORDERED_LIST,
   QUOTE,
   STRIKETHROUGH,
@@ -83,12 +82,11 @@ function $convertFromMarkdownString(
   node?: ElementNode,
   shouldPreserveNewLines = false,
 ): void {
-  const sanitizedMarkdown = normalizeMarkdown(markdown);
   const importMarkdown = createMarkdownImport(
     transformers,
     shouldPreserveNewLines,
   );
-  return importMarkdown(sanitizedMarkdown, node);
+  return importMarkdown(markdown, node);
 }
 
 /**

diff --git a/packages/lexical-playground/__tests__/e2e/Markdown.spec.mjs b/packages/lexical-playground/__tests__/e2e/Markdown.spec.mjs
@@ -1310,6 +1310,7 @@ const IMPORTED_MARKDOWN_HTML = html`
       bold italic strikethrough
     </strong>
     <span data-lexical-text="true">text,</span>
+    <br />
     <strong
       class="PlaygroundEditorTheme__textBold PlaygroundEditorTheme__textItalic PlaygroundEditorTheme__textStrikethrough"
       data-lexical-text="true">
@@ -1407,7 +1408,9 @@ const IMPORTED_MARKDOWN_HTML = html`
     dir="ltr">
     <span data-lexical-text="true">Blockquotes text goes here</span>
     <br />
-    <span data-lexical-text="true">And secondline after</span>
+    <span data-lexical-text="true">And second</span>
+    <br />
+    <span data-lexical-text="true">line after</span>
   </blockquote>
   <blockquote
     class="PlaygroundEditorTheme__quote PlaygroundEditorTheme__ltr"
@@ -1485,9 +1488,9 @@ const IMPORTED_MARKDOWN_HTML = html`
           class="PlaygroundEditorTheme__listItem PlaygroundEditorTheme__ltr"
           dir="ltr"
           value="1">
-          <span data-lexical-text="true">
-            And can be nested and multiline as well
-          </span>
+          <span data-lexical-text="true">And can be nested</span>
+          <br />
+          <span data-lexical-text="true">and multiline as well</span>
         </li>
       </ol>
     </li>