diff --git a/packages/lexical-markdown/src/MarkdownTransformers.ts b/packages/lexical-markdown/src/MarkdownTransformers.ts index fc0662726ae..efff5770e18 100644 --- a/packages/lexical-markdown/src/MarkdownTransformers.ts +++ b/packages/lexical-markdown/src/MarkdownTransformers.ts @@ -153,6 +153,14 @@ export type TextMatchTransformer = Readonly<{ type: 'text-match'; }>; +const ORDERED_LIST_REGEX = /^(\s*)(\d{1,})\.\s/; +const UNORDERED_LIST_REGEX = /^(\s*)[-*+]\s/; +const CHECK_LIST_REGEX = /^(\s*)(?:-\s)?\s?(\[(\s|x)?\])\s/i; +const HEADING_REGEX = /^(#{1,6})\s/; +const QUOTE_REGEX = /^>\s/; +const CODE_START_REGEX = /^[ \t]*```(\w+)?/; +const CODE_END_REGEX = /[ \t]*```$/; + const createBlockNode = ( createNode: (match: Array) => ElementNode, ): ElementTransformer['replace'] => { @@ -266,7 +274,7 @@ export const HEADING: ElementTransformer = { const level = Number(node.getTag().slice(1)); return '#'.repeat(level) + ' ' + exportChildren(node); }, - regExp: /^(#{1,6})\s/, + regExp: HEADING_REGEX, replace: createBlockNode((match) => { const tag = ('h' + match[1].length) as HeadingTagType; return $createHeadingNode(tag); @@ -288,7 +296,7 @@ export const QUOTE: ElementTransformer = { } return output.join('\n'); }, - regExp: /^>\s/, + regExp: QUOTE_REGEX, replace: (parentNode, children, _match, isImport) => { if (isImport) { const previousNode = parentNode.getPreviousSibling(); @@ -328,9 +336,9 @@ export const CODE: MultilineElementTransformer = { }, regExpEnd: { optional: true, - regExp: /[ \t]*```$/, + regExp: CODE_END_REGEX, }, - regExpStart: /^[ \t]*```(\w+)?/, + regExpStart: CODE_START_REGEX, replace: ( rootNode, children, @@ -399,7 +407,7 @@ export const UNORDERED_LIST: ElementTransformer = { export: (node, exportChildren) => { return $isListNode(node) ? listExport(node, exportChildren, 0) : null; }, - regExp: /^(\s*)[-*+]\s/, + regExp: UNORDERED_LIST_REGEX, replace: listReplace('bullet'), type: 'element', }; @@ -409,7 +417,7 @@ export const CHECK_LIST: ElementTransformer = { export: (node, exportChildren) => { return $isListNode(node) ? listExport(node, exportChildren, 0) : null; }, - regExp: /^(\s*)(?:-\s)?\s?(\[(\s|x)?\])\s/i, + regExp: CHECK_LIST_REGEX, replace: listReplace('check'), type: 'element', }; @@ -419,7 +427,7 @@ export const ORDERED_LIST: ElementTransformer = { export: (node, exportChildren) => { return $isListNode(node) ? listExport(node, exportChildren, 0) : null; }, - regExp: /^(\s*)(\d{1,})\.\s/, + regExp: ORDERED_LIST_REGEX, replace: listReplace('number'), type: 'element', }; @@ -519,3 +527,47 @@ export const LINK: TextMatchTransformer = { trigger: ')', type: 'text-match', }; + +export function normalizeMarkdown(input: string): string { + const lines = input.split('\n'); + let inCodeBlock = false; + const sanitizedLines: string[] = []; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + const lastLine = sanitizedLines[sanitizedLines.length - 1]; + + // Detect the start or end of a code block + if (CODE_START_REGEX.test(line) || CODE_END_REGEX.test(line)) { + inCodeBlock = !inCodeBlock; + sanitizedLines.push(line); + continue; + } + + // If we are inside a code block, keep the line unchanged + if (inCodeBlock) { + sanitizedLines.push(line); + continue; + } + + // In markdown the concept of "empty paragraphs" does not exist. + // Blocks must be separated by an empty line. Non-empty adjacent lines must be merged. + if ( + line === '' || + lastLine === '' || + !lastLine || + HEADING_REGEX.test(lastLine) || + HEADING_REGEX.test(line) || + QUOTE_REGEX.test(line) || + ORDERED_LIST_REGEX.test(line) || + UNORDERED_LIST_REGEX.test(line) || + CHECK_LIST_REGEX.test(line) + ) { + sanitizedLines.push(line); + } else { + sanitizedLines[sanitizedLines.length - 1] = lastLine + line; + } + } + + return sanitizedLines.join('\n'); +} diff --git a/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts b/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts index 421394fcbf1..8d1a3a7bdb2 100644 --- a/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts +++ b/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts @@ -22,7 +22,10 @@ import { Transformer, TRANSFORMERS, } from '../..'; -import {MultilineElementTransformer} from '../../MarkdownTransformers'; +import { + MultilineElementTransformer, + normalizeMarkdown, +} from '../../MarkdownTransformers'; // Matches html within a mdx file const MDX_HTML_TRANSFORMER: MultilineElementTransformer = { @@ -92,19 +95,36 @@ describe('Markdown', () => { html: '
Hello world
', md: '###### Hello world', }, + { + // Multiline paragraphs: https://spec.commonmark.org/dingus/?text=Hello%0Aworld%0A! + html: '

Helloworld!

', + md: ['Hello', 'world', '!'].join('\n'), + skipExport: true, + }, { // Multiline paragraphs - html: '

Hello
world
!

', + // TO-DO: It would be nice to support also hard line breaks (
) as \ or double spaces + // See https://spec.commonmark.org/0.31.2/#hard-line-breaks. + // Example: '

Hello\\\nworld\\\n!

', + html: '

Hello
world
!

', md: ['Hello', 'world', '!'].join('\n'), + skipImport: true, }, { html: '
Hello
world!
', md: '> Hello\n> world!', }, + // TO-DO:
should be preserved + // { + // html: '', + // md: '- Hello\n- world
!
!', + // skipImport: true, + // }, { - // Multiline list items - html: '', + // Multiline list items: https://spec.commonmark.org/dingus/?text=-%20Hello%0A-%20world%0A!%0A! + html: '', md: '- Hello\n- world\n!\n!', + skipExport: true, }, { html: '', @@ -274,8 +294,8 @@ describe('Markdown', () => { skipExport: true, }, { - // Import only: multiline quote will be prefixed with ">" on each line during export - html: '
Hello
world
!
', + // https://spec.commonmark.org/dingus/?text=%3E%20Hello%0Aworld%0A! + html: '
Helloworld!
', md: '> Hello\nworld\n!', skipExport: true, }, @@ -298,8 +318,9 @@ describe('Markdown', () => { }, { customTransformers: [MDX_HTML_TRANSFORMER], - html: '

Some HTML in mdx:

From HTML: Line 1\nSome Text
', + html: '

Some HTML in mdx:

From HTML: Line 1Some Text
', md: 'Some HTML in mdx:\n\nLine 1\nSome Text', + skipExport: true, }, ]; @@ -407,3 +428,47 @@ describe('Markdown', () => { }); } }); + +describe('normalizeMarkdown', () => { + it('should combine lines separated by a single \n unless they are in a codeblock', () => { + const markdown = ` +1 +2 + +3 + +\`\`\`md +1 +2 + +3 +\`\`\` + +\`\`\`js +1 +2 + +3 +\`\`\` +`; + expect(normalizeMarkdown(markdown)).toBe(` +12 + +3 + +\`\`\`md +1 +2 + +3 +\`\`\` + +\`\`\`js +1 +2 + +3 +\`\`\` +`); + }); +}); diff --git a/packages/lexical-markdown/src/index.ts b/packages/lexical-markdown/src/index.ts index dac5b260478..3fc2f21da17 100644 --- a/packages/lexical-markdown/src/index.ts +++ b/packages/lexical-markdown/src/index.ts @@ -31,6 +31,7 @@ import { ITALIC_STAR, ITALIC_UNDERSCORE, LINK, + normalizeMarkdown, ORDERED_LIST, QUOTE, STRIKETHROUGH, @@ -82,11 +83,12 @@ function $convertFromMarkdownString( node?: ElementNode, shouldPreserveNewLines = false, ): void { + const sanitizedMarkdown = normalizeMarkdown(markdown); const importMarkdown = createMarkdownImport( transformers, shouldPreserveNewLines, ); - return importMarkdown(markdown, node); + return importMarkdown(sanitizedMarkdown, node); } /** diff --git a/packages/lexical-playground/__tests__/e2e/Markdown.spec.mjs b/packages/lexical-playground/__tests__/e2e/Markdown.spec.mjs index b434d62be55..505b038ce28 100644 --- a/packages/lexical-playground/__tests__/e2e/Markdown.spec.mjs +++ b/packages/lexical-playground/__tests__/e2e/Markdown.spec.mjs @@ -1310,7 +1310,6 @@ const IMPORTED_MARKDOWN_HTML = html` bold italic strikethrough text, -
@@ -1408,9 +1407,7 @@ const IMPORTED_MARKDOWN_HTML = html` dir="ltr"> Blockquotes text goes here
- And second -
- line after + And secondline after
- And can be nested -
- and multiline as well + + And can be nested and multiline as well +