Skip to content

Commit

Permalink
Revert "[lexical-markdown] Fix: normalize markdown in $convertFromMar…
Browse files Browse the repository at this point in the history
…kdownString to comply with CommonMark spec (#6608)" (#6627)
  • Loading branch information
potatowagon authored Sep 12, 2024
1 parent f50f168 commit b0c9809
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 138 deletions.
66 changes: 7 additions & 59 deletions packages/lexical-markdown/src/MarkdownTransformers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -153,14 +153,6 @@ export type TextMatchTransformer = Readonly<{
type: 'text-match';
}>;

const ORDERED_LIST_REGEX = /^(\s*)(\d{1,})\.\s/;
const UNORDERED_LIST_REGEX = /^(\s*)[-*+]\s/;
const CHECK_LIST_REGEX = /^(\s*)(?:-\s)?\s?(\[(\s|x)?\])\s/i;
const HEADING_REGEX = /^(#{1,6})\s/;
const QUOTE_REGEX = /^>\s/;
const CODE_START_REGEX = /^[ \t]*```(\w+)?/;
const CODE_END_REGEX = /[ \t]*```$/;

const createBlockNode = (
createNode: (match: Array<string>) => ElementNode,
): ElementTransformer['replace'] => {
Expand Down Expand Up @@ -274,7 +266,7 @@ export const HEADING: ElementTransformer = {
const level = Number(node.getTag().slice(1));
return '#'.repeat(level) + ' ' + exportChildren(node);
},
regExp: HEADING_REGEX,
regExp: /^(#{1,6})\s/,
replace: createBlockNode((match) => {
const tag = ('h' + match[1].length) as HeadingTagType;
return $createHeadingNode(tag);
Expand All @@ -296,7 +288,7 @@ export const QUOTE: ElementTransformer = {
}
return output.join('\n');
},
regExp: QUOTE_REGEX,
regExp: /^>\s/,
replace: (parentNode, children, _match, isImport) => {
if (isImport) {
const previousNode = parentNode.getPreviousSibling();
Expand Down Expand Up @@ -336,9 +328,9 @@ export const CODE: MultilineElementTransformer = {
},
regExpEnd: {
optional: true,
regExp: CODE_END_REGEX,
regExp: /[ \t]*```$/,
},
regExpStart: CODE_START_REGEX,
regExpStart: /^[ \t]*```(\w+)?/,
replace: (
rootNode,
children,
Expand Down Expand Up @@ -407,7 +399,7 @@ export const UNORDERED_LIST: ElementTransformer = {
export: (node, exportChildren) => {
return $isListNode(node) ? listExport(node, exportChildren, 0) : null;
},
regExp: UNORDERED_LIST_REGEX,
regExp: /^(\s*)[-*+]\s/,
replace: listReplace('bullet'),
type: 'element',
};
Expand All @@ -417,7 +409,7 @@ export const CHECK_LIST: ElementTransformer = {
export: (node, exportChildren) => {
return $isListNode(node) ? listExport(node, exportChildren, 0) : null;
},
regExp: CHECK_LIST_REGEX,
regExp: /^(\s*)(?:-\s)?\s?(\[(\s|x)?\])\s/i,
replace: listReplace('check'),
type: 'element',
};
Expand All @@ -427,7 +419,7 @@ export const ORDERED_LIST: ElementTransformer = {
export: (node, exportChildren) => {
return $isListNode(node) ? listExport(node, exportChildren, 0) : null;
},
regExp: ORDERED_LIST_REGEX,
regExp: /^(\s*)(\d{1,})\.\s/,
replace: listReplace('number'),
type: 'element',
};
Expand Down Expand Up @@ -527,47 +519,3 @@ export const LINK: TextMatchTransformer = {
trigger: ')',
type: 'text-match',
};

export function normalizeMarkdown(input: string): string {
const lines = input.split('\n');
let inCodeBlock = false;
const sanitizedLines: string[] = [];

for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const lastLine = sanitizedLines[sanitizedLines.length - 1];

// Detect the start or end of a code block
if (CODE_START_REGEX.test(line) || CODE_END_REGEX.test(line)) {
inCodeBlock = !inCodeBlock;
sanitizedLines.push(line);
continue;
}

// If we are inside a code block, keep the line unchanged
if (inCodeBlock) {
sanitizedLines.push(line);
continue;
}

// In markdown the concept of "empty paragraphs" does not exist.
// Blocks must be separated by an empty line. Non-empty adjacent lines must be merged.
if (
line === '' ||
lastLine === '' ||
!lastLine ||
HEADING_REGEX.test(lastLine) ||
HEADING_REGEX.test(line) ||
QUOTE_REGEX.test(line) ||
ORDERED_LIST_REGEX.test(line) ||
UNORDERED_LIST_REGEX.test(line) ||
CHECK_LIST_REGEX.test(line)
) {
sanitizedLines.push(line);
} else {
sanitizedLines[sanitizedLines.length - 1] = lastLine + line;
}
}

return sanitizedLines.join('\n');
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,7 @@ import {
Transformer,
TRANSFORMERS,
} from '../..';
import {
MultilineElementTransformer,
normalizeMarkdown,
} from '../../MarkdownTransformers';
import {MultilineElementTransformer} from '../../MarkdownTransformers';

// Matches html within a mdx file
const MDX_HTML_TRANSFORMER: MultilineElementTransformer = {
Expand Down Expand Up @@ -95,36 +92,19 @@ describe('Markdown', () => {
html: '<h6><span style="white-space: pre-wrap;">Hello world</span></h6>',
md: '###### Hello world',
},
{
// Multiline paragraphs: https://spec.commonmark.org/dingus/?text=Hello%0Aworld%0A!
html: '<p><span style="white-space: pre-wrap;">Helloworld!</span></p>',
md: ['Hello', 'world', '!'].join('\n'),
skipExport: true,
},
{
// Multiline paragraphs
// TO-DO: It would be nice to support also hard line breaks (<br>) as \ or double spaces
// See https://spec.commonmark.org/0.31.2/#hard-line-breaks.
// Example: '<p><span style="white-space: pre-wrap;">Hello\\\nworld\\\n!</span></p>',
html: '<p><span style="white-space: pre-wrap;">Hello<br>world<br>!</span></p>',
html: '<p><span style="white-space: pre-wrap;">Hello</span><br><span style="white-space: pre-wrap;">world</span><br><span style="white-space: pre-wrap;">!</span></p>',
md: ['Hello', 'world', '!'].join('\n'),
skipImport: true,
},
{
html: '<blockquote><span style="white-space: pre-wrap;">Hello</span><br><span style="white-space: pre-wrap;">world!</span></blockquote>',
md: '> Hello\n> world!',
},
// TO-DO: <br> should be preserved
// {
// html: '<ul><li value="1"><span style="white-space: pre-wrap;">Hello</span></li><li value="2"><span style="white-space: pre-wrap;">world<br>!<br>!</span></li></ul>',
// md: '- Hello\n- world<br>!<br>!',
// skipImport: true,
// },
{
// Multiline list items: https://spec.commonmark.org/dingus/?text=-%20Hello%0A-%20world%0A!%0A!
html: '<ul><li value="1"><span style="white-space: pre-wrap;">Hello</span></li><li value="2"><span style="white-space: pre-wrap;">world!!</span></li></ul>',
// Multiline list items
html: '<ul><li value="1"><span style="white-space: pre-wrap;">Hello</span></li><li value="2"><span style="white-space: pre-wrap;">world</span><br><span style="white-space: pre-wrap;">!</span><br><span style="white-space: pre-wrap;">!</span></li></ul>',
md: '- Hello\n- world\n!\n!',
skipExport: true,
},
{
html: '<ul><li value="1"><span style="white-space: pre-wrap;">Hello</span></li><li value="2"><span style="white-space: pre-wrap;">world</span></li></ul>',
Expand Down Expand Up @@ -294,8 +274,8 @@ describe('Markdown', () => {
skipExport: true,
},
{
// https://spec.commonmark.org/dingus/?text=%3E%20Hello%0Aworld%0A!
html: '<blockquote><span style="white-space: pre-wrap;">Helloworld!</span></blockquote>',
// Import only: multiline quote will be prefixed with ">" on each line during export
html: '<blockquote><span style="white-space: pre-wrap;">Hello</span><br><span style="white-space: pre-wrap;">world</span><br><span style="white-space: pre-wrap;">!</span></blockquote>',
md: '> Hello\nworld\n!',
skipExport: true,
},
Expand All @@ -318,9 +298,8 @@ describe('Markdown', () => {
},
{
customTransformers: [MDX_HTML_TRANSFORMER],
html: '<p><span style="white-space: pre-wrap;">Some HTML in mdx:</span></p><pre spellcheck="false" data-language="MyComponent"><span style="white-space: pre-wrap;">From HTML: Line 1Some Text</span></pre>',
html: '<p><span style="white-space: pre-wrap;">Some HTML in mdx:</span></p><pre spellcheck="false" data-language="MyComponent"><span style="white-space: pre-wrap;">From HTML: Line 1\nSome Text</span></pre>',
md: 'Some HTML in mdx:\n\n<MyComponent>Line 1\nSome Text</MyComponent>',
skipExport: true,
},
];

Expand Down Expand Up @@ -428,47 +407,3 @@ describe('Markdown', () => {
});
}
});

describe('normalizeMarkdown', () => {
it('should combine lines separated by a single \n unless they are in a codeblock', () => {
const markdown = `
1
2
3
\`\`\`md
1
2
3
\`\`\`
\`\`\`js
1
2
3
\`\`\`
`;
expect(normalizeMarkdown(markdown)).toBe(`
12
3
\`\`\`md
1
2
3
\`\`\`
\`\`\`js
1
2
3
\`\`\`
`);
});
});
4 changes: 1 addition & 3 deletions packages/lexical-markdown/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ import {
ITALIC_STAR,
ITALIC_UNDERSCORE,
LINK,
normalizeMarkdown,
ORDERED_LIST,
QUOTE,
STRIKETHROUGH,
Expand Down Expand Up @@ -83,12 +82,11 @@ function $convertFromMarkdownString(
node?: ElementNode,
shouldPreserveNewLines = false,
): void {
const sanitizedMarkdown = normalizeMarkdown(markdown);
const importMarkdown = createMarkdownImport(
transformers,
shouldPreserveNewLines,
);
return importMarkdown(sanitizedMarkdown, node);
return importMarkdown(markdown, node);
}

/**
Expand Down
11 changes: 7 additions & 4 deletions packages/lexical-playground/__tests__/e2e/Markdown.spec.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -1310,6 +1310,7 @@ const IMPORTED_MARKDOWN_HTML = html`
bold italic strikethrough
</strong>
<span data-lexical-text="true">text,</span>
<br />
<strong
class="PlaygroundEditorTheme__textBold PlaygroundEditorTheme__textItalic PlaygroundEditorTheme__textStrikethrough"
data-lexical-text="true">
Expand Down Expand Up @@ -1407,7 +1408,9 @@ const IMPORTED_MARKDOWN_HTML = html`
dir="ltr">
<span data-lexical-text="true">Blockquotes text goes here</span>
<br />
<span data-lexical-text="true">And secondline after</span>
<span data-lexical-text="true">And second</span>
<br />
<span data-lexical-text="true">line after</span>
</blockquote>
<blockquote
class="PlaygroundEditorTheme__quote PlaygroundEditorTheme__ltr"
Expand Down Expand Up @@ -1485,9 +1488,9 @@ const IMPORTED_MARKDOWN_HTML = html`
class="PlaygroundEditorTheme__listItem PlaygroundEditorTheme__ltr"
dir="ltr"
value="1">
<span data-lexical-text="true">
And can be nested and multiline as well
</span>
<span data-lexical-text="true">And can be nested</span>
<br />
<span data-lexical-text="true">and multiline as well</span>
</li>
</ol>
</li>
Expand Down

0 comments on commit b0c9809

Please sign in to comment.