Promptless · promptless · Nov 25, 2024 · Nov 25, 2024 · Nov 25, 2024
diff --git a/docs/docs/how_to/markdown_header_metadata_splitter.ipynb b/docs/docs/how_to/markdown_header_metadata_splitter.ipynb
@@ -261,6 +261,50 @@
     "splits = text_splitter.split_documents(md_header_splits)\n",
     "splits"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b7b557f7",
+   "metadata": {},
+   "source": [
+    "### How to preserve whitespace from the original document:\n",
+    "\n",
+    "By default, `MarkdownHeaderTextSplitter` strips whitespace and newlines from the resulting documents, which can sometimes can cause issues with markdown sections like code blocks or nested lists. Use the `ExperimentalMarkdownSyntaxTextSplitter` to preserve whitespace in these instances."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "ba48193b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Document(metadata={'Header 1': 'Foo'}, page_content='# Foo  \\nThis is Jim'),\n",
+       " Document(metadata={'Header 1': 'Foo', 'Header 2': 'Bar'}, page_content='## Bar  \\n* Bullet 1\\n* Sub-bullet a')]"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_text_splitters import ExperimentalMarkdownSyntaxTextSplitter\n",
+    "\n",
+    "markdown_document = \"# Foo\\n\\n This is Jim \\n\\n## Bar\\n\\n* Bullet 1\\n  * Sub-bullet a\"\n",
+    "\n",
+    "headers_to_split_on = [\n",
+    "    (\"#\", \"Header 1\"),\n",
+    "    (\"##\", \"Header 2\"),\n",
+    "    (\"###\", \"Header 3\"),\n",
+    "]\n",
+    "\n",
+    "markdown_splitter = ExperimentalMarkdownSyntaxTextSplitter(headers_to_split_on, strip_headers=False)\n",
+    "md_header_splits = markdown_splitter.split_text(markdown_document)\n",
+    "md_header_splits"
+   ]
   }
  ],
  "metadata": {
@@ -279,7 +323,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.4"
+   "version": "3.11.6"
   }
  },
  "nbformat": 4,