Skip to content

Commit

Permalink
implement block_html #68 and #70
Browse files Browse the repository at this point in the history
  • Loading branch information
fqrious committed Nov 19, 2024
1 parent 814226b commit d74ed71
Showing 1 changed file with 17 additions and 0 deletions.
17 changes: 17 additions & 0 deletions txt2stix/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from pathlib import Path
from typing import Dict

import bs4
import mistune
from mistune.renderers.markdown import MarkdownRenderer
from mistune.util import unescape
Expand All @@ -25,10 +26,26 @@ def link(self, token: dict[str, dict], state: mistune.BlockState) -> str:
def codespan(self, token: dict[str, dict], state: mistune.BlockState) -> str:
token['raw'] = unescape(token['raw'])
return super().codespan(token, state)

def block_html(self, token: Dict[str, dict], state: mistune.BlockState) -> str:
raw = token['raw']
soup = bs4.BeautifulSoup(raw, 'html.parser')
if self.remove_links:
for a in soup.find_all('a'):
del a['href']
if self.remove_images:
for img in soup.find_all('img'):
img.decompose()
token['raw'] = soup.decode()
return super().block_html(token, state)

def inline_html(self, token: Dict[str, dict], state: mistune.BlockState) -> str:
return self.block_html(token, state)


def remove_links(input_text: str, remove_images: bool, remove_anchors: bool):
modify_links = mistune.create_markdown(escape=False, renderer=ImageLinkRemover(remove_links=remove_anchors, remove_images=remove_images))
print(modify_links(input_text))
return modify_links(input_text)

def read_included_file(path):
Expand Down

0 comments on commit d74ed71

Please sign in to comment.