Skip to content

Commit

Permalink
feat: add parameters for JinaReaderTool (#11613)
Browse files Browse the repository at this point in the history
  • Loading branch information
DDDDD12138 authored Dec 18, 2024
1 parent 99f40a9 commit 79d11ea
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 0 deletions.
13 changes: 13 additions & 0 deletions api/core/tools/provider/builtin/jina/tools/jina_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,13 @@ def _invoke(
if wait_for_selector is not None and wait_for_selector != "":
headers["X-Wait-For-Selector"] = wait_for_selector

remove_selector = tool_parameters.get("remove_selector")
if remove_selector is not None and remove_selector != "":
headers["X-Remove-Selector"] = remove_selector

if tool_parameters.get("retain_images", False):
headers["X-Retain-Images"] = "true"

if tool_parameters.get("image_caption", False):
headers["X-With-Generated-Alt"] = "true"

Expand All @@ -59,6 +66,12 @@ def _invoke(
if tool_parameters.get("no_cache", False):
headers["X-No-Cache"] = "true"

if tool_parameters.get("with_iframe", False):
headers["X-With-Iframe"] = "true"

if tool_parameters.get("with_shadow_dom", False):
headers["X-With-Shadow-Dom"] = "true"

max_retries = tool_parameters.get("max_retries", 3)
response = ssrf_proxy.get(
str(URL(self._jina_reader_endpoint + url)),
Expand Down
55 changes: 55 additions & 0 deletions api/core/tools/provider/builtin/jina/tools/jina_reader.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,33 @@ parameters:
pt_BR: css selector para aguardar elementos específicos
llm_description: css selector of the target element to wait for
form: form
- name: remove_selector
type: string
required: false
label:
en_US: Excluded Selector
zh_Hans: 排除选择器
pt_BR: Seletor Excluído
human_description:
en_US: css selector for remove for specific elements
zh_Hans: css 选择器用于排除特定元素
pt_BR: seletor CSS para remover elementos específicos
llm_description: css selector of the target element to remove for
form: form
- name: retain_images
type: boolean
required: false
default: false
label:
en_US: Remove All Images
zh_Hans: 删除所有图片
pt_BR: Remover todas as imagens
human_description:
en_US: Removes all images from the response.
zh_Hans: 从响应中删除所有图片。
pt_BR: Remove todas as imagens da resposta.
llm_description: Remove all images
form: form
- name: image_caption
type: boolean
required: false
Expand Down Expand Up @@ -136,6 +163,34 @@ parameters:
pt_BR: Ignorar o cache
llm_description: bypass the cache
form: form
- name: with_iframe
type: boolean
required: false
default: false
label:
en_US: Enable iframe extraction
zh_Hans: 启用 iframe 提取
pt_BR: Habilitar extração de iframe
human_description:
en_US: Extract and process content of all embedded iframes in the DOM tree.
zh_Hans: 提取并处理 DOM 树中所有嵌入 iframe 的内容。
pt_BR: Extrair e processar o conteúdo de todos os iframes incorporados na árvore DOM.
llm_description: Extract content from embedded iframes
form: form
- name: with_shadow_dom
type: boolean
required: false
default: false
label:
en_US: Enable Shadow DOM extraction
zh_Hans: 启用 Shadow DOM 提取
pt_BR: Habilitar extração de Shadow DOM
human_description:
en_US: Traverse all Shadow DOM roots in the document and extract content.
zh_Hans: 遍历文档中所有 Shadow DOM 根并提取内容。
pt_BR: Percorra todas as raízes do Shadow DOM no documento e extraia o conteúdo.
llm_description: Extract content from Shadow DOM roots
form: form
- name: summary
type: boolean
required: false
Expand Down

0 comments on commit 79d11ea

Please sign in to comment.