-
-
Notifications
You must be signed in to change notification settings - Fork 10.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
⚡️ perf: improve knowledge base RAG prompts (#4544)
* ⚡️ perf: improve knowledge base qa prompts * ⚡️ perf: improve knowledge base qa performance * ⚡️ perf: improve knowledge base qa performance * ✅ test: add tests * ✅ test: add tests for rag actions * ✅ test: fix tests
- Loading branch information
Showing
12 changed files
with
537 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
26 changes: 26 additions & 0 deletions
26
src/prompts/knowledgeBaseQA/__snapshots__/index.test.ts.snap
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html | ||
|
||
exports[`knowledgeBaseQAPrompts > should generate prompt with all parameters 1`] = ` | ||
"<knowledge_base_qa_info> | ||
You are also a helpful assistant good answering questions related to Test Knowledge. And you'll be provided with a question and several passages that might be relevant. And currently your task is to provide answer based on the question and passages. | ||
<knowledge_base_anwser_instruction> | ||
- Note that passages might not be relevant to the question, please only use the passages that are relevant. | ||
- if there is no relevant passage, please answer using your knowledge. | ||
- Answer should use the same original language as the question and follow markdown syntax. | ||
</knowledge_base_anwser_instruction> | ||
<knowledge_bases> | ||
<knowledge_bases_docstring>here are the knowledge base scope we retrieve chunks from:</knowledge_bases_docstring> | ||
<knowledge id="kb1" name="Test Knowledge" type="file" fileType="txt" >Test description</knowledge> | ||
</knowledge_bases> | ||
<retrieved_chunks> | ||
<retrieved_chunks_docstring>here are retrived chunks you can refer to:</retrieved_chunks_docstring> | ||
<chunk fileId="file1" fileName="test.txt" similarity="0.8" pageNumber="1" >This is a test chunk</chunk> | ||
</retrieved_chunks> | ||
<user_query> | ||
<user_query_docstring>to make result better, we may rewrite user's question.If there is a rewrite query, it will be wrapper with \`rewrite_query\` tag.</user_query_docstring> | ||
<raw_query>What is the test about?</raw_query> | ||
<rewrite_query>Could you explain the content of the test?</rewrite_query> | ||
<user_query> | ||
</knowledge_base_qa_info>" | ||
`; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
import { ChatSemanticSearchChunk } from '@/types/chunk'; | ||
|
||
const chunkPrompt = (item: ChatSemanticSearchChunk) => | ||
`<chunk fileId="${item.fileId}" fileName="${item.fileName}" similarity="${item.similarity}" ${item.pageNumber ? ` pageNumber="${item.pageNumber}" ` : ''}>${item.text}</chunk>`; | ||
|
||
export const chunkPrompts = (fileList: ChatSemanticSearchChunk[]) => { | ||
if (fileList.length === 0) return ''; | ||
|
||
const prompt = `<retrieved_chunks> | ||
<retrieved_chunks_docstring>here are retrived chunks you can refer to:</retrieved_chunks_docstring> | ||
${fileList.map((item) => chunkPrompt(item)).join('\n')} | ||
</retrieved_chunks>`; | ||
|
||
return prompt.trim(); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
import { describe, expect, it } from 'vitest'; | ||
|
||
import { ChatSemanticSearchChunk } from '@/types/chunk'; | ||
import { KnowledgeItem, KnowledgeType } from '@/types/knowledgeBase'; | ||
|
||
import { knowledgeBaseQAPrompts } from './index'; | ||
|
||
describe('knowledgeBaseQAPrompts', () => { | ||
// Define test data | ||
const mockChunks: ChatSemanticSearchChunk[] = [ | ||
{ | ||
id: '1', | ||
fileId: 'file1', | ||
fileName: 'test.txt', | ||
text: 'This is a test chunk', | ||
similarity: 0.8, | ||
pageNumber: 1, | ||
}, | ||
]; | ||
|
||
const mockKnowledge: KnowledgeItem[] = [ | ||
{ | ||
id: 'kb1', | ||
name: 'Test Knowledge', | ||
type: KnowledgeType.File, | ||
fileType: 'txt', | ||
description: 'Test description', | ||
}, | ||
]; | ||
|
||
const userQuery = 'What is the test about?'; | ||
const rewriteQuery = 'Could you explain the content of the test?'; | ||
|
||
it('should return empty string if chunks is empty', () => { | ||
const result = knowledgeBaseQAPrompts({ | ||
chunks: [], | ||
knowledge: mockKnowledge, | ||
userQuery, | ||
}); | ||
|
||
expect(result).toBe(''); | ||
}); | ||
|
||
it('should return empty string if chunks is undefined', () => { | ||
const result = knowledgeBaseQAPrompts({ | ||
knowledge: mockKnowledge, | ||
userQuery, | ||
}); | ||
|
||
expect(result).toBe(''); | ||
}); | ||
|
||
it('should generate prompt with all parameters', () => { | ||
const result = knowledgeBaseQAPrompts({ | ||
chunks: mockChunks, | ||
knowledge: mockKnowledge, | ||
userQuery, | ||
rewriteQuery, | ||
}); | ||
|
||
// Verify the prompt structure and content | ||
expect(result).toMatchSnapshot(); | ||
}); | ||
|
||
it('should generate prompt without rewriteQuery', () => { | ||
const result = knowledgeBaseQAPrompts({ | ||
chunks: mockChunks, | ||
knowledge: mockKnowledge, | ||
userQuery, | ||
}); | ||
|
||
expect(result).toContain('<raw_query>What is the test about?</raw_query>'); | ||
expect(result).not.toContain('<rewrite_query>'); | ||
}); | ||
|
||
it('should generate prompt without knowledge', () => { | ||
const result = knowledgeBaseQAPrompts({ | ||
chunks: mockChunks, | ||
userQuery, | ||
}); | ||
|
||
expect(result).toContain( | ||
'You are also a helpful assistant good answering questions related to', | ||
); | ||
expect(result).not.toContain('<knowledge_bases>'); | ||
}); | ||
|
||
it('should handle empty knowledge array', () => { | ||
const result = knowledgeBaseQAPrompts({ | ||
chunks: mockChunks, | ||
knowledge: [], | ||
userQuery, | ||
}); | ||
|
||
expect(result).toContain( | ||
'You are also a helpful assistant good answering questions related to', | ||
); | ||
expect(result).not.toContain('<knowledge_bases>'); | ||
}); | ||
|
||
it('should properly escape special characters in input', () => { | ||
const specialChunks: ChatSemanticSearchChunk[] = [ | ||
{ | ||
id: '1', | ||
fileId: 'file1', | ||
fileName: 'test&.txt', | ||
text: 'This is a test with & < > "quotes"', | ||
similarity: 0.8, | ||
}, | ||
]; | ||
|
||
const result = knowledgeBaseQAPrompts({ | ||
chunks: specialChunks, | ||
userQuery: 'Test with & < > "quotes"', | ||
}); | ||
|
||
expect(result).toContain('test&.txt'); | ||
expect(result).toContain('This is a test with & < > "quotes"'); | ||
expect(result).toContain('Test with & < > "quotes"'); | ||
}); | ||
|
||
it('should handle multiple knowledge items', () => { | ||
const multipleKnowledge: KnowledgeItem[] = [ | ||
{ | ||
id: 'kb1', | ||
name: 'Knowledge 1', | ||
type: KnowledgeType.File, | ||
}, | ||
{ | ||
id: 'kb2', | ||
name: 'Knowledge 2', | ||
type: KnowledgeType.KnowledgeBase, | ||
}, | ||
]; | ||
|
||
const result = knowledgeBaseQAPrompts({ | ||
chunks: mockChunks, | ||
knowledge: multipleKnowledge, | ||
userQuery, | ||
}); | ||
|
||
expect(result).toContain('Knowledge 1/Knowledge 2'); | ||
expect(result).toContain('<knowledge id="kb1"'); | ||
expect(result).toContain('<knowledge id="kb2"'); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import { chunkPrompts } from '@/prompts/knowledgeBaseQA/chunk'; | ||
import { knowledgePrompts } from '@/prompts/knowledgeBaseQA/knowledge'; | ||
import { userQueryPrompt } from '@/prompts/knowledgeBaseQA/userQuery'; | ||
import { ChatSemanticSearchChunk } from '@/types/chunk'; | ||
import { KnowledgeItem } from '@/types/knowledgeBase'; | ||
|
||
export const knowledgeBaseQAPrompts = ({ | ||
chunks, | ||
knowledge, | ||
userQuery, | ||
rewriteQuery, | ||
}: { | ||
chunks?: ChatSemanticSearchChunk[]; | ||
knowledge?: KnowledgeItem[]; | ||
rewriteQuery?: string; | ||
userQuery: string; | ||
}) => { | ||
if ((chunks || [])?.length === 0) return ''; | ||
|
||
const domains = (knowledge || []).map((v) => v.name).join('/'); | ||
|
||
return `<knowledge_base_qa_info> | ||
You are also a helpful assistant good answering questions related to ${domains}. And you'll be provided with a question and several passages that might be relevant. And currently your task is to provide answer based on the question and passages. | ||
<knowledge_base_anwser_instruction> | ||
- Note that passages might not be relevant to the question, please only use the passages that are relevant. | ||
- if there is no relevant passage, please answer using your knowledge. | ||
- Answer should use the same original language as the question and follow markdown syntax. | ||
</knowledge_base_anwser_instruction> | ||
${knowledgePrompts(knowledge)} | ||
${chunks ? chunkPrompts(chunks) : ''} | ||
${userQueryPrompt(userQuery, rewriteQuery)} | ||
</knowledge_base_qa_info>`; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
import { KnowledgeItem } from '@/types/knowledgeBase'; | ||
|
||
const knowledgePrompt = (item: KnowledgeItem) => | ||
`<knowledge id="${item.id}" name="${item.name}" type="${item.type}"${item.fileType ? ` fileType="${item.fileType}" ` : ''}>${item.description || ''}</knowledge>`; | ||
|
||
export const knowledgePrompts = (list?: KnowledgeItem[]) => { | ||
if ((list || []).length === 0) return ''; | ||
|
||
const prompt = `<knowledge_bases> | ||
<knowledge_bases_docstring>here are the knowledge base scope we retrieve chunks from:</knowledge_bases_docstring> | ||
${list?.map((item) => knowledgePrompt(item)).join('\n')} | ||
</knowledge_bases>`; | ||
|
||
return prompt.trim(); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
export const userQueryPrompt = (userQuery: string, rewriteQuery?: string) => { | ||
return `<user_query> | ||
<user_query_docstring>to make result better, we may rewrite user's question.If there is a rewrite query, it will be wrapper with \`rewrite_query\` tag.</user_query_docstring> | ||
<raw_query>${userQuery.trim()}</raw_query> | ||
${rewriteQuery ? `<rewrite_query>${rewriteQuery.trim()}</rewrite_query>` : ''} | ||
<user_query>`; | ||
}; |
File renamed without changes.
Oops, something went wrong.