From 90422b10780d5f786dee3a0200d1d322a05ecb93 Mon Sep 17 00:00:00 2001 From: evgenius1424 Date: Mon, 28 Oct 2024 19:05:28 +0100 Subject: [PATCH] Rework prompt --- apps/learnbefore-bff/src/get-words.ts | 77 ++++++++++++++++----------- 1 file changed, 45 insertions(+), 32 deletions(-) diff --git a/apps/learnbefore-bff/src/get-words.ts b/apps/learnbefore-bff/src/get-words.ts index caa6117..84b683b 100644 --- a/apps/learnbefore-bff/src/get-words.ts +++ b/apps/learnbefore-bff/src/get-words.ts @@ -47,47 +47,60 @@ export async function* getWords( } function getPrompt(translationLanguage: string) { - return `Please process the input text as follows: + return `Please analyze the input text to extract valuable vocabulary, prioritizing words in three tiers: -1. First detect the source language of the text and remove: - - Most frequently used words in that language (approximately top 5000) - - Basic vocabulary (A1/A2 level) including: - * Common verbs (equivalents of be, do, go, etc.) - * Basic adjectives (equivalents of good, bad, big, small) - * Time expressions - * Basic numbers and quantities - * Family terms - * Elementary nouns - * Question words - * Pronouns - * Articles (if language has them) - * Prepositions - * Conjunctions - * Basic adverbs - * Auxiliary/modal verbs - * Common greetings - * Basic location words - * Everyday action words +1. High-complexity words: + - Academic vocabulary (B1-C2 level) + - Technical and specialized terms + - Domain-specific terminology + - Scientific and professional jargon + - Abstract concepts + - Literary or archaic terms -2. For each remaining word: - - Convert to dictionary form - - Remove duplicates - - Keep order - - Create entry with: - * Original word - * Definition - * Russian translation - * Detected language code (ISO 639-1) +2. Medium-complexity words (B1-B2 level): + - Less common everyday verbs (e.g., blaze, scatter, dodge) + - Descriptive vocabulary (e.g., graceful, peculiar, vivid) + - Phrasal verbs beyond basics + - Nature and environment terms + - Emotion and behavior words + - Specific actions and processes + - Words with multiple meanings + - Common metaphorical usage + - Words that native speakers use but learners often don't know -3. Format each entry as JSON: +3. Contextually valuable words: + - Words crucial for understanding the text's meaning + - Topic-specific vocabulary + - Words with cultural significance + - Terms that often appear in media/news + - Words with tricky usage patterns + - Terms that often cause confusion for learners + +Processing rules: +- Maintain original order of appearance +- Convert to dictionary form +- Remove duplicates while preserving first occurrence +- Consider word frequency in general usage (roughly 3000-15000 range for medium complexity) +- Include words that might seem simple to native speakers but are often unknown to learners + +Format output as JSON: { "words": [ { "word": "[Original word]", - "meaning": "[Definition in source language]", + "meaning": "[Definition in text language]", "translation": "[${translationLanguage} translation]", "languageCode": "[ISO 639-1 code]" } ] -}` +} + +Additional guidelines: +- Include words that appear in upper-intermediate textbooks +- Keep terms that might not be extremely complex but are still challenging for learners +- Consider including words that: + * Have subtle usage differences from their synonyms + * Are common in native speech but rare in learner vocabulary + * Represent concepts that might be familiar but whose specific term might not be + * Are frequently used in certain contexts but not necessarily in basic conversation` }