fix: changed api contract, tokenization logic and faq entries

willianantunes · Apr 12, 2021 · e2e6aca · e2e6aca
1 parent d5f2121
commit e2e6aca
Show file tree

Hide file tree

Showing 11 changed files with 533 additions and 228 deletions.
diff --git a/backend/rave_of_phonetics/apps/core/api/v2/api_views.py b/backend/rave_of_phonetics/apps/core/api/v2/api_views.py
@@ -30,9 +30,9 @@ def transcribe(request: Request) -> Response:
     transcriptions = check_and_retrieve_transcriptions(words, language)
     logger.debug(f"Transcriptions: {transcriptions}")
 
-    result = []
+    result = {}
     for transcription in transcriptions:
         transcription_as_dict = asdict(transcription)
-        result.append(transcription_as_dict)
+        result[transcription.word] = transcription_as_dict["entries"]
 
     return Response(result)
diff --git a/backend/rave_of_phonetics/apps/core/api/v2/serializers.py b/backend/rave_of_phonetics/apps/core/api/v2/serializers.py
@@ -25,4 +25,8 @@ def validate(self, data):
         if language == self.supported_languages[1]:
             data["language"] = "en-gb-x-rp"
 
+        # Without repeated items
+        words = data["words"]
+        data["words"] = list(dict.fromkeys(words))
+
         return data
diff --git a/backend/tests/int/apps/core/api/v2/test_api_views.py b/backend/tests/int/apps/core/api/v2/test_api_views.py
@@ -53,11 +53,7 @@ def test_should_receive_empty_entries_as_the_words_does_not_exist_in_database(cl
 
     assert ResearchedWord.objects.count() == 3
     assert response.status_code == 200
-    assert result == [
-        {"word": "rave", "entries": None},
-        {"word": "of", "entries": None},
-        {"word": "phonetics", "entries": None},
-    ]
+    assert result == {"of": None, "phonetics": None, "rave": None}
 
 
 @pytest.mark.django_db
@@ -79,44 +75,35 @@ def test_should_receive_transcriptions(client, mock_recaptcha_verify):
 
     assert ResearchedWord.objects.count() == 3
     assert response.status_code == 200
-    assert result == [
-        {
-            "entries": [
-                {
-                    "classification": "Undefined",
-                    "phonemic": "ɹ eɪ v",
-                    "phonemic_syllables": "ɹ eɪ v",
-                    "phonetic": None,
-                    "phonetic_syllables": None,
-                    "version": "Version 1",
-                }
-            ],
-            "word": "rave",
-        },
-        {
-            "entries": [
-                {
-                    "classification": "Undefined",
-                    "phonemic": "ə v",
-                    "phonemic_syllables": "ə v",
-                    "phonetic": None,
-                    "phonetic_syllables": None,
-                    "version": "Version 1",
-                },
-            ],
-            "word": "of",
-        },
-        {
-            "entries": [
-                {
-                    "classification": "Undefined",
-                    "phonemic": "f ə ˈn ɛ t ɪ k s",
-                    "phonemic_syllables": "f ə • ˈn ɛ • t ɪ k s",
-                    "phonetic": None,
-                    "phonetic_syllables": None,
-                    "version": "Version 1",
-                }
-            ],
-            "word": "phonetics",
-        },
-    ]
+    assert result == {
+        "of": [
+            {
+                "classification": "Undefined",
+                "phonemic": "ə v",
+                "phonemic_syllables": "ə v",
+                "phonetic": None,
+                "phonetic_syllables": None,
+                "version": "Version 1",
+            }
+        ],
+        "phonetics": [
+            {
+                "classification": "Undefined",
+                "phonemic": "f ə ˈn ɛ t ɪ k s",
+                "phonemic_syllables": "f ə • ˈn ɛ • t ɪ k s",
+                "phonetic": None,
+                "phonetic_syllables": None,
+                "version": "Version 1",
+            }
+        ],
+        "rave": [
+            {
+                "classification": "Undefined",
+                "phonemic": "ɹ eɪ v",
+                "phonemic_syllables": "ɹ eɪ v",
+                "phonetic": None,
+                "phonetic_syllables": None,
+                "version": "Version 1",
+            }
+        ],
+    }
diff --git a/backend/tests/int/apps/core/api/v2/test_serializers.py b/backend/tests/int/apps/core/api/v2/test_serializers.py
@@ -37,3 +37,14 @@ def test_should_inform_that_is_valid_and_change_language_to_correct_one(self):
         words, language = serializer.validated_data["words"], serializer.validated_data["language"]
         assert words == fake_data["words"]
         assert language == "en-gb-x-rp"
+
+    def test_should_inform_that_is_valid_and_words_must_not_be_repeatable(self):
+        fake_data = {"words": ["you", "if", "you", "won't", "won't"], "language": "en-gb"}
+        serializer = TranscriberSerializer(data=fake_data)
+
+        assert serializer.is_valid()
+
+        words, language = serializer.validated_data["words"], serializer.validated_data["language"]
+        assert len(words) == 3
+        assert words == ["you", "if", "won't"]
+        assert language == "en-gb-x-rp"
diff --git a/frontend/src/components/FrequentlyAskedQuestions/index.js b/frontend/src/components/FrequentlyAskedQuestions/index.js
@@ -5,10 +5,10 @@ import { slugify } from "../../utils/general"
 const entries = [
   {
     question: "How do I use Rave of Phonetics?",
-    text: `The main function of Rave of Phonetics is to provide you a phonemic transcription of a word or text in 
-    order to help you pronounce it. You can also see its syllables, stress marks and the phonetic version as well, if 
-    they are available. Simply type a word in the space provided and read the transcription as well as listen to 
-    the audio to improve your listening skills.`,
+    text: `The main function of Rave of Phonetics is to provide you a phonemic transcription of a word or text in order 
+    to help you pronounce it. You can also see its syllables, stress marks and the phonetic version as well, if they are 
+    available. Simply type a word in the space provided and read the transcription as well as listen to the audio 
+    to improve your listening skills.`,
   },
   {
     question: "Can I improve my accent with this page?",
@@ -27,15 +27,15 @@ const entries = [
   },
   {
     question: "How do I share my transcriptions?",
-    text: `Sharing is caring. At the bottom of the <strong>IPA Transcription Tool</strong> panel you have a bottom named 
-    <strong>copy link</strong>. Just set the tool as you'd like, let's say, you choose the word THING, using AMERICAN ENGLISH,
-    with SHOW STRESS and SHOW SYLLABLES activated, after that, you can simply click on <strong>copy link</strong> and then 
-    it will be available in your transfer area! Just press CTRL+V on your social media and you'll see it!`,
+    text: `Sharing is caring. At the bottom of the <strong>IPA Transcription Tool</strong> panel, there is an option named 
+    <strong>copy link</strong>. Just type in the desired word you would like to transcribe, apply your options of stress, 
+    syllables, etc. and after that you can simply click on <strong>copy link</strong>. Then 
+    it will be available on your clipboard! Just press CTRL+V on your social media and you'll see it!`,
   },
   {
     question: "Is there a blog for this page?",
     text: `Of course, there is. If you click <a href="/blog">here</a>, you will find a blog section that has interesting 
-    topics related to phonetic and languages. Please share with all your friends 
+    topics related to Phonetics and Languages. Please share with all your friends 
     <span role="img" aria-label="slightly smiling face">😊</span>`,
   },
   {
@@ -53,8 +53,8 @@ const entries = [
   },
   {
     question: "What does ‘show stress’ mean?",
-    text: `Glad you asked, no need to stress. This option is used to see where the syllables of the words are and which 
-    one is pronounce, or stressed, with standard pronunciation.`,
+    text: `Glad you asked, no need to stress. This option is used to see which syllable of the word has primary and 
+    secondary stress. This option shows standard pronunciation.`,
   },
   {
     question: "Why do I need to loop the speech?",
@@ -64,9 +64,9 @@ const entries = [
   },
   {
     question: "How do I leave a comment?",
-    text: `Ah, yes. Please let us know what you think. If you want to leave a comment you can go to the bottom of the page. 
-    They are available in our home, changelog, FAQ and blog pages. Also you can get in touch with us through our social 
-    medias (see the bottom bar).`,
+    text: `Ah, yes. Please let us know what you think. If you want to leave a comment you can go to the bottom of the 
+    page and find our comment section. They are available in our home, changelog, FAQ and blog sections. Also, you can 
+    get in touch with us through our social medias (see the bottom bar).`,
   },
   {
     question: "How can I ask questions?",
@@ -82,18 +82,19 @@ const entries = [
     options but for now you will see mainly phonemic transcriptions.`,
   },
   {
-    question: "Is there an option for allophone variations?",
-    text: `I knew we would have some experts ask this question. For the moment, we mainly provide phonemic transcriptions, 
-    as phonetic, syllables and allophones are being filled by the community through suggestions. If you'd like to check 
-    all sort of variations, you should check if the transcription is underlined, if so, just click on it to see its 
-    variations. The details can be seen if you click on the word, which will be underlined as well.`,
+    question: "Is there an option for phonetic variations of the word?",
+    text: `I knew we would have some experts ask this question. For the moment, we mainly provide phonemic transcriptions. 
+    Phonetic transcription, syllables and allophone variations are still being developed as well as receiving 
+    contributions and suggestions by our great community of learners and experts in the area. If you'd like to check 
+    alternate variations of the word, you should check if the transcription is underlined, if so, just click on it to 
+    see its variations. The details can be seen if you click on the word, which will be underlined as well.`,
   },
   {
     question: "I would like to add or fix a transcription. Is it possible? How do I do that?",
-    text: `Sure thing! First you try to transcribe the desired word or phrase, after you receive the transcription, you 
-    click on the underlined word. You should see the option <strong>apply suggestion</strong>. If you click on it, a 
-    window will be opened describing what you can do. If you'd like to provide only the phonemic, just fill the field 
-    related to it, give us some reasons and click on <strong>send suggestion</strong>. The same applies to phonetic. 
+    text: `Sure thing! First you try to transcribe the desired word or phrase and then click on the underlined word. 
+    You should see the option <strong>apply suggestion</strong>. If you click on it, a window will open describing what 
+    you can do. If you'd like to provide only the phonemic, just fill the field related to that and give us some reasons 
+    why you made the suggestion before you click on <strong>send suggestion</strong>. The same applies to phonetic. 
     Syllables will be handled by us, so you don't have to worry.`,
   },
 ]

diff --git a/frontend/src/domains/TranscriptionDetails.js b/frontend/src/domains/TranscriptionDetails.js
@@ -1,4 +1,4 @@
-import { extractRawWordsFromText } from "../utils/tokenization"
+import { extractRawWordsAndTheirTokensFromText } from "../utils/tokenization"
 
 export class TranscriptionDetails {
   constructor(
@@ -149,16 +149,20 @@ export class TranscriptionDetails {
     // REGEX to deal with stress marks and punctuations
     const regexToExtractStressMarks = /[ˈˌ]+/g
     // Words that may have punctuations
-    const wordsFromText = extractRawWordsFromText(this._text)
+    const rawWordsAndTheirTokens = extractRawWordsAndTheirTokensFromText(this._text)
     // What will be returned
     const changedTranscription = []
     // Filling changedTranscription array with data
-    for (const [index, word] of wordsFromText.entries()) {
-      const wordDetails = this._transcriptionSetup[index]
+    for (const tokenDetails of rawWordsAndTheirTokens) {
+      // Extracting objetcs
+      const word = tokenDetails.raw
+      const token = tokenDetails.token
+      // Creating a new entry to insert into changedTranscription array
+      const entries = this._transcriptionSetup[token]
       const changedWord = { word }
       const changedEntries = []
-      if (wordDetails.entries) {
-        wordDetails.entries.forEach(transcription => {
+      if (entries) {
+        entries.forEach(transcription => {
           const changedTranscription = {}
           Object.assign(changedTranscription, transcription)
           if (!this._showStress) {

diff --git a/frontend/src/redux/slices/transcription-slice.js b/frontend/src/redux/slices/transcription-slice.js
@@ -1,7 +1,7 @@
 import { createSlice } from "@reduxjs/toolkit"
 import { transcribe } from "../../services/rop-api"
 import { findById } from "../../domains/transcription-details-dao"
-import { extractWordsFromText } from "../../utils/tokenization"
+import { extractTokensFromText } from "../../utils/tokenization"
 
 const initialState = {
   text: "",
@@ -102,7 +102,7 @@ export const transcriptionFromText = (text, chosenLanguage, token, hookWhenError
   dispatch(analysingText())
 
   try {
-    const words = extractWordsFromText(text)
+    const words = extractTokensFromText(text)
     const result = await transcribe(words, chosenLanguage, token)
     dispatch(textWasTranscribed(result))
     dispatch(transcriptionToBeSaved())

diff --git a/frontend/src/utils/tokenization.js b/frontend/src/utils/tokenization.js
@@ -1,9 +1,18 @@
-export function extractWordsFromText(text) {
-  const regexToExtractWordsAndEmojis = /([\w’'\-\u00a9\u00ae\u2000-\u3300\ud83c\ud000-\udfff\ud83d\ud000-\udfff\ud83e\ud000-\udfff])+/g
-  return text.match(regexToExtractWordsAndEmojis).map(value => value.toLowerCase())
-}
+const regexNegationToExtractWordsAndEmojis = /([^\w’'\-\u00a9\u00ae\u2000-\u3300\ud83c\ud000-\udfff\ud83d\ud000-\udfff\ud83e\ud000-\udfff])+/g
 
-export function extractRawWordsFromText(text) {
+export function extractRawWordsAndTheirTokensFromText(text) {
   const splitText = text.split(" ")
-  return splitText.filter(entry => entry).map(dirtyWord => dirtyWord.trim())
+
+  return splitText
+    .filter(entry => entry)
+    .map(dirtyWord => dirtyWord.trim())
+    .map(cleanedWord => {
+      const token = cleanedWord.toLowerCase().replace(regexNegationToExtractWordsAndEmojis, "")
+      return { raw: cleanedWord, token: token ? token : null }
+    })
+}
+
+export function extractTokensFromText(text) {
+  const tokens = extractRawWordsAndTheirTokensFromText(text)
+  return tokens.map(({ raw, token }) => (token ? token : raw.toLowerCase()))
 }