From 6feefc76f21b2c7c2ac91e7c94c9b9dc8de4346d Mon Sep 17 00:00:00 2001
From: "Daniel W. Hieber" <dwhieb@gmail.com>
Date: Fri, 23 Feb 2024 17:09:46 -0600
Subject: [PATCH] NEW: option: glosses = `true`

---
 README.md                  | 15 +++----
 src/index.js               |  1 +
 src/words/glosses.js       | 33 +++++++++++----
 src/words/literal.js       |  4 +-
 src/words/morphemes.js     |  2 +-
 src/words/transcription.js |  2 +-
 test/words.test.js         | 83 +++++++++++++++++++++++++++++++++++---
 7 files changed, 115 insertions(+), 25 deletions(-)
diff --git a/README.md b/README.md
index 6d4b379..9b645cb 100644
--- a/README.md
+++ b/README.md
@@ -114,13 +114,14 @@ If the input is a string containing only whitespace, an empty string is returned
 
 ## Options
 
-| Option         | type          | Default   | Description                                                                                                                                                                                                                                                                                                                                                  |
-| -------------- | ------------- | --------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `analysisLang` | String        | undefined | An [IETF language tag][lang-tags] to use as the default value of the `lang` attribute for any data in the analysis language (metadata, literal translation, free translation, glosses, literal word translation). If `undefined`, no `lang` tag is added, which means that browsers will assume that the analysis language is the same as the HTML document. |
-| `classes`      | Array<String> | `['igl']` | An array of classes to apply to the wrapper element.                                                                                                                                                                                                                                                                                                         |
-| `scription`    | Object        | `{}`      | Options to pass to the `scription2dlx` library. See [scription2dlx][scription2dlx] for more details.                                                                                                                                                                                                                                                         |
-| `tag`          | String        | `'div'`   | The HTML tag to wrap each interlinear gloss in. Can also be a custom tag (useful for HTML custom elements).                                                                                                                                                                                                                                                  |
-| `targetLang`   | String        | undefined | An [IETF language tag][lang-tags] to use as the default value of the `lang` attribute for any data in the target language.                                                                                                                                                                                                                                   |
+| Option         | type                       | Default   | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
+| -------------- | -------------------------- | --------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `analysisLang` | String                     | undefined | An [IETF language tag][lang-tags] to use as the default value of the `lang` attribute for any data in the analysis language (metadata, literal translation, free translation, glosses, literal word translation). If `undefined`, no `lang` tag is added, which means that browsers will assume that the analysis language is the same as the HTML document.                                                                                                                                                                                                                                                                                                                                  |
+| `classes`      | Array<String>              | `['igl']` | An array of classes to apply to the wrapper element.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
+| `glosses`      | Boolean \| Array \| Object | `false`   | Options for wrapping glosses in `<abbr>` tags.<br><br>If set to `false` (default), no `<abbr>` tags are added to the glosses.<br><br>If set to `true`, an `<abbr>` tag is wrapped around any glosses in CAPS, and any numbers.<br><br>If set to an array, any glosses listed in the array will be wrapped in `<abbr>` tags. (This is useful if you'd certain glosses to be lowercase but still wrapped in an `<abbr>` tag, such as `sg` and `pl`, which are commonly lowercased.)<br><br>If set to an object hash, the keys of the object are treated as glosses, and the values of the object are treated as definitions for those glosses. Each gloss will be wrapped in an `<abbr>` tag with a `title` attribute set to the definition. |
+| `scription`    | Object                     | `{}`      | Options to pass to the `scription2dlx` library. See [scription2dlx][scription2dlx] for more details.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
+| `tag`          | String                     | `'div'`   | The HTML tag to wrap each interlinear gloss in. Can also be a custom tag (useful for HTML custom elements).                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
+| `targetLang`   | String                     | undefined | An [IETF language tag][lang-tags] to use as the default value of the `lang` attribute for any data in the target language.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 
 ## HTML Structure
 
diff --git a/src/index.js b/src/index.js
index eeaaf29..4035179 100644
--- a/src/index.js
+++ b/src/index.js
@@ -4,6 +4,7 @@ import validateOptions  from './options.js'
 
 const defaultOptions = {
   classes:   [`igl`],
+  glosses:   false,
   scription: {},
   tag:       `div`,
 }
diff --git a/src/words/glosses.js b/src/words/glosses.js
index 530f49d..3ff3535 100644
--- a/src/words/glosses.js
+++ b/src/words/glosses.js
@@ -1,24 +1,41 @@
 import addEmphasis    from '../utilities/addEmphasis.js'
 import replaceHyphens from '../utilities/replaceHyphens.js'
 
-export default function createGlosses(data, { analysisLang }) {
+const glossRegExp = /(?<gloss>[1-4]|[A-Z]+)/gv
 
-  if (!data) return ``
+function createGlossLine(glosses, language, option) {
 
-  if (typeof data === `string`) {
+  const lang = language ? `lang='${ language }'` : ``
+
+  if (option === true) glosses = wrapGlosses(glosses)
+  glosses = replaceHyphens(glosses)
+  glosses = addEmphasis(glosses)
+
+  return `<span class=glosses ${ lang }>${ glosses }</span>`
+
+}
 
-    const lang    = analysisLang ? `lang='${ analysisLang }'` : ``
-    const glosses = addEmphasis(replaceHyphens(data))
+/**
+ * Finds all numbers and capitalized glosses in a string and wraps them in `<abbr>` tags.
+ * @param {string} glosses
+ * @returns {string}
+ */
+function wrapGlosses(glosses) {
+  return glosses.replaceAll(glossRegExp, `<abbr>$1</abbr>`)
+}
 
-    return `<span class=w-gl ${ lang }>${ glosses }</span>`
+export default function createGlosses(data, { analysisLang, glosses: glossesOption }) {
 
+  if (!data) return ``
+
+  if (typeof data === `string`) {
+    return createGlossLine(data, analysisLang, glossesOption)
   }
 
   let html = ``
 
   for (const lang in data) {
-    const glosses = addEmphasis(replaceHyphens(data[lang]))
-    html += `<span class=w-gl lang='${ lang }'>${ glosses }</span>`
+    html += createGlossLine(data[lang], lang, glossesOption)
   }
 
   return html
diff --git a/src/words/literal.js b/src/words/literal.js
index b77da09..17dc876 100644
--- a/src/words/literal.js
+++ b/src/words/literal.js
@@ -6,14 +6,14 @@ export default function createLiteral(data, { analysisLang }) {
 
   if (typeof data === `string`) {
     const lang = analysisLang ? `lang='${ analysisLang }'` : ``
-    return `<span class=w-lit ${ lang }>${ addEmphasis(data) }</span>`
+    return `<span class=lit ${ lang }>${ addEmphasis(data) }</span>`
   }
 
   let html = ``
 
   for (const lang in data) {
     const tln = data[lang]
-    html += `<span class=w-lit lang='${ lang }'>${ addEmphasis(tln) }</span>`
+    html += `<span class=lit lang='${ lang }'>${ addEmphasis(tln) }</span>`
   }
 
   return html
diff --git a/src/words/morphemes.js b/src/words/morphemes.js
index f5dca4a..e3defab 100644
--- a/src/words/morphemes.js
+++ b/src/words/morphemes.js
@@ -8,7 +8,7 @@ export default function createMorphemes(data, { targetLang }) {
 
   for (const ortho in data) {
     const morphemes = addEmphasis(replaceHyphens(data[ortho]))
-    html += `<span class=w-m data-ortho='${ ortho }' ${ lang }>${ morphemes }</span>`
+    html += `<span class=morphemes data-ortho='${ ortho }' ${ lang }>${ morphemes }</span>`
   }
 
   return html
diff --git a/src/words/transcription.js b/src/words/transcription.js
index bce147f..ff95c74 100644
--- a/src/words/transcription.js
+++ b/src/words/transcription.js
@@ -7,7 +7,7 @@ export default function createTranscription(data, { targetLang }) {
 
   for (const ortho in data) {
     const txn = data[ortho]
-    html += `<span class=w-txn data-ortho='${ ortho }' ${ lang }>${ addEmphasis(txn) }</span>`
+    html += `<span class=w data-ortho='${ ortho }' ${ lang }>${ addEmphasis(txn) }</span>`
   }
 
   return html
diff --git a/test/words.test.js b/test/words.test.js
index c521046..9b96bb6 100644
--- a/test/words.test.js
+++ b/test/words.test.js
@@ -6,6 +6,7 @@ import parse               from './utilities/convertAndParse.js'
 
 import {
   findElement,
+  findElements,
   getTagName,
 } from '@web/parse5-utils'
 
@@ -149,7 +150,7 @@ describe(`words`, function() {
       `
 
       const { dom }   = await parse(scription)
-      const morphemes = findElementByClass(dom, `w-m`)
+      const morphemes = findElementByClass(dom, `morphemes`)
 
       expect(getTextContent(morphemes)).to.equal(`ni‑na‑ku‑pend‑a`) // non-breaking hyphens
 
@@ -164,7 +165,7 @@ describe(`words`, function() {
       `
 
       const { dom, html } = await parse(scription)
-      const [mod, swad]   = findElementsByClass(dom, `w-m`)
+      const [mod, swad]   = findElementsByClass(dom, `morphemes`)
 
       expect(getTextContent(mod)).to.equal(`waxt‑qungu`) // non-breaking hypens
       expect(getTextContent(swad)).to.equal(`wašt‑ʔungu`) // non-breaking hypens
@@ -200,16 +201,86 @@ describe(`words`, function() {
       I love you
       `
 
-      const { dom, html } = await parse(scription)
-      const morphemes     = findElementByClass(dom, `w-gl`)
+      const { dom }   = await parse(scription)
+      const morphemes = findElementByClass(dom, `glosses`)
 
       expect(getTextContent(morphemes)).to.equal(`1SG.SUBJ‑PRES‑2SG.OBJ‑love‑IND`) // non-breaking hyphens
 
     })
 
-    it(`supports multiple analysis languages`)
+    it(`supports multiple analysis languages`, async function() {
+
+      const scription = `
+      \\txn   ninakupenda
+      \\m     ni-na-ku-pend-a
+      \\gl-en 1SG.SUBJ-PRES-2SG.OBJ-love-IND
+      \\gl-sp 1SG.SJ-PRES-2SG.OJ-amar-IND
+      \\tln   I love you
+      `
+
+      const { dom }                   = await parse(scription)
+      const [firstGloss, secondGloss] = findElementsByClass(dom, `glosses`)
+
+      expect(getTextContent(firstGloss)).to.include(`SUBJ`)
+      expect(getTextContent(secondGloss)).to.include(`SJ`)
+
+    })
+
+    it(`supports emphasis`, async function() {
+
+      const scription = `
+      ninakupenda
+      ni-na-ku-pend-a
+      1SG.SUBJ-PRES-*2SG.OBJ*-love-IND
+      I love you
+      `
+
+      const { dom } = await parse(scription)
+      const b       = findElement(dom, el => getTagName(el) === `b`)
+
+      expect(getTextContent(b)).to.equal(`2SG.OBJ`)
+
+    })
+
+    it(`option: glosses = false (default)`, async function() {
+
+      const scription = `
+      ninakupenda
+      ni-na-ku-pend-a
+      1SG.SUBJ-PRES-2SG.OBJ-love-IND
+      I love you
+      `
+
+      const { dom } = await parse(scription)
+      const abbr    = findElement(dom, el => getTagName(el) === `abbr`)
+
+      expect(abbr).not.to.exist
+
+    })
+
+    it(`option: glosses = true`, async function() {
+
+      const scription = `
+      ninakupenda
+      ni-na-ku-pend-a
+      1SG.SUBJ-PRES-2SG.OBJ-love-IND
+      I love you
+      `
+
+      const { dom }     = await parse(scription, { glosses: true })
+      const glosses     = findElements(dom, el => getTagName(el) === `abbr`)
+      const [num, caps] = glosses
+
+      expect(getTextContent(num)).to.equal(`1`)
+      expect(getTextContent(caps)).to.equal(`SG`)
+
+      expect(glosses).to.have.length(8)
+
+    })
+
+    it(`option: glosses = array`)
 
-    it(`supports emphasis`)
+    it(`option: glosses = object`)
 
   })