Skip to content

Commit

Permalink
NEW: Diacritic-(In)Sensitive Searching (#159)
Browse files Browse the repository at this point in the history
closes #159
  • Loading branch information
dwhieb committed Jul 18, 2024
1 parent 8dd1231 commit 6cd8231
Show file tree
Hide file tree
Showing 8 changed files with 134 additions and 44 deletions.
27 changes: 18 additions & 9 deletions data/Database.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import Components from './Components.js'
import Languages from './Languages.js'
import Normalizer from '../scripts/Normalizer.js'

export default class Database {

Expand All @@ -17,7 +18,14 @@ export default class Database {
this.components = Array.from(this.index.values())
}

search(query, langQuery) {
search(query, {
diacritics,
language: langQuery,
} = {}) {

const normalize = new Normalizer({ diacritics })
const q = normalize(query)

// NB: Be careful not to alter the original array here.
return Array.from(this.components).filter(function({
definition,
Expand All @@ -30,23 +38,24 @@ export default class Database {

if (langQuery && langQuery !== `all` && langQuery !== language) return false

return definition?.toLowerCase().includes(query)
|| form?.toLowerCase().includes(query)
|| PA?.toLowerCase().includes(query)
|| UR?.toLowerCase().includes(query)
return normalize(definition)?.includes(q)
|| normalize(form)?.includes(q)
|| normalize(PA)?.includes(q)
|| normalize(UR)?.includes(q)
|| tokens.some(function({
form,
gloss,
PA,
UR,
}) {
return form?.toLowerCase().includes(query)
|| gloss?.toLowerCase().includes(query)
|| PA?.toLowerCase().includes(query)
|| UR?.toLowerCase().includes(query)
return normalize(form)?.includes(q)
|| normalize(gloss)?.includes(q)
|| normalize(PA)?.includes(q)
|| normalize(UR)?.includes(q)
})

})

}

}
16 changes: 16 additions & 0 deletions pages/Search/Search.css
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,17 @@ body {

}

#diacritics-box {
all: revert;
cursor: pointer;
/* use accent color */
}

.diacritics-field {
align-items: center;
flex-direction: row;
}

/* NB: The .sticky class is applied conditionally in the footer Handlebars template. */
.footer {

Expand All @@ -88,10 +99,15 @@ h2 {
position: sticky;
}

label {
cursor: pointer;
}

#language-select {

all: revert; /* Use default browser styles. */
border-radius: var(--border-radius);
cursor: pointer;
font-family: 'Times New Roman', serif; /* This is necessary because the adjustments to ligatures don't apply within the <select> element. */
font-size: 100%;
line-height: 1.5;
Expand Down
13 changes: 11 additions & 2 deletions pages/Search/Search.hbs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

<search class=sticky>
<form class=search-form method=get>

<label for=search-box>Quick Search</label>
<input
autocomplete=on
Expand All @@ -15,18 +16,26 @@
spellcheck=false
type=search
>
<span class=help-text>Searches forms (in any orthography) and definitions for a match anywhere within the string. Search is not case sensitive. Leave blank to display all components.</span>
<span class=help-text>Searches forms (in any orthography) and definitions for a match anywhere within the string. Search is case insensitive. Leave blank to display all components.</span>

<div class=diacritics-field>
<label for=diacritics-box>Match Diacritics</label>
<input id=diacritics-box name=diacritics type=checkbox>
</div>

<label for=language-select>Language</label>
<select class=text-box name=language id=language-select>
<option selected value=all>All languages</option>
{{#each languages as |lang| }}
<option value='{{ key }}'>{{ name }} {{#if autonyms }}({{#each autonyms }}{{ this }}{{#unless @last }}, {{/unless}}{{/each}}){{/if}}</option>
{{/each}}
</select>

<div class=search-controls>
<button class='button green' id=search-button type=submit>Search</button>
<button class='button blue' id=reset-button type=reset>Reset</button>
</div>

</form>
</search>

Expand Down Expand Up @@ -137,7 +146,7 @@
</thead>
<tbody>
{{#each results as |result| }}
<tr>
<tr id='{{ ID }}'>
<td class=language>{{ displayLanguage }}</td>
<td class=form>{{#if form }}<a class=link href='/components/{{ ID }}'>{{ form }}</a>{{else}}{{/if}}</td>
<td class=UR>{{#if UR }}<a class=link href='/components/{{ ID }}'>/{{ UR }}/</a>{{else}}{{/if}}</td>
Expand Down
13 changes: 8 additions & 5 deletions pages/Search/Search.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,18 @@ export function Search(req, res) {
let {
limit = 100,
offset = 0,
q,
sort = ``,
} = req.query

// Search
const {
diacritics,
language,
q,
} = req.query

q = q.trim().toLowerCase()
// Search

let results = req.app.db.search(q, req.query.language)
let results = req.app.db.search(q.trim(), { diacritics, language })
const numTotalResults = results.length

// Sort
Expand All @@ -59,7 +62,7 @@ export function Search(req, res) {
return direction === `ascending` ? comparison : comparison * -1
})

return comparisons.reduce((state, comparison) => state ? state : comparison, 0)
return comparisons.reduce((state, comparison) => (state ? state : comparison), 0)

})
}
Expand Down
62 changes: 44 additions & 18 deletions pages/Search/Search.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,36 @@ describe(`Search`, function() {
cy.location(`search`).should(`eq`, ``)
})

it(`case insensitive`, function() {
it(`case insensitive (default)`, function() {
cy.visit(`/search`)
cy.get(`#search-box`).type(`ATIMW`)
cy.get(`form`).submit()
cy.get(`.num-results`).should(`include.text`, 2)
cy.get(`#results tbody tr`).should(`have.length`, 2)
})

// Might not implement this
it.skip(`case sensitive`, function() {
cy.visit(`/search`)
cy.get(`#search-box`).type(`ATIMW`)
cy.get(`form`).submit()
})

it(`diacritic-insensitive (default)`, function() {
cy.visit(`/search`)
cy.get(`#search-box`).type(`aštimw`)
cy.get(`form`).submit()
cy.get(`.num-results`).should(`include.text`, `of 9`)
})

it(`diacritic-sensitive`, function() {
cy.visit(`/search`)
cy.get(`#diacritics-box`).check()
cy.get(`#search-box`).type(`aštimw`)
cy.get(`form`).submit()
cy.get(`.num-results`).should(`include.text`, `of 1`)
})

it(`Form (Project)`, function() {
cy.visit(`/search`)
cy.get(`#search-box`).type(`aamæhk`)
Expand Down Expand Up @@ -121,13 +143,6 @@ describe(`Search`, function() {
cy.get(`#results tbody tr`).should(`have.length`, 1)
})

it(`saves the user's selection across visits`, function() {
cy.visit(`/search`)
cy.get(`#language-select`).select(`Cree_East`)
cy.reload()
cy.get(`#language-select`).should(`have.value`, `Cree_East`)
})

})

describe(`Pagination`, function() {
Expand Down Expand Up @@ -177,20 +192,31 @@ describe(`Search`, function() {

})

describe(`Sorting`, function() {

// NB: Currently testing using querystring.
// TODO: Once column sorting UI is implemented, test with that instead.
it(`single-column sort`, function() {
describe(`Settings`, function() {

cy.visit(`/search?sort=-form&q=`)
it(`saves the user's selections across visits`, function() {
cy.visit(`/search`)
cy.get(`#diacritics-box`).check()
cy.get(`#language-select`).select(`Cree_East`)
cy.reload()
cy.get(`#diacritics-box`).should(`be.checked`)
cy.get(`#language-select`).should(`have.value`, `Cree_East`)
})

cy.contains(`th`, `Form`).first().should(`have.attr`, `aria-sort`)
})

cy.get(`#results td`).first().should(`have.text`, `Arapaho`)
.next()
.should(`have.text`, `θooxoneeʔ-`)
describe(`Sorting`, function() {

it(`single-column sort`, function() {
cy.visit(`/search`)
cy.get(`#search-box`).type(`dog`)
cy.get(`form`).submit()
cy.contains(`button`, `Form`).click()
cy.get(`#results tbody tr`).first().should(`have.attr`, `id`, `Arapaho-607`)
cy.get(`#results tbody tr`).last().should(`have.attr`, `id`, `Cree_Innu-70`)
cy.contains(`button`, `Form`).click()
cy.get(`#results tbody tr`).first().should(`have.attr`, `id`, `Cree_Innu-70`)
cy.get(`#results tbody tr`).last().should(`have.attr`, `id`, `Meskwaki-383`)
})

// Wait to test this until Advanced Search is implemented.
Expand Down
26 changes: 16 additions & 10 deletions pages/Search/scripts/SearchForm.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,30 @@ export default class SearchForm {

initialize() {

this.reset = document.getElementById(`reset-button`)
this.language = document.getElementById(`language-select`)
this.search = document.getElementById(`search-box`)
this.diacritics = document.getElementById(`diacritics-box`)
this.reset = document.getElementById(`reset-button`)
this.language = document.getElementById(`language-select`)
this.search = document.getElementById(`search-box`)

// Populate search form from querystring / local storage.
// NOTE: Query parameters take precedence over local storage.

const url = new URL(location.href)
const query = url.searchParams.get(`q`)
const language = url.searchParams.get(`language`) || localStorage.getItem(`language`)
const url = new URL(location.href)
const diacritics = Boolean(url.searchParams.get(`diacritics`)) || localStorage.getItem(`diacritics`) === `true`
const language = url.searchParams.get(`language`) ?? localStorage.getItem(`language`)
const query = url.searchParams.get(`q`)

this.diacritics.checked = diacritics

if (query) this.search.value = query
if (language) this.language.value = language
if (query) this.search.value = query

this.search.focus()

// Add event listeners

this.language.addEventListener(`input`, this.saveLanguage.bind(this))
this.diacritics.addEventListener(`input`, this.saveSettings.bind(this))
this.language.addEventListener(`input`, this.saveSettings.bind(this))
this.reset.addEventListener(`click`, this.resetForm.bind(this))

}
Expand All @@ -46,8 +51,9 @@ export default class SearchForm {

}

saveLanguage(ev) {
localStorage.setItem(`language`, ev.target.value)
saveSettings() {
localStorage.setItem(`diacritics`, this.diacritics.checked)
localStorage.setItem(`language`, this.language.value)
}

}
18 changes: 18 additions & 0 deletions scripts/Normalizer.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
/* eslint no-constructor-return: "off" */

import removeDiacritics from '../utilities/removeDiacritics.js'

export default class Normalizer {

constructor({ diacritics = false } = {}) {

if (diacritics) return str => str?.toLowerCase()

return str => {
if (typeof str !== `string`) return
return removeDiacritics(str).toLowerCase()
}

}

}
3 changes: 3 additions & 0 deletions utilities/removeDiacritics.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
export default function removeDiacritics(str) {
return str.normalize(`NFD`).replaceAll(/\p{Diacritic}/gv, ``)
}

0 comments on commit 6cd8231

Please sign in to comment.