Skip to content

Commit

Permalink
DATA: Improve processing of tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
dwhieb committed Jul 21, 2024
1 parent b10ddef commit 96629ed
Show file tree
Hide file tree
Showing 43 changed files with 11,938 additions and 11,958 deletions.
66 changes: 23 additions & 43 deletions data/Components.js
Original file line number Diff line number Diff line change
Expand Up @@ -114,24 +114,31 @@ class Token {
notes,
orthography,
PA,
pages,
source,
speaker,
UR,
}) {

const isProto = language.includes(`Proto`)

this.bibliography = source

if (pages) {
this.bibliography += `: `
this.bibliography += parsePages(pages)
}

if (form) {
this.form = form.normalize()
if (isProto) this.form = `*${ cleanProto(form) }`
}

this.bibliography = bibliography
if (gloss) this.gloss = gloss
if (notes) this.notes = notes
this.orthography = orthography
if (PA) this.PA = cleanProto(PA)
if (gloss) this.gloss = cleanGloss(gloss)
if (notes) this.notes = notes.normalize()
if (PA) this.PA = cleanProto(PA)
if (speaker) this.speaker = speaker.normalize()
if (UR) this.UR = UR.normalize()
if (UR) this.UR = cleanUR(UR)

}
}
Expand Down Expand Up @@ -446,45 +453,18 @@ export default class Components extends Map {

const cols = Components.columns

// UR
const UR = cleanUR(record[cols.UR])

// Proto-Algonquian
const PA = record[cols.proto]?.normalize()

// Gloss
const gloss = cleanGloss(record[cols.gloss])

// Bibliography
const source = record[cols.sourceCode]

let bibliography = source
const pages = record[cols.pages]

if (pages) {
bibliography += `: `
bibliography += parsePages(record[cols.pages])
}

// Speaker
const speaker = record[cols.speaker]?.normalize()

// Notes
const notes = record[cols.notes]?.normalize()

// Orthography Key
const orthography = record[cols.orthography]

return new Token({
bibliography,
form: record[cols.originalOrthography],
gloss,
bibliography: record[cols.bibliography],
form: record[cols.originalOrthography],
gloss: record[cols.gloss],
language,
notes,
orthography,
PA,
speaker,
UR,
notes: record[cols.notes],
orthography: record[cols.orthography],
PA: record[cols.proto],
pages: record[cols.pages],
source: record[cols.sourceCode],
speaker: record[cols.speaker],
UR: record[cols.UR],
})

}
Expand Down
10 changes: 5 additions & 5 deletions data/json/components/Abenaki.ndjson
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{"componentID":"1","ID":"Abenaki-1","language":"Abenaki","displayLanguage":"Abenaki","dialect":"","Glottocode":"aben1250","ISO":"","form":"ɔ̃ben-","PA":"*a·p-","displayForm":"ɔ̃ben-","definition":"","type":"initial","subcategory":"","reduplicated":false,"deverbal":false,"matches":{},"notes":"GVM: Looks like an initial, but is listed as \"(TA, TI)\"","tokens":[{"form":"ôben-","bibliography":"IG1965: 213","gloss":"untie","notes":"GVM: Looks like an initial, but is listed as \"(TA, TI)\"","orthography":"AB:2","PA":"a·p-"}]}
{"componentID":"2","ID":"Abenaki-2","language":"Abenaki","displayLanguage":"Abenaki","dialect":"","Glottocode":"aben1250","ISO":"","form":"ɔ̃jemi-","PA":"*a·t-","displayForm":"ɔ̃jemi-","definition":"","type":"initial","subcategory":"","reduplicated":false,"deverbal":false,"matches":{},"tokens":[{"form":"ôjemi-","bibliography":"IG1965: 214","gloss":"relate, declare","orthography":"AB:2","PA":"a·t-"}]}
{"componentID":"3","ID":"Abenaki-3","language":"Abenaki","displayLanguage":"Abenaki","dialect":"","Glottocode":"aben1250","ISO":"","form":"ɔ̃tl-","PA":"*a·nt-","displayForm":"ɔ̃tl-","definition":"","type":"initial","subcategory":"","reduplicated":false,"deverbal":false,"matches":{},"notes":"GVM: Looks like an initial, but is listed as \"(TA, TI)\". MAM: this is a PA initial, so I don't know why it would say that.","tokens":[{"form":"ôtl-","bibliography":"IG1965: 214","gloss":"move, change, afresh, anew","notes":"GVM: Looks like an initial, but is listed as \"(TA, TI)\"","orthography":"AB:2","PA":"a·nt-"},{"form":"ôtto-","bibliography":"IG1965: 214","gloss":"move, change, afresh, anew","notes":"GVM: Looks like an initial, but is listed as \"(TA, TI)\"","orthography":"AB:2","PA":"a·nt-"}]}
{"componentID":"5","ID":"Abenaki-5","language":"Abenaki","displayLanguage":"Abenaki","dialect":"","Glottocode":"aben1250","ISO":"","form":"-gɔ̃bawi","PA":"*-ka·pawi-","displayForm":"-gɔ̃bawi","definition":"","type":"final","subcategory":"AI","specificity":"","primary":true,"secondary":false,"deverbal":false,"matches":{},"tokens":[{"form":"-gan̈ba8i","bibliography":"IG1965: 215","gloss":"stand, be standing","orthography":"AB:2","PA":"-ka·pawi-"}]}
{"componentID":"6","ID":"Abenaki-6","language":"Abenaki","displayLanguage":"Abenaki","dialect":"","Glottocode":"aben1250","ISO":"","form":"-igɔ̃n","displayForm":"-igɔ̃n","definition":"","type":"final","subcategory":"N","specificity":"","primary":true,"secondary":false,"deverbal":false,"matches":{},"notes":"GVM: From context, probably N final? Says \"for -igan read -igan̈n\".","tokens":[{"form":"-igan̈n","bibliography":"IG1965: 219","notes":"GVM: From context, probably N final? Says \"for -igan read -igan̈n\".","orthography":"AB:2"}]}
{"componentID":"1","ID":"Abenaki-1","language":"Abenaki","displayLanguage":"Abenaki","dialect":"","Glottocode":"aben1250","ISO":"","form":"ɔ̃ben-","PA":"*a·p-","displayForm":"ɔ̃ben-","definition":"","type":"initial","subcategory":"","reduplicated":false,"deverbal":false,"matches":{},"notes":"GVM: Looks like an initial, but is listed as \"(TA, TI)\"","tokens":[{"bibliography":"IG1965: 213","form":"ôben-","gloss":"untie","notes":"GVM: Looks like an initial, but is listed as \"(TA, TI)\"","PA":"a·p-"}]}
{"componentID":"2","ID":"Abenaki-2","language":"Abenaki","displayLanguage":"Abenaki","dialect":"","Glottocode":"aben1250","ISO":"","form":"ɔ̃jemi-","PA":"*a·t-","displayForm":"ɔ̃jemi-","definition":"","type":"initial","subcategory":"","reduplicated":false,"deverbal":false,"matches":{},"tokens":[{"bibliography":"IG1965: 214","form":"ôjemi-","gloss":"relate, declare","PA":"a·t-"}]}
{"componentID":"3","ID":"Abenaki-3","language":"Abenaki","displayLanguage":"Abenaki","dialect":"","Glottocode":"aben1250","ISO":"","form":"ɔ̃tl-","PA":"*a·nt-","displayForm":"ɔ̃tl-","definition":"","type":"initial","subcategory":"","reduplicated":false,"deverbal":false,"matches":{},"notes":"GVM: Looks like an initial, but is listed as \"(TA, TI)\". MAM: this is a PA initial, so I don't know why it would say that.","tokens":[{"bibliography":"IG1965: 214","form":"ôtl-","gloss":"move, change, afresh, anew","notes":"GVM: Looks like an initial, but is listed as \"(TA, TI)\"","PA":"a·nt-"},{"bibliography":"IG1965: 214","form":"ôtto-","gloss":"move, change, afresh, anew","notes":"GVM: Looks like an initial, but is listed as \"(TA, TI)\"","PA":"a·nt-"}]}
{"componentID":"5","ID":"Abenaki-5","language":"Abenaki","displayLanguage":"Abenaki","dialect":"","Glottocode":"aben1250","ISO":"","form":"-gɔ̃bawi","PA":"*-ka·pawi-","displayForm":"-gɔ̃bawi","definition":"","type":"final","subcategory":"AI","specificity":"","primary":true,"secondary":false,"deverbal":false,"matches":{},"tokens":[{"bibliography":"IG1965: 215","form":"-gan̈ba8i","gloss":"stand, be standing","PA":"-ka·pawi-"}]}
{"componentID":"6","ID":"Abenaki-6","language":"Abenaki","displayLanguage":"Abenaki","dialect":"","Glottocode":"aben1250","ISO":"","form":"-igɔ̃n","displayForm":"-igɔ̃n","definition":"","type":"final","subcategory":"N","specificity":"","primary":true,"secondary":false,"deverbal":false,"matches":{},"notes":"GVM: From context, probably N final? Says \"for -igan read -igan̈n\".","tokens":[{"bibliography":"IG1965: 219","form":"-igan̈n","gloss":"","notes":"GVM: From context, probably N final? Says \"for -igan read -igan̈n\"."}]}
Loading

0 comments on commit 96629ed

Please sign in to comment.