diff --git a/bin/ingest_aclpub2.py b/bin/ingest_aclpub2.py index c31776668b..d192c34808 100755 --- a/bin/ingest_aclpub2.py +++ b/bin/ingest_aclpub2.py @@ -46,6 +46,10 @@ # # Check things over, then commit and push the changes and synchronize the files. +# TODO: +# - check for venue YAML, create/complain if non-existent +# - add verification model to ensure format is correct + import click import yaml import re @@ -142,12 +146,19 @@ def parse_conf_yaml(ingestion_dir: str) -> Dict[str, Any]: cover_subtitle == shortbooktitle ''' ingestion_dir = Path(ingestion_dir) - if (ingestion_dir / 'conference_details.yml').exists(): - meta = yaml.safe_load((ingestion_dir / 'conference_details.yml').read_text()) + + paths_to_check = [ + ingestion_dir / 'conference_details.yml', + ingestion_dir / 'inputs' / 'conference_details.yml', + ] + meta = None + for path in paths_to_check: + if path.exists(): + meta = yaml.safe_load(path.read_text()) + break else: - meta = yaml.safe_load( - (ingestion_dir / 'inputs/conference_details.yml').read_text() - ) + raise Exception("Can't find conference_details.yml (looked in {paths_to_check})") + meta['month'] = meta['start_date'].strftime('%B') meta['year'] = str(meta['start_date'].year) @@ -175,12 +186,26 @@ def parse_conf_yaml(ingestion_dir: str) -> Dict[str, Any]: def parse_paper_yaml(ingestion_dir: str) -> List[Dict[str, str]]: + """ + Reads papers.yml to get metadata. Skips non-archival papers. + """ ingestion_dir = Path(ingestion_dir) - if (ingestion_dir / 'conference_details.yml').exists(): - papers = yaml.safe_load((ingestion_dir / 'papers.yml').read_text()) + paths_to_check = [ + ingestion_dir / 'papers.yml', + ingestion_dir / 'inputs' / 'papers.yml', + ] + papers = None + for path in paths_to_check: + if path.exists(): + papers = yaml.safe_load(path.read_text()) + break else: - papers = yaml.safe_load((ingestion_dir / 'input/papers.yml').read_text()) + raise Exception("Can't find papers.yml (looked in root dir and under inputs/)") + + # remove non-archival papers + papers = [p for p in papers if p.get('archival', True)] + return papers @@ -194,42 +219,42 @@ def add_paper_nums_in_paper_yaml( start, end = 1, 0 for paper in papers: - if 'archival' not in paper.keys(): - paper.update({'archival': '1'}) - assert 'archival' in paper.keys(), f'{paper["id"]} is missing key archival' assert 'file' in paper.keys(), f'{paper["id"]} is missing key file' - if ( - paper['archival'] == 1 - or paper['archival'] is True - or paper['archival'] == '1' - ): - paper_id = str(paper['id']) - # if 'file' not in paper.keys(): - # print(f'{paper_id} does not have file key but archive is {paper["archival"]}') - # paper_name = paper['title'] - # else: - paper_path = paper['file'] - paper_need_read_path = None - # TODO: we should just be able to read paper_path directly, and throw an - # error if it doesn't exist - if (path := ingestion_dir / "watermarked_pdfs" / paper_path).exists(): - paper_need_read_path = str(path) - elif ( - path := ingestion_dir / "watermarked_pdfs" / f"{paper_id}.pdf" - ).exists(): + paper_id = str(paper['id']) + # if 'file' not in paper.keys(): + # print(f'{paper_id} does not have file key but archive is {paper["archival"]}') + # paper_name = paper['title'] + # else: + + paper_path = paper['file'] + + # TODO: we should just be able to read paper_path directly, and throw an + # error if it doesn't exist + paper_need_read_path = None + paths_to_check = [ + ingestion_dir / "watermarked_pdfs" / paper_path, + ingestion_dir / "watermarked_pdfs" / f"{paper_id}.pdf", + ingestion_dir / "build" / "watermarked_pdfs" / paper_path, + ingestion_dir / "build" / "watermarked_pdfs" / f"{paper_id}.pdf", + ] + paper_need_read_path = None + for path in paths_to_check: + if path.exists(): paper_need_read_path = str(path) + break + else: + raise Exception( + f"* Fatal: could not find paper ID {paper_id} ({paths_to_check})" + ) - assert ( - paper_need_read_path is not None - ), f"* Fatal: could not find {paper_id} (path was {paper_path}, {path})" + pdf = open(paper_need_read_path, 'rb') + pdf_reader = PyPDF2.PdfReader(pdf) + num_of_pages = len(pdf_reader.pages) + start = end + 1 + end = start + num_of_pages - 1 + paper['pages'] = f'{start}-{end}' - pdf = open(paper_need_read_path, 'rb') - pdf_reader = PyPDF2.PdfReader(pdf) - num_of_pages = len(pdf_reader.pages) - start = end + 1 - end = start + num_of_pages - 1 - paper['pages'] = f'{start}-{end}' return papers @@ -342,6 +367,7 @@ def paper2xml( 'semantic_scholar_id', 'username'] ''' + fields = [ 'title', 'author', @@ -351,7 +377,7 @@ def paper2xml( 'doi', 'language', ] - paper = make_simple_element('paper', attrib={'id': str(paper_num)}) + paper = make_simple_element('paper', attrib={"id": str(paper_num)}) for field in fields: if field == 'author': authors = paper_item['authors'] @@ -372,15 +398,19 @@ def paper2xml( if field == 'url': value = f'{anthology_id}' elif field == 'abstract': - value = paper_item['abstract'].replace('\n', '') + value = None + if "abstract" in paper_item: + value = paper_item["abstract"].replace('\n', '') elif field == 'title': value = paper_item[field] elif field == 'pages': value = paper_item[field] else: continue + try: - make_simple_element(field, text=value, parent=paper) + if value is not None: + make_simple_element(field, text=value, parent=paper) except Exception: print( f"Couldn't process {paper} for {anthology_id}, please check the abstract in the papers.yaml file for this paper", @@ -450,16 +480,39 @@ def copy_pdf_and_attachment( venue_name = meta['anthology_venue_id'].lower() volume_name = meta['volume_name'].lower() - pdfs_dest_dir = create_dest_path(pdfs_dir, venue_name) - pdfs_src_dir = os.path.join(meta['path'], 'watermarked_pdfs') + pdfs_src_dir = None + paths_to_check = [ + Path(meta['path']) / 'watermarked_pdfs', + Path(meta['path']) / 'build' / 'watermarked_pdfs', + ] + for path in paths_to_check: + if path.exists() and path.is_dir(): + pdfs_src_dir = path + break + else: + raise FileNotFoundError(f"Could not find watermarked PDFs in {paths_to_check}") + + pdfs_dest_dir = Path(create_dest_path(pdfs_dir, venue_name)) # copy proceedings.pdf - proceedings_pdf_src_path = os.path.join(meta['path'], 'proceedings.pdf') - proceedings_pdf_dest_path = None - if os.path.exists(proceedings_pdf_src_path): - proceedings_pdf_dest_path = ( - os.path.join(pdfs_dest_dir, f"{collection_id}-{volume_name}") + ".pdf" + proceedings_pdf_src_path = None + paths_to_check = [ + Path('proceedings.pdf'), + Path("build") / 'proceedings.pdf', + ] + for path in paths_to_check: + if path.exists(): + proceedings_pdf_src_path = str(path) + break + else: + print( + f"Warning: could not find proceedings.pdf in {paths_to_check}", + file=sys.stderr, ) + + proceedings_pdf_dest_path = None + if proceedings_pdf_src_path is not None: + proceedings_pdf_dest_path = pdfs_dest_dir / f"{collection_id}-{volume_name}.pdf" if dry_run: print( f'would\'ve moved {proceedings_pdf_src_path} to {proceedings_pdf_dest_path}' @@ -476,11 +529,24 @@ def copy_pdf_and_attachment( "attachments": [], } - frontmatter_src_path = 'front_matter.pdf' - if os.path.exists(frontmatter_src_path): - frontmatter_dest_path = ( - os.path.join(pdfs_dest_dir, f"{collection_id}-{volume_name}") + '.0.pdf' + frontmatter_src_path = None + paths_to_check = [ + Path('front_matter.pdf'), + Path('0.pdf'), + Path("build") / 'front_matter.pdf', + Path("build") / '0.pdf', + ] + for path in paths_to_check: + if path.exists(): + frontmatter_src_path = str(path) + break + else: + print( + f"Warning: could not find front matter in {paths_to_check}", file=sys.stderr ) + + if frontmatter_src_path is not None: + frontmatter_dest_path = pdfs_dest_dir / f"{collection_id}-{volume_name}.0.pdf" if dry_run: print(f'would\'ve moved {frontmatter_src_path} to {frontmatter_dest_path}') if not dry_run: @@ -489,6 +555,7 @@ def copy_pdf_and_attachment( # create the PDF entry so that we'll get volume[0]['pdf'] = frontmatter_dest_path + paper_num = 0 for i, paper in enumerate(papers): # archival papers only if 'archival' not in paper.keys(): @@ -509,23 +576,21 @@ def copy_pdf_and_attachment( # paper_name = paper['file'] if paper_name != '' or paper_name is not None: paper_id = str(paper['id']) - paper_num = i + 1 + paper_num += 1 paper_id_full = f'{collection_id}-{volume_name}.{paper_num}' pdf_src_path = None - if os.path.exists(os.path.join(pdfs_src_dir, paper_name)): - pdf_src_path = os.path.join(pdfs_src_dir, paper_name) - elif os.path.exists(os.path.join(pdfs_src_dir, f'{paper_id}.pdf')): - pdf_src_path = os.path.join(pdfs_src_dir, f'{paper_id}.pdf') + if (pdfs_src_dir / paper_name).exists(): + pdf_src_path = pdfs_src_dir / paper_name + elif pdfs_src_dir / f'{paper_id}.pdf': + pdf_src_path = pdfs_src_dir / f'{paper_id}.pdf' assert ( pdf_src_path - ), f"Couldn't find {paper_name}/{paper_id} in {pdfs_src_dir}" - pdf_dest_path = os.path.join( - pdfs_dest_dir, f"{collection_id}-{volume_name}.{paper_num}.pdf" - ) + ), f"Couldn't find {paper_name} or {paper_id} in {pdfs_src_dir}" + pdf_dest_path = pdfs_dest_dir / f"{paper_id_full}.pdf" if dry_run: - print(f'would\'ve moved {pdf_src_path} to {pdf_dest_path}') + print(f"would've moved {pdf_src_path} to {pdf_dest_path}") if not dry_run: maybe_copy(pdf_src_path, pdf_dest_path) @@ -536,22 +601,35 @@ def copy_pdf_and_attachment( } # copy attachments - # TODO: skipping attachments because full of non-publishable stuff - if False and 'attachments' in paper: + if 'attachments' in paper: attachs_dest_dir = create_dest_path(attachments_dir, venue_name) - attachs_src_dir = os.path.join(meta['path'], 'attachments') - assert os.path.exists( - attachs_src_dir - ), f'paper {i, paper_name} contains attachments but attachments folder was not found' + attachs_src_dir = Path(meta['path']) / 'attachments' + # assert ( + # attachs_src_dir.exists() + # ), f'paper {i, paper_name} contains attachments but attachments folder was not found' for attachment in paper['attachments']: - print("ATTACH", paper_id_full, attachment) - file_path = attachment.get('file', None) + file_path = Path(attachment.get('file', None)) if file_path is None: continue - attach_src_path = attachs_src_dir + '/' + file_path - attach_src_extension = attach_src_path.split(".")[-1] + attach_src_path = None + paths_to_check = [ + attachs_src_dir / file_path, + attachs_src_dir / file_path.name, + ] + for path in paths_to_check: + if path.exists(): + attach_src_path = str(path) + break + else: + print( + f"Warning: paper {paper_id} attachment {file_path} not found, skipping", + file=sys.stderr, + ) + continue + + attach_src_extension = attach_src_path.split(".")[-1] type_ = attachment['type'].replace(" ", "") file_name = f'{collection_id}-{volume_name}.{paper_num}.{type_}.{attach_src_extension}' @@ -567,6 +645,7 @@ def copy_pdf_and_attachment( ) else: maybe_copy(attach_src_path, attach_dest_path) + print(f"Attaching {attach_dest_path}/{type_} to {paper_num}") volume[paper_num]['attachments'].append( (attach_dest_path, type_) ) @@ -767,10 +846,13 @@ def main(ingestion_dir, pdfs_dir, attachments_dir, dry_run, anthology_dir, inges volume_full_id, meta = process_proceeding( ingestion_dir, anthology_datadir, venue_index, venue_keys ) + + # Load the papers.yaml file, skipping non-archival papers papers = parse_paper_yaml(ingestion_dir) # print(f'original paper {papers[0]}') + + # add page numbering by parsing the PDFs papers = add_paper_nums_in_paper_yaml(papers, ingestion_dir) - # print(f'updated paper {papers[0]}') ( volume, diff --git a/bin/ingest_mitpress.py b/bin/ingest_mitpress.py index 70449675e2..d0bed2254f 100755 --- a/bin/ingest_mitpress.py +++ b/bin/ingest_mitpress.py @@ -424,7 +424,6 @@ def sort_papers_by_page(paper_tuple): paper_id = max(paper_id, int(paper.attrib["id"])) paper_id += 1 - print(f"Setting paper_id to {paper_id}") anth_id = f"{collection_id}-{issue}.{paper_id}" diff --git a/data/xml/2020.aacl.xml b/data/xml/2020.aacl.xml index 395dc1fc60..e1d3800ffe 100644 --- a/data/xml/2020.aacl.xml +++ b/data/xml/2020.aacl.xml @@ -1549,7 +1549,7 @@ We introduce fairseq S2T, a fairseq extension for speech-to-text (S2T) modeling tasks such as end-to-end speech recognition and speech-to-text translation. It follows fairseq’s careful design for scalability and extensibility. We provide end-to-end workflows from data pre-processing, model training to offline (online) inference. We implement state-of-the-art RNN-based as well as Transformer-based models and open-source detailed training recipes. Fairseq’s machine translation models and language models can be seamlessly integrated into S2T workflows for multi-task learning or transfer learning. Fairseq S2T is available at https://github.com/pytorch/fairseq/tree/master/examples/speech_to_text. 2020.aacl-demo.6 wang-etal-2020-fairseq - pytorch/fairseq + pytorch/fairseq LibriSpeech MuST-C diff --git a/data/xml/2020.emnlp.xml b/data/xml/2020.emnlp.xml index cdffea717b..8f1296d816 100644 --- a/data/xml/2020.emnlp.xml +++ b/data/xml/2020.emnlp.xml @@ -5911,6 +5911,7 @@ 10.18653/v1/2020.emnlp-main.392 <fixed-case>FINDINGS</fixed-case> <fixed-case>OF</fixed-case> <fixed-case>THE</fixed-case> <fixed-case>IWSLT</fixed-case> 2023 <fixed-case>EVALUATION</fixed-case> <fixed-case>CAMPAIGN</fixed-case> - SwetaAgrawalUmd - AntoniosAnastasopoulosGmu - LuisaBentivogliFbk + SwetaAgrawalUMD + AntoniosAnastasopoulosGMU + LuisaBentivogliFBK OndřejBojarCharles U. ClaudiaBorgU. Malta - MarineCarpuatUmd - RoldanoCattoniFbk - MauroCettoloFbk + MarineCarpuatUMD + RoldanoCattoniFBK + MauroCettoloFBK MingdaChenMeta - WilliamChenCmu - KhalidChoukriElda - AlexandraChronopoulouLmu - AnnaCurreyAws - ThierryDeclerckDfki + WilliamChenCMU + KhalidChoukriELDA + AlexandraChronopoulouLMU + AnnaCurreyAWS + ThierryDeclerckDFKI QianqianDongBytedance - KevinDuhJhu + KevinDuhJHU YannickEstèveAvignon U. - MarcelloFedericoAws + MarcelloFedericoAWS SouhirGahbicheAirbus BarryHaddowU. Edinburgh - BenjaminHsuAws - PhuMon HtutAws + BenjaminHsuAWS + PhuMon HtutAWS HirofumiInagumaMeta DávidJavorskýCharles U. - JohnJudgeDcu - YasumasaKanoNaist + JohnJudgeDCU + YasumasaKanoNAIST TomKoBytedance RishuKumarCharles U. PengweiLiMeta XutaiMaMeta - PrashantMathurAws + PrashantMathurAWS EvgenyMatusovAppTek - PaulMcNameeJhu + PaulMcNameeJHU JohnP. McCraeU. Galway - KentonMurrayJhu - MariaNadejdeAws - SatoshiNakamuraNaist - MatteoNegriFbk + KentonMurrayJHU + MariaNadejdeAWS + SatoshiNakamuraNAIST + MatteoNegriFBK HaNguyenAvignon U. - JanNiehuesKit - XingNiuAws + JanNiehuesKIT + XingNiuAWS AtulKr. OjhaU. Galway JohnE. OrtegaNortheastern U. ProyagPalU. Edinburgh JuanPinoMeta - Lonnekevan der PlasIdiap + Lonnekevan der PlasIDIAP PeterPolákCharles U. - ElijahRippethUmd - ElizabethSaleskyJhu - JiatongShiCmu + ElijahRippethUMD + ElizabethSaleskyJHU + JiatongShiCMU MatthiasSperberApple SebastianStükerZoom - KatsuhitoSudohNaist + KatsuhitoSudohNAIST YunTangMeta - BrianThompsonAws + BrianThompsonAWS KevinTranMeta MarcoTurchiZoom - AlexWaibelCmu + AlexWaibelCMU MingxuanWangBytedance - ShinjiWatanabeCmu + ShinjiWatanabeCMU RodolfoZevallosU. Pompeu Fabra 1-61 This paper reports on the shared tasks organized by the 20th IWSLT Conference. The shared tasks address 9 scientific challenges in spoken language translation: simultaneous and offline translation, automatic subtitling and dubbing, speech-to-speech translation, multilingual, dialect and low-resource speech translation, and formality control. The shared tasks attracted a total of 38 submissions by 31 teams. The growing interest towards spoken language translation is also witnessed by the constantly increasing number of shared task organizers and contributors to the overview paper, almost evenly distributed across industry and academia. @@ -95,12 +95,13 @@ 62-78 We present the ACL 60/60 evaluation sets for multilingual translation of ACL 2022 technical presentations into 10 target languages. This dataset enables further research into multilingual speech translation under realistic recording conditions with unsegmented audio and domain-specific terminology, applying NLP tools to text and speech in the technical domain, and evaluating and improving model robustness to diverse speaker demographics. 2023.iwslt-1.2 + 2023.iwslt-1.2.dataset.zip salesky-etal-2023-evaluating The <fixed-case>M</fixed-case>ine<fixed-case>T</fixed-case>rans Systems for <fixed-case>IWSLT</fixed-case> 2023 Offline Speech Translation and Speech-to-Speech Translation Tasks YichaoDuUniversity of Science and Technology of China - GuoZhengshengTencent + GuoZhengshengtencent JinchuanTianPeking University ZhiruiZhangTencent AI Lab XingWangTencent @@ -127,7 +128,7 @@ The <fixed-case>USTC</fixed-case>’s Dialect Speech Translation System for <fixed-case>IWSLT</fixed-case> 2023 PanDengUniversity of Science and Technology of China ShihaoChenUniversity of Science and Technology of China - WeitaiZhangUstc + WeitaiZhangUSTC JieZhangUniversity of Science &Technology of China LirongDaiUniversity of Science &Technology of China 102-112 @@ -164,7 +165,7 @@ Enhancing Video Translation Context with Object Labels JeremyGwinnupAir Force Research Laboratory TimAndersonAir Force Research Laboratory - BrianOreAfrl + BrianOreAFRL EricHansenAir Force Research Laboratory KevinDuhJohns Hopkins University 130-137 @@ -218,7 +219,7 @@ <fixed-case>MT</fixed-case> Metrics Correlate with Human Ratings of Simultaneous Speech Translation DominikMacháčekCharles University, MFF UFAL OndřejBojarCharles University, MFF UFAL - RajDabreNict + RajDabreNICT 169-179 There have been several meta-evaluation studies on the correlation between human ratings and offline machine translation (MT) evaluation metrics such as BLEU, chrF2, BertScore and COMET. These metrics have been used to evaluate simultaneous speech translation (SST) but their correlations with human ratings of SST, which has been recently collected as Continuous Ratings (CR), are unclear. In this paper, we leverage the evaluations of candidate systems submitted to the English-German SST task at IWSLT 2022 and conduct an extensive correlation analysis of CR and the aforementioned metrics. Our study reveals that the offline metrics are well correlated with CR and can be reliably used for evaluating machine translation in simultaneous mode, with some limitations on the test set size. We conclude that given the current quality levels of SST, these metrics can be used as proxies for CR, alleviating the need for large scale human evaluation. Additionally, we observe that correlations of the metrics with translation as a reference is significantly higher than with simultaneous interpreting, and thus we recommend the former for reliable evaluation. 2023.iwslt-1.12 @@ -270,13 +271,13 @@ Submission of <fixed-case>USTC</fixed-case>’s System for the <fixed-case>IWSLT</fixed-case> 2023 - Offline Speech Translation Track - XinyuanZhouIflytek + XinyuanZhouiflytek JianweiCuiUniversity of Science and Technology of China - ZhongyiYeIflytek + ZhongyiYeiflytek YichiWangUniversity of Science and Technology of China LuzhenXuUniversity of Science and Technology of China - HanyiZhangIflytek - WeitaiZhangUstc + HanyiZhangiflytek + WeitaiZhangUSTC LirongDaiUniversity of Science and Technology of China 194-201 This paper describes the submissions of the research group USTC-NELSLIP to the 2023 IWSLT Offline Speech Translation competition, which involves translating spoken English into written Chinese. We utilize both cascaded models and end-to-end models for this task. To improve the performance of the cascaded models, we introduce Whisper to reduce errors in the intermediate source language text, achieving a significant improvement in ASR recognition performance. For end-to-end models, we propose Stacked Acoustic-and-Textual En- coding extension (SATE-ex), which feeds the output of the acoustic decoder into the textual decoder for information fusion and to prevent error propagation. Additionally, we improve the performance of the end-to-end system in translating speech by combining the SATE-ex model with the encoder-decoder model through ensembling. @@ -287,7 +288,7 @@ <fixed-case>I</fixed-case>2<fixed-case>R</fixed-case>’s End-to-End Speech Translation System for <fixed-case>IWSLT</fixed-case> 2023 Offline Shared Task MuhammadHuzaifahAgency for Science, Technology and Research KyeMin TanInstitute for Infocomm Research, A*STAR - RichengDuanAstar + RichengDuanASTAR 202-210 This paper describes I2R’s submission to the offline speech translation track for IWSLT 2023. We focus on an end-to-end approach for translation from English audio to German text, one of the three available language directions in this year’s edition. The I2R system leverages on pretrained models that have been exposed to large-scale audio and text data for our base model. We introduce several stages of additional pretraining followed by fine-tuning to adapt the system for the downstream speech translation task. The strategy is supplemented by other techniques such as data augmentation, domain tagging, knowledge distillation, and model ensemble, among others. We evaluate the system on several publicly available test sets for comparison. 2023.iwslt-1.16 @@ -319,7 +320,7 @@ SalimaMdhaffarLIA - University of Avignon GaëlleLaperrièreAvignon University LIA LucasMaisonLIA - Avignon University - SameerKhuranaMit + SameerKhuranaMIT YannickEstèveLIA - Avignon University 219-226 This paper describes the ON-TRAC consortium speech translation systems developed for IWSLT 2023 evaluation campaign. Overall, we participated in three speech translation tracks featured in the low-resource and dialect speech translation shared tasks, namely; i) spoken Tamasheq to written French, ii) spoken Pashto to written French, and iii) spoken Tunisian to written English. All our primary submissions are based on the end-to-end speech-to-text neural architecture using a pretrained SAMU-XLSR model as a speech encoder and a mbart model as a decoder. The SAMU-XLSR model is built from the XLS-R 128 in order to generate language agnostic sentence-level embeddings. This building is driven by the LaBSE model trained on multilingual text dataset. This architecture allows us to improve the input speech representations and achieve significant improvements compared to conventional end-to-end speech translation systems. @@ -405,7 +406,7 @@ HengchaoShangHuawei Technologies Co., Ltd. DaimengWeiHuawei Technologies Co., Ltd. MinZhangHuawei - ShiminTaoHuawei + ShiminTaohuawei HaoYangHuawei Co. Ltd 277-282 This paper describes our work on the IWSLT2023 Speech-to-Speech task. Our proposed cascaded system consists of an ensemble of Conformer and S2T-Transformer-based ASR models, a Transformer-based MT model, and a Diffusion-based TTS model. Our primary focus in this competition was to investigate the modeling ability of the Diffusion model for TTS tasks in high-resource scenarios and the role of TTS in the overall S2S task. To this end, we proposed DTS, an end-to-end diffusion-based TTS model that takes raw text as input and generates waveform by iteratively denoising on pure Gaussian noise. Compared to previous TTS models, the speech generated by DTS is more natural and performs better in code-switching scenarios. As the training process is end-to-end, it is relatively straightforward. Our experiments demonstrate that DTS outperforms other TTS models on the GigaS2S benchmark, and also brings positive gains for the entire S2S system. @@ -476,15 +477,15 @@ <fixed-case>NAIST</fixed-case> Simultaneous Speech-to-speech Translation System for <fixed-case>IWSLT</fixed-case> 2023 - RyoFukudaNaist - YutaNishikawaNaist + RyoFukudaNAIST + YutaNishikawaNAIST YasumasaKanoNara Institute of Science and Technology - YukaKoNaist + YukaKoNAIST TomoyaYanagitaNara Institute of Science and Technology KosukeDoiNara Institute of Science and Technology - ManaMakinaeNaist - SakrianiSaktiJaist/naist - KatsuhitoSudohNaist + ManaMakinaeNAIST + SakrianiSaktiJAIST/NAIST + KatsuhitoSudohNAIST SatoshiNakamuraNara Institute of Science and Technology 330-340 This paper describes NAIST’s submission to the IWSLT 2023 Simultaneous Speech Translation task: English-to-German, Japanese, Chinese speech-to-text translation and English-to-Japanese speech-to-speech translation. Our speech-to-text system uses an end-to-end multilingual speech translation model based on large-scale pre-trained speech and text models. We add Inter-connections into the model to incorporate the outputs from intermediate layers of the pre-trained speech model and augment prefix-to-prefix text data using Bilingual Prefix Alignment to enhance the simultaneity of the offline speech translation model. Our speech-to-speech system employs an incremental text-to-speech module that consists of a Japanese pronunciation estimation model, an acoustic model, and a neural vocoder. @@ -515,11 +516,11 @@ Tagged End-to-End Simultaneous Speech Translation Training Using Simultaneous Interpretation Data - YukaKoNaist - RyoFukudaNaist - YutaNishikawaNaist + YukaKoNAIST + RyoFukudaNAIST + YutaNishikawaNAIST YasumasaKanoNara Institute of Science and Technology - KatsuhitoSudohNaist + KatsuhitoSudohNAIST SatoshiNakamuraNara Institute of Science and Technology 363-375 Simultaneous speech translation (SimulST) translates partial speech inputs incrementally. Although the monotonic correspondence between input and output is preferable for smaller latency, it is not the case for distant language pairs such as English and Japanese. A prospective approach to this problem is to mimic simultaneous interpretation (SI) using SI data to train a SimulST model. However, the size of such SI data is limited, so the SI data should be used together with ordinary bilingual data whose translations are given in offline. In this paper, we propose an effective way to train a SimulST model using mixed data of SI and offline. The proposed method trains a single model using the mixed data with style tags that tell the model to generate SI- or offline-style outputs. Experiment results show improvements of BLEURT in different latency ranges, and our analyses revealed the proposed model generates SI-style outputs more than the baseline. @@ -580,7 +581,7 @@ Speech Translation with Foundation Models and Optimal Transport: <fixed-case>UPC</fixed-case> at <fixed-case>IWSLT</fixed-case>23 - IoannisTsiamasUpc + IoannisTsiamasUPC GerardI. GállegoUniversitat Politcnica de Catalunya JoseFonollosaUniversitat Politecnica de Catalunya MartaR. Costa-jussáMeta AI @@ -627,7 +628,7 @@ KurtMicallefUniversity of Malta AhnafMozib SaminUniversity of Malta AndreaDeMarcoUniversity of Malta - Lonnekevan der PlasIdiap + Lonnekevan der PlasIDIAP ClaudiaBorgUniversity of Malta 433-441 For the 2023 IWSLT Maltese Speech Translation Task, UM-DFKI jointly presents a cascade solution which achieves 0.6 BLEU. While this is the first time that a Maltese speech translation task has been released by IWSLT, this paper explores previous solutions for other speech translation tasks, focusing primarily on low-resource scenarios. Moreover, we present our method of fine-tuning XLS-R models for Maltese ASR using a collection of multi-lingual speech corpora as well as the fine-tuning of the mBART model for Maltese to English machine translation. @@ -636,10 +637,10 @@ <fixed-case>NVIDIA</fixed-case> <fixed-case>N</fixed-case>e<fixed-case>M</fixed-case>o Offline Speech Translation Systems for <fixed-case>IWSLT</fixed-case> 2023 - OleksiiHrinchukNvidia + OleksiiHrinchukNVIDIA VladimirBataevSTC-innovations Ltd EvelinaBakhturinaNvidia - BorisGinsburgNvidia + BorisGinsburgNVIDIA 442-448 This paper provides an overview of NVIDIA NeMo’s speech translation systems for the IWSLT 2023 Offline Speech Translation Task. This year, we focused on end-to-end system which capitalizes on pre-trained models and synthetic data to mitigate the problem of direct speech translation data scarcity. When trained on IWSLT 2022 constrained data, our best En->De end-to-end model achieves the average score of 31 BLEU on 7 test sets from IWSLT 2010-2020 which improves over our last year cascade (28.4) and end-to-end (25.7) submissions. When trained on IWSLT 2023 constrained data, the average score drops to 29.5 BLEU. 2023.iwslt-1.42 diff --git a/data/xml/2023.nlrse.xml b/data/xml/2023.nlrse.xml new file mode 100644 index 0000000000..9fcd1cc7c4 --- /dev/null +++ b/data/xml/2023.nlrse.xml @@ -0,0 +1,149 @@ + + + + + Proceedings of the 1st Workshop on Natural Language Reasoning and Structured Explanations (NLRSE) + BhavanaDalvi Mishra + GregDurrett + PeterJansen + DaniloNeves Ribeiro + JasonWei + Association for Computational Linguistics +
Toronto, Canada
+ June + 2023 + 2023.nlrse-1 + nlrse + + + 2023.nlrse-1.0 + nlrse-2023-natural + + + Knowledge Graph-augmented Language Models for Complex Question Answering + PriyankaSenAmazon + SandeepMavadiaAmazon Alexa + AmirSaffariAmazon + 1-8 + Large language models have shown impressive abilities to reason over input text, however, they are prone to hallucinations. On the other hand, end-to-end knowledge graph question answering (KGQA) models output responses grounded in facts, but they still struggle with complex reasoning, such as comparison or ordinal questions. In this paper, we propose a new method for complex question answering where we combine a knowledge graph retriever based on an end-to-end KGQA model with a language model that reasons over the retrieved facts to return an answer. We observe that augmenting language model prompts with retrieved KG facts improves performance over using a language model alone by an average of 83%. In particular, we see improvements on complex questions requiring count, intersection, or multi-hop reasoning operations. + 2023.nlrse-1.1 + sen-etal-2023-knowledge + + + Exploring the Curious Case of Code Prompts + LiZhangUniversity of Pennsylvania + LiamDuganUniversity of Pennsylvania + HainiuXuUniversity of Pennsylvania + ChrisCallison-burchUniversity of Pennsylvania + 9-17 + Recent work has shown that prompting language models with code-like representations of natural language leads to performance improvements on structured reasoning tasks. However, such tasks comprise only a small subset of all natural language tasks. In our work, we seek to answer whether or not code-prompting is the preferred way of interacting with language models in general. We compare code and text prompts across three popular GPT models (davinci, code-davinci-002, and text-davinci-002) on a broader selection of tasks (e.g., QA, sentiment, summarization) and find that with few exceptions, code prompts do not consistently outperform text prompts. Furthermore, we show that the style of code prompt has a large effect on performance for some (but not all) tasks and that fine-tuning on text instructions leads to better relative performance of code prompts. + 2023.nlrse-1.2 + zhang-etal-2023-exploring + + + A smashed glass cannot be full: Generation of Commonsense Explanations through Prompt-based Few-shot Learning + AndreaZaninelloFondazione Bruno Kessler + BernardoMagniniFBK + 18-29 + We assume that providing explanations is a process to elicit implicit knowledge in human communication, and propose a general methodology to generate commonsense explanations from pairs of semantically related sentences. We take advantage of both prompting applied to large, encoder-decoder pre-trained language models, and few-shot learning techniques, such as pattern-exploiting training. Experiments run on the e-SNLI dataset show that the proposed method achieves state-of-the-art results on the explanation generation task, with a substantial reduction of labelled data. The obtained results open new perspective on a number of tasks involving the elicitation of implicit knowledge. + 2023.nlrse-1.3 + zaninello-magnini-2023-smashed + + + Saliency Map Verbalization: Comparing Feature Importance Representations from Model-free and Instruction-based Methods + NilsFeldhusGerman Research Center for Artificial Intelligence (DFKI) + LeonhardHennigGerman Research Center for Artificial Intelligence (DFKI) + MaximilianNasertGerman Research Center for Artificial Intelligence (DFKI) + ChristopherEbertGerman Research Center for Artificial Intelligence (DFKI) + RobertSchwarzenbergGerman Research Center For Artificial Intelligence (DFKI) + SebastianMllerQuality and Usability Lab, TU Berlin + 30-46 + Saliency maps can explain a neural model’s predictions by identifying important input features. They are difficult to interpret for laypeople, especially for instances with many features. In order to make them more accessible, we formalize the underexplored task of translating saliency maps into natural language and compare methods that address two key challenges of this approach – what and how to verbalize. In both automatic and human evaluation setups, using token-level attributions from text classification tasks, we compare two novel methods (search-based and instruction-based verbalizations) against conventional feature importance representations (heatmap visualizations and extractive rationales), measuring simulatability, faithfulness, helpfulness and ease of understanding. Instructing GPT-3.5 to generate saliency map verbalizations yields plausible explanations which include associations, abstractive summarization and commonsense reasoning, achieving by far the highest human ratings, but they are not faithfully capturing numeric information and are inconsistent in their interpretation of the task. In comparison, our search-based, model-free verbalization approach efficiently completes templated verbalizations, is faithful by design, but falls short in helpfulness and simulatability. Our results suggest that saliency map verbalization makes feature attribution explanations more comprehensible and less cognitively challenging to humans than conventional representations. + 2023.nlrse-1.4 + feldhus-etal-2023-saliency + + + Using Planning to Improve Semantic Parsing of Instructional Texts + VanyaCohenThe University of Texas at Austin + RaymondMooneyUniversity of Texas at Austin + 47-58 + We develop a symbolic planning-based decoder to improve the few-shot semantic parsing of instructional texts. The system takes long-form instructional texts as input and produces sequences of actions in a formal language that enable execution of the instructions. This task poses unique challenges since input texts may contain long context dependencies and ambiguous and domain-specific language. Valid semantic parses also require sequences of steps that constitute an executable plan. We build on recent progress in semantic parsing by leveraging large language models to learn parsers from small amounts of training data. During decoding, our method employs planning methods and domain information to rank and correct candidate parses. To validate our method, we evaluate on four domains: two household instruction-following domains and two cooking recipe interpretation domains. We present results for few-shot semantic parsing using leave-one-out cross-validation. We show that utilizing planning domain information improves the quality of generated plans. Through ablations we also explore the effects of our decoder design choices. + 2023.nlrse-1.5 + cohen-mooney-2023-using + + + Reasoning Circuits: Few-shot Multi-hop Question Generation with Structured Rationales + SaurabhKulshreshthaUniversity of Massachusetts Lowell + AnnaRumshiskyUniversity of Massachusetts Lowell + 59-77 + Multi-hop Question Generation is the task of generating questions which require the reader to reason over and combine information spread across multiple passages employing several reasoning steps. Chain-of-thought rationale generation has been shown to improve performance on multi-step reasoning tasks and make model predictions more interpretable. However, few-shot performance gains from including rationales have been largely observed only in +100B language models, and otherwise require large-scale manual rationale annotation. In this paper, we introduce a new framework for applying chain-of-thought inspired structured rationale generation to multi-hop question generation under a very low supervision regime (8- to 128-shot). We propose to annotate a small number of examples following our proposed multi-step rationale schema, treating each reasoning step as a separate task to be performed by a generative language model. We show that our framework leads to improved control over the difficulty of the generated questions and better performance compared to baselines trained without rationales, both on automatic evaluation metrics and in human evaluation. Importantly, we show that this is achievable with a modest model size. + 2023.nlrse-1.6 + kulshreshtha-rumshisky-2023-reasoning + + + Knowledge-Augmented Language Model Prompting for Zero-Shot Knowledge Graph Question Answering + JinheonBaekKorea Advanced Institute of Science and Technology + Alham FikriAjiMBZUAI + AmirSaffariAmazon + 78-106 + Large Language Models (LLMs) are capable of performing zero-shot closed-book question answering tasks, based on their internal knowledge stored in parameters during pre-training. However, such internalized knowledge might be insufficient and incorrect, which could lead LLMs to generate factually wrong answers. Furthermore, fine-tuning LLMs to update their knowledge is expensive. To this end, we propose to augment the knowledge directly in the input of LLMs. Specifically, we first retrieve the relevant facts to the input question from the knowledge graph based on semantic similarities between the question and its associated facts. After that, we prepend the retrieved facts to the input question in the form of the prompt, which is then forwarded to LLMs to generate the answer. Our framework, Knowledge-Augmented language model PromptING (KAPING), requires no model training, thus completely zero-shot. We validate the performance of our KAPING framework on the knowledge graph question answering task, that aims to answer the user’s question based on facts over a knowledge graph, on which ours outperforms relevant zero-shot baselines by up to 48% in average, across multiple LLMs of various sizes. + 2023.nlrse-1.7 + baek-etal-2023-knowledge + + + Can In-context Learners Learn a Reasoning Concept from Demonstrations? + MichalTefnikMasaryk University + MarekKadlcikFaculty of Informatics, Masaryk University + 107-115 + Large language models show an emergent ability to learn a new task from a small number of input-output demonstrations.However, recent work shows that in-context learners largely rely on their pre-trained knowledge, such as the sentiment of the labels, instead of finding new associations in the input.However, the commonly-used few-shot evaluation settings using a random selection of in-context demonstrations can not disentangle models’ ability to learn a new skill from demonstrations, as most of the randomly-selected demonstrations do not present relations informative for prediction beyond exposing the new task distribution.To disentangle models’ in-context learning ability independent of models’ memory, we introduce a Conceptual few-shot learning method selecting the demonstrations sharing a possibly-informative concept with the predicted sample. We extract a set of such concepts from annotated explanations and measure how much can models benefit from presenting these concepts in few-shot demonstrations.We find that smaller models are more sensitive to the presented concepts. While some of the models are able to benefit from concept-presenting demonstrations for each assessed concept, we find that none of the assessed in-context learners can benefit from all presented reasoning concepts consistently, leaving the in-context concept learning an open challenge. + 2023.nlrse-1.8 + tefnik-kadlcik-2023-context + + + Effect Graph: Effect Relation Extraction for Explanation Generation + JonathanKobbeUniversity of Mannheim + IoanaHulpuData and Web Science Group, University of Mannheim + HeinerStuckenschmidtUniversity of Mannheim + 116-127 + Argumentation is an important means of communication. For describing especially arguments about consequences, the notion of effect relations has been introduced recently. We propose a method to extract effect relations from large text resources and apply it on encyclopedic and argumentative texts. By connecting the extracted relations, we generate a knowledge graph which we call effect graph. For evaluating the effect graph, we perform crowd and expert annotations and create a novel dataset. We demonstrate a possible use case of the effect graph by proposing a method for explaining arguments from consequences. + 2023.nlrse-1.9 + kobbe-etal-2023-effect + + + <fixed-case>OPT</fixed-case>-<fixed-case>R</fixed-case>: Exploring the Role of Explanations in Finetuning and Prompting for Reasoning Skills of Large Language Models + BadrAlkhamissiMeta AI + SiddharthVermaSquare + PingYuUniversity at Buffalo + ZhijingJinMax Planck Institute & ETH Zurich + AsliCelikyilmazFAIR @ Meta + MonaDiabMeta Responsible AI + 128-138 + We conduct a thorough investigation into the reasoning capabilities of Large Language Models (LLMs), focusing specifically on the Open Pretrained Transformers (OPT) models as a representative of such models. Our study entails finetuning three different sizes of OPT on a carefully curated reasoning corpus, resulting in two sets of finetuned models: OPT-R, finetuned without explanations, and OPT-RE, finetuned with explanations. We then evaluate all models on 57 out-of-domain tasks drawn from the Super-NaturalInstructions benchmark, covering 26 distinct reasoning skills, utilizing three prompting techniques. Through a comprehensive grid of 27 configurations and 6,156 test evaluations, we investigate the dimensions of finetuning, prompting, and scale to understand the role of explanations on different reasoning skills. Our findings reveal that having explanations in the fewshot exemplar has no significant impact on the model’s performance when the model is finetuned, while positively affecting the non-finetuned counterpart. Moreover, we observe a slight yet consistent increase in classification accuracy as we incorporate explanations during prompting and finetuning, respectively. Finally, we offer insights on which reasoning skills benefit the most from incorporating explanations during finetuning and prompting, such as Numerical (+20.4%) and Analogical (+13.9%) reasoning, as well as skills that exhibit negligible or negative effects. + 2023.nlrse-1.10 + alkhamissi-etal-2023-opt + + + Deductive Additivity for Planning of Natural Language Proofs + ZayneSpragueUniversity of Texas at Austin + KajBostromUniversity of Texas at Austin + SwaratChaudhuriUT Austin + GregDurrettUT Austin + 139-156 + Current natural language systems designed for multi-step claim validation typically operate in two phases: retrieve a set of relevant premise statements using heuristics (planning), then generate novel conclusions from those statements using a large language model (deduction). The planning step often requires expensive Transformer operations and does not scale to arbitrary numbers of premise statements. In this paper, we investigate whether efficient planning heuristic is possible via embedding spaces compatible with deductive reasoning. Specifically, we evaluate whether embedding spaces exhibit a property we call deductive additivity: the sum of premise statement embeddings should be close to embeddings of conclusions based on those premises. We explore multiple sources of off-the-shelf dense embeddings in addition to fine-tuned embeddings from GPT3 and sparse embeddings from BM25. We study embedding models both intrinsically, evaluating whether the property of deductive additivity holds, and extrinsically, using them to assist planning in natural language proof generation. Lastly, we create a dataset, Single-Step Reasoning Contrast (SSRC), to further probe performance on various reasoning types. Our findings suggest that while standard embedding methods frequently embed conclusions near the sums of their premises, they fall short of being effective heuristics and lack the ability to model certain categories of reasoning. + 2023.nlrse-1.11 + sprague-etal-2023-deductive + + + Synthetic Dataset for Evaluating Complex Compositional Knowledge for Natural Language Inference + Sushma AnandAkojuUniversity of Arizona + RobertVacareanuUniversity of Arizona + EduardoBlancoUniversity of Arizona + HarisRiazUniversity of Arizona + MihaiSurdeanuUniversity of Arizona + 157-168 + We introduce a synthetic dataset called Sentences Involving Complex Compositional Knowledge (SICCK) and a novel analysis that investigates the performance of Natural Language Inference (NLI) models to understand compositionality in logic. We produce 1,304 sentence pairs by modifying 15 examples from the SICK dataset (Marelli et al., 2014). To this end, we modify the original texts using a set of phrases modifiers that correspond to universal quantifiers, existential quantifiers, negation, and other concept modifiers in Natural Logic (NL) (MacCartney, 2009). We use these phrases to modify the subject, verb, and object parts of the premise and hypothesis. Lastly, we annotate these modified texts with the corresponding entailment labels following NL rules. We conduct a preliminary verification of how well the change in the structural and semantic composition is captured by neural NLI models, in both zero-shot and fine-tuned scenarios. We found that the performance of NLI models under the zero-shot setting is poor, especially for modified sentences with negation and existential quantifiers. After fine-tuning this dataset, we observe that models continue to perform poorly over negation, existential and universal modifiers. + 2023.nlrse-1.12 + akoju-etal-2023-synthetic + +
+
diff --git a/data/xml/2023.repl4nlp.xml b/data/xml/2023.repl4nlp.xml new file mode 100644 index 0000000000..05fb0738ff --- /dev/null +++ b/data/xml/2023.repl4nlp.xml @@ -0,0 +1,307 @@ + + + + + Proceedings of the 8th Workshop on Representation Learning for NLP (RepL4NLP 2023) + BurcuCanUniversity of Stirling + MaximilianMozesUniversity College London + SamuelCahyawijayaHong Kong University of Science and Technology + NaomiSaphraNew York University + NoraKassnerMeta + ShauliRavfogelBar-Ilan University + AbhilashaRavichanderAllen Institute for Artificial Intelligence + ChenZhaoNew York University + IsabelleAugensteinUniversity of Copenhagen + AnnaRogersUniversity of Copenhagen + KyunghyunChoNew York University + EdwardGrefenstetteDeepMind + LenaVoitaMeta AI + Association for Computational Linguistics +
Toronto, Canada
+ July + 2023 + repl4nlp + + + 2023.repl4nlp-1.0 + repl4nlp-2023-representation + + + Adversarial Clean Label Backdoor Attacks and Defenses on Text Classification Systems + AshimGupta + AmrithKrishnaUniversity of Cambridge + 1-12 + Clean-label (CL) attack is a form of data poisoning attack where an adversary modifies only the textual input of the training data, without requiring access to the labeling function. CL attacks are relatively unexplored in NLP, as compared to label flipping (LF) attacks, where the latter additionally requires access to the labeling function as well. While CL attacks are more resilient to data sanitization and manual relabeling methods than LF attacks, they often demand as high as ten times the poisoning budget than LF attacks. In this work, we first introduce an Adversarial Clean Label attack which can adversarially perturb in-class training examples for poisoning the training set. We then show that an adversary can significantly bring down the data requirements for a CL attack, using the aforementioned approach, to as low as 20 % of the data otherwise required. We then systematically benchmark and analyze a number of defense methods, for both LF and CL attacks, some previously employed solely for LF attacks in the textual domain and others adapted from computer vision. We find that text-specific defenses greatly vary in their effectiveness depending on their properties. + 2023.repl4nlp-1.1 + gupta-krishna-2023-adversarial + + + Do not Mask Randomly: Effective Domain-adaptive Pre-training by Masking In-domain Keywords + ShahriarGolchinUniversity of Arizona + MihaiSurdeanuUniversity of Arizona + NazgolTavabiHarvard University + AtaKiapourHarvard University + 13-21 + We propose a novel task-agnostic in-domain pre-training method that sits between generic pre-training and fine-tuning. Our approach selectively masks in-domain keywords, i.e., words that provide a compact representation of the target domain. We identify such keywords using KeyBERT (Grootendorst, 2020). We evaluate our approach using six different settings: three datasets combined with two distinct pre-trained language models (PLMs). Our results reveal that the fine-tuned PLMs adapted using our in-domain pre-training strategy outperform PLMs that used in-domain pre-training with random masking as well as those that followed the common pre-train-then-fine-tune paradigm. Further, the overhead of identifying in-domain keywords is reasonable, e.g., 7-15% of the pre-training time (for two epochs) for BERT Large (Devlin et al., 2019). + 2023.repl4nlp-1.2 + golchin-etal-2023-mask + + + Grammatical information in <fixed-case>BERT</fixed-case> sentence embeddings as two-dimensional arrays + ViviNastaseUniversity of Geneva + PaolaMerloUppsala University and University of Geneva, Switzerland + 22-39 + Sentence embeddings induced with various transformer architectures encode much semantic and syntactic information in a distributed manner in a one-dimensional array. We investigate whether specific grammatical information can be accessed in these distributed representations. Using data from a task developed to test rule-like generalizations, our experiments on detecting subject-verb agreement yield several promising results. First, we show that while the usual sentence representations encoded as one-dimensional arrays do not easily support extraction of rule-like regularities, a two-dimensional reshaping of these vectors allows various learning architectures to access such information. Next, we show that various architectures can detect patterns in these two-dimensional reshaped sentence embeddings and successfully learn a model based on smaller amounts of simpler training data, which performs well on more complex test data. This indicates that current sentence embeddings contain information that is regularly distributed, and which can be captured when the embeddings are reshaped into higher dimensional arrays. Our results cast light on representations produced by language models and help move towards developing few-shot learning approaches. + 2023.repl4nlp-1.3 + nastase-merlo-2023-grammatical + + + A Multilingual Evaluation of <fixed-case>NER</fixed-case> Robustness to Adversarial Inputs + AkshaySrinivasan + SowmyaVajjalaNational Research Council Canada + 40-53 + Adversarial evaluations of language models typically focus on English alone. In this paper, we performed a multilingual evaluation of Named Entity Recognition (NER) in terms of its robustness to small perturbations in the input. Our results showed the NER models we explored across three languages (English, German and Hindi) are not very robust to such changes, as indicated by the fluctuations in the overall F1 score as well as in a more fine-grained evaluation. With that knowledge, we further explored whether it is possible to improve the existing NER models using a part of the generated adversarial data sets as augmented training data to train a new NER model or as fine-tuning data to adapt an existing NER model. Our results showed that both these approaches improve performance on the original as well as adversarial test sets. While there is no significant difference between the two approaches for English, re-training is significantly better than fine-tuning for German and Hindi. + 2023.repl4nlp-1.4 + srinivasan-vajjala-2023-multilingual + + + Retrieval-Augmented Domain Adaptation of Language Models + BenfengXu + ChunxuZhaoBeijing Language and Culture University + WenbinJiang + PengFeiZhuBaidu + SongtaiDaiBaidu + ChaoPangBaidu + ZhuoSunBaidu + ShuohuanWang + YuSun + 54-64 + Language models pretrained on general domain corpora usually exhibit considerable degradation when generalizing to downstream tasks of specialized domains. Existing approaches try to construct PLMs for each specific domains either from scratch or through further pretraining, which not only costs substantial resources, but also fails to cover all target domains at various granularity. In this work, we propose RADA, a novel Retrieval-Augmented framework for Domain Adaptation. We first construct a textual corpora that covers the downstream task at flexible domain granularity and resource availability. We employ it as a pluggable datastore to retrieve informative background knowledge, and integrate them into the standard language model framework to augment representations. We then propose a two-level selection scheme to integrate the most relevant information while alleviating irrelevant noises. Specifically, we introduce a differentiable sampling module as well as an attention mechanism to achieve both passage-level and word-level selection. Such a retrieval-augmented framework enables domain adaptation of language models with flexible domain coverage and fine-grained domain knowledge integration. We conduct comprehensive experiments across biomedical, science and legal domains to demonstrate the effectiveness of the overall framework, and its advantage over existing solutions. + 2023.repl4nlp-1.5 + xu-etal-2023-retrieval + + + Fine-grained Text Style Transfer with Diffusion-Based Language Models + YiweiLyu + TiangeLuoUniversity of Michigan - Ann Arbor + JiachengShi + ToddHollonUniversity of Michigan + HonglakLeeLG AI Research and University of Michigan + 65-74 + Diffusion probabilistic models have shown great success in generating high-quality images controllably, and researchers have tried to utilize this controllability into text generation domain. Previous works on diffusion-based language models have shown that they can be trained without external knowledge (such as pre-trained weights) and still achieve stable performance and controllability. In this paper, we trained a diffusion-based model on StylePTB dataset, the standard benchmark for fine-grained text style transfers. The tasks in StylePTB requires much more refined control over the output text compared to tasks evaluated in previous works, and our model was able to achieve state-of-the-art performance on StylePTB on both individual and compositional transfers. Moreover, our model, trained on limited data from StylePTB without external knowledge, outperforms previous works that utilized pretrained weights, embeddings, and external grammar parsers, and this may indicate that diffusion-based language models have great potential under low-resource settings. + 2023.repl4nlp-1.6 + lyu-etal-2023-fine + + + Enhancing text comprehension for Question Answering with Contrastive Learning + SeungyeonLeeKyungpook National University + MinhoLeeKyungpook National University + 75-86 + Although Question Answering (QA) have advanced to the human-level language skills in NLP tasks, there is still a problem: the QA model gets confused when there are similar sentences or paragraphs. Existing studies focus on enhancing the text understanding of the candidate answers to improve the overall performance of the QA models. However, since these methods focus on re-ranking queries or candidate answers, they fail to resolve the confusion when many generated answers are similar to the expected answer. To address these issues, we propose a novel contrastive learning framework called ContrastiveQA that alleviates the confusion problem in answer extraction. We propose a supervised method where we generate positive and negative samples from the candidate answers and the given answer, respectively. We thus introduce ContrastiveQA, which uses contrastive learning with sampling data to reduce incorrect answers. Experimental results on four QA benchmarks show the effectiveness of the proposed method. + 2023.repl4nlp-1.7 + lee-lee-2023-enhancing + + + Towards Flow Graph Prediction of Open-Domain Procedural Texts + KeisukeShirai + HirotakaKamekoBaidu + ShinsukeMoriKyoto University + 87-96 + Machine comprehension of procedural texts is essential for reasoning about the steps and automating the procedures. However, this requires identifying entities within a text and resolving the relationships between the entities. Previous work focused on the cooking domain and proposed a framework to convert a recipe text into a flow graph (FG) representation. In this work, we propose a framework based on the recipe FG for flow graph prediction of open-domain procedural texts. To investigate flow graph prediction performance in non-cooking domains, we introduce the wikiHow-FG corpus from articles on wikiHow, a website of how-to instruction articles. In experiments, we consider using the existing recipe corpus and performing domain adaptation from the cooking to the target domain. Experimental results show that the domain adaptation models achieve higher performance than those trained only on the cooking or target domain data. + 2023.repl4nlp-1.8 + shirai-etal-2023-towards + + + One does not fit all! On the Complementarity of Vision Encoders for Vision and Language Tasks + GregorGeigleBayerische Julius-Maximilians-Universität Würzburg + ChenLiu + JonasPfeifferGoogle + IrynaGurevychTU Darmstadt + 97-117 + Current multimodal models, aimed at solving Vision and Language (V+L) tasks, predominantly repurpose Vision Encoders (VE) as feature extractors. While many VEs—of different architectures, trained on different data and objectives—are publicly available, they are not designed for the downstream V+L tasks. Nonetheless, most current work assumes that a single pre-trained VE can serve as a general-purpose encoder. In this work, we focus on analysis and aim to understand whether the information stored within different VEs is complementary, i.e. if providing the model with features from multiple VEs can improve the performance on a target task, and how they are combined. We exhaustively experiment with three popular VEs on six downstream V+L tasks and analyze the attention and VE-dropout patterns. Our analyses suggest that diverse VEs complement each other, resulting in improved downstream V+L task performance, where the improvements are not due to simple ensemble effects (i.e. the performance does not always improve when increasing the number of encoders). We demonstrate that future VEs, which are not repurposed, but explicitly designed for V+L tasks, have the potential of improving performance on the target V+L tasks. + 2023.repl4nlp-1.9 + geigle-etal-2023-one + + + <fixed-case>SPC</fixed-case>: Soft Prompt Construction for Cross Domain Generalization + WenboZhaoAmazon + ArpitGuptaAmazon + TagyoungChungAmazon + JingHuangAmazon Alexa AI + 118-130 + Recent advances in prompt tuning have proven effective as a new language modeling paradigm for various natural language understanding tasks. However, it is challenging to adapt the soft prompt embeddings to different domains or generalize to low-data settings when learning soft prompts itself is unstable, task-specific, and bias-prone. This paper proposes a principled learning framework—soft prompt construction (SPC)—to facilitate learning domain-adaptable soft prompts. Derived from the SPC framework is a simple loss that can plug into various models and tuning approaches to improve their cross-domain performance. We show SPC can improve upon SOTA for contextual query rewriting, summarization, and paraphrase detection by up to 5%, 19%, and 16%, respectively. + 2023.repl4nlp-1.10 + zhao-etal-2023-spc + + + Friendly Neighbors: Contextualized Sequence-to-Sequence Link Prediction + AdrianKochsiekUniversität Mannheim + ApoorvSaxenaIndian Institute of Science, Bangalore + InderjeetNairAdobe Systems + RainerGemullaUniversität Mannheim, Germany + 131-138 + We propose KGT5-context, a simple sequence-to-sequence model for link prediction (LP) in knowledge graphs (KG). Our work expands on KGT5, a recent LP model that exploits textual features of the KG, has small model size, and is scalable. To reach good predictive performance, however, KGT5 relies on an ensemble with a knowledge graph embedding model, which itself is excessively large and costly to use. In this short paper, we show empirically that adding contextual information — i.e., information about the direct neighborhood of the query entity — alleviates the need for a separate KGE model to obtain good performance. The resulting KGT5-context model is simple, reduces model size significantly, and obtains state-of-the-art performance in our experimental study. + 2023.repl4nlp-1.11 + kochsiek-etal-2023-friendly + + + Extracting Multi-valued Relations from Language Models + SnehaSinghaniaSaarland Informatics Campus, Max-Planck Institute for Informatics + SimonRazniewskiSaarland Informatics Campus, Max-Planck Institute + GerhardWeikumMax Planck Institute and Max-Planck Institute for Informatics + 139-154 + The widespread usage of latent language representations via pre-trained language models (LMs) suggests that they are a promising source of structured knowledge. However, existing methods focus only on a single object per subject-relation pair, even though often multiple objects are correct. To overcome this limitation, we analyze these representations for their potential to yield materialized multi-object relational knowledge. We formulate the problem as a rank-then-select task. For ranking candidate objects, we evaluate existing prompting techniques and propose new ones incorporating domain knowledge. Among the selection methods, we find that choosing objects with a likelihood above a learned relation-specific threshold gives a 49.5% F1 score. Our results highlight the difficulty of employing LMs for the multi-valued slot-filling task, and pave the way for further research on extracting relational knowledge from latent language representations. + 2023.repl4nlp-1.12 + singhania-etal-2023-extracting + + + Hierarchical Multi-Instance Multi-Label Learning for Detecting Propaganda Techniques + AnniChen + BhuwanDhingra + 155-163 + Since the introduction of the SemEval 2020 Task 11 (CITATION), several approaches have been proposed in the literature for classifying propagandabased on the rhetorical techniques used to influence readers.These methods, however, classify one span at a time, ignoring dependencies from the labels of other spans within the same context.In this paper, we approach propaganda technique classification as aMulti-Instance Multi-Label (MIML) learning problem (CITATION) and propose a simple RoBERTa-based model (CITATION) for classifying all spans in an article simultaneously. Further, we note that, due to the annotation process whereannotators classified the spans by following a decision tree,there is an inherent hierarchical relationship among the differenttechniques, which existing approaches ignore. We incorporate these hierarchical label dependencies by adding an auxiliary classifier for each node in the decision tree to the training objective and ensembling the predictions from the original and auxiliary classifiers at test time. Overall, our model leads to an absolute improvement of 2.47% micro-F1 over the model from the shared task winning team in a cross-validation setup and is the best performing non-ensemble model on the shared task leaderboard. + 2023.repl4nlp-1.13 + chen-dhingra-2023-hierarchical + + + Contrastive Loss is All You Need to Recover Analogies as Parallel Lines + NarutatsuRi + Fei-TzinLeeColumbia University + NakulVermaColumbia University + 164-173 + While static word embedding models are known to represent linguistic analogies as parallel lines in high-dimensional space, the underlying mechanism as to why they result in such geometric structures remains obscure. We find that an elementary contrastive-style method employed over distributional information performs competitively with popular word embedding models on analogy recovery tasks, while achieving dramatic speedups in training time. Further, we demonstrate that a contrastive loss is sufficient to create these parallel structures in word embeddings, and establish a precise relationship between the co-occurrence statistics and the geometric structure of the resulting word embeddings. + 2023.repl4nlp-1.14 + ri-etal-2023-contrastive + + + Syntax-Aware Graph-to-Graph Transformer for Semantic Role Labelling + AlirezaMohammadshahi + JamesHendersonIdiap Research Institute + 174-186 + Recent models have shown that incorporating syntactic knowledge into the semantic role labelling (SRL) task leads to a significant improvement. In this paper, we propose Syntax-aware Graph-to-Graph Transformer (SynG2G-Tr) model, which encodes the syntactic structure using a novel way to input graph relations as embeddings, directly into the self-attention mechanism of Transformer. This approach adds a soft bias towards attention patterns that follow the syntactic structure but also allows the model to use this information to learn alternative patterns. We evaluate our model on both span-based and dependency-based SRL datasets, and outperform previous alternative methods in both in-domain and out-of-domain settings, on CoNLL 2005 and CoNLL 2009 datasets. + 2023.repl4nlp-1.15 + mohammadshahi-henderson-2023-syntax + + + Improving Zero-shot Relation Classification via Automatically-acquired Entailment Templates + MahdiRahimiComputer Science Department, University of Arizona + MihaiSurdeanuUniversity of Arizona + 187-195 + While fully supervised relation classification (RC) models perform well on large-scale datasets, their performance drops drastically in low-resource settings. As generating annotated examples are expensive, recent zero-shot methods have been proposed that reformulate RC into other NLP tasks for which supervision exists such as textual entailment. However, these methods rely on templates that are manually created which is costly and requires domain expertise. In this paper, we present a novel strategy for template generation for relation classification, which is based on adapting Harris’ distributional similarity principle to templates encoded using contextualized representations. Further, we perform empirical evaluation of different strategies for combining the automatically acquired templates with manual templates. The experimental results on TACRED show that our approach not only performs better than the zero-shot RC methods that only use manual templates, but also that it achieves state-of-the-art performance for zero-shot TACRED at 64.3 F1 score. + 2023.repl4nlp-1.16 + rahimi-surdeanu-2023-improving + + + <fixed-case>MUX</fixed-case>-<fixed-case>PLM</fixed-case>s: Pre-training Language Models with Data Multiplexing + VishvakMurahariPrinceton University + AmeetDeshpande + CarlosJimenez + IzhakShafranGoogle + MingqiuWang + YuanCaoGoogle Brain + KarthikNarasimhanPrinceton University + 196-211 + The widespread adoption of large language models such as ChatGPT and Bard has led to unprecedented demand for these technologies. The burgeoning cost of inference for ever-increasing model sizes coupled with hardware shortages has limited affordable access and poses a pressing need for efficiency approaches geared towards high throughput and performance. Multi-input multi-output (MIMO) algorithms such as data multiplexing, offer a promising solution with a many-fold increase in throughput by performing inference for multiple inputs at the cost of a single input. Yet these approaches are not currently performant enough to be deployed in modern systems. We change that by developing MUX-PLMs, a class of high throughput pre-trained language models (PLMs) trained with data multiplexing, that can be fine-tuned for any downstream task to yield high-throughput high-performance. Our novel multiplexing and demultiplexing modules proficiently entangle and disentangle inputs, and enable high-performance high throughput that are competitive with vanilla PLMs while achieving 2x/5x inference speedup with only a 1−4% drop on a broad suite of tasks. + 2023.repl4nlp-1.17 + murahari-etal-2023-mux + + + Mixed Orthographic/Phonemic Language Modeling: Beyond Orthographically Restricted Transformers (<fixed-case>BORT</fixed-case>) + RobertGaleOregon Health Sciences University + AlexandraSalemOregon Health Sciences University + GerasimosFergadiotisPortland State University + StevenBedrickOregon Health & Science University + 212-225 + Speech language pathologists rely on information spanning the layers of language, often drawing from multiple layers (e.g. phonology & semantics) at once. Recent innovations in large language models (LLMs) have been shown to build powerful representations for many complex language structures, especially syntax and semantics, unlocking the potential of large datasets through self-supervised learning techniques. However, these datasets are overwhelmingly orthographic, favoring writing systems like the English alphabet, a natural but phonetically imprecise choice. Meanwhile, LLM support for the international phonetic alphabet (IPA) ranges from poor to absent. Further, LLMs encode text at a word- or near-word level, and pre-training tasks have little to gain from phonetic/phonemic representations. In this paper, we introduce BORT, an LLM for mixed orthography/IPA meant to overcome these limitations. To this end, we extend the pre-training of an existing LLM with our own self-supervised pronunciation tasks. We then fine-tune for a clinical task that requires simultaneous phonological and semantic analysis. For an “easy” and “hard” version of these tasks, we show that fine-tuning from our models is more accurate by a relative 24% and 29%, and improved on character error rates by a relative 75% and 31%, respectively, than those starting from the original model. + 2023.repl4nlp-1.18 + gale-etal-2023-mixed + + + Effectiveness of Data Augmentation for Parameter Efficient Tuning with Limited Data + StephenObadinmaQueen’s University + HongyuGuo + XiaodanZhuQueen’s University + 226-237 + Recent work has demonstrated that using parameter efficient tuning techniques such as prefix tuning (or P-tuning) on pretrained language models can yield performance that is comparable or superior to fine-tuning while dramatically reducing trainable parameters. Nevertheless, the effectiveness of such methods under the context of data augmentation, a common strategy to improve learning under low data regimes, has not been fully explored. In this paper, we examine the effectiveness of several popular task-agnostic data augmentation techniques, i.e., EDA, Back Translation, and Mixup, when using two general parameter efficient tuning methods, P-tuning v2 and LoRA, under data scarcity. We show that data augmentation can be used to boost the performance of P-tuning and LoRA models, but the effectiveness of each technique varies and certain methods can lead to a notable degradation in performance, particularly when using larger models and on harder tasks. We further analyze the sentence representations of P-tuning compared to fine-tuning to help understand the above behaviour, and reveal how P-tuning generally presents a more limited ability to separate the sentence embeddings from different classes of augmented data. In addition, it displays poorer performance on heavily altered data. However, we demonstrate that by adding a simple contrastive loss function it can help mitigate such issues for prefix tuning, resulting in sizable improvements to augmented data performance. + 2023.repl4nlp-1.19 + obadinma-etal-2023-effectiveness + + + Relational Sentence Embedding for Flexible Semantic Matching + BinWangNational University of Singapore, Singapore + HaizhouLiNational University of Singapore, Singapore and School of Data Science, The Chinese University of Hong Kong, Shenzhen, China and Shenzhen Research Institute of Big Data + 238-252 + 2023.repl4nlp-1.20 + wang-li-2023-relational + + + Tucker Decomposition with Frequency Attention for Temporal Knowledge Graph Completion + LikangXiaoSKLSDE, School of Computer Science and Engineering, Beihang University, Beijing, China and Shen Yuan Honors College, Beihang University, Beijing, China + RichongZhangSKLSDE, School of Computer Science and Engineering, Beihang University, Beijing, China + ZijieChenSchool of Electrical and Computer Engineering, University of Toronto, Toronto, Canada + JunfanChenSKLSDE, School of Computer Science and Engineering, Beihang University, Beijing, China + 253-265 + 2023.repl4nlp-1.21 + xiao-etal-2023-tucker-decomposition + + + <fixed-case>CLIP</fixed-case>-based image captioning via unsupervised cycle-consistency in the latent space + RomainBielawskiANITI, Université de Toulouse, France + RufinVanRullenCerCo, CNRS UMR5549, Toulouse + 266-275 + 2023.repl4nlp-1.22 + bielawski-vanrullen-2023-clip + + + Token-level Fitting Issues of Seq2seq Models + GuangshengBaoZhejiang University and School of Engineering, Westlake University + ZhiyangTengNanyang Technological University + YueZhangSchool of Engineering, Westlake University and Institute of Advanced Technology, Westlake Institute for Advanced Study + 276-288 + 2023.repl4nlp-1.23 + bao-etal-2023-token + + + Revealing the Blind Spot of Sentence Encoder Evaluation by <fixed-case>HEROS</fixed-case> + Cheng-HanChiangNational Taiwan University† + Hung-yiLeeNational Taiwan University† + Yung-SungChuangMassachusetts Institute of Technology + JamesGlassMassachusetts Institute of Technology + 289-302 + 2023.repl4nlp-1.24 + chiang-etal-2023-revealing + + + One-Shot Exemplification Modeling via Latent Sense Representations + JohnHarvillUniversity of Illinois Urbana-Champaign + MarkHasegawa-JohnsonUniversity of Illinois Urbana-Champaign + Hee SukYoonKorea Advanced Institute of Science and Technology + Chang D.YooKorea Advanced Institute of Science and Technology + EunseopYoonKorea Advanced Institute of Science and Technology + 303-314 + 2023.repl4nlp-1.25 + harvill-etal-2023-one + + + <fixed-case>S</fixed-case>en2<fixed-case>P</fixed-case>ro: A Probabilistic Perspective to Sentence Embedding from Pre-trained Language Model + LingfengShenTencent AI Lab + HaiyunJiangTencent AI Lab + LemaoLiuTencent AI Lab + ShumingShiTencent AI Lab + 315-333 + 2023.repl4nlp-1.26 + shen-etal-2023-sen2pro + + + Visual Coherence Loss for Coherent and Visually Grounded Story Generation + XudongHongMPI Informatics and Saarland University and Saarland Informatics Campus + VeraDembergSaarland University and Saarland Informatics Campus + AsadSayeedUniversity of Gothenburg + QiankunZhengSaarland University and Saarland Informatics Campus + BerntSchieleMPI Informatics and Saarland Informatics Campus + 334-346 + 2023.repl4nlp-1.27 + hong-etal-2023-visual-coherence + +
+
diff --git a/data/xml/2023.semeval.xml b/data/xml/2023.semeval.xml index ddb13649de..2993e04b5a 100644 --- a/data/xml/2023.semeval.xml +++ b/data/xml/2023.semeval.xml @@ -15,6 +15,10 @@ 2023 semeval + + 2023.semeval-1.0 + semeval-2023-international + <fixed-case>K</fixed-case>now<fixed-case>C</fixed-case>omp at <fixed-case>S</fixed-case>em<fixed-case>E</fixed-case>val-2023 Task 7: Fine-tuning Pre-trained Language Models for Clinical Trial Entailment Identification WeiqiWangHong Kong University of Science and Technology diff --git a/data/xml/2023.sicon.xml b/data/xml/2023.sicon.xml new file mode 100644 index 0000000000..f3dd03b204 --- /dev/null +++ b/data/xml/2023.sicon.xml @@ -0,0 +1,101 @@ + + + + + Proceedings of the First Workshop on Social Influence in Conversations (SICon 2023) + KushalChawla + WeiyanShi + Association for Computational Linguistics +
Toronto, Canada
+ July + 2023 + 2023.sicon-1 + sicon + + + 2023.sicon-1.0 + sicon-2023-social + + + Eliciting Rich Positive Emotions in Dialogue Generation + ZiweiGongColumbia University + QingkaiMin + YueZhangWestlake University + 1-8 + Positive emotion elicitation aims at evoking positive emotion states in human users in open-domain dialogue generation. However, most work focuses on inducing a single-dimension of positive sentiment using human annotated datasets, which limits the scale of the training dataset. In this paper, we propose to model various emotions in large unannotated conversations, such as joy, trust and anticipation, by leveraging a latent variable to control the emotional intention of the response. Our proposed emotion-eliciting-Conditional-Variational-AutoEncoder (EE-CVAE) model generates more diverse and emotionally-intelligent responses compared to single-dimension baseline models in human evaluation. + 2023.sicon-1.1 + gong-etal-2023-eliciting + + + Detoxifying Online Discourse: A Guided Response Generation Approach for Reducing Toxicity in User-Generated Text + RitwikBoseKnox College + IanPereraThe Institute for Human & Machine Cognition + BonnieDorrUniversity of Florida + 9-14 + The expression of opinions, stances, and moral foundations on social media often coincide with toxic, divisive, or inflammatory language that can make constructive discourse across communities difficult. Natural language generation methods could provide a means to reframe or reword such expressions in a way that fosters more civil discourse, yet current Large Language Model (LLM) methods tend towards language that is too generic or formal to seem authentic for social media discussions. We present preliminary work on training LLMs to maintain authenticity while presenting a community’s ideas and values in a constructive, non-toxic manner. + 2023.sicon-1.2 + bose-etal-2023-detoxifying + + + Large Language Models respond to Influence like Humans + LewisGriffinUniversity College London, University of London + BennettKleinbergTilburg University + MaximilianMozes + KimberlyMaiUniversity College London, University of London + Maria Do MarVau + MatthewCaldwellNA + AugustineMavor-Parker + 15-24 + Two studies tested the hypothesis that a Large Language Model (LLM) can be used to model psychological change following exposure to influential input. The first study tested a generic mode of influence - the Illusory Truth Effect (ITE) - where earlier exposure to a statement boosts a later truthfulness test rating. Analysis of newly collected data from human and LLM-simulated subjects (1000 of each) showed the same pattern of effects in both populations; although with greater per statement variability for the LLM. The second study concerns a specific mode of influence – populist framing of news to increase its persuasion and political mobilization. Newly collected data from simulated subjects was compared to previously published data from a 15 country experiment on 7286 human participants. Several effects from the human study were replicated by the simulated study, including ones that surprised the authors of the human study by contradicting their theoretical expectations; but some significant relationships found in human data were not present in the LLM data. Together the two studies support the view that LLMs have potential to act as models of the effect of influence. + 2023.sicon-1.3 + griffin-etal-2023-large + + + What Makes a Good Counter-Stereotype? Evaluating Strategies for Automated Responses to Stereotypical Text + KathleenFraserNational Research Council Canada + SvetlanaKiritchenkoNational Research Council Canada + IsarNejadgholi + AnnaKerkhof + 25-38 + When harmful social stereotypes are expressed on a public platform, they must be addressed in a way that educates and informs both the original poster and other readers, without causing offence or perpetuating new stereotypes. In this paper, we synthesize findings from psychology and computer science to propose a set of potential counter-stereotype strategies. We then automatically generate such counter-stereotypes using ChatGPT, and analyze their correctness and expected effectiveness at reducing stereotypical associations. We identify the strategies of denouncing stereotypes, warning of consequences, and using an empathetic tone as three promising strategies to be further tested. + 2023.sicon-1.4 + fraser-etal-2023-makes + + + <fixed-case>BC</fixed-case>ause: Reducing group bias and promoting cohesive discussion in online deliberation processes through a simple and engaging online deliberation tool + LucasAnastasiou + AnnaDe LibboNA + 39-49 + Facilitating healthy online deliberation in terms of sensemaking and collaboration of discussion participants proves extremely challenging due to a number of known negative effects of online communication on social media platforms. We start from concerns and aspirations about the use of existing online discussion systems as distilled in previous literature, we then combine them with lessons learned on design and engineering practices from our research team, to inform the design of an easy-to-use tool (BCause.app) that enables higher quality discussions than traditional social media. We describe the design of this tool, highlighting the main interaction features that distinguish it from common social media, namely: i. the low-cost argumentation structuring of the conversations with direct replies; ii. and the distinctive use of reflective feedback rather than appreciative-only feedback. We then present the results of a controlled A/B experiment in which we show that the presence of argumentative and cognitive reflective discussion elements produces better social interaction with less polarization and promotes a more cohesive discussion than common social media-like interactions. + 2023.sicon-1.5 + anastasiou-de-libbo-2023-bcause + + + Measuring Lexico-Semantic Alignment in Debates with Contextualized Word Representations + AinaGarí SolerTélécom-Paris + MatthieuLabeauTélécom ParisTech + ChloéClavelTélécom ParisTech and Télécom Paris + 50-63 + Dialog participants sometimes align their linguistic styles, e.g., they use the same words and syntactic constructions as their interlocutors. We propose to investigate the notion of lexico-semantic alignment: to what extent do speakers convey the same meaning when they use the same words? We design measures of lexico-semantic alignment relying on contextualized word representations. We show that they reflect interesting semantic differences between the two sides of a debate and that they can assist in the task of debate’s winner prediction. + 2023.sicon-1.6 + gari-soler-etal-2023-measuring + + + Exploring Linguistic Style Matching in Online Communities: The Role of Social Context and Conversation Dynamics + AparnaAnanthasubramaniam + HongChen + JasonYanUniversity of Michigan - Ann Arbor + KenanAlkiek + JiaxinPeiUniversity of Michigan + AgrimaSethUniversity of Michigan + LaviniaDunagan + MinjeChoiUniversity of Michigan + BenjaminLittererNA + DavidJurgensUniversity of Michigan + 64-74 + Linguistic style matching (LSM) in conversations can be reflective of several aspects of social influence such as power or persuasion. However, how LSM relates to the outcomes of online communication on platforms such as Reddit is an unknown question. In this study, we analyze a large corpus of two-party conversation threads in Reddit where we identify all occurrences of LSM using two types of style: the use of function words and formality. Using this framework, we examine how levels of LSM differ in conversations depending on several social factors within Reddit: post and subreddit features, conversation depth, user tenure, and the controversiality of a comment. Finally, we measure the change of LSM following loss of status after community banning. Our findings reveal the interplay of LSM in Reddit conversations with several community metrics, suggesting the importance of understanding conversation engagement when understanding community dynamics. + 2023.sicon-1.7 + ananthasubramaniam-etal-2023-exploring + +
+
diff --git a/data/xml/2023.sigmorphon.xml b/data/xml/2023.sigmorphon.xml new file mode 100644 index 0000000000..c3ceb28079 --- /dev/null +++ b/data/xml/2023.sigmorphon.xml @@ -0,0 +1,306 @@ + + + + + Proceedings of the 20th SIGMORPHON workshop on Computational Research in Phonetics, Phonology, and Morphology + GarrettNicolai + EleanorChodroff + FredericMailhot + ÇağrıÇöltekin + Association for Computational Linguistics +
Toronto, Canada
+ July + 2023 + 2023.sigmorphon-1 + sigmorphon + + + 2023.sigmorphon-1.0 + sigmorphon-2023-sigmorphon + + + Translating a low-resource language using <fixed-case>GPT</fixed-case>-3 and a human-readable dictionary + MichaElsnerThe Ohio State University + JordanNeedleThe Ohio State University + 1-13 + We investigate how well words in the polysynthetic language Inuktitut can be translated by combining dictionary definitions, without use of a neural machine translation model trained on parallel text. Such a translation system would allow natural language technology to benefit from resources designed for community use in a language revitalization or education program, rather than requiring a separate parallel corpus. We show that the text-to-text generation capabilities of GPT-3 allow it to perform this task with BLEU scores of up to 18.5. We investigate prompting GPT-3 to provide multiple translations, which can help slightly, and providing it with grammar information, which is mostly ineffective. Finally, we test GPT-3’s ability to derive morpheme definitions from whole-word translations, but find this process is prone to errors including hallucinations. + 2023.sigmorphon-1.2 + elsner-needle-2023-translating + + + Evaluating Cross Lingual Transfer for Morphological Analysis: a Case Study of <fixed-case>I</fixed-case>ndian Languages + SiddheshPawarGoogle + PushpakBhattacharyyaIndian Institute of Technology Bombay and Patna + ParthaTalukdarGoogle Research and IISc + 14-26 + Recent advances in pretrained multilingual models such as Multilingual T5 (mT5) have facilitated cross-lingual transfer by learning shared representations across languages. Leveraging pretrained multilingual models for scaling morphology analyzers to low-resource languages is a unique opportunity that has been under-explored so far. We investigate this line of research in the context of Indian languages, focusing on two important morphological sub-tasks: root word extraction and tagging morphosyntactic descriptions (MSD), viz., gender, number, and person (GNP). We experiment with six Indian languages from two language families (Dravidian and Indo-Aryan) to train a multilingual morphology analyzers for the first time for Indian languages. We demonstrate the usability of multilingual models for few-shot cross-lingual transfer through an average 7% increase in GNP tagging in a cross-lingual setting as compared to a monolingual setting through controlled experiments. We provide an overview of the state of the datasets available related to our tasks and point-out a few modeling limitations due to datasets. Lastly, we analyze the cross-lingual transfer of morphological tags for verbs and nouns, which provides a proxy for the quality of representations of word markings learned by the model. + 2023.sigmorphon-1.3 + pawar-etal-2023-evaluating + + + Joint Learning Model for Low-Resource Agglutinative Language Morphological Tagging + GulinigeerAbudouwailiSchool of Information Science and Engineering Xinjiang University + KahaerjiangAbiderexitiSchool of Information Science and Engineering, Xinjiang University + NianYiSchool of Information Science and Engineering Xinjiang University + AishanWumaierSchool of Science and Engineering, Xinjiang University; Xinjiang Provincial Key Laboratory of Multi-lingual Information Technology + 27-37 + Due to the lack of data resources, rule-based or transfer learning is mainly used in the morphological tagging of low-resource languages. However, these methods require expert knowledge, ignore contextual features, and have error propagation. Therefore, we propose a joint morphological tagger for low-resource agglutinative languages to alleviate the above challenges. First, we represent the contextual input with multi-dimensional features of agglutinative words. Second, joint training reduces the direct impact of part-of-speech errors on morphological features and increases the indirect influence between the two types of labels through a fusion mechanism. Finally, our model separately predicts part-of-speech and morphological features. Part-of-speech tagging is regarded as sequence tagging. When predicting morphological features, two-label adjacency graphs are dynamically reconstructed by integrating multilingual global features and monolingual local features. Then, a graph convolution network is used to learn the higher-order intersection of labels. A series of experiments show that the proposed model in this paper is superior to other comparative models. + 2023.sigmorphon-1.4 + abudouwaili-etal-2023-joint + + + Revisiting and Amending <fixed-case>C</fixed-case>entral <fixed-case>K</fixed-case>urdish Data on <fixed-case>U</fixed-case>ni<fixed-case>M</fixed-case>orph 4.0 + SinaAhmadiGeorge Mason University + AsoMahmudiIndependent + 38-48 + UniMorph–the Universal Morphology project is a collaborative initiative to create and maintain morphological data and organize numerous related tasks for various language processing communities. The morphological data is provided by linguists for over 160 languages in the latest version of UniMorph 4.0. This paper sheds light on the Central Kurdish data on UniMorph 4.0 by analyzing the existing data, its fallacies, and systematic morphological errors. It also presents an approach to creating more reliable morphological data by considering various specific phenomena in Central Kurdish that have not been addressed previously, such as Izafe and several enclitics. + 2023.sigmorphon-1.5 + ahmadi-mahmudi-2023-revisiting + + + Investigating Phoneme Similarity with Artificially Accented Speech + MargotMassonUniversity College Dublin + JulieCarson-berndsenUniversity College Dublin + 49-57 + While the deep learning revolution has led to significant performance improvements in speech recognition, accented speech remains a challenge. Current approaches to this challenge typically do not seek to understand and provide explanations for the variations of accented speech, whether they stem from native regional variation or non-native error patterns. This paper seeks to address non-native speaker variations from both a knowledge-based and a data-driven perspective. We propose to approximate non-native accented-speech pronunciation patterns by the means of two approaches: based on phonetic and phonological knowledge on the one hand and inferred from a text-to-speech system on the other. Artificial speech is then generated with a range of variants which have been captured in confusion matrices representing phoneme similarities. We then show that non-native accent confusions actually propagate to the transcription from the ASR, thus suggesting that the inference of accent specific phoneme confusions is achievable from artificial speech. + 2023.sigmorphon-1.6 + masson-carson-berndsen-2023-investigating + + + Generalized Glossing Guidelines: An Explicit, Human- and Machine-Readable, Item-and-Process Convention for Morphological Annotation + David R.MortensenLanguage Technologies Institute, Carnegie Mellon University + ElaGulsenCarnegie Mellon University + TaiqiHeCarnegie Mellon University + NathanielRobinsonCarnegie Mellon University + JonathanAmithGettysburg College + LindiaTjuatjaCarnegie Mellon University + LoriLevinCarnegie Mellon University + 58-67 + Interlinear glossing provides a vital type of morphosyntactic annotation, both for linguists and language revitalists, and numerous conventions exist for representing it formally and computationally. Some of these formats are human readable; others are machine readable. Some are easy to edit with general-purpose tools. Few represent non-concatentative processes like infixation, reduplication, mutation, truncation, and tonal overwriting in a consistent and formally rigorous way (on par with affixation). We propose an annotation convention—Generalized Glossing Guidelines (GGG) that combines all of these positive properties using an Item-and-Process (IP) framework. We describe the format, demonstrate its linguistic adequacy, and compare it with two other interlinear glossed text annotation schemes. + 2023.sigmorphon-1.7 + mortensen-etal-2023-generalized + + + Jambu: A historical linguistic database for <fixed-case>S</fixed-case>outh <fixed-case>A</fixed-case>sian languages + AryamanAroraGeorgetown University + AdamFarrisStanford University + SamopriyaBasuSimon Fraser University + SureshKolichalaMicrosoft + 68-77 + We introduce JAMBU, a cognate database of South Asian languages which unifies dozens of previous sources in a structured and accessible format. The database includes nearly 287k lemmata from 602 lects, grouped together in 23k sets of cognates. We outline the data wrangling necessary to compile the dataset and train neural models for reflex prediction on the Indo- Aryan subset of the data. We hope that JAMBU is an invaluable resource for all historical linguists and Indologists, and look towards further improvement and expansion of the database. + 2023.sigmorphon-1.8 + arora-etal-2023-jambu + + + Lightweight morpheme labeling in context: Using structured linguistic representations to support linguistic analysis for the language documentation context + BhargavShandilyaUniversity of Colorado Boulder + AlexisPalmerUniversity of Colorado Boulder + 78-92 + Linguistic analysis is a core task in the process of documenting, analyzing, and describing endangered and less-studied languages. In addition to providing insight into the properties of the language being studied, having tools to automatically label words in a language for grammatical category and morphological features can support a range of applications useful for language pedagogy and revitalization. At the same time, most modern NLP methods for these tasks require both large amounts of data in the language and compute costs well beyond the capacity of most research groups and language communities. In this paper, we present a gloss-to-gloss (g2g) model for linguistic analysis (specifically, morphological analysis and part-of-speech tagging) that is lightweight in terms of both data requirements and computational expense. The model is designed for the interlinear glossed text (IGT) format, in which we expect the source text of a sentence in a low-resource language, a translation of that sentence into a language of wider communication, and a detailed glossing of the morphological properties of each word in the sentence. We first produce silver standard parallel glossed data by automatically labeling the high-resource translation. The model then learns to transform source language morphological labels into output labels for the target language, mediated by a structured linguistic representation layer. We test the model on both low-resource and high-resource languages, and find that our simple CNN-based model achieves comparable performance to a state-of-the-art transformer-based model, at a fraction of the computational cost. + 2023.sigmorphon-1.9 + shandilya-palmer-2023-lightweight + + + Improving Automated Prediction of <fixed-case>E</fixed-case>nglish Lexical Blends Through the Use of Observable Linguistic Features + JaremSaundersUniversity of North Carolina at Chapel Hill + 93-97 + The process of lexical blending is difficult to reliably predict. This difficulty has been shown by machine learning approaches in blend modeling, including attempts using then state-of-the-art LSTM deep neural networks trained on character embeddings, which were able to predict lexical blends given the ordered constituent words in less than half of cases, at maximum. This project introduces a novel model architecture which dramatically increases the correct prediction rates for lexical blends, using only Polynomial regression and Random Forest models. This is achieved by generating multiple possible blend candidates for each input word pairing and evaluating them based on observable linguistic features. The success of this model architecture illustrates the potential usefulness of observable linguistic features for problems that elude more advanced models which utilize only features discovered in the latent space. + 2023.sigmorphon-1.10 + saunders-2023-improving + + + Colexifications for Bootstrapping Cross-lingual Datasets: The Case of Phonology, Concreteness, and Affectiveness + YiyiChenAalborg University + JohannesBjervaDepartment of Computer Science, Aalborg University + 98-109 + Colexification refers to the linguistic phenomenon where a single lexical form is used to convey multiple meanings. By studying cross-lingual colexifications, researchers have gained valuable insights into fields such as psycholinguistics and cognitive sciences (Jack- son et al., 2019; Xu et al., 2020; Karjus et al., 2021; Schapper and Koptjevskaja-Tamm, 2022; François, 2022). While several multilingual colexification datasets exist, there is untapped potential in using this information to bootstrap datasets across such semantic features. In this paper, we aim to demonstrate how colexifications can be leveraged to create such cross-lingual datasets. We showcase curation procedures which result in a dataset covering 142 languages across 21 language families across the world. The dataset includes ratings of concreteness and affectiveness, mapped with phonemes and phonological features. We further analyze the dataset along different dimensions to demonstrate potential of the proposed procedures in facilitating further interdisciplinary research in psychology, cognitive science, and multilingual natural language processing (NLP). Based on initial investigations, we observe that i) colexifications that are closer in concreteness/affectiveness are more likely to colexify ; ii) certain initial/last phonemes are significantly correlated with concreteness/affectiveness intra language families, such as /k/ as the initial phoneme in both Turkic and Tai-Kadai correlated with concreteness, and /p/ in Dravidian and Sino-Tibetan correlated with Valence; iii) the type-to-token ratio (TTR) of phonemes are positively correlated with concreteness across several language families, while the length of phoneme segments are negatively correlated with concreteness; iv) certain phonological features are negatively correlated with concreteness across languages. The dataset is made public online for further research. + 2023.sigmorphon-1.11 + chen-bjerva-2023-colexifications + + + Character alignment methods for dialect-to-standard normalization + YvesScherrerUniversity of Helsinki + 110-116 + This paper evaluates various character alignment methods on the task of sentence-level standardization of dialect transcriptions. We compare alignment methods from different scientific traditions (dialectometry, speech processing, machine translation) and apply them to Finnish, Norwegian and Swiss German dialect datasets. In the absence of gold alignments, we evaluate the methods on a set of characteristics that are deemed undesirable for the task. We find that trained alignment methods only show marginal benefits to simple Levenshtein distance. On this particular task, eflomal outperforms related methods such as GIZA++ or fast_align by a large margin. + 2023.sigmorphon-1.12 + scherrer-2023-character + + + <fixed-case>SIGMORPHON</fixed-case>–<fixed-case>U</fixed-case>ni<fixed-case>M</fixed-case>orph 2023 Shared Task 0: Typologically Diverse Morphological Inflection + OmerGoldmanBar-Ilan University + KhuyagbaatarBatsurenNational University of Mongolia + SalamKhalifaStony Brook University + AryamanAroraGeorgetown University + GarrettNicolaiUniversity of British Columbia + ReutTsarfatyBar-Ilan University + EkaterinaVylomovaUniversity of Melbourne + 117-125 + The 2023 SIGMORPHON–UniMorph shared task on typologically diverse morphological inflection included a wide range of languages: 26 languages from 9 primary language families. The data this year was all lemma-split, to allow testing models’ generalization ability, and structured along the new hierarchical schema presented in (Batsuren et al., 2022). The systems submitted this year, 9 in number, showed ingenuity and innovativeness, including hard attention for explainability and bidirectional decoding. Special treatment was also given by many participants to the newly-introduced data in Japanese, due to the high abundance of unseen Kanji characters in its test set. + 2023.sigmorphon-1.13 + goldman-etal-2023-sigmorphon + + + <fixed-case>SIGMORPHON</fixed-case>–<fixed-case>U</fixed-case>ni<fixed-case>M</fixed-case>orph 2023 Shared Task 0, Part 2: Cognitively Plausible Morphophonological Generalization in <fixed-case>K</fixed-case>orean + CanaanBreissMassachusetts Institute of Technology + JinyoungJoUniversity of California, Los Angeles + 126-131 + This paper summarises data collection and curation for Part 2 of the 2023 SIGMORPHON-UniMorph Shared Task 0, which focused on modeling speaker knowledge and generalization of a pair of interacting phonological processes in Korean. We briefly describe how modeling the generalization task could be of interest to researchers in both Natural Language Processing and linguistics, and then summarise the traditional description of the phonological processes that are at the center of the modeling challenge. We then describe the criteria we used to select and code cases of process application in two Korean speech corpora, which served as the primary learning data. We also report the technical details of the experiment we carried out that served as the primary test data. + 2023.sigmorphon-1.14 + breiss-jo-2023-sigmorphon + + + Morphological reinflection with weighted finite-state transducers + AliceKwakUniversity of Arizona + MichaelHammondUniversity of Arizona + CheyenneWingUniversity of Arizona + 132-137 + This paper describes the submission by the University of Arizona to the SIGMORPHON 2023 Shared Task on typologically diverse morphological (re-)infection. In our submission, we investigate the role of frequency, length, and weighted transducers in addressing the challenge of morphological reinflection. We start with the non-neural baseline provided for the task and show how some improvement can be gained by integrating length and frequency in prefix selection. We also investigate using weighted finite-state transducers, jump-started from edit distance and directly augmented with frequency. Our specific technique is promising and quite simple, but we see only modest improvements for some languages here. + 2023.sigmorphon-1.15 + kwak-etal-2023-morphological + + + Linear Discriminative Learning: a competitive non-neural baseline for morphological inflection + CheonkamJeongUniversity of Arizona + DominicSchmitzHeinrich Heine University Düsseldorf, Germany + AkhileshKakolu RamaraoHeinrich-Heine-Universität Düsseldorf + AnnaSteinHeinrich Heine Universität + KevinTangHeinrich-Heine-Universität Düsseldorf + 138-150 + This paper presents our submission to the SIGMORPHON 2023 task 2 of Cognitively Plausible Morphophonological Generalization in Korean. We implemented both Linear Discriminative Learning and Transformer models and found that the Linear Discriminative Learning model trained on a combination of corpus and experimental data showed the best performance with the overall accuracy of around 83%. We found that the best model must be trained on both corpus data and the experimental data of one particular participant. Our examination of speaker-variability and speaker-specific information did not explain why a particular participant combined well with the corpus data. We recommend Linear Discriminative Learning models as a future non-neural baseline system, owning to its training speed, accuracy, model interpretability and cognitive plausibility. In order to improve the model performance, we suggest using bigger data and/or performing data augmentation and incorporating speaker- and item-specifics considerably. + 2023.sigmorphon-1.16 + jeong-etal-2023-linear + + + Tü-<fixed-case>CL</fixed-case> at <fixed-case>SIGMORPHON</fixed-case> 2023: Straight-Through Gradient Estimation for Hard Attention + LeanderGirrbachUniversity of Tübingen + 151-165 + This paper describes our systems participating in the 2023 SIGMORPHON Shared Task on Morphological Inflection and in the 2023 SIGMORPHON Shared Task on Interlinear Glossing. We propose methods to enrich predictions from neural models with discrete, i.e. interpretable, information. For morphological inflection, our models learn deterministic mappings from subsets of source lemma characters and morphological tags to individual target characters, which introduces interpretability. For interlinear glossing, our models learn a shallow morpheme segmentation in an unsupervised way jointly with predicting glossing lines. Estimated segmentation may be useful when no ground-truth segmentation is available. As both methods introduce discreteness into neural models, our technical contribution is to show that straight-through gradient estimators are effective to train hard attention models. + 2023.sigmorphon-1.17 + girrbach-2023-tu + + + The <fixed-case>BGU</fixed-case>-<fixed-case>M</fixed-case>e<fixed-case>L</fixed-case>e<fixed-case>L</fixed-case> System for the <fixed-case>SIGMORPHON</fixed-case> 2023 Shared Task on Morphological Inflection + GalAstrachBen Gurion University + YuvalPinterBen Gurion University + 166-170 + This paper presents the submission by the MeLeL team to the SIGMORPHON–UniMorph Shared Task on Typologically Diverse and Acquisition-Inspired Morphological Inflection Generation Part 3: Models of Acquisition of Inflectional Noun Morphology in Polish, Estonian, and Finnish. This task requires us to produce the word form given a lemma and a grammatical case, while trying to produce the same error-rate as in children. We approach this task with a reduced-size character-based transformer model, multilingual training and an upsampling method to introduce bias. + 2023.sigmorphon-1.18 + astrach-pinter-2023-bgu + + + Tü-<fixed-case>CL</fixed-case> at <fixed-case>SIGMORPHON</fixed-case> 2023: Straight-Through Gradient Estimation for Hard Attention + LeanderGirrbachUniversity of Tübingen + 171-185 + This paper describes our systems participating in the 2023 SIGMORPHON Shared Task on Morphological Inflection and in the 2023 SIGMORPHON Shared Task on Interlinear Glossing. We propose methods to enrich predictions from neural models with discrete, i.e. interpretable, information. For morphological inflection, our models learn deterministic mappings from subsets of source lemma characters and morphological tags to individual target characters, which introduces interpretability. For interlinear glossing, our models learn a shallow morpheme segmentation in an unsupervised way jointly with predicting glossing lines. Estimated segmentation may be useful when no ground-truth segmentation is available. As both methods introduce discreteness into neural models, our technical contribution is to show that straight-through gradient estimators are effective to train hard attention models. + 2023.sigmorphon-1.19 + girrbach-2023-tu-cl + + + Findings of the <fixed-case>SIGMORPHON</fixed-case> 2023 Shared Task on Interlinear Glossing + MichaelGinnUniversity of Colorado Boulder + SarahMoellerUniversity of Florida + AlexisPalmerUniversity of Colorado Boulder + AnnaStaceyUniversity of British Columbia + GarrettNicolaiUniversity of British Columbia + MansHuldenUniversity of Colorado Boulder + MiikkaSilfverbergUniversity of British Columbia + 186-201 + This paper presents the findings of the SIGMORPHON 2023 Shared Task on Interlinear Glossing. This first iteration of the shared task explores glossing of a set of six typologically diverse languages: Arapaho, Gitksan, Lezgi, Natügu, Tsez and Uspanteko. The shared task encompasses two tracks: a resource-scarce closed track and an open track, where participants are allowed to utilize external data resources. Five teams participated in the shared task. The winning team Tü-CL achieved a 23.99%-point improvement over a baseline RoBERTa system in the closed track and a 17.42%-point improvement in the open track. + 2023.sigmorphon-1.20 + ginn-etal-2023-findings + + + <fixed-case>LISN</fixed-case> @ <fixed-case>SIGMORPHON</fixed-case> 2023 Shared Task on Interlinear Glossing + ShuOkabeLISN/CNRS, Université Paris-Saclay + FrançoisYvonISIR CNRS & Sorbonne Université + 202-208 + This paper describes LISN”’“s submission to the second track (open track) of the shared task on Interlinear Glossing for SIGMORPHON 2023. Our systems are based on Lost, a variation of linear Conditional Random Fields initially developed as a probabilistic translation model and then adapted to the glossing task. This model allows us to handle one of the main challenges posed by glossing, i.e. the fact that the list of potential labels for lexical morphemes is not fixed in advance and needs to be extended dynamically when labelling units are not seen in training. In such situations, we show how to make use of candidate lexical glosses found in the translation and discuss how such extension affects the training and inference procedures. The resulting automatic glossing systems prove to yield very competitive results, especially in low-resource settings. + 2023.sigmorphon-1.21 + okabe-yvon-2023-lisn + + + <fixed-case>S</fixed-case>ig<fixed-case>M</fixed-case>ore<fixed-case>F</fixed-case>un Submission to the <fixed-case>SIGMORPHON</fixed-case> Shared Task on Interlinear Glossing + TaiqiHeCarnegie Mellon University + LindiaTjuatjaCarnegie Mellon University + NathanielRobinsonCarnegie Mellon University + ShinjiWatanabeCarnegie Mellon University + David R.MortensenLanguage Technologies Institute, Carnegie Mellon University + GrahamNeubigCarnegie Mellon University + LoriLevinCarnegie Mellon University + 209-216 + In our submission to the SIGMORPHON 2023 Shared Task on interlinear glossing (IGT), we explore approaches to data augmentation and modeling across seven low-resource languages. For data augmentation, we explore two approaches: creating artificial data from the provided training data and utilizing existing IGT resources in other languages. On the modeling side, we test an enhanced version of the provided token classification baseline as well as a pretrained multilingual seq2seq model. Additionally, we apply post-correction using a dictionary for Gitksan, the language with the smallest amount of data. We find that our token classification models are the best performing, with the highest word-level accuracy for Arapaho and highest morpheme-level accuracy for Gitksan out of all submissions. We also show that data augmentation is an effective strategy, though applying artificial data pretraining has very different effects across both models tested. + 2023.sigmorphon-1.22 + he-etal-2023-sigmorefun + + + An Ensembled Encoder-Decoder System for Interlinear Glossed Text + EdithCoatesUBC Mathematics + 217-221 + This paper presents my submission to Track 1 of the 2023 SIGMORPHON shared task on interlinear glossed text (IGT). There are a wide amount of techniques for building and training IGT models (see Moeller and Hulden, 2018; McMillan-Major, 2020; Zhao et al., 2020). I describe my ensembled sequence-to-sequence approach, perform experiments, and share my submission’s test-set accuracy. I also discuss future areas of research in low-resource token classification methods for IGT. + 2023.sigmorphon-1.23 + coates-2023-ensembled + + + Glossy Bytes: Neural Glossing using Subword Encoding + ZiggyCrossUniversity of British Columbia + MichelleYunUniversity of British Columbia + AnanyaApparajuUniversity of British Columbia + JataMacCabeUniversity of British Columbia + GarrettNicolaiUniversity of British Columbia + MiikkaSilfverbergUniversity of British Columbia + 222-229 + This paper presents several different neural subword modelling based approaches to interlinear glossing for seven under-resourced languages as a part of the 2023 SIGMORPHON shared task on interlinear glossing. We experiment with various augmentation and tokenization strategies for both the open and closed tracks of data. We found that while byte-level models may perform well for greater amounts of data, character based approaches remain competitive in their performance in lower resource settings. + 2023.sigmorphon-1.24 + cross-etal-2023-glossy + + + The <fixed-case>SIGMORPHON</fixed-case> 2022 Shared Task on Cross-lingual and Low-Resource Grapheme-to-Phoneme Conversion + Arya D.McCarthyJohns Hopkins University + Jackson L.Lee + AlexandraDeLuciaJohns Hopkins University + TravisBartleyCity University of New York + MilindAgarwalGeorge Mason University + Lucas F.E.AshbyCity University of New York + LucaDel SignoreCity University of New York + CameronGibsonCity University of New York + ReubenRaffCity University of New York + WinstonWuUniversity of Michigan + 230-238 + Grapheme-to-phoneme conversion is an important component in many speech technologies, but until recently there were no multilingual benchmarks for this task. The third iteration of the SIGMORPHON shared task on multilingual grapheme-to-phoneme conversion features many improvements from the previous year’s task (Ashby et al., 2021), including additional languages, three subtasks varying the amount of available resources, extensive quality assurance procedures, and automated error analyses. Three teams submitted a total of fifteen systems, at best achieving relative reductions of word error rate of 14% in the crosslingual subtask and 14% in the very-low resource subtask. The generally consistent result is that cross-lingual transfer substantially helps grapheme-to-phoneme modeling, but not to the same degree as in-language examples. + 2023.sigmorphon-1.27 + mccarthy-etal-2023-sigmorphon + + + <fixed-case>SIGMORPHON</fixed-case> 2022 Shared Task on Grapheme-to-Phoneme Conversion Submission Description: Sequence Labelling for <fixed-case>G</fixed-case>2<fixed-case>P</fixed-case> + LeanderGirrbachThe University of Tübingen + 239-244 + This paper describes our participation in the Third SIGMORPHON Shared Task on Grapheme-to-Phoneme Conversion (Low-Resource and Cross-Lingual) (McCarthy et al.,2022). Our models rely on different sequence labelling methods. The main model predicts multiple phonemes from each grapheme and is trained using CTC loss (Graves et al., 2006). We find that sequence labelling methods yield worse performance than the baseline when enough data is available, but can still be used when very little data is available. Furthermore, we demonstrate that alignments learned by the sequence labelling models can be easily inspected. + 2023.sigmorphon-1.28 + girrbach-2023-sigmorphon + + + Low-resource grapheme-to-phoneme mapping with phonetically-conditioned transfer + MichaelHammondThe University of Arizona + 245-248 + In this paper we explore a very simple nonneural approach to mapping orthography to phonetic transcription in a low-resource context with transfer data from a related language. We start from a baseline system and focus our efforts on data augmentation. We make three principal moves. First, we start with an HMMbased system (Novak et al., 2012). Second, we augment our basic system by recombining legal substrings in restricted fashion (Ryan and Hulden, 2020). Finally, we limit our transfer data by only using training pairs where the phonetic form shares all bigrams with the target language. + 2023.sigmorphon-1.29 + hammond-2023-low + + + A future for universal grapheme-phoneme transduction modeling with neuralized finite-state transducers + Chu-Cheng LinLinJohns Hopkins University + 249-249 + We propose a universal grapheme-phoneme transduction model using neuralized finite-state transducers. Many computational models of grapheme-phoneme transduction nowadays are based on the (autoregressive) sequence-to-sequence string transduction paradigm. While such models have achieved state-of-the-art performance, they suffer from theoretical limitations of autoregressive models. On the other hand, neuralized finite-state transducers (NFSTs) have shown promising results on various string transduction tasks. NFSTs can be seen as a generalization of weighted finite-state transducers (WFSTs), and can be seen as pairs of a featurized finite-state machine (‘marked finite-state transducer’ or MFST in NFST terminology), and a string scoring function. Instead of taking a product of local contextual feature weights on FST arcs, NFSTs can employ arbitrary scoring functions to weight global contextual features of a string transduction, and therefore break the Markov property. Furthermore, NFSTs can be formally shown to be more expressive than (autoregressive) seq2seq models. Empirically, joint grapheme-phoneme transduction NFSTs have consistently outperformed vanilla seq2seq models on grapheme-tophoneme and phoneme-to-grapheme transduction tasks for English. Furthermore, they provide interpretable aligned string transductions, thanks to their finite-state machine component. In this talk, we propose a multilingual extension of the joint grapheme-phoneme NFST. We achieve this goal by modeling typological and phylogenetic features of languages and scripts as optional latent variables using a finite-state machine. The result is a versatile graphemephoneme transduction model: in addition to standard monolingual and multilingual transduction, the proposed multilingual NFST can also be used in various controlled generation scenarios, such as phoneme-to-grapheme transduction of an unseen language-script pair. We also plan to release an NFST software package. + 2023.sigmorphon-1.30 + lin-2023-future + + + Fine-tuning m<fixed-case>SLAM</fixed-case> for the <fixed-case>SIGMORPHON</fixed-case> 2022 Shared Task on Grapheme-to-Phoneme Conversion + DanGarretteGoogle Research + 250-250 + Grapheme-to-phoneme (G2P) conversion is a task that is inherently related to both written and spoken language. Therefore, our submission to the G2P shared task builds off of mSLAM (Bapna et al., 2022), a 600M parameter encoder model pretrained simultaneously on text from 101 languages and speech from 51 languages. For fine-tuning a G2P model, we combined mSLAM’s text encoder, which uses characters as its input tokens, with an uninitialized single-layer RNN-T decoder (Graves, 2012) whose vocabulary is the set of all 381 phonemes appearing in the shared task data. We took an explicitly multilingual approach to modeling the G2P tasks, fine-tuning and evaluating a single model that covered all the languages in each task, and adding language codes as prefixes to the input strings as a means of specifying the language of each example. Our models perform well in the shared task’s “high” setting (in which they were trained on 1,000 words from each language), though they do poorly in the “low” task setting (training on only 100 words from each language). Our models also perform reasonably in the “mixed” setting (training on 100 words in the target language and 1000 words in a related language), hinting that mSLAM’s multilingual pretraining may be enabling useful cross-lingual sharing. + 2023.sigmorphon-1.31 + garrette-2023-fine + +
+
diff --git a/data/xml/2023.sustainlp.xml b/data/xml/2023.sustainlp.xml new file mode 100644 index 0000000000..77925ede03 --- /dev/null +++ b/data/xml/2023.sustainlp.xml @@ -0,0 +1,248 @@ + + + + + Proceedings of The Fourth Workshop on Simple and Efficient Natural Language Processing (SustaiNLP) + NafiseSadat Moosavi + IrynaGurevych + YufangHou + GyuwanKim + Young JinKim + TalSchuster + AmeetaAgrawal + Association for Computational Linguistics +
Toronto, Canada (Hybrid)
+ July + 2023 + 2023.sustainlp-1 + sustainlp + + + 2023.sustainlp-1.0 + sustainlp-2023-simple + + + <fixed-case>K</fixed-case>wik<fixed-case>B</fixed-case>ucks: Correlation Clustering with Cheap-Weak and Expensive-Strong Signals + SandeepSilwalMIT + SaraAhmadianGoogle Research + AndrewNystromGoogle AI + AndrewMccallumUMass Amherst + DeepakRamachandranGoogle Research + MehranKazemiGoogle Research + 1-31 + 2023.sustainlp-1.1 + silwal-etal-2023-kwikbucks + + + Semantic-Oriented Unlabeled Priming for Large-Scale Language Models + YanchenLiuHarvard University + TimoSchickMeta AI + HinrichSchtzeCenter for Information and Language Processing, University of Munich + 32-38 + 2023.sustainlp-1.2 + liu-etal-2023-semantic + + + o<fixed-case>BERT</fixed-case>a: Improving Sparse Transfer Learning via improved initialization, distillation, and pruning regimes + DanielCamposUniversity of Illinois Urbana Champaign + AlexandreMarquesNeural Magic + MarkKurtzNeural Magic + ChengXiang ZhaiUniversity of Illinois Urbana Champaign + 39-58 + 2023.sustainlp-1.3 + campos-etal-2023-oberta + + + Quick Dense Retrievers Consume <fixed-case>KALE</fixed-case>: Post Training <fixed-case>K</fixed-case>ullback<fixed-case>L</fixed-case>eibler Alignment of Embeddings for Asymmetrical dual encoders + DanielCamposUniversity of Illinois Urbana Champaign + AlessandroMagnaniWalmart Labs + ChengxiangZhaiUniversity of Illinois Urbana Champaign + 59-77 + 2023.sustainlp-1.4 + campos-etal-2023-quick + + + Lessons on Parameter Sharing across Layers in Transformers + ShoTakaseLINE Corporation + ShunKiyonoLINE Corporation + 78-90 + 2023.sustainlp-1.5 + takase-kiyono-2023-lessons + + + To Asymmetry and Beyond: Structured Pruning of Sequence to Sequence Models for Improved Inference Efficiency + DanielCamposUniversity of Illinois Urbana Champaign + ChengxiangZhaiUniversity of Illinois Urbana Champaign + 91-109 + 2023.sustainlp-1.6 + campos-zhai-2023-asymmetry + + + Small is the New Big: Pre-finetuned compact models are better for Asynchronous Active Learning + DantongLiuAmazon + KaushikPavaniAmazon + SunnyDasguptaAmazon + 110-120 + 2023.sustainlp-1.7 + liu-etal-2023-small + + + <fixed-case>ADEPT</fixed-case>: Adapter-based Efficient Prompt Tuning Approach for Language Models + AdityaShahVirginia Tech + SurendrabikramThapaVirginia Tech + AneeshJainVirginia Tech + LifuHuangVirginia Tech + 121-128 + 2023.sustainlp-1.8 + shah-etal-2023-adept + + + <fixed-case>NLU</fixed-case> on Data Diets: Dynamic Data Subset Selection for <fixed-case>NLP</fixed-case> Classification Tasks + Jean-michelAttenduNuance Communications + Jean-philippeCorbeilNuance Communications + 129-146 + 2023.sustainlp-1.9 + attendu-corbeil-2023-nlu + + + On the Interactions of Structural Constraints and Data Resources for Structured Prediction + ZhisongZhangCarnegie Mellon University + EmmaStrubellCarnegie Mellon University + EduardHovyUniversity of Melbourne + 147-157 + 2023.sustainlp-1.10 + zhang-etal-2023-interactions + + + Can we Pretrain a <fixed-case>S</fixed-case>ot<fixed-case>A</fixed-case> Legal Language Model on a Budget From Scratch? + JoelNiklausUniversity of Bern + DanieleGiofreThomson Reuters + 158-182 + 2023.sustainlp-1.11 + niklaus-giofre-2023-pretrain + + + Is a Video worth n n Images? A Highly Efficient Approach to Transformer-based Video Question Answering + ChenyangLyuDublin City University + TianboJiNantong University + YvetteGrahamADAPT, Trinity College Dublin + JenniferFosterDublin City University + 183-189 + 2023.sustainlp-1.12 + lyu-etal-2023-video + + + How to Unleash the Power of Large Language Models for Few-shot Relation Extraction? + XinXuZhejiang University + YuqiZhuZhejiang University + XiaohanWangZhejiang University + NingyuZhangZhejiang University + 190-200 + 2023.sustainlp-1.13 + xu-etal-2023-unleash + + + Prompting language models improves performance in imbalanced setting + JayMohtaAmazon + 201-211 + 2023.sustainlp-1.14 + mohta-2023-prompting + + + <fixed-case>KGQA</fixed-case> Without Retraining + NickMckennaUniversity of Edinburgh, School of Informatics + PriyankaSenAmazon + 212-218 + 2023.sustainlp-1.15 + mckenna-sen-2023-kgqa + + + <fixed-case>MANER</fixed-case>: Mask Augmented Named Entity Recognition for Extreme Low-Resource Languages + ShashankSonkarRice University + ZichaoWangRice University + RichardBaraniukRice University + 219-226 + 2023.sustainlp-1.16 + sonkar-etal-2023-maner + + + Efficient and Interpretable Compressive Text Summarisation with Unsupervised Dual-Agent Reinforcement Learning + PeggyTangThe University of Sydney + JunbinGaoThe University of Sydney + LeiZhangInternational Digital Economy Academy (IDEA) + ZhiyongWangThe University of Sydney + 227-238 + 2023.sustainlp-1.17 + tang-etal-2023-efficient + + + Exploring the Effect of Frequency Resolution in <fixed-case>FN</fixed-case>et + GregorySzumelDuke University + GhazalKhalighinejadDuke University + RickardStureborgDuke University + SamWisemanDuke University + 239-244 + 2023.sustainlp-1.18 + szumel-etal-2023-exploring + + + Towards Adaptable and Interactive Image Captioning with Data Augmentation and Episodic Memory + AlikiAnagnostopoulouCarl von Ossietzky University of Oldenburg / German Research Center for Artificial Intelligence + MareikeHartmannSaarland University / German Research Center for Artificial Intelligence + DanielSonntagCarl von Ossietzky University of Oldenburg / German Research Center for Artificial Intelligence + 245-256 + 2023.sustainlp-1.19 + anagnostopoulou-etal-2023-towards + + + Corpus Complexity Matters in Pretraining Language Models + AmeetaAgrawalPortland State University + SureshSinghPortland State University + 257-263 + 2023.sustainlp-1.20 + agrawal-singh-2023-corpus + + + <fixed-case>P</fixed-case>ersona<fixed-case>PKT</fixed-case>: Building Personalized Dialogue Agents via Parameter-efficient Knowledge Transfer + XuHanUniversity of Colorado Boulder + BinGuoAmazon.com + YoonJungAmazon + BenjaminYaoAmazon + YuZhangAmazon.com + XiaohuLiuAmazon + ChenleiGuoAmazon + 264-273 + 2023.sustainlp-1.21 + han-etal-2023-personapkt + + + Small Character Models Match Large Word Models for Autocomplete Under Memory Constraints + GaneshJawaharThe University of British Columbia + SubhabrataMukherjeeMicrosoft Research + DebadeeptaDeyMicrosoft Research + MuhammadAbdul-mageedThe University of British Columbia + LaksLakshmanan, V.s.UBC + CaioMendesMicrosoft + GustavoDe RosaMicrosoft Research + ShitalShahMicrosoft Research + 274-289 + 2023.sustainlp-1.22 + jawahar-etal-2023-small + + + Query Encoder Distillation via Embedding Alignment is a Strong Baseline Method to Boost Dense Retriever Online Efficiency + YuxuanWangUniversity of Pennsylvania + LyuHongUniversity of Pennsylvania + 290-298 + 2023.sustainlp-1.23 + wang-hong-2023-query + + + Minimalist Entity Disambiguation for Mid-Resource Languages + BennoKruitVU Amsterdam + 299-306 + 2023.sustainlp-1.24 + kruit-2023-minimalist + +
+
diff --git a/data/xml/2023.ws.xml b/data/xml/2023.ws.xml index 8dda476745..74c40991ee 100644 --- a/data/xml/2023.ws.xml +++ b/data/xml/2023.ws.xml @@ -31,6 +31,14 @@ 2023.wnu-1 2023.semeval-1 2023.woah-1 + 2023.cawl-1 + 2023.clinicalnlp-1 + 2023.repl4nlp-1 + 2023.nlrse-1 + 2023.sustainlp-1 + 2023.dialdoc-1 + 2023.sicon-1 + 2023.americasnlp-1 diff --git a/data/xml/D18.xml b/data/xml/D18.xml index 221602f8f1..435d2fad35 100644 --- a/data/xml/D18.xml +++ b/data/xml/D18.xml @@ -2179,6 +2179,7 @@ 10.18653/v1/D18-1160 he-etal-2018-unsupervised jxhe/struct-learning-with-flow + PTB Diagnostic ECG Database Penn Treebank
diff --git a/data/xml/D19.xml b/data/xml/D19.xml index ebbe5639b5..da5316f79a 100644 --- a/data/xml/D19.xml +++ b/data/xml/D19.xml @@ -5092,6 +5092,7 @@ jiang-etal-2019-improved jiangyingjunn/i-darts CoNLL-2003 + PTB Diagnostic ECG Database Penn Treebank diff --git a/data/xml/N19.xml b/data/xml/N19.xml index 50bf847b51..6c7e9d4d3e 100644 --- a/data/xml/N19.xml +++ b/data/xml/N19.xml @@ -1580,6 +1580,7 @@ kim-etal-2019-unsupervised harvardnlp/urnng Billion Word Benchmark + PTB Diagnostic ECG Database Penn Treebank @@ -1614,6 +1615,7 @@ drozdov-etal-2019-unsupervised-latent diff --git a/data/xml/P19.xml b/data/xml/P19.xml index 130a39712e..f5f3970dbe 100644 --- a/data/xml/P19.xml +++ b/data/xml/P19.xml @@ -3276,6 +3276,7 @@ 10.18653/v1/P19-1228 kim-etal-2019-compound harvardnlp/compound-pcfg + PTB Diagnostic ECG Database Penn Treebank diff --git a/data/xml/W19.xml b/data/xml/W19.xml index c5e337bfd7..4f68816532 100644 --- a/data/xml/W19.xml +++ b/data/xml/W19.xml @@ -2061,6 +2061,8 @@ 10.18653/v1/W19-1803 pavlopoulos-etal-2019-survey nlpaueb/bio_image_caption + IU X-Ray + Peir Gross Revisiting Visual Grounding @@ -14121,7 +14123,6 @@ One of the references was wrong therefore it is corrected to cite the appropriat W19-5945 10.18653/v1/W19-5945 keizer-etal-2019-user - skeizer/madrigal Dialogue Act Classification in Team Communication for Robot Assisted Disaster Response diff --git a/data/yaml/sigs/sigmorphon.yaml b/data/yaml/sigs/sigmorphon.yaml index ed92704442..6792211be0 100644 --- a/data/yaml/sigs/sigmorphon.yaml +++ b/data/yaml/sigs/sigmorphon.yaml @@ -2,6 +2,8 @@ Name: Special Interest Group on Computational Morphology and Phonology (SIGMORPH ShortName: SIGMORPHON URL: https://sigmorphon.github.io/ Meetings: + - 2023: + - 2023.sigmorphon-1 - 2022: - 2022.sigmorphon-1 - 2021: diff --git a/data/yaml/venues/cawl.yaml b/data/yaml/venues/cawl.yaml new file mode 100644 index 0000000000..4adcd21731 --- /dev/null +++ b/data/yaml/venues/cawl.yaml @@ -0,0 +1,2 @@ +acronym: CAWL +name: Workshop on Computation and Written Language (CAWL) diff --git a/data/yaml/venues/nlrse.yaml b/data/yaml/venues/nlrse.yaml new file mode 100644 index 0000000000..d5b63f0d8f --- /dev/null +++ b/data/yaml/venues/nlrse.yaml @@ -0,0 +1,3 @@ +acronym: NLRSE +is_acl: true +name: Workshop on Natural Language Reasoning and Structured Explanations diff --git a/data/yaml/venues/sicon.yaml b/data/yaml/venues/sicon.yaml new file mode 100644 index 0000000000..0df0de3891 --- /dev/null +++ b/data/yaml/venues/sicon.yaml @@ -0,0 +1,2 @@ +acronym: SICon +name: Workshop on Social Influence in Conversations