Merge branch 'master' into more-explicit-xml

acl-org · Jul 12, 2023 · a356539 · a356539
2 parents fdbfb2c + f4c7b7d
commit a356539
Show file tree

Hide file tree

Showing 45 changed files with 33,098 additions and 134 deletions.
diff --git a/bin/find_mismatched_braces.py b/bin/find_mismatched_braces.py
diff --git a/bin/ingest_aclpub2.py b/bin/ingest_aclpub2.py
diff --git a/bin/requirements.txt b/bin/requirements.txt
@@ -4,6 +4,7 @@ citeproc-py-styles
 click
 docopt>=0.6.0
 filetype
+iso-639
 langcodes[data]
 latexcodec>=1.0.7
 lxml>=4.2.0

diff --git a/data/xml/2020.aacl.xml b/data/xml/2020.aacl.xml
@@ -1549,7 +1549,7 @@
       <abstract>We introduce fairseq S2T, a fairseq extension for speech-to-text (S2T) modeling tasks such as end-to-end speech recognition and speech-to-text translation. It follows fairseq’s careful design for scalability and extensibility. We provide end-to-end workflows from data pre-processing, model training to offline (online) inference. We implement state-of-the-art RNN-based as well as Transformer-based models and open-source detailed training recipes. Fairseq’s machine translation models and language models can be seamlessly integrated into S2T workflows for multi-task learning or transfer learning. Fairseq S2T is available at https://github.com/pytorch/fairseq/tree/master/examples/speech_to_text.</abstract>
       <url hash="ba6e2aa3">2020.aacl-demo.6</url>
       <bibkey>wang-etal-2020-fairseq</bibkey>
-      <pwccode url="https://github.com/pytorch/fairseq" additional="true">pytorch/fairseq</pwccode>
+      <pwccode url="https://github.com/pytorch/fairseq/tree/master/examples/speech_to_text" additional="true">pytorch/fairseq</pwccode>
       <pwcdataset url="https://paperswithcode.com/dataset/librispeech">LibriSpeech</pwcdataset>
       <pwcdataset url="https://paperswithcode.com/dataset/must-c">MuST-C</pwcdataset>
     </paper>

diff --git a/data/xml/2020.coling.xml b/data/xml/2020.coling.xml
@@ -6846,7 +6846,7 @@
       <pwcdataset url="https://paperswithcode.com/dataset/winogrande">WinoGrande</pwcdataset>
     </paper>
     <paper id="516">
-      <title>The Indigenous Languages Technology project at <fixed-case>NRC</fixed-case> <fixed-case>C</fixed-case>anada: An empowerment-oriented approach to developing language software</title>
+      <title>The <fixed-case>Indigenous</fixed-case> Languages Technology project at <fixed-case>NRC</fixed-case> <fixed-case>C</fixed-case>anada: An empowerment-oriented approach to developing language software</title>
       <author><first>Roland</first><last>Kuhn</last></author>
       <author><first>Fineen</first><last>Davis</last></author>
       <author><first>Alain</first><last>Désilets</last></author>
@@ -7632,6 +7632,7 @@
       <doi>10.18653/v1/2020.coling-main.573</doi>
       <bibkey>yoshimura-etal-2020-reference</bibkey>
       <pwccode url="https://github.com/kokeman/some" additional="false">kokeman/some</pwccode>
+      <pwcdataset url="https://paperswithcode.com/dataset/gug">GUG</pwcdataset>
       <pwcdataset url="https://paperswithcode.com/dataset/jfleg">JFLEG</pwcdataset>
     </paper>
     <paper id="574">

diff --git a/data/xml/2020.emnlp.xml b/data/xml/2020.emnlp.xml
@@ -5811,6 +5811,7 @@
       <revision id="2" href="2020.emnlp-main.385v2" hash="dd2a9d9a" date="2021-03-26">Modifed author order</revision>
       <video href="https://slideslive.com/38939346"/>
       <bibkey>berg-kirkpatrick-spokoyny-2020-empirical</bibkey>
+      <pwccode url="https://github.com/dspoka/mnm" additional="false">dspoka/mnm</pwccode>
       <pwcdataset url="https://paperswithcode.com/dataset/drop">DROP</pwcdataset>
     </paper>
     <paper id="386">
@@ -7512,7 +7513,7 @@
       <doi>10.18653/v1/2020.emnlp-main.498</doi>
       <video href="https://slideslive.com/38938695"/>
       <bibkey>garg-ramakrishnan-2020-bae</bibkey>
-      <pwccode url="https://github.com/QData/TextAttack" additional="true">QData/TextAttack</pwccode>
+      <pwccode url="https://github.com/QData/TextAttack/blob/master/textattack/attack_recipes/bae_garg_2019.py" additional="true">QData/TextAttack</pwccode>
       <pwcdataset url="https://paperswithcode.com/dataset/imdb-binary">IMDB-BINARY</pwcdataset>
       <pwcdataset url="https://paperswithcode.com/dataset/mpqa-opinion-corpus">MPQA Opinion Corpus</pwcdataset>
       <pwcdataset url="https://paperswithcode.com/dataset/mr">MR</pwcdataset>

diff --git a/data/xml/2020.nuse.xml b/data/xml/2020.nuse.xml
@@ -22,6 +22,7 @@
       <year>2020</year>
       <url hash="9075bb72">2020.nuse-1</url>
       <venue>nuse</venue>
+      <venue>wnu</venue>
     </meta>
     <frontmatter>
       <url hash="16e67b2a">2020.nuse-1.0</url>

diff --git a/data/xml/2021.conll.xml b/data/xml/2021.conll.xml
@@ -137,6 +137,7 @@
       <bibkey>dankers-etal-2021-generalising</bibkey>
       <doi>10.18653/v1/2021.conll-1.8</doi>
       <video href="2021.conll-1.8.mp4"/>
+      <pwccode url="https://github.com/i-machine-think/morphology_and_generalisation" additional="false">i-machine-think/morphology_and_generalisation</pwccode>
     </paper>
     <paper id="9">
       <title>Can Language Models Encode Perceptual Structure Without Grounding? A Case Study in Color</title>

diff --git a/data/xml/2021.emnlp.xml b/data/xml/2021.emnlp.xml
@@ -3753,6 +3753,7 @@
       <bibkey>islam-magnani-2021-end</bibkey>
       <doi>10.18653/v1/2021.emnlp-main.239</doi>
       <video href="2021.emnlp-main.239.mp4"/>
+      <pwcdataset url="https://paperswithcode.com/dataset/gug">GUG</pwcdataset>
     </paper>
     <paper id="240">
       <title>Augmenting <fixed-case>BERT</fixed-case>-style Models with Predictive Coding to Improve Discourse-level Representations</title>

diff --git a/data/xml/2021.nuse.xml b/data/xml/2021.nuse.xml
@@ -15,6 +15,7 @@
       <year>2021</year>
       <url hash="85c83d62">2021.nuse-1</url>
       <venue>nuse</venue>
+      <venue>wnu</venue>
     </meta>
     <frontmatter>
       <url hash="d9b091ca">2021.nuse-1.0</url>

diff --git a/data/xml/2022.dravidianlangtech.xml b/data/xml/2022.dravidianlangtech.xml
@@ -240,9 +240,9 @@
     </paper>
     <paper id="18">
       <title><fixed-case>PANDAS</fixed-case>@Abusive Comment Detection in <fixed-case>T</fixed-case>amil Code-Mixed Data Using Custom Embeddings with <fixed-case>L</fixed-case>a<fixed-case>BSE</fixed-case></title>
+      <author><first>Gayathri</first><last>G L</last></author>
       <author><first>Krithika</first><last>Swaminathan</last></author>
       <author><first>Divyasri</first><last>K</last></author>
-      <author><first>Gayathri</first><last>G L</last></author>
       <author><first>Thenmozhi</first><last>Durairaj</last></author>
       <author><first>Bharathi</first><last>B</last></author>
       <pages>112-119</pages>

diff --git a/data/xml/2022.lrec.xml b/data/xml/2022.lrec.xml
@@ -6334,7 +6334,7 @@
       <author><first>Anara</first><last>Sandygulova</last></author>
       <pages>4767–4773</pages>
       <abstract>This paper presents a new handwritten dataset, Cyrillic-MNIST, a Cyrillic version of the MNIST dataset, comprising of 121,234 samples of 42 Cyrillic letters. The performance of Cyrillic-MNIST is evaluated using standard deep learning approaches and is compared to the Extended MNIST (EMNIST) dataset. The dataset is available at https://github.com/bolattleubayev/cmnist</abstract>
-      <url hash="36b92170">2022.lrec-1.510</url>
+      <url hash="5039c1b6">2022.lrec-1.510</url>
       <bibkey>tleubayev-etal-2022-cyrillic</bibkey>
       <pwcdataset url="https://paperswithcode.com/dataset/how2sign">How2Sign</pwcdataset>
     </paper>
@@ -7395,6 +7395,7 @@
       <abstract>In grammatical error correction (GEC), automatic evaluation is considered as an important factor for research and development of GEC systems. Previous studies on automatic evaluation have shown that quality estimation models built from datasets with manual evaluation can achieve high performance in automatic evaluation of English GEC. However, quality estimation models have not yet been studied in Japanese, because there are no datasets for constructing quality estimation models. In this study, therefore, we created a quality estimation dataset with manual evaluation to build an automatic evaluation model for Japanese GEC. By building a quality estimation model using this dataset and conducting a meta-evaluation, we verified the usefulness of the quality estimation model for Japanese GEC.</abstract>
       <url hash="14a3b423">2022.lrec-1.596</url>
       <bibkey>suzuki-etal-2022-construction</bibkey>
+      <pwcdataset url="https://paperswithcode.com/dataset/gug">GUG</pwcdataset>
     </paper>
     <paper id="597">
       <title>Enhanced Distant Supervision with State-Change Information for Relation Extraction</title>

diff --git a/data/xml/2022.naacl.xml b/data/xml/2022.naacl.xml
@@ -2307,7 +2307,7 @@
       <bibkey>changpinyo-etal-2022-may</bibkey>
       <doi>10.18653/v1/2022.naacl-main.142</doi>
       <video href="2022.naacl-main.142.mp4"/>
-      <pwccode url="https://github.com/google-research-datasets/maverics" additional="false">google-research-datasets/maverics</pwccode>
+      <pwccode url="https://github.com/google-research-datasets/maverics" additional="true">google-research-datasets/maverics</pwccode>
       <pwcdataset url="https://paperswithcode.com/dataset/maverics">MAVERICS</pwcdataset>
       <pwcdataset url="https://paperswithcode.com/dataset/coco">COCO</pwcdataset>
       <pwcdataset url="https://paperswithcode.com/dataset/coco-qa">COCO-QA</pwcdataset>
@@ -3307,7 +3307,6 @@
       <bibkey>ebert-etal-2022-trajectories</bibkey>
       <doi>10.18653/v1/2022.naacl-main.206</doi>
       <video href="2022.naacl-main.206.mp4"/>
-      <pwccode url="https://github.com/dylanebert/simulated" additional="false">dylanebert/simulated</pwccode>
     </paper>
     <paper id="207">
       <title>Long Context Question Answering via Supervised Contrastive Learning</title>
@@ -5809,6 +5808,7 @@
       <bibkey>si-etal-2022-mining</bibkey>
       <doi>10.18653/v1/2022.naacl-main.356</doi>
       <video href="2022.naacl-main.356.mp4"/>
+      <pwcdataset url="https://paperswithcode.com/dataset/canard">CANARD</pwcdataset>
     </paper>
     <paper id="357">
       <title>Domain-Oriented Prefix-Tuning: Towards Efficient and Generalizable Fine-tuning for Zero-Shot Dialogue Summarization</title>

diff --git a/data/xml/2022.nsurl.xml b/data/xml/2022.nsurl.xml
@@ -77,7 +77,7 @@
       <bibkey>lichouri-2022-empirical-comparison</bibkey>
     </paper>
     <paper id="7">
-      <title><fixed-case>ALRT</fixed-case>: Cutting Edge Tool for Automatic Generation of <fixed-case>A</fixed-case>rabic Lexical</title>
+      <title><fixed-case>ALRT</fixed-case>: Cutting Edge Tool for Automatic Generation of <fixed-case>A</fixed-case>rabic Lexical Recognition Tests</title>
       <author><first>Osama</first><last>Hamed</last></author>
       <author><first>Saeed</first><last>Salah</last></author>
       <author><first>Abed Alhakim</first><last>Freihat</last></author>

diff --git a/data/xml/2022.ws.xml b/data/xml/2022.ws.xml
@@ -67,6 +67,7 @@
       <volume-id>2022.nlp4pi-1</volume-id>
       <volume-id>2022.nlpcss-1</volume-id>
       <volume-id>2022.nlppower-1</volume-id>
+      <volume-id>2022.nsurl-1</volume-id>
       <volume-id>2022.pandl-1</volume-id>
       <volume-id>2022.privatenlp-1</volume-id>
       <volume-id>2022.repl4nlp-1</volume-id>