Skip to content

Commit

Permalink
Merge branch 'master' into more-explicit-xml
Browse files Browse the repository at this point in the history
  • Loading branch information
mbollmann committed Jul 12, 2023
2 parents fdbfb2c + f4c7b7d commit a356539
Show file tree
Hide file tree
Showing 45 changed files with 33,098 additions and 134 deletions.
Empty file modified bin/find_mismatched_braces.py
100644 → 100755
Empty file.
308 changes: 209 additions & 99 deletions bin/ingest_aclpub2.py
100644 → 100755

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions bin/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ citeproc-py-styles
click
docopt>=0.6.0
filetype
iso-639
langcodes[data]
latexcodec>=1.0.7
lxml>=4.2.0
Expand Down
2 changes: 1 addition & 1 deletion data/xml/2020.aacl.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1549,7 +1549,7 @@
<abstract>We introduce fairseq S2T, a fairseq extension for speech-to-text (S2T) modeling tasks such as end-to-end speech recognition and speech-to-text translation. It follows fairseq’s careful design for scalability and extensibility. We provide end-to-end workflows from data pre-processing, model training to offline (online) inference. We implement state-of-the-art RNN-based as well as Transformer-based models and open-source detailed training recipes. Fairseq’s machine translation models and language models can be seamlessly integrated into S2T workflows for multi-task learning or transfer learning. Fairseq S2T is available at https://github.com/pytorch/fairseq/tree/master/examples/speech_to_text.</abstract>
<url hash="ba6e2aa3">2020.aacl-demo.6</url>
<bibkey>wang-etal-2020-fairseq</bibkey>
<pwccode url="https://github.com/pytorch/fairseq" additional="true">pytorch/fairseq</pwccode>
<pwccode url="https://github.com/pytorch/fairseq/tree/master/examples/speech_to_text" additional="true">pytorch/fairseq</pwccode>
<pwcdataset url="https://paperswithcode.com/dataset/librispeech">LibriSpeech</pwcdataset>
<pwcdataset url="https://paperswithcode.com/dataset/must-c">MuST-C</pwcdataset>
</paper>
Expand Down
3 changes: 2 additions & 1 deletion data/xml/2020.coling.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6846,7 +6846,7 @@
<pwcdataset url="https://paperswithcode.com/dataset/winogrande">WinoGrande</pwcdataset>
</paper>
<paper id="516">
<title>The Indigenous Languages Technology project at <fixed-case>NRC</fixed-case> <fixed-case>C</fixed-case>anada: An empowerment-oriented approach to developing language software</title>
<title>The <fixed-case>Indigenous</fixed-case> Languages Technology project at <fixed-case>NRC</fixed-case> <fixed-case>C</fixed-case>anada: An empowerment-oriented approach to developing language software</title>
<author><first>Roland</first><last>Kuhn</last></author>
<author><first>Fineen</first><last>Davis</last></author>
<author><first>Alain</first><last>Désilets</last></author>
Expand Down Expand Up @@ -7632,6 +7632,7 @@
<doi>10.18653/v1/2020.coling-main.573</doi>
<bibkey>yoshimura-etal-2020-reference</bibkey>
<pwccode url="https://github.com/kokeman/some" additional="false">kokeman/some</pwccode>
<pwcdataset url="https://paperswithcode.com/dataset/gug">GUG</pwcdataset>
<pwcdataset url="https://paperswithcode.com/dataset/jfleg">JFLEG</pwcdataset>
</paper>
<paper id="574">
Expand Down
3 changes: 2 additions & 1 deletion data/xml/2020.emnlp.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5811,6 +5811,7 @@
<revision id="2" href="2020.emnlp-main.385v2" hash="dd2a9d9a" date="2021-03-26">Modifed author order</revision>
<video href="https://slideslive.com/38939346"/>
<bibkey>berg-kirkpatrick-spokoyny-2020-empirical</bibkey>
<pwccode url="https://github.com/dspoka/mnm" additional="false">dspoka/mnm</pwccode>
<pwcdataset url="https://paperswithcode.com/dataset/drop">DROP</pwcdataset>
</paper>
<paper id="386">
Expand Down Expand Up @@ -7512,7 +7513,7 @@
<doi>10.18653/v1/2020.emnlp-main.498</doi>
<video href="https://slideslive.com/38938695"/>
<bibkey>garg-ramakrishnan-2020-bae</bibkey>
<pwccode url="https://github.com/QData/TextAttack" additional="true">QData/TextAttack</pwccode>
<pwccode url="https://github.com/QData/TextAttack/blob/master/textattack/attack_recipes/bae_garg_2019.py" additional="true">QData/TextAttack</pwccode>
<pwcdataset url="https://paperswithcode.com/dataset/imdb-binary">IMDB-BINARY</pwcdataset>
<pwcdataset url="https://paperswithcode.com/dataset/mpqa-opinion-corpus">MPQA Opinion Corpus</pwcdataset>
<pwcdataset url="https://paperswithcode.com/dataset/mr">MR</pwcdataset>
Expand Down
1 change: 1 addition & 0 deletions data/xml/2020.nuse.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
<year>2020</year>
<url hash="9075bb72">2020.nuse-1</url>
<venue>nuse</venue>
<venue>wnu</venue>
</meta>
<frontmatter>
<url hash="16e67b2a">2020.nuse-1.0</url>
Expand Down
1 change: 1 addition & 0 deletions data/xml/2021.conll.xml
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@
<bibkey>dankers-etal-2021-generalising</bibkey>
<doi>10.18653/v1/2021.conll-1.8</doi>
<video href="2021.conll-1.8.mp4"/>
<pwccode url="https://github.com/i-machine-think/morphology_and_generalisation" additional="false">i-machine-think/morphology_and_generalisation</pwccode>
</paper>
<paper id="9">
<title>Can Language Models Encode Perceptual Structure Without Grounding? A Case Study in Color</title>
Expand Down
1 change: 1 addition & 0 deletions data/xml/2021.emnlp.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3753,6 +3753,7 @@
<bibkey>islam-magnani-2021-end</bibkey>
<doi>10.18653/v1/2021.emnlp-main.239</doi>
<video href="2021.emnlp-main.239.mp4"/>
<pwcdataset url="https://paperswithcode.com/dataset/gug">GUG</pwcdataset>
</paper>
<paper id="240">
<title>Augmenting <fixed-case>BERT</fixed-case>-style Models with Predictive Coding to Improve Discourse-level Representations</title>
Expand Down
1 change: 1 addition & 0 deletions data/xml/2021.nuse.xml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
<year>2021</year>
<url hash="85c83d62">2021.nuse-1</url>
<venue>nuse</venue>
<venue>wnu</venue>
</meta>
<frontmatter>
<url hash="d9b091ca">2021.nuse-1.0</url>
Expand Down
2 changes: 1 addition & 1 deletion data/xml/2022.dravidianlangtech.xml
Original file line number Diff line number Diff line change
Expand Up @@ -240,9 +240,9 @@
</paper>
<paper id="18">
<title><fixed-case>PANDAS</fixed-case>@Abusive Comment Detection in <fixed-case>T</fixed-case>amil Code-Mixed Data Using Custom Embeddings with <fixed-case>L</fixed-case>a<fixed-case>BSE</fixed-case></title>
<author><first>Gayathri</first><last>G L</last></author>
<author><first>Krithika</first><last>Swaminathan</last></author>
<author><first>Divyasri</first><last>K</last></author>
<author><first>Gayathri</first><last>G L</last></author>
<author><first>Thenmozhi</first><last>Durairaj</last></author>
<author><first>Bharathi</first><last>B</last></author>
<pages>112-119</pages>
Expand Down
3 changes: 2 additions & 1 deletion data/xml/2022.lrec.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6334,7 +6334,7 @@
<author><first>Anara</first><last>Sandygulova</last></author>
<pages>4767–4773</pages>
<abstract>This paper presents a new handwritten dataset, Cyrillic-MNIST, a Cyrillic version of the MNIST dataset, comprising of 121,234 samples of 42 Cyrillic letters. The performance of Cyrillic-MNIST is evaluated using standard deep learning approaches and is compared to the Extended MNIST (EMNIST) dataset. The dataset is available at https://github.com/bolattleubayev/cmnist</abstract>
<url hash="36b92170">2022.lrec-1.510</url>
<url hash="5039c1b6">2022.lrec-1.510</url>
<bibkey>tleubayev-etal-2022-cyrillic</bibkey>
<pwcdataset url="https://paperswithcode.com/dataset/how2sign">How2Sign</pwcdataset>
</paper>
Expand Down Expand Up @@ -7395,6 +7395,7 @@
<abstract>In grammatical error correction (GEC), automatic evaluation is considered as an important factor for research and development of GEC systems. Previous studies on automatic evaluation have shown that quality estimation models built from datasets with manual evaluation can achieve high performance in automatic evaluation of English GEC. However, quality estimation models have not yet been studied in Japanese, because there are no datasets for constructing quality estimation models. In this study, therefore, we created a quality estimation dataset with manual evaluation to build an automatic evaluation model for Japanese GEC. By building a quality estimation model using this dataset and conducting a meta-evaluation, we verified the usefulness of the quality estimation model for Japanese GEC.</abstract>
<url hash="14a3b423">2022.lrec-1.596</url>
<bibkey>suzuki-etal-2022-construction</bibkey>
<pwcdataset url="https://paperswithcode.com/dataset/gug">GUG</pwcdataset>
</paper>
<paper id="597">
<title>Enhanced Distant Supervision with State-Change Information for Relation Extraction</title>
Expand Down
4 changes: 2 additions & 2 deletions data/xml/2022.naacl.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2307,7 +2307,7 @@
<bibkey>changpinyo-etal-2022-may</bibkey>
<doi>10.18653/v1/2022.naacl-main.142</doi>
<video href="2022.naacl-main.142.mp4"/>
<pwccode url="https://github.com/google-research-datasets/maverics" additional="false">google-research-datasets/maverics</pwccode>
<pwccode url="https://github.com/google-research-datasets/maverics" additional="true">google-research-datasets/maverics</pwccode>
<pwcdataset url="https://paperswithcode.com/dataset/maverics">MAVERICS</pwcdataset>
<pwcdataset url="https://paperswithcode.com/dataset/coco">COCO</pwcdataset>
<pwcdataset url="https://paperswithcode.com/dataset/coco-qa">COCO-QA</pwcdataset>
Expand Down Expand Up @@ -3307,7 +3307,6 @@
<bibkey>ebert-etal-2022-trajectories</bibkey>
<doi>10.18653/v1/2022.naacl-main.206</doi>
<video href="2022.naacl-main.206.mp4"/>
<pwccode url="https://github.com/dylanebert/simulated" additional="false">dylanebert/simulated</pwccode>
</paper>
<paper id="207">
<title>Long Context Question Answering via Supervised Contrastive Learning</title>
Expand Down Expand Up @@ -5809,6 +5808,7 @@
<bibkey>si-etal-2022-mining</bibkey>
<doi>10.18653/v1/2022.naacl-main.356</doi>
<video href="2022.naacl-main.356.mp4"/>
<pwcdataset url="https://paperswithcode.com/dataset/canard">CANARD</pwcdataset>
</paper>
<paper id="357">
<title>Domain-Oriented Prefix-Tuning: Towards Efficient and Generalizable Fine-tuning for Zero-Shot Dialogue Summarization</title>
Expand Down
2 changes: 1 addition & 1 deletion data/xml/2022.nsurl.xml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@
<bibkey>lichouri-2022-empirical-comparison</bibkey>
</paper>
<paper id="7">
<title><fixed-case>ALRT</fixed-case>: Cutting Edge Tool for Automatic Generation of <fixed-case>A</fixed-case>rabic Lexical</title>
<title><fixed-case>ALRT</fixed-case>: Cutting Edge Tool for Automatic Generation of <fixed-case>A</fixed-case>rabic Lexical Recognition Tests</title>
<author><first>Osama</first><last>Hamed</last></author>
<author><first>Saeed</first><last>Salah</last></author>
<author><first>Abed Alhakim</first><last>Freihat</last></author>
Expand Down
1 change: 1 addition & 0 deletions data/xml/2022.ws.xml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
<volume-id>2022.nlp4pi-1</volume-id>
<volume-id>2022.nlpcss-1</volume-id>
<volume-id>2022.nlppower-1</volume-id>
<volume-id>2022.nsurl-1</volume-id>
<volume-id>2022.pandl-1</volume-id>
<volume-id>2022.privatenlp-1</volume-id>
<volume-id>2022.repl4nlp-1</volume-id>
Expand Down
Loading

0 comments on commit a356539

Please sign in to comment.