Merge remote-tracking branch 'origin/master' into siglex

acl-org · Jul 11, 2023 · 6e4be31 · 6e4be31
2 parents ad0908f + 3fa8913
commit 6e4be31
Show file tree

Hide file tree

Showing 51 changed files with 33,186 additions and 218 deletions.
diff --git a/bin/find_mismatched_braces.py b/bin/find_mismatched_braces.py
diff --git a/bin/ingest_aclpub2.py b/bin/ingest_aclpub2.py
diff --git a/bin/requirements.txt b/bin/requirements.txt
@@ -4,6 +4,7 @@ citeproc-py-styles
 click
 docopt>=0.6.0
 filetype
+iso-639
 langcodes[data]
 latexcodec>=1.0.7
 lxml>=4.2.0

diff --git a/data/xml/2020.aacl.xml b/data/xml/2020.aacl.xml
@@ -1549,7 +1549,7 @@
       <abstract>We introduce fairseq S2T, a fairseq extension for speech-to-text (S2T) modeling tasks such as end-to-end speech recognition and speech-to-text translation. It follows fairseq’s careful design for scalability and extensibility. We provide end-to-end workflows from data pre-processing, model training to offline (online) inference. We implement state-of-the-art RNN-based as well as Transformer-based models and open-source detailed training recipes. Fairseq’s machine translation models and language models can be seamlessly integrated into S2T workflows for multi-task learning or transfer learning. Fairseq S2T is available at https://github.com/pytorch/fairseq/tree/master/examples/speech_to_text.</abstract>
       <url hash="ba6e2aa3">2020.aacl-demo.6</url>
       <bibkey>wang-etal-2020-fairseq</bibkey>
-      <pwccode url="https://github.com/pytorch/fairseq" additional="true">pytorch/fairseq</pwccode>
+      <pwccode url="https://github.com/pytorch/fairseq/tree/master/examples/speech_to_text" additional="true">pytorch/fairseq</pwccode>
       <pwcdataset url="https://paperswithcode.com/dataset/librispeech">LibriSpeech</pwcdataset>
       <pwcdataset url="https://paperswithcode.com/dataset/must-c">MuST-C</pwcdataset>
     </paper>

diff --git a/data/xml/2020.coling.xml b/data/xml/2020.coling.xml
@@ -7632,6 +7632,7 @@
       <doi>10.18653/v1/2020.coling-main.573</doi>
       <bibkey>yoshimura-etal-2020-reference</bibkey>
       <pwccode url="https://github.com/kokeman/some" additional="false">kokeman/some</pwccode>
+      <pwcdataset url="https://paperswithcode.com/dataset/gug">GUG</pwcdataset>
       <pwcdataset url="https://paperswithcode.com/dataset/jfleg">JFLEG</pwcdataset>
     </paper>
     <paper id="574">

diff --git a/data/xml/2020.emnlp.xml b/data/xml/2020.emnlp.xml
@@ -5811,6 +5811,7 @@
       <revision id="2" href="2020.emnlp-main.385v2" hash="dd2a9d9a" date="2021-03-26">Modifed author order</revision>
       <video href="https://slideslive.com/38939346"/>
       <bibkey>berg-kirkpatrick-spokoyny-2020-empirical</bibkey>
+      <pwccode url="https://github.com/dspoka/mnm" additional="false">dspoka/mnm</pwccode>
       <pwcdataset url="https://paperswithcode.com/dataset/drop">DROP</pwcdataset>
     </paper>
     <paper id="386">
@@ -5910,6 +5911,7 @@
       <doi>10.18653/v1/2020.emnlp-main.392</doi>
       <video href="https://slideslive.com/38939296"/>
       <bibkey>drozdov-etal-2020-unsupervised</bibkey>
+      <pwcdataset url="https://paperswithcode.com/dataset/penn-treebank">Penn Treebank</pwcdataset>
     </paper>
     <paper id="393">
       <title>Utility is in the Eye of the User: A Critique of <fixed-case>NLP</fixed-case> Leaderboards</title>
@@ -7511,7 +7513,7 @@
       <doi>10.18653/v1/2020.emnlp-main.498</doi>
       <video href="https://slideslive.com/38938695"/>
       <bibkey>garg-ramakrishnan-2020-bae</bibkey>
-      <pwccode url="https://github.com/QData/TextAttack" additional="true">QData/TextAttack</pwccode>
+      <pwccode url="https://github.com/QData/TextAttack/blob/master/textattack/attack_recipes/bae_garg_2019.py" additional="true">QData/TextAttack</pwccode>
       <pwcdataset url="https://paperswithcode.com/dataset/imdb-binary">IMDB-BINARY</pwcdataset>
       <pwcdataset url="https://paperswithcode.com/dataset/mpqa-opinion-corpus">MPQA Opinion Corpus</pwcdataset>
       <pwcdataset url="https://paperswithcode.com/dataset/mr">MR</pwcdataset>

diff --git a/data/xml/2020.findings.xml b/data/xml/2020.findings.xml
@@ -845,6 +845,7 @@
       <doi>10.18653/v1/2020.findings-emnlp.59</doi>
       <bibkey>jiang-etal-2020-cascaded</bibkey>
       <pwcdataset url="https://paperswithcode.com/dataset/yahoo-answers">Yahoo! Answers</pwcdataset>
+      <pwcdataset url="https://paperswithcode.com/dataset/yelp-review-polarity">Yelp Review Polarity</pwcdataset>
     </paper>
     <paper id="60">
       <title>Toward Recognizing More Entity Types in <fixed-case>NER</fixed-case>: An Efficient Implementation using Only Entity Lexicons</title>

diff --git a/data/xml/2020.icon.xml b/data/xml/2020.icon.xml
@@ -1096,7 +1096,7 @@
       <author><first>Nitin</first><last>Bansal</last></author>
       <author><first>Ajit</first><last>Kumar</last></author>
       <pages>32–34</pages>
-      <abstract>Abstract Development of Machine Translation System (MTS) for any language pair is a challenging task for several reasons. Lack of lexical resources for any language is one of the major issue arise while developing MTS using that language. For example, during the development of Punjabi to Urdu MTS, many issues were recognized while preparing lexical resources for both the language. Since there is no machine readable dictionary is available for Punjabi to Urdu which can be directly used for translation; however various dictionaries are available to explain the meaning of word. Along with this, handling of OOV (out of vocabulary words), handling of multiple sense Punjabi word in Urdu, identification of proper nouns, identification of collocations in the source sentence i.e. Punjabi sentence in our case, are the issues which we are facing during development of this system. Since MTSs are in great demand from the last one decade and are being widely used in applications such as in case of smart phones. Therefore, development of such a system becomes more demanding and more users friendly. There usage is mainly in large scale translations, automated translations; act as an instrument to bridge a digital divide.</abstract>
+      <abstract>Development of Machine Translation System (MTS) for any language pair is a challenging task for several reasons. Lack of lexical resources for any language is one of the major issue arise while developing MTS using that language. For example, during the development of Punjabi to Urdu MTS, many issues were recognized while preparing lexical resources for both the language. Since there is no machine readable dictionary is available for Punjabi to Urdu which can be directly used for translation; however various dictionaries are available to explain the meaning of word. Along with this, handling of OOV (out of vocabulary words), handling of multiple sense Punjabi word in Urdu, identification of proper nouns, identification of collocations in the source sentence i.e. Punjabi sentence in our case, are the issues which we are facing during development of this system. Since MTSs are in great demand from the last one decade and are being widely used in applications such as in case of smart phones. Therefore, development of such a system becomes more demanding and more users friendly. There usage is mainly in large scale translations, automated translations; act as an instrument to bridge a digital divide.</abstract>
       <url hash="51081841">2020.icon-demos.13</url>
       <bibkey>bansal-kumar-2020-punjabi</bibkey>
     </paper>

diff --git a/data/xml/2020.wnut.xml b/data/xml/2020.wnut.xml
@@ -461,6 +461,7 @@
       <attachment type="OptionalSupplementaryMaterial" hash="12fdd120">2020.wnut-1.37.OptionalSupplementaryMaterial.txt</attachment>
       <doi>10.18653/v1/2020.wnut-1.37</doi>
       <bibkey>acharya-2020-wnut</bibkey>
+      <pwccode url="https://github.com/kaushikacharya/wet_lab_protocols" additional="false">kaushikacharya/wet_lab_protocols</pwccode>
       <pwcdataset url="https://paperswithcode.com/dataset/wnut-20-task-1-extracting-entities-and">WNUT 2020</pwcdataset>
     </paper>
     <paper id="38">

diff --git a/data/xml/2021.acl.xml b/data/xml/2021.acl.xml
@@ -2820,7 +2820,7 @@
       <doi>10.18653/v1/2021.acl-long.174</doi>
       <bibkey>ou-etal-2021-integrating</bibkey>
       <video href="2021.acl-long.174.mp4"/>
-      <pwccode url="https://github.com/J-zin/SNUH" additional="false">J-zin/SNUH</pwccode>
+      <pwccode url="https://github.com/J-zin/SNUH" additional="true">J-zin/SNUH</pwccode>
     </paper>
     <paper id="175">
       <title><fixed-case>SMURF</fixed-case>: <fixed-case>S</fixed-case>e<fixed-case>M</fixed-case>antic and linguistic <fixed-case>U</fixed-case>nde<fixed-case>R</fixed-case>standing Fusion for Caption Evaluation via Typicality Analysis</title>
@@ -4868,6 +4868,7 @@
       <bibkey>panwar-etal-2021-tan</bibkey>
       <video href="2021.acl-long.299.mp4"/>
       <pwcdataset url="https://paperswithcode.com/dataset/ag-news">AG News</pwcdataset>
+      <pwcdataset url="https://paperswithcode.com/dataset/yelp-review-polarity">Yelp Review Polarity</pwcdataset>
     </paper>
     <paper id="300">
       <title>Cross-language Sentence Selection via Data Augmentation and Rationale Training</title>
@@ -10796,7 +10797,7 @@
       <doi>10.18653/v1/2021.acl-short.103</doi>
       <bibkey>le-etal-2021-lightweight</bibkey>
       <video href="2021.acl-short.103.mp4"/>
-      <pwccode url="https://github.com/formiel/fairseq/blob/master/examples/speech_to_text/docs/adapters.md" additional="false">formiel/fairseq</pwccode>
+      <pwccode url="https://github.com/formiel/fairseq" additional="true">formiel/fairseq</pwccode>
       <pwcdataset url="https://paperswithcode.com/dataset/must-c">MuST-C</pwcdataset>
     </paper>
     <paper id="104">