diff --git a/paper/paper.bib b/paper/paper.bib index f53d323..a169ad1 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -1,8 +1,8 @@ -@misc{diaz-vico+ramos-carreno_2022_scikitdatasets, +@misc{diaz-vico+ramos-carreno_2023_scikitdatasets, title = {{{scikit-datasets}}: {{Scikit-learn-compatible}} Datasets}, author = {{D{\'i}az-Vico}, David and {Ramos-Carre{\~n}o}, Carlos}, - year = {2022}, - month = mar, + year = {2023}, + month = aug, doi = {10.5281/zenodo.6383047}, url = {https://github.com/daviddiazvico/scikit-datasets}, copyright = {MIT} @@ -13,7 +13,8 @@ @misc{fajardo_2024_pyreadr author = {Fajardo, Otto}, year = {2024}, month = jul, - doi = {10.5281/zenodo.13132498}, + publisher = {Zenodo}, + doi = {10.5281/zenodo.7110169}, url = {https://github.com/ofajardo/pyreadr} } @@ -25,11 +26,42 @@ @misc{gautier_2024_rpy2 url = {https://github.com/rpy2/rpy2} } -@software{pandasdevelopmentteam_2020_pandasdev, - title = {{{pandas-dev/pandas}}: {{pandas}}}, - author = {Pandas Development Team}, +@article{harris+_2020_numpy, + title = {Array programming with {NumPy}}, + author = {Charles R. Harris and K. Jarrod Millman and St{\'{e}}fan J. + van der Walt and Ralf Gommers and Pauli Virtanen and David + Cournapeau and Eric Wieser and Julian Taylor and Sebastian + Berg and Nathaniel J. Smith and Robert Kern and Matti Picus + and Stephan Hoyer and Marten H. van Kerkwijk and Matthew + Brett and Allan Haldane and Jaime Fern{\'{a}}ndez del + R{\'{i}}o and Mark Wiebe and Pearu Peterson and Pierre + G{\'{e}}rard-Marchant and Kevin Sheppard and Tyler Reddy and + Warren Weckesser and Hameer Abbasi and Christoph Gohlke and + Travis E. Oliphant}, year = {2020}, - month = feb, + month = sep, + journal = {Nature}, + volume = {585}, + number = {7825}, + pages = {357--362}, + doi = {10.1038/s41586-020-2649-2}, +} + +@inproceedings{mckinney_2010_pandas, + author = {{W}es {M}c{K}inney}, + title = {{D}ata {S}tructures for {S}tatistical {C}omputing in {P}ython}, + booktitle = {{P}roceedings of the 9th {P}ython in {S}cience {C}onference}, + pages = {56 - 61}, + year = {2010}, + editor = {{S}t\'efan van der {W}alt and {J}arrod {M}illman}, + doi = {10.25080/Majora-92bf1922-00a}, +} + +@software{pandasdevelopmentteam_2024_pandasdev, + title = {{{pandas-dev/pandas}}: {{pandas}}}, + author = {{The Pandas Development Team}}, + year = {2024}, + month = apr, publisher = {Zenodo}, doi = {10.5281/zenodo.3509134}, url = {https://doi.org/10.5281/zenodo.3509134}, diff --git a/paper/paper.md b/paper/paper.md index 5985467..1eb3cfa 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -53,11 +53,11 @@ does not include the parsing of common objects such as R lists and S4 objects. The license can also be a problem, as it is part of the GPL family and does not allow commercial use. As existing solutions were unsuitable for our needs, the package `rdata` was developed to parse data in the RData format. -This is a small, extensible, efficient, and very complete implementation in pure Python of a RData parser, that is able to read and convert most datasets in the CRAN repository to equivalent Python objects. +This is a small, extensible, efficient, and very complete implementation in pure Python of a RData parser, that is able to read and convert most datasets in the CRAN repository to equivalent Python objects, such as the built-in types of The Python Standard Library, NumPy arrays [@harris+_2020_numpy], or Pandas dataframes [@mckinney_2010_pandas; @pandasdevelopmentteam_2024_pandasdev]. It has a permissive license and can be extended to support additional conversions from custom R classes. The package `rdata` has been designed as a pure Python package with minimal dependencies, so that it can be easily integrated inside other libraries and applications. -It currently powers the functionality offered in the `scikit-datasets` package [@diaz-vico+ramos-carreno_2022_scikitdatasets] for loading datasets from the CRAN repository of R packages. +It currently powers the functionality offered in the `scikit-datasets` package [@diaz-vico+ramos-carreno_2023_scikitdatasets] for loading datasets from the CRAN repository of R packages. This functionality is used for fetching the functional datasets provided in the `scikit-fda` library [@ramos-carreno+_2024_scikitfda], whose development was the main reason for the creation of the `rdata` package itself. # Features @@ -89,7 +89,7 @@ The function `convert()` of the conversion module transforms that representation Advanced users will probably require loading datasets which contain non standard S3 or S4 classes, translating each of them to a custom Python class. This is easy to achieve using `rdata` by simply creating a constructor function that receives the converted object representation and its attributes, and returns a Python object of the desired type. -As an example, consider the following simple code that constructs a `Pandas` [@pandasdevelopmentteam_2020_pandasdev] `Categorical` object from the internal representation of an R `factor`. +As an example, consider the following simple code that constructs a `Pandas` [@pandasdevelopmentteam_2024_pandasdev] `Categorical` object from the internal representation of an R `factor`. ```python import pandas