From 4a92a390f8ae4888a734d36b317f6c066677265f Mon Sep 17 00:00:00 2001 From: mtmorgan Date: Tue, 16 Jan 2024 23:16:24 +0000 Subject: [PATCH] =?UTF-8?q?Deploying=20to=20gh-pages=20from=20@=20mtmorgan?= =?UTF-8?q?/cellxgenedp@c3329f8f0aa51a4d24f6c897740e3425b89612dd=20?= =?UTF-8?q?=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- articles/using_cellxgenedp.html | 14 +++++++------- index.html | 3 +-- pkgdown.yml | 2 +- reference/query.html | 2 +- search.json | 2 +- 5 files changed, 11 insertions(+), 12 deletions(-) diff --git a/articles/using_cellxgenedp.html b/articles/using_cellxgenedp.html index a46222e..c587ad4 100644 --- a/articles/using_cellxgenedp.html +++ b/articles/using_cellxgenedp.html @@ -413,10 +413,10 @@

Filtering faceted columns## 5 bcb61471-2a44-4… 39fca0ca-2b0f-47b5-9… https://cellx… <list> info@kpmp.org ## 6 72d37bc9-76cc-4… 3e396ffb-b0d8-4ce4-b… https://cellx… <list> m.sepp@zmbh.… ## 7 b953c942-f5d8-4… 7727e578-1805-47c8-b… https://cellx… <lgl [1]> icobos@stanf… -## 8 4195ab4c-20bd-4… 04c5b03f-d07e-423b-8… https://cellx… <list> nnavin@mdand… -## 9 62e8f058-9c37-4… addce074-53d2-4f21-9… https://cellx… <list> chanj3@mskcc… -## 10 71f4bccf-53d4-4… 5a524bd4-231b-4941-a… https://cellx… <list> kevinmbyrd@g… -## 11 e1fa9900-3fc9-4… 85624898-8006-4209-a… https://cellx… <lgl [1]> j.ma@yale.edu +## 8 62e8f058-9c37-4… addce074-53d2-4f21-9… https://cellx… <list> chanj3@mskcc… +## 9 71f4bccf-53d4-4… 5a524bd4-231b-4941-a… https://cellx… <list> kevinmbyrd@g… +## 10 e1fa9900-3fc9-4… 85624898-8006-4209-a… https://cellx… <lgl [1]> j.ma@yale.edu +## 11 4195ab4c-20bd-4… f7da9dd1-b0ec-401f-9… https://cellx… <list> nnavin@mdand… ## 12 6b701826-37bb-4… 95ab05df-9716-4fc8-a… https://cellx… <list> astreets@ber… ## 13 b9fc3d70-5a72-4… 6701e565-6dfe-4649-b… https://cellx… <list> bruce.aronow… ## # ℹ 13 more variables: contact_name <chr>, curator_name <chr>, @@ -506,7 +506,7 @@

Publication and other external data
 external_links <- links(db)
 external_links
-
## # A tibble: 715 × 4
+
## # A tibble: 716 × 4
 ##    collection_id                        link_name     link_type   link_url      
 ##    <chr>                                <chr>         <chr>       <chr>         
 ##  1 ceb895f4-ff9f-403a-b7c3-187a9657ac2c SCP1859       OTHER       https://singl…
@@ -519,7 +519,7 @@ 

Publication and other external data ## 8 af893e86-8e9f-41f1-a474-ef05359b1fb7 GSE226108 RAW_DATA https://www.n… ## 9 1d1c7275-476a-49e2-9022-ad1b1c793594 GSE148077 RAW_DATA https://www.n… ## 10 1d1c7275-476a-49e2-9022-ad1b1c793594 NA OTHER https://singl… -## # ℹ 705 more rows

+## # ℹ 706 more rows
 external_links |>
     count(link_type)
@@ -530,7 +530,7 @@

Publication and other external data ## 2 LAB_WEBSITE 38 ## 3 OTHER 329 ## 4 PROTOCOL 44 -## 5 RAW_DATA 269 +## 5 RAW_DATA 270
 external_links |>
     filter(collection_id == collection_id_of_interest)
diff --git a/index.html b/index.html index a3a00ab..3769fdd 100644 --- a/index.html +++ b/index.html @@ -83,8 +83,7 @@

Installation

-

This package is available in Bioconductor version 3.15 and later. The following code installs [cellxgenedp][]

-

cellxgenedp

+

This package is available in Bioconductor version 3.15 and later. The following code installs cellxgenedp

 if (!"BiocManager" %in% rownames(installed.packages()))
     install.packages("BiocManager", repos = "https://CRAN.R-project.org")
diff --git a/pkgdown.yml b/pkgdown.yml
index 536ddb7..3e223ab 100644
--- a/pkgdown.yml
+++ b/pkgdown.yml
@@ -3,7 +3,7 @@ pkgdown: 2.0.7
 pkgdown_sha: ~
 articles:
   using_cellxgenedp: using_cellxgenedp.html
-last_built: 2024-01-16T22:58Z
+last_built: 2024-01-16T23:14Z
 urls:
   reference: https://mtmorgan.github.io/cellxgenedp/reference
   article: https://mtmorgan.github.io/cellxgenedp/articles
diff --git a/reference/query.html b/reference/query.html
index 1ab322f..9327edf 100644
--- a/reference/query.html
+++ b/reference/query.html
@@ -255,7 +255,7 @@ 

Examples#> 2 LAB_WEBSITE 38 #> 3 OTHER 329 #> 4 PROTOCOL 44 -#> 5 RAW_DATA 269 +#> 5 RAW_DATA 270 ## authors per collection authors() |> diff --git a/search.json b/search.json index 02c2e86..3ff3537 100644 --- a/search.json +++ b/search.json @@ -1 +1 @@ -[{"path":"https://mtmorgan.github.io/cellxgenedp/articles/using_cellxgenedp.html","id":"installation-and-use","dir":"Articles","previous_headings":"","what":"Installation and use","title":"Discover and download datasets and files from the cellxgene data portal","text":"package available Bioconductor version 3.15 later. following code installs cellxgenedp well packages required vignette. Alternatively, install ‘development’ version GitHub also install additional packages required vignette, use Load package current R session. make extensive use dplyr packages, end vignette use SingleCellExperiment zellkonverter, load well.","code":"if (!\"BiocManager\" %in% rownames(installed.packages())) install.packages(\"BiocManager\", repos = \"https://CRAN.R-project.org\") BiocManager::install(\"cellxgenedp\") if (!\"remotes\" %in% rownames(installed.packages())) install.packages(\"remotes\", repos = \"https://CRAN.R-project.org\") remotes::install_github(\"mtmorgan/cellxgenedp\") pkgs <- c(\"zellkonverter\", \"SingleCellExperiment\", \"HDF5Array\") required_pkgs <- pkgs[!pkgs %in% rownames(installed.packages())] BiocManager::install(required_pkgs) library(zellkonverter) library(SingleCellExperiment) # load early to avoid masking dplyr::count() library(dplyr) library(cellxgenedp)"},{"path":"https://mtmorgan.github.io/cellxgenedp/articles/using_cellxgenedp.html","id":"cxg-provides-a-shiny-interface","dir":"Articles","previous_headings":"","what":"cxg() Provides a ‘shiny’ interface","title":"Discover and download datasets and files from the cellxgene data portal","text":"following sections outline use cellxgenedp package R script; functionality also available cxg() shiny application, providing easy way identify, download, visualize one several datasets. Start app choose project first tab, dataset visualization, one datasets download!","code":"cxg()"},{"path":"https://mtmorgan.github.io/cellxgenedp/articles/using_cellxgenedp.html","id":"collections-datasets-and-files","dir":"Articles","previous_headings":"","what":"Collections, datasets and files","title":"Discover and download datasets and files from the cellxgene data portal","text":"Retrieve metadata resources available cellxgene data portal using db(): Printing db object provides brief overview available data, well hints, form functions like collections(), exploration. portal organizes data hierarchically, ‘collections’ (research studies, approximately), ‘datasets’, ‘files’. Discover data using corresponding functions. resources unique primary identifier (e.g., file_id) well identifier describing relationship resource components database (e.g., dataset_id). identifiers can used ‘join’ information across tables.","code":"db <- db() db ## cellxgene_db ## number of collections(): 182 ## number of datasets(): 1167 ## number of files(): 2314 collections(db) ## # A tibble: 182 × 18 ## collection_id collection_version_id collection_url consortia contact_email ## ## 1 ceb895f4-ff9f-4… ee098b5a-4f33-473b-b… https://cellx… panagiotis.r… ## 2 af893e86-8e9f-4… 768170a6-c590-4900-a… https://cellx… ruichen@bcm.… ## 3 1d1c7275-476a-4… 609becde-c797-41bb-8… https://cellx… wey334@g.har… ## 4 1b014f39-f202-4… 1d88cb46-6e84-4b5b-b… https://cellx… kimberly.ald… ## 5 48d354f5-a5ca-4… 2862daa3-c933-43c8-9… https://cellx… Nathan.Salom… ## 6 43d4bb39-21af-4… 78360f02-1acc-415c-a… https://cellx… raymond.cho@… ## 7 f7cecffa-00b4-4… 43224f82-db2a-443c-9… https://cellx… st9@sanger.a… ## 8 f17b9205-f61f-4… 21ff4724-95e2-491b-8… https://cellx… genevieve.ko… ## 9 64b24fda-6591-4… e414854b-2666-4977-9… https://cellx… magness@med.… ## 10 48259aa8-f168-4… 44601b80-bd11-49d8-a… https://cellx… wtk22@cam.ac… ## # ℹ 172 more rows ## # ℹ 13 more variables: contact_name , curator_name , ## # description , doi , links , name , ## # publisher_metadata , revising_in , revision_of , ## # visibility , created_at , published_at , revised_at datasets(db) ## # A tibble: 1,167 × 31 ## dataset_id dataset_version_id collection_id donor_id assay batch_condition ## ## 1 53ce2631-36… 2f17c183-388a-4c0… ceb895f4-ff9… ## 2 1d4128f6-c2… 94762ee1-9f9f-49e… ceb895f4-ff9… ## 3 ed419b4e-db… 758b30a8-5fb0-46c… af893e86-8e9… ## 4 aad97cb5-f3… d6966985-89f9-485… af893e86-8e9… ## 5 8f10185b-e0… 63d7a3a3-9691-41d… af893e86-8e9… ## 6 359f7af4-87… 0f461193-282f-443… af893e86-8e9… ## 7 11ef37ee-21… 74253a67-927c-4cd… af893e86-8e9… ## 8 0129dbd9-a7… a970179d-2e9e-4d2… af893e86-8e9… ## 9 00e5dedd-b9… 94c0e74c-b269-4ce… af893e86-8e9… ## 10 d319af7f-be… 3c80a5bb-8c89-433… 1d1c7275-476… ## # ℹ 1,157 more rows ## # ℹ 25 more variables: cell_count , cell_type , citation , ## # development_stage , disease , embeddings , ## # explorer_url , feature_biotype , feature_count , ## # feature_reference , is_primary_data , ## # mean_genes_per_cell , organism , primary_cell_count , ## # raw_data_location , schema_version , … files(db) ## # A tibble: 2,314 × 4 ## dataset_id filesize filetype url ## ## 1 53ce2631-3646-4172-bbd9-38b0a44d8214 406108808 H5AD https://datasets.ce… ## 2 53ce2631-3646-4172-bbd9-38b0a44d8214 399752425 RDS https://datasets.ce… ## 3 1d4128f6-c27b-40c4-af77-b1c7e2b694e7 906795740 H5AD https://datasets.ce… ## 4 1d4128f6-c27b-40c4-af77-b1c7e2b694e7 1060800682 RDS https://datasets.ce… ## 5 ed419b4e-db9b-40f1-8593-68fdf8dfb076 1071401902 H5AD https://datasets.ce… ## 6 ed419b4e-db9b-40f1-8593-68fdf8dfb076 1419579253 RDS https://datasets.ce… ## 7 aad97cb5-f375-45ef-ae9d-178e7f5d5180 785137201 H5AD https://datasets.ce… ## 8 aad97cb5-f375-45ef-ae9d-178e7f5d5180 1025253758 RDS https://datasets.ce… ## 9 8f10185b-e0b3-46a5-8706-7f1799225d79 3077438912 H5AD https://datasets.ce… ## 10 8f10185b-e0b3-46a5-8706-7f1799225d79 4090930879 RDS https://datasets.ce… ## # ℹ 2,304 more rows"},{"path":"https://mtmorgan.github.io/cellxgenedp/articles/using_cellxgenedp.html","id":"using-dplyr-to-navigate-data","dir":"Articles","previous_headings":"Collections, datasets and files","what":"Using dplyr to navigate data","title":"Discover and download datasets and files from the cellxgene data portal","text":"collection may several datasets, datasets may several files. instance, collection datasets can find collection joining collections() table. can take similar strategy identify datasets belonging collection","code":"collection_with_most_datasets <- datasets(db) |> count(collection_id, sort = TRUE) |> slice(1) left_join( collection_with_most_datasets |> select(collection_id), collections(db), by = \"collection_id\" ) |> glimpse() ## Rows: 1 ## Columns: 18 ## $ collection_id \"283d65eb-dd53-496d-adb7-7570c7caa443\" ## $ collection_version_id \"4c16c611-00a9-42f9-a8c4-7b42daa226fe\" ## $ collection_url \"https://cellxgene.cziscience.com/collections/28… ## $ consortia [\"BRAIN Initiative\", \"CZI Single-Cell Biology\"] ## $ contact_email \"kimberly.siletti@ki.se\" ## $ contact_name \"Kimberly Siletti\" ## $ curator_name \"James Chaffer\" ## $ description \"First draft atlas of human brain transcriptomic… ## $ doi \"10.1126/science.add7046\" ## $ links [[\"\", \"RAW_DATA\", \"http://data.nemoarchive.org/b… ## $ name \"Human Brain Cell Atlas v1.0\" ## $ publisher_metadata [[[\"Siletti\", \"Kimberly\"], [\"Hodge\", \"Rebecca\"]… ## $ revising_in NA ## $ revision_of NA ## $ visibility \"PUBLIC\" ## $ created_at 2023-12-12 ## $ published_at 2022-12-09 ## $ revised_at 2023-12-13 left_join( collection_with_most_datasets |> select(collection_id), datasets(db), by = \"collection_id\" ) ## # A tibble: 138 × 31 ## collection_id dataset_id dataset_version_id donor_id assay batch_condition ## ## 1 283d65eb-dd53-… ff7d15fa-… 51e05270-1f00-452… ## 2 283d65eb-dd53-… fe1a73ab-… 4e124ecc-7885-465… ## 3 283d65eb-dd53-… fbf173f9-… 5a52f557-aeaf-4fc… ## 4 283d65eb-dd53-… fa554686-… 6606e9aa-e4c4-452… ## 5 283d65eb-dd53-… f9034091-… 8f5b1977-8317-447… ## 6 283d65eb-dd53-… f8dda921-… 1ad58833-956c-454… ## 7 283d65eb-dd53-… f7d003d4-… 4d002ac1-4671-490… ## 8 283d65eb-dd53-… f6d9f2ad-… 2102f4b8-c1fe-4ee… ## 9 283d65eb-dd53-… f5a04dff-… b92375fd-dafe-44c… ## 10 283d65eb-dd53-… f502c312-… b750310e-1abb-4c7… ## # ℹ 128 more rows ## # ℹ 25 more variables: cell_count , cell_type , citation , ## # development_stage , disease , embeddings , ## # explorer_url , feature_biotype , feature_count , ## # feature_reference , is_primary_data , ## # mean_genes_per_cell , organism , primary_cell_count , ## # raw_data_location , schema_version , …"},{"path":"https://mtmorgan.github.io/cellxgenedp/articles/using_cellxgenedp.html","id":"facets-provides-information-on-levels-present-in-specific-columns","dir":"Articles","previous_headings":"Collections, datasets and files","what":"facets() provides information on ‘levels’ present in specific columns","title":"Discover and download datasets and files from the cellxgene data portal","text":"Notice columns ‘lists’ rather atomic vectors like ‘character’ ‘integer’. indicates least datasets one type assay, cell_type, etc. facets() function provides convenient way discovering possible levels column, e.g., assay, organism, self_reported_ethnicity, sex, number datasets label.","code":"datasets(db) |> select(where(is.list)) ## # A tibble: 1,167 × 15 ## donor_id assay batch_condition cell_type development_stage disease ## ## 1 ## 2 ## 3 ## 4 ## 5 ## 6 ## 7 ## 8 ## 9 ## 10 ## # ℹ 1,157 more rows ## # ℹ 9 more variables: embeddings , feature_biotype , ## # feature_reference , is_primary_data , organism , ## # self_reported_ethnicity , sex , suspension_type , ## # tissue facets(db, \"assay\") ## # A tibble: 38 × 4 ## facet label ontology_term_id n ## ## 1 assay 10x 3' v3 EFO:0009922 563 ## 2 assay 10x 3' v2 EFO:0009899 254 ## 3 assay Slide-seqV2 EFO:0030062 223 ## 4 assay Visium Spatial Gene Expression EFO:0010961 108 ## 5 assay 10x 5' v1 EFO:0011025 81 ## 6 assay Smart-seq2 EFO:0008931 63 ## 7 assay 10x multiome EFO:0030059 61 ## 8 assay 10x 5' v2 EFO:0009900 23 ## 9 assay sci-RNA-seq3 EFO:0030028 15 ## 10 assay Drop-seq EFO:0008722 14 ## # ℹ 28 more rows facets(db, \"self_reported_ethnicity\") ## # A tibble: 32 × 4 ## facet label ontology_term_id n ## ## 1 self_reported_ethnicity European HANCESTRO:0005 499 ## 2 self_reported_ethnicity unknown unknown 411 ## 3 self_reported_ethnicity na na 314 ## 4 self_reported_ethnicity Asian HANCESTRO:0008 141 ## 5 self_reported_ethnicity African American HANCESTRO:0568 61 ## 6 self_reported_ethnicity Native American,Hispanic or L… HANCESTRO:0013,… 50 ## 7 self_reported_ethnicity Hispanic or Latin American HANCESTRO:0014 48 ## 8 self_reported_ethnicity African American or Afro-Cari… HANCESTRO:0016 26 ## 9 self_reported_ethnicity Greater Middle Eastern (Midd… HANCESTRO:0015 22 ## 10 self_reported_ethnicity South Asian HANCESTRO:0006 11 ## # ℹ 22 more rows facets(db, \"sex\") ## # A tibble: 3 × 4 ## facet label ontology_term_id n ## ## 1 sex male PATO:0000384 903 ## 2 sex female PATO:0000383 677 ## 3 sex unknown unknown 173"},{"path":"https://mtmorgan.github.io/cellxgenedp/articles/using_cellxgenedp.html","id":"filtering-faceted-columns","dir":"Articles","previous_headings":"Collections, datasets and files","what":"Filtering faceted columns","title":"Discover and download datasets and files from the cellxgene data portal","text":"Suppose interested finding datasets 10x 3’ v3 assay (ontology_term_id EFO:0009922) containing individuals African American ethnicity, female sex. Use facets_filter() utility function filter data sets needed Use nrow(african_american_female) find number datasets satisfying criteria. looks like cells sequenced (dataset may contain cells several ethnicities, well males individuals unknown gender, know actual number cells available without downloading files). Use left_join identify corresponding collections:","code":"african_american_female <- datasets(db) |> filter( facets_filter(assay, \"ontology_term_id\", \"EFO:0009922\"), facets_filter(self_reported_ethnicity, \"label\", \"African American\"), facets_filter(sex, \"label\", \"female\") ) african_american_female |> summarise(total_cell_count = sum(cell_count)) ## # A tibble: 1 × 1 ## total_cell_count ## ## 1 4320736 ## collections left_join( african_american_female |> select(collection_id) |> distinct(), collections(db), by = \"collection_id\" ) ## # A tibble: 13 × 18 ## collection_id collection_version_id collection_url consortia contact_email ## ## 1 f17b9205-f61f-4… 21ff4724-95e2-491b-8… https://cellx… genevieve.ko… ## 2 625f6bf4-2f33-4… 0c0d607f-00b8-4f3d-8… https://cellx… a5wang@healt… ## 3 c9706a92-0e5f-4… bc627471-7137-4518-a… https://cellx… hnakshat@iup… ## 4 a98b828a-622a-4… cee0b899-009a-40ec-a… https://cellx… markusbi@med… ## 5 bcb61471-2a44-4… 39fca0ca-2b0f-47b5-9… https://cellx… info@kpmp.org ## 6 72d37bc9-76cc-4… 3e396ffb-b0d8-4ce4-b… https://cellx… m.sepp@zmbh.… ## 7 b953c942-f5d8-4… 7727e578-1805-47c8-b… https://cellx… icobos@stanf… ## 8 4195ab4c-20bd-4… 04c5b03f-d07e-423b-8… https://cellx… nnavin@mdand… ## 9 62e8f058-9c37-4… addce074-53d2-4f21-9… https://cellx… chanj3@mskcc… ## 10 71f4bccf-53d4-4… 5a524bd4-231b-4941-a… https://cellx… kevinmbyrd@g… ## 11 e1fa9900-3fc9-4… 85624898-8006-4209-a… https://cellx… j.ma@yale.edu ## 12 6b701826-37bb-4… 95ab05df-9716-4fc8-a… https://cellx… astreets@ber… ## 13 b9fc3d70-5a72-4… 6701e565-6dfe-4649-b… https://cellx… bruce.aronow… ## # ℹ 13 more variables: contact_name , curator_name , ## # description , doi , links , name , ## # publisher_metadata , revising_in , revision_of , ## # visibility , created_at , published_at , revised_at "},{"path":"https://mtmorgan.github.io/cellxgenedp/articles/using_cellxgenedp.html","id":"publication-and-other-external-data","dir":"Articles","previous_headings":"Collections, datasets and files","what":"Publication and other external data","title":"Discover and download datasets and files from the cellxgene data portal","text":"Many collections include publication information external data. information available return value collections(), helper function publisher_metadata(), authors(), links() may facilite access. Suppose one interested publication “single-cell atlas healthy breast tissues reveals clinically relevant clusters breast epithelial cells”. Discover collections Use collection_id extract publisher metadata (including DOI available) author information Collections may links additional external data, case DOI two links RAW_DATA. Conversely, knowledge DOI, etc., can used discover details corresponding collection.","code":"title_of_interest <- paste( \"A single-cell atlas of the healthy breast tissues reveals clinically\", \"relevant clusters of breast epithelial cells\" ) collection_of_interest <- collections(db) |> dplyr::filter(startsWith(name, title_of_interest)) collection_of_interest |> glimpse() ## Rows: 1 ## Columns: 18 ## $ collection_id \"c9706a92-0e5f-46c1-96d8-20e42467f287\" ## $ collection_version_id \"bc627471-7137-4518-a593-2f679bac054e\" ## $ collection_url \"https://cellxgene.cziscience.com/collections/c9… ## $ consortia [\"CZI Single-Cell Biology\"] ## $ contact_email \"hnakshat@iupui.edu\" ## $ contact_name \"Harikrishna Nakshatri\" ## $ curator_name \"Jennifer Yu-Sheng Chien\" ## $ description \"Single-cell RNA sequencing (scRNA-seq) is an ev… ## $ doi \"10.1016/j.xcrm.2021.100219\" ## $ links [[\"\", \"RAW_DATA\", \"https://data.humancellatlas.o… ## $ name \"A single-cell atlas of the healthy breast tiss… ## $ publisher_metadata [[[\"Bhat-Nakshatri\", \"Poornima\"], [\"Gao\", \"Hongy… ## $ revising_in NA ## $ revision_of NA ## $ visibility \"PUBLIC\" ## $ created_at 2023-12-12 ## $ published_at 2021-03-25 ## $ revised_at 2023-12-13 collection_id_of_interest <- pull(collection_of_interest, \"collection_id\") publisher_metadata(db) |> filter(collection_id == collection_id_of_interest) |> glimpse() ## Rows: 1 ## Columns: 9 ## $ collection_id \"c9706a92-0e5f-46c1-96d8-20e42467f287\" ## $ name \"A single-cell atlas of the healthy breast tissues rev… ## $ is_preprint FALSE ## $ journal \"Cell Reports Medicine\" ## $ published_at 2021-03-01 ## $ published_year 2021 ## $ published_month 3 ## $ published_day 1 ## $ doi NA authors(db) |> filter(collection_id == collection_id_of_interest) ## # A tibble: 12 × 4 ## collection_id family given consortium ## ## 1 c9706a92-0e5f-46c1-96d8-20e42467f287 Bhat-Nakshatri Poornima NA ## 2 c9706a92-0e5f-46c1-96d8-20e42467f287 Gao Hongyu NA ## 3 c9706a92-0e5f-46c1-96d8-20e42467f287 Sheng Liu NA ## 4 c9706a92-0e5f-46c1-96d8-20e42467f287 McGuire Patrick C. NA ## 5 c9706a92-0e5f-46c1-96d8-20e42467f287 Xuei Xiaoling NA ## 6 c9706a92-0e5f-46c1-96d8-20e42467f287 Wan Jun NA ## 7 c9706a92-0e5f-46c1-96d8-20e42467f287 Liu Yunlong NA ## 8 c9706a92-0e5f-46c1-96d8-20e42467f287 Althouse Sandra K. NA ## 9 c9706a92-0e5f-46c1-96d8-20e42467f287 Colter Austyn NA ## 10 c9706a92-0e5f-46c1-96d8-20e42467f287 Sandusky George NA ## 11 c9706a92-0e5f-46c1-96d8-20e42467f287 Storniolo Anna Maria NA ## 12 c9706a92-0e5f-46c1-96d8-20e42467f287 Nakshatri Harikrishna NA external_links <- links(db) external_links ## # A tibble: 715 × 4 ## collection_id link_name link_type link_url ## ## 1 ceb895f4-ff9f-403a-b7c3-187a9657ac2c SCP1859 OTHER https://singl… ## 2 ceb895f4-ff9f-403a-b7c3-187a9657ac2c NA LAB_WEBSITE https://labs.… ## 3 ceb895f4-ff9f-403a-b7c3-187a9657ac2c NA OTHER http://genome… ## 4 ceb895f4-ff9f-403a-b7c3-187a9657ac2c GSE204684 RAW_DATA https://www.n… ## 5 ceb895f4-ff9f-403a-b7c3-187a9657ac2c analysis code OTHER https://zenod… ## 6 af893e86-8e9f-41f1-a474-ef05359b1fb7 NA OTHER https://retin… ## 7 af893e86-8e9f-41f1-a474-ef05359b1fb7 NA RAW_DATA https://data.… ## 8 af893e86-8e9f-41f1-a474-ef05359b1fb7 GSE226108 RAW_DATA https://www.n… ## 9 1d1c7275-476a-49e2-9022-ad1b1c793594 GSE148077 RAW_DATA https://www.n… ## 10 1d1c7275-476a-49e2-9022-ad1b1c793594 NA OTHER https://singl… ## # ℹ 705 more rows external_links |> count(link_type) ## # A tibble: 5 × 2 ## link_type n ## ## 1 DATA_SOURCE 35 ## 2 LAB_WEBSITE 38 ## 3 OTHER 329 ## 4 PROTOCOL 44 ## 5 RAW_DATA 269 external_links |> filter(collection_id == collection_id_of_interest) ## # A tibble: 2 × 4 ## collection_id link_name link_type link_url ## ## 1 c9706a92-0e5f-46c1-96d8-20e42467f287 NA RAW_DATA https://data.humance… ## 2 c9706a92-0e5f-46c1-96d8-20e42467f287 NA RAW_DATA https://www.ncbi.nlm… doi_of_interest <- \"https://doi.org/10.1016/j.stem.2018.12.011\" links(db) |> filter(link_url == doi_of_interest) |> left_join(collections(db), by = \"collection_id\") |> glimpse() ## Rows: 1 ## Columns: 21 ## $ collection_id \"b1a879f6-5638-48d3-8f64-f6592c1b1561\" ## $ link_name \"PSC-ATO protocol\" ## $ link_type \"PROTOCOL\" ## $ link_url \"https://doi.org/10.1016/j.stem.2018.12.011\" ## $ collection_version_id \"aa814356-20ba-4066-88be-fcbf89c84899\" ## $ collection_url \"https://cellxgene.cziscience.com/collections/b1… ## $ consortia [\"CZI Single-Cell Biology\", \"Wellcome HCA Strate… ## $ contact_email \"st9@sanger.ac.uk\" ## $ contact_name \"Sarah Teichmann\" ## $ curator_name \"Batuhan Cakir\" ## $ description \"Single-cell genomics studies have decoded the i… ## $ doi \"10.1126/science.abo0510\" ## $ links [[\"scVI Models\", \"DATA_SOURCE\", \"https://develop… ## $ name \"Mapping the developing human immune system acro… ## $ publisher_metadata [[[\"Suo\", \"Chenqu\"], [\"Dann\", \"Emma\"], [\"Goh\", \"… ## $ revising_in NA ## $ revision_of NA ## $ visibility \"PUBLIC\" ## $ created_at 2023-12-11 ## $ published_at 2022-10-04 ## $ revised_at 2023-12-13"},{"path":"https://mtmorgan.github.io/cellxgenedp/articles/using_cellxgenedp.html","id":"visualizing-data-in-cellxgene","dir":"Articles","previous_headings":"","what":"Visualizing data in cellxgene","title":"Discover and download datasets and files from the cellxgene data portal","text":"Visualization straight-forward dataset_id available. example, visualize first dataset african_american_female, use Visualization interactive process, datasets_visualize() open 5 browser tabs per call.","code":"african_american_female |> ## use criteria to identify a single dataset (here just the ## 'first' dataset), then visualize slice(1) |> datasets_visualize()"},{"path":"https://mtmorgan.github.io/cellxgenedp/articles/using_cellxgenedp.html","id":"file-download-and-use","dir":"Articles","previous_headings":"","what":"File download and use","title":"Discover and download datasets and files from the cellxgene data portal","text":"Datasets usually contain H5AD (files produced python AnnData module), Rds (serialized files produced R Seurat package). Rds files may unreadable version Seurat used create file different version used read file. therefore focus H5AD files. illustration, find files associated studies African American females download one selected files. choose single dataset H5AD file download downloaded local cache (use internal function cellxgenedp:::.cellxgenedb_cache_path() location cache), process time-consuming first time. H5AD files can converted R / Bioconductor objects using zellkonverter package. SingleCellExperiment object matrix-like object rows corresponding genes columns cells. Thus can easily explore cells present data.","code":"selected_files <- left_join( african_american_female |> select(dataset_id), files(db), by = \"dataset_id\" ) local_file <- selected_files |> filter( dataset_id == \"de985818-285f-4f59-9dbd-d74968fddba3\", filetype == \"H5AD\" ) |> files_download(dry.run = FALSE) basename(local_file) ## [1] \"64f14a2b-d754-4bc9-b496-b26f05ebfe4e.h5ad\" h5ad <- readH5AD(local_file, use_hdf5 = TRUE, reader = \"R\") h5ad ## class: SingleCellExperiment ## dim: 33234 31696 ## metadata(5): citation default_embedding schema_reference schema_version ## title ## assays(1): X ## rownames(33234): ENSG00000243485 ENSG00000237613 ... ENSG00000277475 ## ENSG00000268674 ## rowData names(5): feature_is_filtered feature_name feature_reference ## feature_biotype feature_length ## colnames(31696): CMGpool_AAACCCAAGGACAACC CMGpool_AAACCCACAATCTCTT ... ## K109064_TTTGTTGGTTGCATCA K109064_TTTGTTGGTTGGACCC ## colData names(36): donor_id self_reported_ethnicity_ontology_term_id ## ... development_stage observation_joinid ## reducedDimNames(3): X_pca X_tsne X_umap ## mainExpName: NULL ## altExpNames(0): h5ad |> colData(h5ad) |> as_tibble() |> count(sex, donor_id) ## # A tibble: 7 × 3 ## sex donor_id n ## ## 1 female D1 2303 ## 2 female D2 864 ## 3 female D3 2517 ## 4 female D4 1771 ## 5 female D5 2244 ## 6 female D11 7454 ## 7 female pooled [D9,D7,D8,D10,D6] 14543"},{"path":"https://mtmorgan.github.io/cellxgenedp/articles/using_cellxgenedp.html","id":"next-steps","dir":"Articles","previous_headings":"","what":"Next steps","title":"Discover and download datasets and files from the cellxgene data portal","text":"Orchestrating Single-Cell Analysis Bioconductor online resource provides excellent introduction analysis visualization single-cell data R / Bioconductor. Extensive opportunities working AnnData objects R using native python interface briefly described , e.g., ?AnnData2SCE help page zellkonverter. hca package provides programmatic access Human Cell Atlas data portal, allowing retrieval primary well derived single-cell data files.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/articles/using_cellxgenedp.html","id":"api-changes","dir":"Articles","previous_headings":"","what":"API changes","title":"Discover and download datasets and files from the cellxgene data portal","text":"Data access provided CELLxGENE changed new ‘Discover’ API. main functionality cellxgenedp package changed, specific columns removed, replaced added, follows: collections() Removed: access_type, data_submission_policy_version Replaced: updated_at replaced revised_at Added: collection_version_id, collection_url, doi, revising_in, revision_of datasets() Removed: is_valid, processing_status, published, revision, created_at Replaced: dataset_deployments replaced explorer_url, name replaced title, updated_at replaced revised_at Added: dataset_version_id, batch_condition, x_approximate_distribution files() Removed: file_id, filename, s3_uri, user_submitted, created_at, updated_at Added: filesize, url","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/articles/using_cellxgenedp.html","id":"session-info","dir":"Articles","previous_headings":"","what":"Session info","title":"Discover and download datasets and files from the cellxgene data portal","text":"","code":"## R version 4.3.2 (2023-10-31) ## Platform: x86_64-pc-linux-gnu (64-bit) ## Running under: Ubuntu 22.04.3 LTS ## ## Matrix products: default ## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 ## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0 ## ## locale: ## [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8 ## [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8 ## [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C ## [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C ## ## time zone: UTC ## tzcode source: system (glibc) ## ## attached base packages: ## [1] stats4 stats graphics grDevices utils datasets methods ## [8] base ## ## other attached packages: ## [1] cellxgenedp_1.7.1.9000 dplyr_1.1.4 ## [3] SingleCellExperiment_1.24.0 SummarizedExperiment_1.32.0 ## [5] Biobase_2.62.0 GenomicRanges_1.54.1 ## [7] GenomeInfoDb_1.38.5 IRanges_2.36.0 ## [9] S4Vectors_0.40.2 BiocGenerics_0.48.1 ## [11] MatrixGenerics_1.14.0 matrixStats_1.2.0 ## [13] zellkonverter_1.12.1 BiocStyle_2.30.0 ## ## loaded via a namespace (and not attached): ## [1] tidyselect_1.2.0 filelock_1.0.3 bitops_1.0-7 ## [4] fastmap_1.1.1 RCurl_1.98-1.14 promises_1.2.1 ## [7] digest_0.6.34 mime_0.12 lifecycle_1.0.4 ## [10] ellipsis_0.3.2 magrittr_2.0.3 compiler_4.3.2 ## [13] rlang_1.1.3 sass_0.4.8 tools_4.3.2 ## [16] utf8_1.2.4 yaml_2.3.8 knitr_1.45 ## [19] S4Arrays_1.2.0 htmlwidgets_1.6.4 curl_5.2.0 ## [22] reticulate_1.34.0 DelayedArray_0.28.0 abind_1.4-5 ## [25] HDF5Array_1.30.0 withr_2.5.2 purrr_1.0.2 ## [28] desc_1.4.3 grid_4.3.2 fansi_1.0.6 ## [31] xtable_1.8-4 Rhdf5lib_1.24.1 cli_3.6.2 ## [34] rmarkdown_2.25 crayon_1.5.2 ragg_1.2.7 ## [37] generics_0.1.3 httr_1.4.7 rhdf5_2.46.1 ## [40] cachem_1.0.8 stringr_1.5.1 zlibbioc_1.48.0 ## [43] parallel_4.3.2 BiocManager_1.30.22 XVector_0.42.0 ## [46] basilisk_1.14.1 vctrs_0.6.5 Matrix_1.6-1.1 ## [49] jsonlite_1.8.8 dir.expiry_1.10.0 bookdown_0.37 ## [52] systemfonts_1.0.5 jquerylib_0.1.4 glue_1.7.0 ## [55] pkgdown_2.0.7 DT_0.31 stringi_1.8.3 ## [58] later_1.3.2 tibble_3.2.1 pillar_1.9.0 ## [61] rhdf5filters_1.14.1 basilisk.utils_1.14.1 htmltools_0.5.7 ## [64] GenomeInfoDbData_1.2.11 R6_2.5.1 textshaping_0.3.7 ## [67] evaluate_0.23 shiny_1.8.0 lattice_0.21-9 ## [70] png_0.1-8 memoise_2.0.1 httpuv_1.6.13 ## [73] bslib_0.6.1 rjsoncons_1.1.0 Rcpp_1.0.12 ## [76] SparseArray_1.2.3 xfun_0.41 fs_1.6.3 ## [79] pkgconfig_2.0.3"},{"path":"https://mtmorgan.github.io/cellxgenedp/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Martin Morgan. Author, maintainer. Kayla Interdonato. Author.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Morgan M, Interdonato K (2024). cellxgenedp: Discover Access Single Cell Data Sets cellxgene Data Portal. R package version 1.7.1.9000, https://github.com/mtmorgan/cellxgenedp, https://mtmorgan.github.io/cellxgenedp/.","code":"@Manual{, title = {cellxgenedp: Discover and Access Single Cell Data Sets in the cellxgene Data Portal}, author = {Martin Morgan and Kayla Interdonato}, year = {2024}, note = {R package version 1.7.1.9000, https://github.com/mtmorgan/cellxgenedp}, url = {https://mtmorgan.github.io/cellxgenedp/}, }"},{"path":"https://mtmorgan.github.io/cellxgenedp/index.html","id":"introduction-to-cellxgenedp","dir":"","previous_headings":"","what":"Discover and Access Single Cell Data Sets in the cellxgene Data Portal","title":"Discover and Access Single Cell Data Sets in the cellxgene Data Portal","text":"cellxgene data portal https://cellxgene.cziscience.com/ provides graphical user interface collections single-cell sequence data processed standard ways ‘count matrix’ summaries. cellxgenedp package provides alternative, R-based inteface, allowind data discovery, viewing, downloading.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"Discover and Access Single Cell Data Sets in the cellxgene Data Portal","text":"package available Bioconductor version 3.15 later. following code installs [cellxgenedp][] cellxgenedp Alternatively, install ‘development’ version GitHub also install additional packages required vignette, use","code":"if (!\"BiocManager\" %in% rownames(installed.packages())) install.packages(\"BiocManager\", repos = \"https://CRAN.R-project.org\") BiocManager::install(\"cellxgenedp\") if (!\"remotes\" %in% rownames(installed.packages())) install.packages(\"remotes\", repos = \"https://CRAN.R-project.org\") remotes::install_github(\"mtmorgan/cellxgenedp\") pkgs <- c(\"zellkonverter\", \"SingleCellExperiment\", \"HDF5Array\") required_pkgs <- pkgs[!pkgs %in% rownames(installed.packages())] BiocManager::install(required_pkgs)"},{"path":"https://mtmorgan.github.io/cellxgenedp/index.html","id":"use","dir":"","previous_headings":"","what":"Use","title":"Discover and Access Single Cell Data Sets in the cellxgene Data Portal","text":"Load package current R session. make extensive use dplyr packages, end vignette use SingleCellExperiment zellkonverter, load well.","code":"suppressPackageStartupMessages({ library(dplyr) library(cellxgenedp) })"},{"path":"https://mtmorgan.github.io/cellxgenedp/index.html","id":"shiny","dir":"","previous_headings":"","what":"Shiny","title":"Discover and Access Single Cell Data Sets in the cellxgene Data Portal","text":"cxg() provides ‘shiny’ interface allowing discovery collections datasets, visualization selected datasets cellxgene data portal, download datasets use R.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/index.html","id":"next-steps","dir":"","previous_headings":"","what":"Next steps","title":"Discover and Access Single Cell Data Sets in the cellxgene Data Portal","text":"View artcle Discover download datasets files cellxgene data portal.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/cxg.html","id":null,"dir":"Reference","previous_headings":"","what":"Shiny application for discovering, viewing, and downloading\ncellxgene data — cxg","title":"Shiny application for discovering, viewing, and downloading\ncellxgene data — cxg","text":"Shiny application discovering, viewing, downloading cellxgene data","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/cxg.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Shiny application for discovering, viewing, and downloading\ncellxgene data — cxg","text":"","code":"cxg(as = c(\"tibble\", \"sce\"))"},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/cxg.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Shiny application for discovering, viewing, and downloading\ncellxgene data — cxg","text":"character(1) Return value quiting shiny application. \"tibble\" returns tibble describing selected datasets (including location disk downloaded file). \"sce\" returns list dataset files imported R SingleCellExperiment objects.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/cxg.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Shiny application for discovering, viewing, and downloading\ncellxgene data — cxg","text":"cxg() returns either tibble describing datasets selected shiny application, list datasets imported R SingleCellExperiment objects.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/cxg.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Shiny application for discovering, viewing, and downloading\ncellxgene data — cxg","text":"","code":"# \\donttest{ if (interactive()) cxg() # }"},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/db.html","id":null,"dir":"Reference","previous_headings":"","what":"Retrieve updated cellxgene database metadata — db","title":"Retrieve updated cellxgene database metadata — db","text":"Retrieve updated cellxgene database metadata","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/db.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Retrieve updated cellxgene database metadata — db","text":"","code":"db(overwrite = .db_online() && .db_first())"},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/db.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Retrieve updated cellxgene database metadata — db","text":"overwrite logical(1) indicating whether database collections updated internet (default, internet available , interactive session, user requests update), read disk (assuming previous successful access internet). overwrite = FALSE might useful reproducibility, testing, working environment restricted internet access.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/db.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Retrieve updated cellxgene database metadata — db","text":"db() returns object class 'cellxgene_db', summarizing available collections, datasets, files.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/db.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Retrieve updated cellxgene database metadata — db","text":"database retrieved cellxgene data portal web site. 'collections' metadata retrieved call; metadata collection cached locally re-use.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/db.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Retrieve updated cellxgene database metadata — db","text":"","code":"db() #> cellxgene_db #> number of collections(): 182 #> number of datasets(): 1167 #> number of files(): 2314"},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/facets.html","id":null,"dir":"Reference","previous_headings":"","what":"Facets available for querying cellxgene data — FACETS","title":"Facets available for querying cellxgene data — FACETS","text":"FACETS character vector common fields used subset cellxgene data. facets() used query cellxgene database current values one facets. facets_filter() provides convenient way filter facets based label ontology term.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/facets.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Facets available for querying cellxgene data — FACETS","text":"","code":"FACETS facets(cellxgene_db = db(), facets = FACETS) facets_filter(facet, key = c(\"label\", \"ontology_term_id\"), value, exact = TRUE)"},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/facets.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Facets available for querying cellxgene data — FACETS","text":"FACETS object class character length 8.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/facets.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Facets available for querying cellxgene data — FACETS","text":"cellxgene_db (optional) cellxgene_db object, returned db(). facets character() vector corersponding one facets FACETS. facet column containing faceted information, e.g., sex datasets(db). key character(1) identifying whether value label ontology_term_id. value character() value label ontology term filter . value may vector length(value) > 0 exact matchs (exact = TRUE, default), character(1) regular expression. exact logical(1) whether values match exactly (default, TRUE) regular expression (FALSE).","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/facets.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Facets available for querying cellxgene data — FACETS","text":"facets() returns tibble columns facet, label, ontology_term_id, n, number times facet label used database. facets_filter() returns logical vector length equal length (number rows) facet, TRUE indicating value key present dataset.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/facets.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Facets available for querying cellxgene data — FACETS","text":"","code":"f <- facets() ## levels of each facet f |> dplyr::count(facet) #> # A tibble: 8 × 2 #> facet n #> #> 1 assay 38 #> 2 cell_type 772 #> 3 development_stage 231 #> 4 disease 102 #> 5 organism 7 #> 6 self_reported_ethnicity 32 #> 7 sex 3 #> 8 tissue 374 ## same as facets(, facets = \"organism\") f |> dplyr::filter(facet == \"organism\") #> # A tibble: 7 × 4 #> facet label ontology_term_id n #> #> 1 organism Homo sapiens NCBITaxon:9606 860 #> 2 organism Mus musculus NCBITaxon:10090 270 #> 3 organism Callithrix jacchus NCBITaxon:9483 28 #> 4 organism Macaca mulatta NCBITaxon:9544 19 #> 5 organism Sus scrofa domesticus NCBITaxon:9825 3 #> 6 organism Pan troglodytes NCBITaxon:9598 2 #> 7 organism Gorilla gorilla NCBITaxon:9593 1 db <- db() ds <- datasets(db) ## datasets with African American females ds |> dplyr::filter( facets_filter(self_reported_ethnicity, \"label\", \"African American\"), facets_filter(sex, \"label\", \"female\") ) #> # A tibble: 52 × 31 #> dataset_id dataset_version_id collection_id donor_id assay batch_condition #> #> 1 94c41723-b2… 4a9bcef2-02db-49f… f17b9205-f61… #> 2 3de0ad6d-43… 6934232e-4db4-423… 625f6bf4-2f3… #> 3 de985818-28… 64f14a2b-d754-4bc… c9706a92-0e5… #> 4 0b4a15a7-4e… 6616739c-0b4a-4a4… a98b828a-622… #> 5 32b9bdce-24… 04019af8-aaf3-446… bcb61471-2a4… #> 6 0b75c598-08… 6568e80d-b8d8-4a5… bcb61471-2a4… #> 7 07854d9c-53… bf5607d5-82c3-46f… bcb61471-2a4… #> 8 e40c6272-af… 23d7646e-5ed0-467… a48f5033-343… #> 9 d6dfdef1-40… 5a2b8d31-f043-48e… a48f5033-343… #> 10 6a270451-b4… 88830bd3-ddbe-41d… a48f5033-343… #> # ℹ 42 more rows #> # ℹ 25 more variables: cell_count , cell_type , citation , #> # development_stage , disease , embeddings , #> # explorer_url , feature_biotype , feature_count , #> # feature_reference , is_primary_data , #> # mean_genes_per_cell , organism , primary_cell_count , #> # raw_data_location , schema_version , … ## datasets with non-European, known ethnicity facets(db, \"self_reported_ethnicity\") #> # A tibble: 32 × 4 #> facet label ontology_term_id n #> #> 1 self_reported_ethnicity European HANCESTRO:0005 499 #> 2 self_reported_ethnicity unknown unknown 411 #> 3 self_reported_ethnicity na na 314 #> 4 self_reported_ethnicity Asian HANCESTRO:0008 141 #> 5 self_reported_ethnicity African American HANCESTRO:0568 61 #> 6 self_reported_ethnicity Native American,Hispanic or L… HANCESTRO:0013,… 50 #> 7 self_reported_ethnicity Hispanic or Latin American HANCESTRO:0014 48 #> 8 self_reported_ethnicity African American or Afro-Cari… HANCESTRO:0016 26 #> 9 self_reported_ethnicity Greater Middle Eastern (Midd… HANCESTRO:0015 22 #> 10 self_reported_ethnicity South Asian HANCESTRO:0006 11 #> # ℹ 22 more rows ds |> dplyr::filter( !facets_filter( self_reported_ethnicity, \"label\", c(\"European\", \"na\", \"unknown\") ) ) #> # A tibble: 25 × 31 #> dataset_id dataset_version_id collection_id donor_id assay batch_condition #> #> 1 e6a11140-25… 4866a804-37eb-436… e5f58829-1a6… #> 2 6ec405bb-47… c524a1cb-e823-478… e5f58829-1a6… #> 3 2ba40233-85… a1e426d5-f575-445… e5f58829-1a6… #> 4 2423ce2c-31… 80e4bc4f-e0a9-467… e5f58829-1a6… #> 5 2adb1f8a-a6… 1b7484e3-83a0-47f… 38833785-fac… #> 6 be884a28-0e… 7dd2aaf8-1714-49b… 10bf5c50-8d8… #> 7 c8f83821-a2… 10ecac59-8895-492… 9d63fcf1-5ca… #> 8 c05e6940-72… 80a4eb91-dab2-40d… 9d63fcf1-5ca… #> 9 894573ad-49… e385cf86-3c0b-422… 9d63fcf1-5ca… #> 10 84242d25-f6… 7ce8fb90-86fa-4ea… 9d63fcf1-5ca… #> # ℹ 15 more rows #> # ℹ 25 more variables: cell_count , cell_type , citation , #> # development_stage , disease , embeddings , #> # explorer_url , feature_biotype , feature_count , #> # feature_reference , is_primary_data , #> # mean_genes_per_cell , organism , primary_cell_count , #> # raw_data_location , schema_version , …"},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/query.html","id":null,"dir":"Reference","previous_headings":"","what":"Query cellxgene collections, datasets, and files — collections","title":"Query cellxgene collections, datasets, and files — collections","text":"files_download() retrieves one cellxgene files cache local system. links(), authors() publisher_metadata() helper functions extract 'nested' information collections.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/query.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Query cellxgene collections, datasets, and files — collections","text":"","code":"collections(cellxgene_db = db()) datasets(cellxgene_db = db()) datasets_visualize(tbl) files(cellxgene_db = db()) files_download(tbl, dry.run = TRUE, cache.path = .cellxgene_cache_path()) links(cellxgene_db = db()) authors(cellxgene_db = db()) publisher_metadata(cellxgene_db = db())"},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/query.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Query cellxgene collections, datasets, and files — collections","text":"cellxgene_db optional 'cellxgene_db' object, returned db(). tbl tibble() typically derived datasets(db) files(db) containing columns dataset_id (datasets_visualize()), columns dataset_id, file_id, filetype (files_download()). dry.run logical(1) indicating whether (often large) file(s) tbl downloaded local cache. Files downloaded dry.run = TRUE (default). cache.path character(1) directory cache downloaded files. directory must already exist. default tools::R_user_dir(\"cellxgenedp\", \"cache\"), package-specific path user home directory.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/query.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Query cellxgene collections, datasets, and files — collections","text":"function returns tibble describing corresponding component database. files_download() returns character() vector paths local files. links() returns tibble external links associated collection. Common links includ DOI, raw data / data sources, lab websites. authors() returns tibble authors associated collection. publisher_metadata() returns tibble publisher metadata (journal, publicate date, doi) associated collection.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/query.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Query cellxgene collections, datasets, and files — collections","text":"","code":"db <- db() collections(db) #> # A tibble: 182 × 18 #> collection_id collection_version_id collection_url consortia contact_email #> #> 1 ceb895f4-ff9f-4… ee098b5a-4f33-473b-b… https://cellx… panagiotis.r… #> 2 af893e86-8e9f-4… 768170a6-c590-4900-a… https://cellx… ruichen@bcm.… #> 3 1d1c7275-476a-4… 609becde-c797-41bb-8… https://cellx… wey334@g.har… #> 4 1b014f39-f202-4… 1d88cb46-6e84-4b5b-b… https://cellx… kimberly.ald… #> 5 48d354f5-a5ca-4… 2862daa3-c933-43c8-9… https://cellx… Nathan.Salom… #> 6 43d4bb39-21af-4… 78360f02-1acc-415c-a… https://cellx… raymond.cho@… #> 7 f7cecffa-00b4-4… 43224f82-db2a-443c-9… https://cellx… st9@sanger.a… #> 8 f17b9205-f61f-4… 21ff4724-95e2-491b-8… https://cellx… genevieve.ko… #> 9 64b24fda-6591-4… e414854b-2666-4977-9… https://cellx… magness@med.… #> 10 48259aa8-f168-4… 44601b80-bd11-49d8-a… https://cellx… wtk22@cam.ac… #> # ℹ 172 more rows #> # ℹ 13 more variables: contact_name , curator_name , #> # description , doi , links , name , #> # publisher_metadata , revising_in , revision_of , #> # visibility , created_at , published_at , revised_at collections(db) |> dplyr::glimpse() #> Rows: 182 #> Columns: 18 #> $ collection_id \"ceb895f4-ff9f-403a-b7c3-187a9657ac2c\", \"af893e8… #> $ collection_version_id \"ee098b5a-4f33-473b-b52a-0451de1f80ae\", \"768170a… #> $ collection_url \"https://cellxgene.cziscience.com/collections/ce… #> $ consortia [\"BRAIN Initiative\"], [\"CZI Single-Cell Biology… #> $ contact_email \"panagiotis.roussos@mssm.edu\", \"ruichen@bcm.edu\"… #> $ contact_name \"Panos Roussos\", \"Rui Chen\", \"Wenjun Yan\", \"Kimb… #> $ curator_name \"Corinn Sophia Small\", \"Jennifer Yu-Sheng Chien\"… #> $ description \"We simultaneously profiled gene expression and … #> $ doi \"10.1126/sciadv.adg3754\", \"10.1016/j.xgen.2023.1… #> $ links [[\"SCP1859\", \"OTHER\", \"https://singlecell.broad… #> $ name \"Multi-omic profiling of the developing human ce… #> $ publisher_metadata [[[\"Zhu\", \"Kaiyi\"], [\"Bendl\", \"Jaroslav\"], [\"Ra… #> $ revising_in NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, … #> $ revision_of NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, … #> $ visibility \"PUBLIC\", \"PUBLIC\", \"PUBLIC\", \"PUBLIC\", \"PUBLIC\"… #> $ created_at 2023-12-12, 2023-12-11, 2023-12-12, 2023-12-11,… #> $ published_at 2023-08-28, 2021-10-29, 2023-02-10, 2023-08-31,… #> $ revised_at 2023-12-13, 2023-12-13, 2023-12-13, 2023-12-13,… datasets(db) |> dplyr::glimpse() #> Rows: 1,167 #> Columns: 31 #> $ dataset_id \"53ce2631-3646-4172-bbd9-38b0a44d8214\", \"1d… #> $ dataset_version_id \"2f17c183-388a-4c08-9adb-a146833e57ab\", \"94… #> $ collection_id \"ceb895f4-ff9f-403a-b7c3-187a9657ac2c\", \"ce… #> $ donor_id [\"LaFet1\", \"LaFet2\", \"EaFet2\", \"EaFet1\", \"… #> $ assay [[\"10x multiome\", \"EFO:0030059\"]], [[\"10x … #> $ batch_condition [\"donor_id\", \"batch\"], [\"donor_id\", \"batch… #> $ cell_count 45549, 45549, 18011, 11617, 53040, 56507, 7… #> $ cell_type [[\"astrocyte\", \"CL:0000127\"], [\"caudal gan… #> $ citation \"Publication: https://doi.org/10.1126/sciad… #> $ development_stage [[\"14-year-old human stage\", \"HsapDv:00001… #> $ disease [[\"normal\", \"PATO:0000461\"]], [[\"normal\", … #> $ embeddings [\"X_joint_wnn_umap\", \"X_umap\"], [\"X_joint_… #> $ explorer_url \"https://cellxgene.cziscience.com/e/53ce263… #> $ feature_biotype [\"gene\"], [\"gene\"], [\"gene\"], [\"gene\"], [\"… #> $ feature_count 30113, 19492, 30933, 30933, 30933, 30933, 3… #> $ feature_reference [\"NCBITaxon:9606\"], [\"NCBITaxon:9606\"], [\"… #> $ is_primary_data [TRUE], [TRUE], [FALSE], [TRUE], [FALSE], … #> $ mean_genes_per_cell 1886.041, 4865.284, 2028.297, 4518.387, 222… #> $ organism [[\"Homo sapiens\", \"NCBITaxon:9606\"]], [[\"H… #> $ primary_cell_count 45549, 45549, 0, 11617, 0, 0, 0, 244474, 0,… #> $ raw_data_location \"raw.X\", \"raw.X\", \"raw.X\", \"raw.X\", \"raw.X\"… #> $ schema_version \"4.0.0\", \"4.0.0\", \"4.0.0\", \"4.0.0\", \"4.0.0\"… #> $ self_reported_ethnicity [[\"unknown\", \"unknown\"]], [[\"unknown\", \"un… #> $ sex [[\"female\", \"PATO:0000383\"], [\"male\", \"PAT… #> $ suspension_type [\"nucleus\"], [\"nucleus\"], [\"nucleus\"], [\"n… #> $ tissue [[\"cortical plate\", \"UBERON:0005343\", \"tis… #> $ title \"10x scRNA-seq from human cortex\", \"10x scA… #> $ tombstone FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F… #> $ x_approximate_distribution NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,… #> $ published_at 2023-08-28, 2023-08-28, 2021-10-29, 2021-1… #> $ revised_at 2023-12-13, 2023-12-13, 2023-12-13, 2023-1… # \\donttest{ if (interactive()) { ## visualize the first dataset datasets(db) |> dplyr::slice(1) |> datasets_visualize() } # } files(db) |> dplyr::glimpse() #> Rows: 2,314 #> Columns: 4 #> $ dataset_id \"53ce2631-3646-4172-bbd9-38b0a44d8214\", \"53ce2631-3646-4172… #> $ filesize 406108808, 399752425, 906795740, 1060800682, 1071401902, 14… #> $ filetype \"H5AD\", \"RDS\", \"H5AD\", \"RDS\", \"H5AD\", \"RDS\", \"H5AD\", \"RDS\",… #> $ url \"https://datasets.cellxgene.cziscience.com/2f17c183-388a-4c… if (FALSE) { files(db) |> dplyr::slice(1) |> files_download(dry.run = FALSE) } ## common links to external data links(db) |> dplyr::count(link_type) #> # A tibble: 5 × 2 #> link_type n #> #> 1 DATA_SOURCE 35 #> 2 LAB_WEBSITE 38 #> 3 OTHER 329 #> 4 PROTOCOL 44 #> 5 RAW_DATA 269 ## authors per collection authors() |> dplyr::count(collection_id, sort = TRUE) #> # A tibble: 172 × 2 #> collection_id n #> #> 1 e5f58829-1a66-40b5-a624-9046778e74f5 221 #> 2 8f126edf-5405-4731-8374-b5ce11f53e82 205 #> 3 bcb61471-2a44-4d00-a0af-ff085512674c 171 #> 4 4f586cb6-972b-4ef7-a4ef-3c3800a3c004 147 #> 5 0b9d8a04-bb9d-44da-aa27-705bb65b54eb 135 #> 6 367d95c0-0eb0-4dae-8276-9407239421ee 106 #> 7 6f6d381a-7701-4781-935c-db10d30de293 98 #> 8 1ca90a2d-2943-483d-b678-b809bf464c30 94 #> 9 0a839c4b-10d0-4d64-9272-684c49a2c8ba 90 #> 10 ae1420fe-6630-46ed-8b3d-cc6056a66467 83 #> # ℹ 162 more rows publisher_metadata() |> dplyr::glimpse() #> Rows: 172 #> Columns: 9 #> $ collection_id \"ceb895f4-ff9f-403a-b7c3-187a9657ac2c\", \"af893e86-8e9f… #> $ name \"Multi-omic profiling of the developing human cerebral… #> $ is_preprint FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE… #> $ journal \"Sci. Adv.\", \"Cell Genomics\", \"Sci Rep\", \"Nat Neurosci… #> $ published_at 2023-10-13, 2023-06-01, 2020-06-17, 2021-08-01, 2021-… #> $ published_year 2023, 2023, 2020, 2021, 2021, 2018, 2022, 2021, 2022, … #> $ published_month 10, 6, 6, 8, 3, 10, 12, 7, 1, 4, 12, 6, 9, 8, 10, 2, 1… #> $ published_day 13, 1, 17, 1, 12, 1, 1, 1, 1, 21, 1, 15, 6, 1, 1, 1, 1… #> $ doi NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…"},{"path":"https://mtmorgan.github.io/cellxgenedp/news/index.html","id":"cellxgenedp-18","dir":"Changelog","previous_headings":"","what":"cellxgenedp 1.8","title":"cellxgenedp 1.8","text":"(v 1.7.1) Update vignette section dataset visualization accomodate changes ‘Discover’ API. https://github.com/mtmorgan/cellxgenedp/issues/15","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/news/index.html","id":"cellxgenedp-16","dir":"Changelog","previous_headings":"","what":"cellxgenedp 1.6","title":"cellxgenedp 1.6","text":"(v 1.5.2) use CELLxGENE ‘Discover’ API, changing column names return values. See ‘API changes’ ‘Discover download datasets…’ vignette.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/news/index.html","id":"cellxgenedp-14","dir":"Changelog","previous_headings":"","what":"cellxgenedp 1.4","title":"cellxgenedp 1.4","text":"SIGNIFICANT USER-VISIBLE CHANGES (v 1.3.3) add publisher_metadata(), authors(), links() make access nested ‘collections()’ data straight-forward","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/news/index.html","id":"cellxgenedp-12","dir":"Changelog","previous_headings":"","what":"cellxgenedp 1.2","title":"cellxgenedp 1.2","text":"SIGNIFICANT USER-VISIBLE CHANGES (v. 1.1.4) allow custom files_download() cache. Thanks @stemangiola, https://github.com/mtmorgan/cellxgenedp/pull/9 (v. 1.1.6) datasets ethnicity field renamed self_reported_ethnicity (v. 1.1.7) use zellkonverter’s basilisk-based Python parser read H5AD files vignette, see https://github.com/theislab/zellkonverter/issues/78 (v. 1.1.2) reset cache build machines weekly (v. 1.1.6) use {rjsoncons} CRAN package queries, rather local implementation. Thanks @LiNk-NY, https://github.com/mtmorgan/cellxgenedp/pull/12","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/news/index.html","id":"cellxgenedp-007","dir":"Changelog","previous_headings":"","what":"cellxgenedp 0.0.7","title":"cellxgenedp 0.0.7","text":"(v. 0.0.7) make errors local cache update accessible; see https://github.com/mtmorgan/cellxgenedp/issues/1","code":""}] +[{"path":"https://mtmorgan.github.io/cellxgenedp/articles/using_cellxgenedp.html","id":"installation-and-use","dir":"Articles","previous_headings":"","what":"Installation and use","title":"Discover and download datasets and files from the cellxgene data portal","text":"package available Bioconductor version 3.15 later. following code installs cellxgenedp well packages required vignette. Alternatively, install ‘development’ version GitHub also install additional packages required vignette, use Load package current R session. make extensive use dplyr packages, end vignette use SingleCellExperiment zellkonverter, load well.","code":"if (!\"BiocManager\" %in% rownames(installed.packages())) install.packages(\"BiocManager\", repos = \"https://CRAN.R-project.org\") BiocManager::install(\"cellxgenedp\") if (!\"remotes\" %in% rownames(installed.packages())) install.packages(\"remotes\", repos = \"https://CRAN.R-project.org\") remotes::install_github(\"mtmorgan/cellxgenedp\") pkgs <- c(\"zellkonverter\", \"SingleCellExperiment\", \"HDF5Array\") required_pkgs <- pkgs[!pkgs %in% rownames(installed.packages())] BiocManager::install(required_pkgs) library(zellkonverter) library(SingleCellExperiment) # load early to avoid masking dplyr::count() library(dplyr) library(cellxgenedp)"},{"path":"https://mtmorgan.github.io/cellxgenedp/articles/using_cellxgenedp.html","id":"cxg-provides-a-shiny-interface","dir":"Articles","previous_headings":"","what":"cxg() Provides a ‘shiny’ interface","title":"Discover and download datasets and files from the cellxgene data portal","text":"following sections outline use cellxgenedp package R script; functionality also available cxg() shiny application, providing easy way identify, download, visualize one several datasets. Start app choose project first tab, dataset visualization, one datasets download!","code":"cxg()"},{"path":"https://mtmorgan.github.io/cellxgenedp/articles/using_cellxgenedp.html","id":"collections-datasets-and-files","dir":"Articles","previous_headings":"","what":"Collections, datasets and files","title":"Discover and download datasets and files from the cellxgene data portal","text":"Retrieve metadata resources available cellxgene data portal using db(): Printing db object provides brief overview available data, well hints, form functions like collections(), exploration. portal organizes data hierarchically, ‘collections’ (research studies, approximately), ‘datasets’, ‘files’. Discover data using corresponding functions. resources unique primary identifier (e.g., file_id) well identifier describing relationship resource components database (e.g., dataset_id). identifiers can used ‘join’ information across tables.","code":"db <- db() db ## cellxgene_db ## number of collections(): 182 ## number of datasets(): 1167 ## number of files(): 2314 collections(db) ## # A tibble: 182 × 18 ## collection_id collection_version_id collection_url consortia contact_email ## ## 1 ceb895f4-ff9f-4… ee098b5a-4f33-473b-b… https://cellx… panagiotis.r… ## 2 af893e86-8e9f-4… 768170a6-c590-4900-a… https://cellx… ruichen@bcm.… ## 3 1d1c7275-476a-4… 609becde-c797-41bb-8… https://cellx… wey334@g.har… ## 4 1b014f39-f202-4… 1d88cb46-6e84-4b5b-b… https://cellx… kimberly.ald… ## 5 48d354f5-a5ca-4… 2862daa3-c933-43c8-9… https://cellx… Nathan.Salom… ## 6 43d4bb39-21af-4… 78360f02-1acc-415c-a… https://cellx… raymond.cho@… ## 7 f7cecffa-00b4-4… 43224f82-db2a-443c-9… https://cellx… st9@sanger.a… ## 8 f17b9205-f61f-4… 21ff4724-95e2-491b-8… https://cellx… genevieve.ko… ## 9 64b24fda-6591-4… e414854b-2666-4977-9… https://cellx… magness@med.… ## 10 48259aa8-f168-4… 44601b80-bd11-49d8-a… https://cellx… wtk22@cam.ac… ## # ℹ 172 more rows ## # ℹ 13 more variables: contact_name , curator_name , ## # description , doi , links , name , ## # publisher_metadata , revising_in , revision_of , ## # visibility , created_at , published_at , revised_at datasets(db) ## # A tibble: 1,167 × 31 ## dataset_id dataset_version_id collection_id donor_id assay batch_condition ## ## 1 53ce2631-36… 2f17c183-388a-4c0… ceb895f4-ff9… ## 2 1d4128f6-c2… 94762ee1-9f9f-49e… ceb895f4-ff9… ## 3 ed419b4e-db… 758b30a8-5fb0-46c… af893e86-8e9… ## 4 aad97cb5-f3… d6966985-89f9-485… af893e86-8e9… ## 5 8f10185b-e0… 63d7a3a3-9691-41d… af893e86-8e9… ## 6 359f7af4-87… 0f461193-282f-443… af893e86-8e9… ## 7 11ef37ee-21… 74253a67-927c-4cd… af893e86-8e9… ## 8 0129dbd9-a7… a970179d-2e9e-4d2… af893e86-8e9… ## 9 00e5dedd-b9… 94c0e74c-b269-4ce… af893e86-8e9… ## 10 d319af7f-be… 3c80a5bb-8c89-433… 1d1c7275-476… ## # ℹ 1,157 more rows ## # ℹ 25 more variables: cell_count , cell_type , citation , ## # development_stage , disease , embeddings , ## # explorer_url , feature_biotype , feature_count , ## # feature_reference , is_primary_data , ## # mean_genes_per_cell , organism , primary_cell_count , ## # raw_data_location , schema_version , … files(db) ## # A tibble: 2,314 × 4 ## dataset_id filesize filetype url ## ## 1 53ce2631-3646-4172-bbd9-38b0a44d8214 406108808 H5AD https://datasets.ce… ## 2 53ce2631-3646-4172-bbd9-38b0a44d8214 399752425 RDS https://datasets.ce… ## 3 1d4128f6-c27b-40c4-af77-b1c7e2b694e7 906795740 H5AD https://datasets.ce… ## 4 1d4128f6-c27b-40c4-af77-b1c7e2b694e7 1060800682 RDS https://datasets.ce… ## 5 ed419b4e-db9b-40f1-8593-68fdf8dfb076 1071401902 H5AD https://datasets.ce… ## 6 ed419b4e-db9b-40f1-8593-68fdf8dfb076 1419579253 RDS https://datasets.ce… ## 7 aad97cb5-f375-45ef-ae9d-178e7f5d5180 785137201 H5AD https://datasets.ce… ## 8 aad97cb5-f375-45ef-ae9d-178e7f5d5180 1025253758 RDS https://datasets.ce… ## 9 8f10185b-e0b3-46a5-8706-7f1799225d79 3077438912 H5AD https://datasets.ce… ## 10 8f10185b-e0b3-46a5-8706-7f1799225d79 4090930879 RDS https://datasets.ce… ## # ℹ 2,304 more rows"},{"path":"https://mtmorgan.github.io/cellxgenedp/articles/using_cellxgenedp.html","id":"using-dplyr-to-navigate-data","dir":"Articles","previous_headings":"Collections, datasets and files","what":"Using dplyr to navigate data","title":"Discover and download datasets and files from the cellxgene data portal","text":"collection may several datasets, datasets may several files. instance, collection datasets can find collection joining collections() table. can take similar strategy identify datasets belonging collection","code":"collection_with_most_datasets <- datasets(db) |> count(collection_id, sort = TRUE) |> slice(1) left_join( collection_with_most_datasets |> select(collection_id), collections(db), by = \"collection_id\" ) |> glimpse() ## Rows: 1 ## Columns: 18 ## $ collection_id \"283d65eb-dd53-496d-adb7-7570c7caa443\" ## $ collection_version_id \"4c16c611-00a9-42f9-a8c4-7b42daa226fe\" ## $ collection_url \"https://cellxgene.cziscience.com/collections/28… ## $ consortia [\"BRAIN Initiative\", \"CZI Single-Cell Biology\"] ## $ contact_email \"kimberly.siletti@ki.se\" ## $ contact_name \"Kimberly Siletti\" ## $ curator_name \"James Chaffer\" ## $ description \"First draft atlas of human brain transcriptomic… ## $ doi \"10.1126/science.add7046\" ## $ links [[\"\", \"RAW_DATA\", \"http://data.nemoarchive.org/b… ## $ name \"Human Brain Cell Atlas v1.0\" ## $ publisher_metadata [[[\"Siletti\", \"Kimberly\"], [\"Hodge\", \"Rebecca\"]… ## $ revising_in NA ## $ revision_of NA ## $ visibility \"PUBLIC\" ## $ created_at 2023-12-12 ## $ published_at 2022-12-09 ## $ revised_at 2023-12-13 left_join( collection_with_most_datasets |> select(collection_id), datasets(db), by = \"collection_id\" ) ## # A tibble: 138 × 31 ## collection_id dataset_id dataset_version_id donor_id assay batch_condition ## ## 1 283d65eb-dd53-… ff7d15fa-… 51e05270-1f00-452… ## 2 283d65eb-dd53-… fe1a73ab-… 4e124ecc-7885-465… ## 3 283d65eb-dd53-… fbf173f9-… 5a52f557-aeaf-4fc… ## 4 283d65eb-dd53-… fa554686-… 6606e9aa-e4c4-452… ## 5 283d65eb-dd53-… f9034091-… 8f5b1977-8317-447… ## 6 283d65eb-dd53-… f8dda921-… 1ad58833-956c-454… ## 7 283d65eb-dd53-… f7d003d4-… 4d002ac1-4671-490… ## 8 283d65eb-dd53-… f6d9f2ad-… 2102f4b8-c1fe-4ee… ## 9 283d65eb-dd53-… f5a04dff-… b92375fd-dafe-44c… ## 10 283d65eb-dd53-… f502c312-… b750310e-1abb-4c7… ## # ℹ 128 more rows ## # ℹ 25 more variables: cell_count , cell_type , citation , ## # development_stage , disease , embeddings , ## # explorer_url , feature_biotype , feature_count , ## # feature_reference , is_primary_data , ## # mean_genes_per_cell , organism , primary_cell_count , ## # raw_data_location , schema_version , …"},{"path":"https://mtmorgan.github.io/cellxgenedp/articles/using_cellxgenedp.html","id":"facets-provides-information-on-levels-present-in-specific-columns","dir":"Articles","previous_headings":"Collections, datasets and files","what":"facets() provides information on ‘levels’ present in specific columns","title":"Discover and download datasets and files from the cellxgene data portal","text":"Notice columns ‘lists’ rather atomic vectors like ‘character’ ‘integer’. indicates least datasets one type assay, cell_type, etc. facets() function provides convenient way discovering possible levels column, e.g., assay, organism, self_reported_ethnicity, sex, number datasets label.","code":"datasets(db) |> select(where(is.list)) ## # A tibble: 1,167 × 15 ## donor_id assay batch_condition cell_type development_stage disease ## ## 1 ## 2 ## 3 ## 4 ## 5 ## 6 ## 7 ## 8 ## 9 ## 10 ## # ℹ 1,157 more rows ## # ℹ 9 more variables: embeddings , feature_biotype , ## # feature_reference , is_primary_data , organism , ## # self_reported_ethnicity , sex , suspension_type , ## # tissue facets(db, \"assay\") ## # A tibble: 38 × 4 ## facet label ontology_term_id n ## ## 1 assay 10x 3' v3 EFO:0009922 563 ## 2 assay 10x 3' v2 EFO:0009899 254 ## 3 assay Slide-seqV2 EFO:0030062 223 ## 4 assay Visium Spatial Gene Expression EFO:0010961 108 ## 5 assay 10x 5' v1 EFO:0011025 81 ## 6 assay Smart-seq2 EFO:0008931 63 ## 7 assay 10x multiome EFO:0030059 61 ## 8 assay 10x 5' v2 EFO:0009900 23 ## 9 assay sci-RNA-seq3 EFO:0030028 15 ## 10 assay Drop-seq EFO:0008722 14 ## # ℹ 28 more rows facets(db, \"self_reported_ethnicity\") ## # A tibble: 32 × 4 ## facet label ontology_term_id n ## ## 1 self_reported_ethnicity European HANCESTRO:0005 499 ## 2 self_reported_ethnicity unknown unknown 411 ## 3 self_reported_ethnicity na na 314 ## 4 self_reported_ethnicity Asian HANCESTRO:0008 141 ## 5 self_reported_ethnicity African American HANCESTRO:0568 61 ## 6 self_reported_ethnicity Native American,Hispanic or L… HANCESTRO:0013,… 50 ## 7 self_reported_ethnicity Hispanic or Latin American HANCESTRO:0014 48 ## 8 self_reported_ethnicity African American or Afro-Cari… HANCESTRO:0016 26 ## 9 self_reported_ethnicity Greater Middle Eastern (Midd… HANCESTRO:0015 22 ## 10 self_reported_ethnicity South Asian HANCESTRO:0006 11 ## # ℹ 22 more rows facets(db, \"sex\") ## # A tibble: 3 × 4 ## facet label ontology_term_id n ## ## 1 sex male PATO:0000384 903 ## 2 sex female PATO:0000383 677 ## 3 sex unknown unknown 173"},{"path":"https://mtmorgan.github.io/cellxgenedp/articles/using_cellxgenedp.html","id":"filtering-faceted-columns","dir":"Articles","previous_headings":"Collections, datasets and files","what":"Filtering faceted columns","title":"Discover and download datasets and files from the cellxgene data portal","text":"Suppose interested finding datasets 10x 3’ v3 assay (ontology_term_id EFO:0009922) containing individuals African American ethnicity, female sex. Use facets_filter() utility function filter data sets needed Use nrow(african_american_female) find number datasets satisfying criteria. looks like cells sequenced (dataset may contain cells several ethnicities, well males individuals unknown gender, know actual number cells available without downloading files). Use left_join identify corresponding collections:","code":"african_american_female <- datasets(db) |> filter( facets_filter(assay, \"ontology_term_id\", \"EFO:0009922\"), facets_filter(self_reported_ethnicity, \"label\", \"African American\"), facets_filter(sex, \"label\", \"female\") ) african_american_female |> summarise(total_cell_count = sum(cell_count)) ## # A tibble: 1 × 1 ## total_cell_count ## ## 1 4320736 ## collections left_join( african_american_female |> select(collection_id) |> distinct(), collections(db), by = \"collection_id\" ) ## # A tibble: 13 × 18 ## collection_id collection_version_id collection_url consortia contact_email ## ## 1 f17b9205-f61f-4… 21ff4724-95e2-491b-8… https://cellx… genevieve.ko… ## 2 625f6bf4-2f33-4… 0c0d607f-00b8-4f3d-8… https://cellx… a5wang@healt… ## 3 c9706a92-0e5f-4… bc627471-7137-4518-a… https://cellx… hnakshat@iup… ## 4 a98b828a-622a-4… cee0b899-009a-40ec-a… https://cellx… markusbi@med… ## 5 bcb61471-2a44-4… 39fca0ca-2b0f-47b5-9… https://cellx… info@kpmp.org ## 6 72d37bc9-76cc-4… 3e396ffb-b0d8-4ce4-b… https://cellx… m.sepp@zmbh.… ## 7 b953c942-f5d8-4… 7727e578-1805-47c8-b… https://cellx… icobos@stanf… ## 8 62e8f058-9c37-4… addce074-53d2-4f21-9… https://cellx… chanj3@mskcc… ## 9 71f4bccf-53d4-4… 5a524bd4-231b-4941-a… https://cellx… kevinmbyrd@g… ## 10 e1fa9900-3fc9-4… 85624898-8006-4209-a… https://cellx… j.ma@yale.edu ## 11 4195ab4c-20bd-4… f7da9dd1-b0ec-401f-9… https://cellx… nnavin@mdand… ## 12 6b701826-37bb-4… 95ab05df-9716-4fc8-a… https://cellx… astreets@ber… ## 13 b9fc3d70-5a72-4… 6701e565-6dfe-4649-b… https://cellx… bruce.aronow… ## # ℹ 13 more variables: contact_name , curator_name , ## # description , doi , links , name , ## # publisher_metadata , revising_in , revision_of , ## # visibility , created_at , published_at , revised_at "},{"path":"https://mtmorgan.github.io/cellxgenedp/articles/using_cellxgenedp.html","id":"publication-and-other-external-data","dir":"Articles","previous_headings":"Collections, datasets and files","what":"Publication and other external data","title":"Discover and download datasets and files from the cellxgene data portal","text":"Many collections include publication information external data. information available return value collections(), helper function publisher_metadata(), authors(), links() may facilite access. Suppose one interested publication “single-cell atlas healthy breast tissues reveals clinically relevant clusters breast epithelial cells”. Discover collections Use collection_id extract publisher metadata (including DOI available) author information Collections may links additional external data, case DOI two links RAW_DATA. Conversely, knowledge DOI, etc., can used discover details corresponding collection.","code":"title_of_interest <- paste( \"A single-cell atlas of the healthy breast tissues reveals clinically\", \"relevant clusters of breast epithelial cells\" ) collection_of_interest <- collections(db) |> dplyr::filter(startsWith(name, title_of_interest)) collection_of_interest |> glimpse() ## Rows: 1 ## Columns: 18 ## $ collection_id \"c9706a92-0e5f-46c1-96d8-20e42467f287\" ## $ collection_version_id \"bc627471-7137-4518-a593-2f679bac054e\" ## $ collection_url \"https://cellxgene.cziscience.com/collections/c9… ## $ consortia [\"CZI Single-Cell Biology\"] ## $ contact_email \"hnakshat@iupui.edu\" ## $ contact_name \"Harikrishna Nakshatri\" ## $ curator_name \"Jennifer Yu-Sheng Chien\" ## $ description \"Single-cell RNA sequencing (scRNA-seq) is an ev… ## $ doi \"10.1016/j.xcrm.2021.100219\" ## $ links [[\"\", \"RAW_DATA\", \"https://data.humancellatlas.o… ## $ name \"A single-cell atlas of the healthy breast tiss… ## $ publisher_metadata [[[\"Bhat-Nakshatri\", \"Poornima\"], [\"Gao\", \"Hongy… ## $ revising_in NA ## $ revision_of NA ## $ visibility \"PUBLIC\" ## $ created_at 2023-12-12 ## $ published_at 2021-03-25 ## $ revised_at 2023-12-13 collection_id_of_interest <- pull(collection_of_interest, \"collection_id\") publisher_metadata(db) |> filter(collection_id == collection_id_of_interest) |> glimpse() ## Rows: 1 ## Columns: 9 ## $ collection_id \"c9706a92-0e5f-46c1-96d8-20e42467f287\" ## $ name \"A single-cell atlas of the healthy breast tissues rev… ## $ is_preprint FALSE ## $ journal \"Cell Reports Medicine\" ## $ published_at 2021-03-01 ## $ published_year 2021 ## $ published_month 3 ## $ published_day 1 ## $ doi NA authors(db) |> filter(collection_id == collection_id_of_interest) ## # A tibble: 12 × 4 ## collection_id family given consortium ## ## 1 c9706a92-0e5f-46c1-96d8-20e42467f287 Bhat-Nakshatri Poornima NA ## 2 c9706a92-0e5f-46c1-96d8-20e42467f287 Gao Hongyu NA ## 3 c9706a92-0e5f-46c1-96d8-20e42467f287 Sheng Liu NA ## 4 c9706a92-0e5f-46c1-96d8-20e42467f287 McGuire Patrick C. NA ## 5 c9706a92-0e5f-46c1-96d8-20e42467f287 Xuei Xiaoling NA ## 6 c9706a92-0e5f-46c1-96d8-20e42467f287 Wan Jun NA ## 7 c9706a92-0e5f-46c1-96d8-20e42467f287 Liu Yunlong NA ## 8 c9706a92-0e5f-46c1-96d8-20e42467f287 Althouse Sandra K. NA ## 9 c9706a92-0e5f-46c1-96d8-20e42467f287 Colter Austyn NA ## 10 c9706a92-0e5f-46c1-96d8-20e42467f287 Sandusky George NA ## 11 c9706a92-0e5f-46c1-96d8-20e42467f287 Storniolo Anna Maria NA ## 12 c9706a92-0e5f-46c1-96d8-20e42467f287 Nakshatri Harikrishna NA external_links <- links(db) external_links ## # A tibble: 716 × 4 ## collection_id link_name link_type link_url ## ## 1 ceb895f4-ff9f-403a-b7c3-187a9657ac2c SCP1859 OTHER https://singl… ## 2 ceb895f4-ff9f-403a-b7c3-187a9657ac2c NA LAB_WEBSITE https://labs.… ## 3 ceb895f4-ff9f-403a-b7c3-187a9657ac2c NA OTHER http://genome… ## 4 ceb895f4-ff9f-403a-b7c3-187a9657ac2c GSE204684 RAW_DATA https://www.n… ## 5 ceb895f4-ff9f-403a-b7c3-187a9657ac2c analysis code OTHER https://zenod… ## 6 af893e86-8e9f-41f1-a474-ef05359b1fb7 NA OTHER https://retin… ## 7 af893e86-8e9f-41f1-a474-ef05359b1fb7 NA RAW_DATA https://data.… ## 8 af893e86-8e9f-41f1-a474-ef05359b1fb7 GSE226108 RAW_DATA https://www.n… ## 9 1d1c7275-476a-49e2-9022-ad1b1c793594 GSE148077 RAW_DATA https://www.n… ## 10 1d1c7275-476a-49e2-9022-ad1b1c793594 NA OTHER https://singl… ## # ℹ 706 more rows external_links |> count(link_type) ## # A tibble: 5 × 2 ## link_type n ## ## 1 DATA_SOURCE 35 ## 2 LAB_WEBSITE 38 ## 3 OTHER 329 ## 4 PROTOCOL 44 ## 5 RAW_DATA 270 external_links |> filter(collection_id == collection_id_of_interest) ## # A tibble: 2 × 4 ## collection_id link_name link_type link_url ## ## 1 c9706a92-0e5f-46c1-96d8-20e42467f287 NA RAW_DATA https://data.humance… ## 2 c9706a92-0e5f-46c1-96d8-20e42467f287 NA RAW_DATA https://www.ncbi.nlm… doi_of_interest <- \"https://doi.org/10.1016/j.stem.2018.12.011\" links(db) |> filter(link_url == doi_of_interest) |> left_join(collections(db), by = \"collection_id\") |> glimpse() ## Rows: 1 ## Columns: 21 ## $ collection_id \"b1a879f6-5638-48d3-8f64-f6592c1b1561\" ## $ link_name \"PSC-ATO protocol\" ## $ link_type \"PROTOCOL\" ## $ link_url \"https://doi.org/10.1016/j.stem.2018.12.011\" ## $ collection_version_id \"aa814356-20ba-4066-88be-fcbf89c84899\" ## $ collection_url \"https://cellxgene.cziscience.com/collections/b1… ## $ consortia [\"CZI Single-Cell Biology\", \"Wellcome HCA Strate… ## $ contact_email \"st9@sanger.ac.uk\" ## $ contact_name \"Sarah Teichmann\" ## $ curator_name \"Batuhan Cakir\" ## $ description \"Single-cell genomics studies have decoded the i… ## $ doi \"10.1126/science.abo0510\" ## $ links [[\"scVI Models\", \"DATA_SOURCE\", \"https://develop… ## $ name \"Mapping the developing human immune system acro… ## $ publisher_metadata [[[\"Suo\", \"Chenqu\"], [\"Dann\", \"Emma\"], [\"Goh\", \"… ## $ revising_in NA ## $ revision_of NA ## $ visibility \"PUBLIC\" ## $ created_at 2023-12-11 ## $ published_at 2022-10-04 ## $ revised_at 2023-12-13"},{"path":"https://mtmorgan.github.io/cellxgenedp/articles/using_cellxgenedp.html","id":"visualizing-data-in-cellxgene","dir":"Articles","previous_headings":"","what":"Visualizing data in cellxgene","title":"Discover and download datasets and files from the cellxgene data portal","text":"Visualization straight-forward dataset_id available. example, visualize first dataset african_american_female, use Visualization interactive process, datasets_visualize() open 5 browser tabs per call.","code":"african_american_female |> ## use criteria to identify a single dataset (here just the ## 'first' dataset), then visualize slice(1) |> datasets_visualize()"},{"path":"https://mtmorgan.github.io/cellxgenedp/articles/using_cellxgenedp.html","id":"file-download-and-use","dir":"Articles","previous_headings":"","what":"File download and use","title":"Discover and download datasets and files from the cellxgene data portal","text":"Datasets usually contain H5AD (files produced python AnnData module), Rds (serialized files produced R Seurat package). Rds files may unreadable version Seurat used create file different version used read file. therefore focus H5AD files. illustration, find files associated studies African American females download one selected files. choose single dataset H5AD file download downloaded local cache (use internal function cellxgenedp:::.cellxgenedb_cache_path() location cache), process time-consuming first time. H5AD files can converted R / Bioconductor objects using zellkonverter package. SingleCellExperiment object matrix-like object rows corresponding genes columns cells. Thus can easily explore cells present data.","code":"selected_files <- left_join( african_american_female |> select(dataset_id), files(db), by = \"dataset_id\" ) local_file <- selected_files |> filter( dataset_id == \"de985818-285f-4f59-9dbd-d74968fddba3\", filetype == \"H5AD\" ) |> files_download(dry.run = FALSE) basename(local_file) ## [1] \"64f14a2b-d754-4bc9-b496-b26f05ebfe4e.h5ad\" h5ad <- readH5AD(local_file, use_hdf5 = TRUE, reader = \"R\") h5ad ## class: SingleCellExperiment ## dim: 33234 31696 ## metadata(5): citation default_embedding schema_reference schema_version ## title ## assays(1): X ## rownames(33234): ENSG00000243485 ENSG00000237613 ... ENSG00000277475 ## ENSG00000268674 ## rowData names(5): feature_is_filtered feature_name feature_reference ## feature_biotype feature_length ## colnames(31696): CMGpool_AAACCCAAGGACAACC CMGpool_AAACCCACAATCTCTT ... ## K109064_TTTGTTGGTTGCATCA K109064_TTTGTTGGTTGGACCC ## colData names(36): donor_id self_reported_ethnicity_ontology_term_id ## ... development_stage observation_joinid ## reducedDimNames(3): X_pca X_tsne X_umap ## mainExpName: NULL ## altExpNames(0): h5ad |> colData(h5ad) |> as_tibble() |> count(sex, donor_id) ## # A tibble: 7 × 3 ## sex donor_id n ## ## 1 female D1 2303 ## 2 female D2 864 ## 3 female D3 2517 ## 4 female D4 1771 ## 5 female D5 2244 ## 6 female D11 7454 ## 7 female pooled [D9,D7,D8,D10,D6] 14543"},{"path":"https://mtmorgan.github.io/cellxgenedp/articles/using_cellxgenedp.html","id":"next-steps","dir":"Articles","previous_headings":"","what":"Next steps","title":"Discover and download datasets and files from the cellxgene data portal","text":"Orchestrating Single-Cell Analysis Bioconductor online resource provides excellent introduction analysis visualization single-cell data R / Bioconductor. Extensive opportunities working AnnData objects R using native python interface briefly described , e.g., ?AnnData2SCE help page zellkonverter. hca package provides programmatic access Human Cell Atlas data portal, allowing retrieval primary well derived single-cell data files.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/articles/using_cellxgenedp.html","id":"api-changes","dir":"Articles","previous_headings":"","what":"API changes","title":"Discover and download datasets and files from the cellxgene data portal","text":"Data access provided CELLxGENE changed new ‘Discover’ API. main functionality cellxgenedp package changed, specific columns removed, replaced added, follows: collections() Removed: access_type, data_submission_policy_version Replaced: updated_at replaced revised_at Added: collection_version_id, collection_url, doi, revising_in, revision_of datasets() Removed: is_valid, processing_status, published, revision, created_at Replaced: dataset_deployments replaced explorer_url, name replaced title, updated_at replaced revised_at Added: dataset_version_id, batch_condition, x_approximate_distribution files() Removed: file_id, filename, s3_uri, user_submitted, created_at, updated_at Added: filesize, url","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/articles/using_cellxgenedp.html","id":"session-info","dir":"Articles","previous_headings":"","what":"Session info","title":"Discover and download datasets and files from the cellxgene data portal","text":"","code":"## R version 4.3.2 (2023-10-31) ## Platform: x86_64-pc-linux-gnu (64-bit) ## Running under: Ubuntu 22.04.3 LTS ## ## Matrix products: default ## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 ## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0 ## ## locale: ## [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8 ## [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8 ## [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C ## [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C ## ## time zone: UTC ## tzcode source: system (glibc) ## ## attached base packages: ## [1] stats4 stats graphics grDevices utils datasets methods ## [8] base ## ## other attached packages: ## [1] cellxgenedp_1.7.1.9000 dplyr_1.1.4 ## [3] SingleCellExperiment_1.24.0 SummarizedExperiment_1.32.0 ## [5] Biobase_2.62.0 GenomicRanges_1.54.1 ## [7] GenomeInfoDb_1.38.5 IRanges_2.36.0 ## [9] S4Vectors_0.40.2 BiocGenerics_0.48.1 ## [11] MatrixGenerics_1.14.0 matrixStats_1.2.0 ## [13] zellkonverter_1.12.1 BiocStyle_2.30.0 ## ## loaded via a namespace (and not attached): ## [1] tidyselect_1.2.0 filelock_1.0.3 bitops_1.0-7 ## [4] fastmap_1.1.1 RCurl_1.98-1.14 promises_1.2.1 ## [7] digest_0.6.34 mime_0.12 lifecycle_1.0.4 ## [10] ellipsis_0.3.2 magrittr_2.0.3 compiler_4.3.2 ## [13] rlang_1.1.3 sass_0.4.8 tools_4.3.2 ## [16] utf8_1.2.4 yaml_2.3.8 knitr_1.45 ## [19] S4Arrays_1.2.0 htmlwidgets_1.6.4 curl_5.2.0 ## [22] reticulate_1.34.0 DelayedArray_0.28.0 abind_1.4-5 ## [25] HDF5Array_1.30.0 withr_2.5.2 purrr_1.0.2 ## [28] desc_1.4.3 grid_4.3.2 fansi_1.0.6 ## [31] xtable_1.8-4 Rhdf5lib_1.24.1 cli_3.6.2 ## [34] rmarkdown_2.25 crayon_1.5.2 ragg_1.2.7 ## [37] generics_0.1.3 httr_1.4.7 rhdf5_2.46.1 ## [40] cachem_1.0.8 stringr_1.5.1 zlibbioc_1.48.0 ## [43] parallel_4.3.2 BiocManager_1.30.22 XVector_0.42.0 ## [46] basilisk_1.14.1 vctrs_0.6.5 Matrix_1.6-1.1 ## [49] jsonlite_1.8.8 dir.expiry_1.10.0 bookdown_0.37 ## [52] systemfonts_1.0.5 jquerylib_0.1.4 glue_1.7.0 ## [55] pkgdown_2.0.7 DT_0.31 stringi_1.8.3 ## [58] later_1.3.2 tibble_3.2.1 pillar_1.9.0 ## [61] rhdf5filters_1.14.1 basilisk.utils_1.14.1 htmltools_0.5.7 ## [64] GenomeInfoDbData_1.2.11 R6_2.5.1 textshaping_0.3.7 ## [67] evaluate_0.23 shiny_1.8.0 lattice_0.21-9 ## [70] png_0.1-8 memoise_2.0.1 httpuv_1.6.13 ## [73] bslib_0.6.1 rjsoncons_1.1.0 Rcpp_1.0.12 ## [76] SparseArray_1.2.3 xfun_0.41 fs_1.6.3 ## [79] pkgconfig_2.0.3"},{"path":"https://mtmorgan.github.io/cellxgenedp/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Martin Morgan. Author, maintainer. Kayla Interdonato. Author.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Morgan M, Interdonato K (2024). cellxgenedp: Discover Access Single Cell Data Sets cellxgene Data Portal. R package version 1.7.1.9000, https://github.com/mtmorgan/cellxgenedp, https://mtmorgan.github.io/cellxgenedp/.","code":"@Manual{, title = {cellxgenedp: Discover and Access Single Cell Data Sets in the cellxgene Data Portal}, author = {Martin Morgan and Kayla Interdonato}, year = {2024}, note = {R package version 1.7.1.9000, https://github.com/mtmorgan/cellxgenedp}, url = {https://mtmorgan.github.io/cellxgenedp/}, }"},{"path":"https://mtmorgan.github.io/cellxgenedp/index.html","id":"introduction-to-cellxgenedp","dir":"","previous_headings":"","what":"Discover and Access Single Cell Data Sets in the cellxgene Data Portal","title":"Discover and Access Single Cell Data Sets in the cellxgene Data Portal","text":"cellxgene data portal https://cellxgene.cziscience.com/ provides graphical user interface collections single-cell sequence data processed standard ways ‘count matrix’ summaries. cellxgenedp package provides alternative, R-based inteface, allowind data discovery, viewing, downloading.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"Discover and Access Single Cell Data Sets in the cellxgene Data Portal","text":"package available Bioconductor version 3.15 later. following code installs cellxgenedp Alternatively, install ‘development’ version GitHub also install additional packages required vignette, use","code":"if (!\"BiocManager\" %in% rownames(installed.packages())) install.packages(\"BiocManager\", repos = \"https://CRAN.R-project.org\") BiocManager::install(\"cellxgenedp\") if (!\"remotes\" %in% rownames(installed.packages())) install.packages(\"remotes\", repos = \"https://CRAN.R-project.org\") remotes::install_github(\"mtmorgan/cellxgenedp\") pkgs <- c(\"zellkonverter\", \"SingleCellExperiment\", \"HDF5Array\") required_pkgs <- pkgs[!pkgs %in% rownames(installed.packages())] BiocManager::install(required_pkgs)"},{"path":"https://mtmorgan.github.io/cellxgenedp/index.html","id":"use","dir":"","previous_headings":"","what":"Use","title":"Discover and Access Single Cell Data Sets in the cellxgene Data Portal","text":"Load package current R session. make extensive use dplyr packages, end vignette use SingleCellExperiment zellkonverter, load well.","code":"suppressPackageStartupMessages({ library(dplyr) library(cellxgenedp) })"},{"path":"https://mtmorgan.github.io/cellxgenedp/index.html","id":"shiny","dir":"","previous_headings":"","what":"Shiny","title":"Discover and Access Single Cell Data Sets in the cellxgene Data Portal","text":"cxg() provides ‘shiny’ interface allowing discovery collections datasets, visualization selected datasets cellxgene data portal, download datasets use R.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/index.html","id":"next-steps","dir":"","previous_headings":"","what":"Next steps","title":"Discover and Access Single Cell Data Sets in the cellxgene Data Portal","text":"View artcle Discover download datasets files cellxgene data portal.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/cxg.html","id":null,"dir":"Reference","previous_headings":"","what":"Shiny application for discovering, viewing, and downloading\ncellxgene data — cxg","title":"Shiny application for discovering, viewing, and downloading\ncellxgene data — cxg","text":"Shiny application discovering, viewing, downloading cellxgene data","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/cxg.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Shiny application for discovering, viewing, and downloading\ncellxgene data — cxg","text":"","code":"cxg(as = c(\"tibble\", \"sce\"))"},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/cxg.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Shiny application for discovering, viewing, and downloading\ncellxgene data — cxg","text":"character(1) Return value quiting shiny application. \"tibble\" returns tibble describing selected datasets (including location disk downloaded file). \"sce\" returns list dataset files imported R SingleCellExperiment objects.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/cxg.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Shiny application for discovering, viewing, and downloading\ncellxgene data — cxg","text":"cxg() returns either tibble describing datasets selected shiny application, list datasets imported R SingleCellExperiment objects.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/cxg.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Shiny application for discovering, viewing, and downloading\ncellxgene data — cxg","text":"","code":"# \\donttest{ if (interactive()) cxg() # }"},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/db.html","id":null,"dir":"Reference","previous_headings":"","what":"Retrieve updated cellxgene database metadata — db","title":"Retrieve updated cellxgene database metadata — db","text":"Retrieve updated cellxgene database metadata","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/db.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Retrieve updated cellxgene database metadata — db","text":"","code":"db(overwrite = .db_online() && .db_first())"},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/db.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Retrieve updated cellxgene database metadata — db","text":"overwrite logical(1) indicating whether database collections updated internet (default, internet available , interactive session, user requests update), read disk (assuming previous successful access internet). overwrite = FALSE might useful reproducibility, testing, working environment restricted internet access.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/db.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Retrieve updated cellxgene database metadata — db","text":"db() returns object class 'cellxgene_db', summarizing available collections, datasets, files.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/db.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Retrieve updated cellxgene database metadata — db","text":"database retrieved cellxgene data portal web site. 'collections' metadata retrieved call; metadata collection cached locally re-use.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/db.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Retrieve updated cellxgene database metadata — db","text":"","code":"db() #> cellxgene_db #> number of collections(): 182 #> number of datasets(): 1167 #> number of files(): 2314"},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/facets.html","id":null,"dir":"Reference","previous_headings":"","what":"Facets available for querying cellxgene data — FACETS","title":"Facets available for querying cellxgene data — FACETS","text":"FACETS character vector common fields used subset cellxgene data. facets() used query cellxgene database current values one facets. facets_filter() provides convenient way filter facets based label ontology term.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/facets.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Facets available for querying cellxgene data — FACETS","text":"","code":"FACETS facets(cellxgene_db = db(), facets = FACETS) facets_filter(facet, key = c(\"label\", \"ontology_term_id\"), value, exact = TRUE)"},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/facets.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Facets available for querying cellxgene data — FACETS","text":"FACETS object class character length 8.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/facets.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Facets available for querying cellxgene data — FACETS","text":"cellxgene_db (optional) cellxgene_db object, returned db(). facets character() vector corersponding one facets FACETS. facet column containing faceted information, e.g., sex datasets(db). key character(1) identifying whether value label ontology_term_id. value character() value label ontology term filter . value may vector length(value) > 0 exact matchs (exact = TRUE, default), character(1) regular expression. exact logical(1) whether values match exactly (default, TRUE) regular expression (FALSE).","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/facets.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Facets available for querying cellxgene data — FACETS","text":"facets() returns tibble columns facet, label, ontology_term_id, n, number times facet label used database. facets_filter() returns logical vector length equal length (number rows) facet, TRUE indicating value key present dataset.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/facets.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Facets available for querying cellxgene data — FACETS","text":"","code":"f <- facets() ## levels of each facet f |> dplyr::count(facet) #> # A tibble: 8 × 2 #> facet n #> #> 1 assay 38 #> 2 cell_type 772 #> 3 development_stage 231 #> 4 disease 102 #> 5 organism 7 #> 6 self_reported_ethnicity 32 #> 7 sex 3 #> 8 tissue 374 ## same as facets(, facets = \"organism\") f |> dplyr::filter(facet == \"organism\") #> # A tibble: 7 × 4 #> facet label ontology_term_id n #> #> 1 organism Homo sapiens NCBITaxon:9606 860 #> 2 organism Mus musculus NCBITaxon:10090 270 #> 3 organism Callithrix jacchus NCBITaxon:9483 28 #> 4 organism Macaca mulatta NCBITaxon:9544 19 #> 5 organism Sus scrofa domesticus NCBITaxon:9825 3 #> 6 organism Pan troglodytes NCBITaxon:9598 2 #> 7 organism Gorilla gorilla NCBITaxon:9593 1 db <- db() ds <- datasets(db) ## datasets with African American females ds |> dplyr::filter( facets_filter(self_reported_ethnicity, \"label\", \"African American\"), facets_filter(sex, \"label\", \"female\") ) #> # A tibble: 52 × 31 #> dataset_id dataset_version_id collection_id donor_id assay batch_condition #> #> 1 94c41723-b2… 4a9bcef2-02db-49f… f17b9205-f61… #> 2 3de0ad6d-43… 6934232e-4db4-423… 625f6bf4-2f3… #> 3 de985818-28… 64f14a2b-d754-4bc… c9706a92-0e5… #> 4 0b4a15a7-4e… 6616739c-0b4a-4a4… a98b828a-622… #> 5 32b9bdce-24… 04019af8-aaf3-446… bcb61471-2a4… #> 6 0b75c598-08… 6568e80d-b8d8-4a5… bcb61471-2a4… #> 7 07854d9c-53… bf5607d5-82c3-46f… bcb61471-2a4… #> 8 e40c6272-af… 23d7646e-5ed0-467… a48f5033-343… #> 9 d6dfdef1-40… 5a2b8d31-f043-48e… a48f5033-343… #> 10 6a270451-b4… 88830bd3-ddbe-41d… a48f5033-343… #> # ℹ 42 more rows #> # ℹ 25 more variables: cell_count , cell_type , citation , #> # development_stage , disease , embeddings , #> # explorer_url , feature_biotype , feature_count , #> # feature_reference , is_primary_data , #> # mean_genes_per_cell , organism , primary_cell_count , #> # raw_data_location , schema_version , … ## datasets with non-European, known ethnicity facets(db, \"self_reported_ethnicity\") #> # A tibble: 32 × 4 #> facet label ontology_term_id n #> #> 1 self_reported_ethnicity European HANCESTRO:0005 499 #> 2 self_reported_ethnicity unknown unknown 411 #> 3 self_reported_ethnicity na na 314 #> 4 self_reported_ethnicity Asian HANCESTRO:0008 141 #> 5 self_reported_ethnicity African American HANCESTRO:0568 61 #> 6 self_reported_ethnicity Native American,Hispanic or L… HANCESTRO:0013,… 50 #> 7 self_reported_ethnicity Hispanic or Latin American HANCESTRO:0014 48 #> 8 self_reported_ethnicity African American or Afro-Cari… HANCESTRO:0016 26 #> 9 self_reported_ethnicity Greater Middle Eastern (Midd… HANCESTRO:0015 22 #> 10 self_reported_ethnicity South Asian HANCESTRO:0006 11 #> # ℹ 22 more rows ds |> dplyr::filter( !facets_filter( self_reported_ethnicity, \"label\", c(\"European\", \"na\", \"unknown\") ) ) #> # A tibble: 25 × 31 #> dataset_id dataset_version_id collection_id donor_id assay batch_condition #> #> 1 e6a11140-25… 4866a804-37eb-436… e5f58829-1a6… #> 2 6ec405bb-47… c524a1cb-e823-478… e5f58829-1a6… #> 3 2ba40233-85… a1e426d5-f575-445… e5f58829-1a6… #> 4 2423ce2c-31… 80e4bc4f-e0a9-467… e5f58829-1a6… #> 5 2adb1f8a-a6… 1b7484e3-83a0-47f… 38833785-fac… #> 6 be884a28-0e… 7dd2aaf8-1714-49b… 10bf5c50-8d8… #> 7 c8f83821-a2… 10ecac59-8895-492… 9d63fcf1-5ca… #> 8 c05e6940-72… 80a4eb91-dab2-40d… 9d63fcf1-5ca… #> 9 894573ad-49… e385cf86-3c0b-422… 9d63fcf1-5ca… #> 10 84242d25-f6… 7ce8fb90-86fa-4ea… 9d63fcf1-5ca… #> # ℹ 15 more rows #> # ℹ 25 more variables: cell_count , cell_type , citation , #> # development_stage , disease , embeddings , #> # explorer_url , feature_biotype , feature_count , #> # feature_reference , is_primary_data , #> # mean_genes_per_cell , organism , primary_cell_count , #> # raw_data_location , schema_version , …"},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/query.html","id":null,"dir":"Reference","previous_headings":"","what":"Query cellxgene collections, datasets, and files — collections","title":"Query cellxgene collections, datasets, and files — collections","text":"files_download() retrieves one cellxgene files cache local system. links(), authors() publisher_metadata() helper functions extract 'nested' information collections.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/query.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Query cellxgene collections, datasets, and files — collections","text":"","code":"collections(cellxgene_db = db()) datasets(cellxgene_db = db()) datasets_visualize(tbl) files(cellxgene_db = db()) files_download(tbl, dry.run = TRUE, cache.path = .cellxgene_cache_path()) links(cellxgene_db = db()) authors(cellxgene_db = db()) publisher_metadata(cellxgene_db = db())"},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/query.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Query cellxgene collections, datasets, and files — collections","text":"cellxgene_db optional 'cellxgene_db' object, returned db(). tbl tibble() typically derived datasets(db) files(db) containing columns dataset_id (datasets_visualize()), columns dataset_id, file_id, filetype (files_download()). dry.run logical(1) indicating whether (often large) file(s) tbl downloaded local cache. Files downloaded dry.run = TRUE (default). cache.path character(1) directory cache downloaded files. directory must already exist. default tools::R_user_dir(\"cellxgenedp\", \"cache\"), package-specific path user home directory.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/query.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Query cellxgene collections, datasets, and files — collections","text":"function returns tibble describing corresponding component database. files_download() returns character() vector paths local files. links() returns tibble external links associated collection. Common links includ DOI, raw data / data sources, lab websites. authors() returns tibble authors associated collection. publisher_metadata() returns tibble publisher metadata (journal, publicate date, doi) associated collection.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/reference/query.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Query cellxgene collections, datasets, and files — collections","text":"","code":"db <- db() collections(db) #> # A tibble: 182 × 18 #> collection_id collection_version_id collection_url consortia contact_email #> #> 1 ceb895f4-ff9f-4… ee098b5a-4f33-473b-b… https://cellx… panagiotis.r… #> 2 af893e86-8e9f-4… 768170a6-c590-4900-a… https://cellx… ruichen@bcm.… #> 3 1d1c7275-476a-4… 609becde-c797-41bb-8… https://cellx… wey334@g.har… #> 4 1b014f39-f202-4… 1d88cb46-6e84-4b5b-b… https://cellx… kimberly.ald… #> 5 48d354f5-a5ca-4… 2862daa3-c933-43c8-9… https://cellx… Nathan.Salom… #> 6 43d4bb39-21af-4… 78360f02-1acc-415c-a… https://cellx… raymond.cho@… #> 7 f7cecffa-00b4-4… 43224f82-db2a-443c-9… https://cellx… st9@sanger.a… #> 8 f17b9205-f61f-4… 21ff4724-95e2-491b-8… https://cellx… genevieve.ko… #> 9 64b24fda-6591-4… e414854b-2666-4977-9… https://cellx… magness@med.… #> 10 48259aa8-f168-4… 44601b80-bd11-49d8-a… https://cellx… wtk22@cam.ac… #> # ℹ 172 more rows #> # ℹ 13 more variables: contact_name , curator_name , #> # description , doi , links , name , #> # publisher_metadata , revising_in , revision_of , #> # visibility , created_at , published_at , revised_at collections(db) |> dplyr::glimpse() #> Rows: 182 #> Columns: 18 #> $ collection_id \"ceb895f4-ff9f-403a-b7c3-187a9657ac2c\", \"af893e8… #> $ collection_version_id \"ee098b5a-4f33-473b-b52a-0451de1f80ae\", \"768170a… #> $ collection_url \"https://cellxgene.cziscience.com/collections/ce… #> $ consortia [\"BRAIN Initiative\"], [\"CZI Single-Cell Biology… #> $ contact_email \"panagiotis.roussos@mssm.edu\", \"ruichen@bcm.edu\"… #> $ contact_name \"Panos Roussos\", \"Rui Chen\", \"Wenjun Yan\", \"Kimb… #> $ curator_name \"Corinn Sophia Small\", \"Jennifer Yu-Sheng Chien\"… #> $ description \"We simultaneously profiled gene expression and … #> $ doi \"10.1126/sciadv.adg3754\", \"10.1016/j.xgen.2023.1… #> $ links [[\"SCP1859\", \"OTHER\", \"https://singlecell.broad… #> $ name \"Multi-omic profiling of the developing human ce… #> $ publisher_metadata [[[\"Zhu\", \"Kaiyi\"], [\"Bendl\", \"Jaroslav\"], [\"Ra… #> $ revising_in NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, … #> $ revision_of NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, … #> $ visibility \"PUBLIC\", \"PUBLIC\", \"PUBLIC\", \"PUBLIC\", \"PUBLIC\"… #> $ created_at 2023-12-12, 2023-12-11, 2023-12-12, 2023-12-11,… #> $ published_at 2023-08-28, 2021-10-29, 2023-02-10, 2023-08-31,… #> $ revised_at 2023-12-13, 2023-12-13, 2023-12-13, 2023-12-13,… datasets(db) |> dplyr::glimpse() #> Rows: 1,167 #> Columns: 31 #> $ dataset_id \"53ce2631-3646-4172-bbd9-38b0a44d8214\", \"1d… #> $ dataset_version_id \"2f17c183-388a-4c08-9adb-a146833e57ab\", \"94… #> $ collection_id \"ceb895f4-ff9f-403a-b7c3-187a9657ac2c\", \"ce… #> $ donor_id [\"LaFet1\", \"LaFet2\", \"EaFet2\", \"EaFet1\", \"… #> $ assay [[\"10x multiome\", \"EFO:0030059\"]], [[\"10x … #> $ batch_condition [\"donor_id\", \"batch\"], [\"donor_id\", \"batch… #> $ cell_count 45549, 45549, 18011, 11617, 53040, 56507, 7… #> $ cell_type [[\"astrocyte\", \"CL:0000127\"], [\"caudal gan… #> $ citation \"Publication: https://doi.org/10.1126/sciad… #> $ development_stage [[\"14-year-old human stage\", \"HsapDv:00001… #> $ disease [[\"normal\", \"PATO:0000461\"]], [[\"normal\", … #> $ embeddings [\"X_joint_wnn_umap\", \"X_umap\"], [\"X_joint_… #> $ explorer_url \"https://cellxgene.cziscience.com/e/53ce263… #> $ feature_biotype [\"gene\"], [\"gene\"], [\"gene\"], [\"gene\"], [\"… #> $ feature_count 30113, 19492, 30933, 30933, 30933, 30933, 3… #> $ feature_reference [\"NCBITaxon:9606\"], [\"NCBITaxon:9606\"], [\"… #> $ is_primary_data [TRUE], [TRUE], [FALSE], [TRUE], [FALSE], … #> $ mean_genes_per_cell 1886.041, 4865.284, 2028.297, 4518.387, 222… #> $ organism [[\"Homo sapiens\", \"NCBITaxon:9606\"]], [[\"H… #> $ primary_cell_count 45549, 45549, 0, 11617, 0, 0, 0, 244474, 0,… #> $ raw_data_location \"raw.X\", \"raw.X\", \"raw.X\", \"raw.X\", \"raw.X\"… #> $ schema_version \"4.0.0\", \"4.0.0\", \"4.0.0\", \"4.0.0\", \"4.0.0\"… #> $ self_reported_ethnicity [[\"unknown\", \"unknown\"]], [[\"unknown\", \"un… #> $ sex [[\"female\", \"PATO:0000383\"], [\"male\", \"PAT… #> $ suspension_type [\"nucleus\"], [\"nucleus\"], [\"nucleus\"], [\"n… #> $ tissue [[\"cortical plate\", \"UBERON:0005343\", \"tis… #> $ title \"10x scRNA-seq from human cortex\", \"10x scA… #> $ tombstone FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F… #> $ x_approximate_distribution NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,… #> $ published_at 2023-08-28, 2023-08-28, 2021-10-29, 2021-1… #> $ revised_at 2023-12-13, 2023-12-13, 2023-12-13, 2023-1… # \\donttest{ if (interactive()) { ## visualize the first dataset datasets(db) |> dplyr::slice(1) |> datasets_visualize() } # } files(db) |> dplyr::glimpse() #> Rows: 2,314 #> Columns: 4 #> $ dataset_id \"53ce2631-3646-4172-bbd9-38b0a44d8214\", \"53ce2631-3646-4172… #> $ filesize 406108808, 399752425, 906795740, 1060800682, 1071401902, 14… #> $ filetype \"H5AD\", \"RDS\", \"H5AD\", \"RDS\", \"H5AD\", \"RDS\", \"H5AD\", \"RDS\",… #> $ url \"https://datasets.cellxgene.cziscience.com/2f17c183-388a-4c… if (FALSE) { files(db) |> dplyr::slice(1) |> files_download(dry.run = FALSE) } ## common links to external data links(db) |> dplyr::count(link_type) #> # A tibble: 5 × 2 #> link_type n #> #> 1 DATA_SOURCE 35 #> 2 LAB_WEBSITE 38 #> 3 OTHER 329 #> 4 PROTOCOL 44 #> 5 RAW_DATA 270 ## authors per collection authors() |> dplyr::count(collection_id, sort = TRUE) #> # A tibble: 172 × 2 #> collection_id n #> #> 1 e5f58829-1a66-40b5-a624-9046778e74f5 221 #> 2 8f126edf-5405-4731-8374-b5ce11f53e82 205 #> 3 bcb61471-2a44-4d00-a0af-ff085512674c 171 #> 4 4f586cb6-972b-4ef7-a4ef-3c3800a3c004 147 #> 5 0b9d8a04-bb9d-44da-aa27-705bb65b54eb 135 #> 6 367d95c0-0eb0-4dae-8276-9407239421ee 106 #> 7 6f6d381a-7701-4781-935c-db10d30de293 98 #> 8 1ca90a2d-2943-483d-b678-b809bf464c30 94 #> 9 0a839c4b-10d0-4d64-9272-684c49a2c8ba 90 #> 10 ae1420fe-6630-46ed-8b3d-cc6056a66467 83 #> # ℹ 162 more rows publisher_metadata() |> dplyr::glimpse() #> Rows: 172 #> Columns: 9 #> $ collection_id \"ceb895f4-ff9f-403a-b7c3-187a9657ac2c\", \"af893e86-8e9f… #> $ name \"Multi-omic profiling of the developing human cerebral… #> $ is_preprint FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE… #> $ journal \"Sci. Adv.\", \"Cell Genomics\", \"Sci Rep\", \"Nat Neurosci… #> $ published_at 2023-10-13, 2023-06-01, 2020-06-17, 2021-08-01, 2021-… #> $ published_year 2023, 2023, 2020, 2021, 2021, 2018, 2022, 2021, 2022, … #> $ published_month 10, 6, 6, 8, 3, 10, 12, 7, 1, 4, 12, 6, 9, 8, 10, 2, 1… #> $ published_day 13, 1, 17, 1, 12, 1, 1, 1, 1, 21, 1, 15, 6, 1, 1, 1, 1… #> $ doi NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…"},{"path":"https://mtmorgan.github.io/cellxgenedp/news/index.html","id":"cellxgenedp-18","dir":"Changelog","previous_headings":"","what":"cellxgenedp 1.8","title":"cellxgenedp 1.8","text":"(v 1.7.1) Update vignette section dataset visualization accomodate changes ‘Discover’ API. https://github.com/mtmorgan/cellxgenedp/issues/15","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/news/index.html","id":"cellxgenedp-16","dir":"Changelog","previous_headings":"","what":"cellxgenedp 1.6","title":"cellxgenedp 1.6","text":"(v 1.5.2) use CELLxGENE ‘Discover’ API, changing column names return values. See ‘API changes’ ‘Discover download datasets…’ vignette.","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/news/index.html","id":"cellxgenedp-14","dir":"Changelog","previous_headings":"","what":"cellxgenedp 1.4","title":"cellxgenedp 1.4","text":"SIGNIFICANT USER-VISIBLE CHANGES (v 1.3.3) add publisher_metadata(), authors(), links() make access nested ‘collections()’ data straight-forward","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/news/index.html","id":"cellxgenedp-12","dir":"Changelog","previous_headings":"","what":"cellxgenedp 1.2","title":"cellxgenedp 1.2","text":"SIGNIFICANT USER-VISIBLE CHANGES (v. 1.1.4) allow custom files_download() cache. Thanks @stemangiola, https://github.com/mtmorgan/cellxgenedp/pull/9 (v. 1.1.6) datasets ethnicity field renamed self_reported_ethnicity (v. 1.1.7) use zellkonverter’s basilisk-based Python parser read H5AD files vignette, see https://github.com/theislab/zellkonverter/issues/78 (v. 1.1.2) reset cache build machines weekly (v. 1.1.6) use {rjsoncons} CRAN package queries, rather local implementation. Thanks @LiNk-NY, https://github.com/mtmorgan/cellxgenedp/pull/12","code":""},{"path":"https://mtmorgan.github.io/cellxgenedp/news/index.html","id":"cellxgenedp-007","dir":"Changelog","previous_headings":"","what":"cellxgenedp 0.0.7","title":"cellxgenedp 0.0.7","text":"(v. 0.0.7) make errors local cache update accessible; see https://github.com/mtmorgan/cellxgenedp/issues/1","code":""}]