From 13626ee49b7b660491b293d19fbfb1c82d543a21 Mon Sep 17 00:00:00 2001 From: d33bs Date: Fri, 13 Oct 2023 13:46:50 +0000 Subject: [PATCH] =?UTF-8?q?Deploying=20to=20pages=20from=20@=20cytomining/?= =?UTF-8?q?CytoTable@241dd9a566d4a7b9e5e2ac19f07fc4ee01bd0dad=20?= =?UTF-8?q?=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- _modules/cytotable/convert.html | 217 ++++++++++++-------------------- _sources/python-api.md.txt | 8 +- genindex.html | 8 +- index.html | 2 +- objects.inv | Bin 830 -> 835 bytes python-api.html | 68 +++++----- searchindex.js | 2 +- 7 files changed, 118 insertions(+), 187 deletions(-) diff --git a/_modules/cytotable/convert.html b/_modules/cytotable/convert.html index 66cf3bae..79cf4413 100644 --- a/_modules/cytotable/convert.html +++ b/_modules/cytotable/convert.html @@ -209,8 +209,9 @@

Source code for cytotable.convert

 
 @python_app
 def _get_table_chunk_offsets(
-    source: Dict[str, Any],
     chunk_size: int,
+    source: Optional[Dict[str, Any]] = None,
+    sql_stmt: Optional[str] = None,
 ) -> Union[List[int], None]:
     """
     Get table data chunk offsets for later use in capturing segments
@@ -241,39 +242,54 @@ 

Source code for cytotable.convert

 
     logger = logging.getLogger(__name__)
 
-    table_name = source["table_name"] if "table_name" in source.keys() else None
-    source_path = source["source_path"]
-    source_type = str(pathlib.Path(source_path).suffix).lower()
+    if source is not None:
+        table_name = source["table_name"] if "table_name" in source.keys() else None
+        source_path = source["source_path"]
+        source_type = str(pathlib.Path(source_path).suffix).lower()
 
-    try:
-        # for csv's, check that we have more than one row (a header and data values)
-        if (
-            source_type == ".csv"
-            and sum(1 for _ in AnyPath(source_path).open("r")) <= 1
-        ):
-            raise NoInputDataException(
-                f"Data file has 0 rows of values. Error in file: {source_path}"
+        try:
+            # for csv's, check that we have more than one row (a header and data values)
+            if (
+                source_type == ".csv"
+                and sum(1 for _ in AnyPath(source_path).open("r")) <= 1
+            ):
+                raise NoInputDataException(
+                    f"Data file has 0 rows of values. Error in file: {source_path}"
+                )
+
+            # gather the total rowcount from csv or sqlite data input sources
+            with _duckdb_reader() as ddb_reader:
+                rowcount = int(
+                    ddb_reader.execute(
+                        # nosec
+                        f"SELECT COUNT(*) from read_csv_auto('{source_path}', header=TRUE, delim=',')"
+                        if source_type == ".csv"
+                        else f"SELECT COUNT(*) from sqlite_scan('{source_path}', '{table_name}')"
+                    ).fetchone()[0]
+                )
+
+        # catch input errors which will result in skipped files
+        except (
+            duckdb.InvalidInputException,
+            NoInputDataException,
+        ) as invalid_input_exc:
+            logger.warning(
+                msg=f"Skipping file due to input file errors: {str(invalid_input_exc)}"
             )
 
+            return None
+
+    # find chunk offsets from sql statement
+    elif sql_stmt is not None:
         # gather the total rowcount from csv or sqlite data input sources
         with _duckdb_reader() as ddb_reader:
             rowcount = int(
                 ddb_reader.execute(
                     # nosec
-                    f"SELECT COUNT(*) from read_csv_auto('{source_path}', header=TRUE, delim=',')"
-                    if source_type == ".csv"
-                    else f"SELECT COUNT(*) from sqlite_scan('{source_path}', '{table_name}')"
+                    f"SELECT COUNT(*) FROM ({sql_stmt})"
                 ).fetchone()[0]
             )
 
-    # catch input errors which will result in skipped files
-    except (duckdb.InvalidInputException, NoInputDataException) as invalid_input_exc:
-        logger.warning(
-            msg=f"Skipping file due to input file errors: {str(invalid_input_exc)}"
-        )
-
-        return None
-
     return list(
         range(
             0,
@@ -292,7 +308,6 @@ 

Source code for cytotable.convert

     chunk_size: int,
     offset: int,
     dest_path: str,
-    data_type_cast_map: Optional[Dict[str, str]] = None,
 ) -> str:
     """
     Export source data to chunked parquet file using chunk size and offsets.
@@ -666,75 +681,51 @@ 

Source code for cytotable.convert

     return concatted
 
 
-@python_app
-def _get_join_chunks(
+@python_app()
+def _prepare_join_sql(
     sources: Dict[str, List[Dict[str, Any]]],
-    metadata: Union[List[str], Tuple[str, ...]],
-    chunk_columns: Union[List[str], Tuple[str, ...]],
-    chunk_size: int,
-) -> List[List[Dict[str, Any]]]:
+    joins: str,
+) -> str:
     """
-    Build groups of join keys for later join operations
+    Prepare join SQL statement with actual locations of data based on the sources.
 
     Args:
-        sources: Dict[List[Dict[str, Any]]]:
+        sources: Dict[str, List[Dict[str, Any]]]:
             Grouped datasets of files which will be used by other functions.
-        metadata: Union[List[str], Tuple[str, ...]]:
-            List of source data names which are used as metadata.
-        chunk_columns: Union[List[str], Tuple[str, ...]]:
-            Column names which appear in all compartments to use when performing join.
-        chunk_size: int:
-            Size of join chunks which is used to limit data size during join ops.
+            Includes the metadata concerning location of actual data.
+        joins: str:
+            DuckDB-compatible SQL which will be used to perform the join
+            operations using the join_group keys as a reference.
 
     Returns:
-        List[List[Dict[str, Any]]]]:
-            A list of lists with at most chunk size length that contain join keys.
+        str:
+            String representing the SQL to be used in later join work.
     """
-
     import pathlib
 
-    import pyarrow.parquet as parquet
-
-    from cytotable.utils import CYTOTABLE_ARROW_USE_MEMORY_MAPPING
+    # replace with real location of sources for join sql
+    for key, val in sources.items():
+        if pathlib.Path(key).stem.lower() in joins.lower():
+            joins = joins.replace(
+                f"'{str(pathlib.Path(key).stem.lower())}.parquet'",
+                str([str(table) for table in val[0]["table"]]),
+            )
 
-    # fetch the compartment concat result as the basis for join groups
-    for key, source in sources.items():
-        if any(name.lower() in pathlib.Path(key).stem.lower() for name in metadata):
-            first_result = source
-            break
-
-    # gather the workflow result for basis if it's not yet returned
-    basis = first_result
-
-    # read only the table's chunk_columns
-    join_column_rows = parquet.read_table(
-        source=basis[0]["table"],
-        columns=list(chunk_columns),
-        memory_map=CYTOTABLE_ARROW_USE_MEMORY_MAPPING,
-    ).to_pylist()
-
-    # build and return the chunked join column rows
-    return [
-        join_column_rows[i : i + chunk_size]
-        for i in range(0, len(join_column_rows), chunk_size)
-    ]
+    return joins
 
 
 @python_app
 def _join_source_chunk(
-    sources: Dict[str, List[Dict[str, Any]]],
     dest_path: str,
     joins: str,
-    join_group: List[Dict[str, Any]],
+    chunk_size: int,
+    offset: int,
     drop_null: bool,
 ) -> str:
     """
     Join sources based on join group keys (group of specific join column values)
 
     Args:
-        sources: Dict[str, List[Dict[str, Any]]]:
-            Grouped datasets of files which will be used by other functions.
-            Includes the metadata concerning location of actual data.
         dest_path: str:
             Destination path to write file-based content.
         joins: str:
@@ -758,52 +749,18 @@ 

Source code for cytotable.convert

 
     from cytotable.utils import _duckdb_reader
 
-    # replace with real location of sources for join sql
-    for key, val in sources.items():
-        if pathlib.Path(key).stem.lower() in joins.lower():
-            joins = joins.replace(
-                f"'{str(pathlib.Path(key).stem.lower())}.parquet'",
-                str([str(table) for table in val[0]["table"]]),
-            )
-
-    # update the join groups to include unique values per table
-    updated_join_group = []
-    for key in sources.keys():
-        updated_join_group.extend(
-            [
-                {
-                    f"{str(pathlib.Path(key).stem)}.{join_key}": val
-                    for join_key, val in chunk.items()
-                }
-                for chunk in join_group
-            ]
-        )
-
-    # form where clause for sql joins to filter the results
-    joins += (
-        "WHERE ("
-        + ") OR (".join(
-            [
-                " AND ".join(
-                    [
-                        # create groups of join column filters where values always
-                        # are expected to equal those within the join_group together
-                        f"{join_column} = {join_column_value}"
-                        if not isinstance(join_column_value, str)
-                        # account for string values
-                        else (f"{join_column} = " f"'{join_column_value}'")
-                        for join_column, join_column_value in chunk.items()
-                    ]
-                )
-                for chunk in updated_join_group
-            ]
-        )
-        + ")"
-    )
-
+    # Attempt to read the data to parquet file
+    # using duckdb for extraction and pyarrow for
+    # writing data to a parquet file.
+    # read data with chunk size + offset
+    # and export to parquet
     with _duckdb_reader() as ddb_reader:
-        # perform compartment joins using duckdb over parquet files
-        result = ddb_reader.execute(joins).arrow()
+        result = ddb_reader.execute(
+            f"""
+                {joins}
+                LIMIT {chunk_size} OFFSET {offset}
+                """
+        ).arrow()
 
     # drop nulls if specified
     if drop_null:
@@ -1046,7 +1003,6 @@ 

Source code for cytotable.convert

     concat: bool,
     join: bool,
     joins: Optional[str],
-    chunk_columns: Optional[Union[List[str], Tuple[str, ...]]],
     chunk_size: Optional[int],
     infer_common_schema: bool,
     drop_null: bool,
@@ -1082,8 +1038,6 @@ 

Source code for cytotable.convert

             Whether to join the compartment data together into one dataset.
         joins: str:
             DuckDB-compatible SQL which will be used to perform the join operations.
-        chunk_columns: Optional[Union[List[str], Tuple[str, ...]]],
-            Column names which appear in all compartments to use when performing join.
         chunk_size: Optional[int],
             Size of join chunks which is used to limit data size during join ops.
         infer_common_schema: bool:  (Default value = True)
@@ -1108,7 +1062,6 @@ 

Source code for cytotable.convert

     from cytotable.convert import (
         _concat_join_sources,
         _concat_source_group,
-        _get_join_chunks,
         _get_table_chunk_offsets,
         _infer_source_group_common_schema,
         _join_source_chunk,
@@ -1195,7 +1148,6 @@ 

Source code for cytotable.convert

                                 chunk_size=chunk_size,
                                 offset=offset,
                                 dest_path=expanded_dest_path,
-                                data_type_cast_map=data_type_cast_map,
                             ),
                             source_group_name=source_group_name,
                             identifying_columns=identifying_columns,
@@ -1244,6 +1196,8 @@ 

Source code for cytotable.convert

     # conditional section for merging
     # note: join implies a concat, but concat does not imply a join
     if join:
+        prepared_joins_sql = _prepare_join_sql(sources=results, joins=joins).result()
+
         # map joined results based on the join groups gathered above
         # note: after mapping we end up with a list of strings (task returns str)
         join_sources_result = [
@@ -1251,21 +1205,18 @@ 

Source code for cytotable.convert

                 # gather the result of concatted sources prior to
                 # join group merging as each mapped task run will need
                 # full concat results
-                sources=results,
                 dest_path=expanded_dest_path,
-                joins=joins,
-                # get merging chunks by join columns
-                join_group=join_group,
+                joins=prepared_joins_sql,
+                chunk_size=chunk_size,
+                offset=offset,
                 drop_null=drop_null,
             ).result()
             # create join group for querying the concatenated
             # data in order to perform memory-safe joining
             # per user chunk size specification.
-            for join_group in _get_join_chunks(
-                sources=results,
-                chunk_columns=chunk_columns,
+            for offset in _get_table_chunk_offsets(
+                sql_stmt=prepared_joins_sql,
                 chunk_size=chunk_size,
-                metadata=metadata,
             ).result()
         ]
 
@@ -1293,7 +1244,6 @@ 

Source code for cytotable.convert

     concat: bool = True,
     join: bool = True,
     joins: Optional[str] = None,
-    chunk_columns: Optional[Union[List[str], Tuple[str, ...]]] = None,
     chunk_size: Optional[int] = None,
     infer_common_schema: bool = True,
     drop_null: bool = False,
@@ -1337,9 +1287,6 @@ 

Source code for cytotable.convert

             Whether to join the compartment data together into one dataset
         joins: str: (Default value = None):
             DuckDB-compatible SQL which will be used to perform the join operations.
-        chunk_columns: Optional[Union[List[str], Tuple[str, ...]]]
-            (Default value = None)
-            Column names which appear in all compartments to use when performing join
         chunk_size: Optional[int] (Default value = None)
             Size of join chunks which is used to limit data size during join ops
         infer_common_schema: bool: (Default value = True)
@@ -1436,11 +1383,6 @@ 

Source code for cytotable.convert

             else identifying_columns
         )
         joins = cast(str, config[preset]["CONFIG_JOINS"]) if joins is None else joins
-        chunk_columns = (
-            cast(list, config[preset]["CONFIG_CHUNK_COLUMNS"])
-            if chunk_columns is None
-            else chunk_columns
-        )
         chunk_size = (
             cast(int, config[preset]["CONFIG_CHUNK_SIZE"])
             if chunk_size is None
@@ -1459,7 +1401,6 @@ 

Source code for cytotable.convert

             concat=concat,
             join=join,
             joins=joins,
-            chunk_columns=chunk_columns,
             chunk_size=chunk_size,
             infer_common_schema=infer_common_schema,
             drop_null=drop_null,
diff --git a/_sources/python-api.md.txt b/_sources/python-api.md.txt
index 669211a8..08ca05a3 100644
--- a/_sources/python-api.md.txt
+++ b/_sources/python-api.md.txt
@@ -21,10 +21,6 @@ Convert
 
 |
 
-.. autofunction:: _get_join_chunks
-
-|
-
 .. autofunction:: _get_table_columns_and_types
 
 |
@@ -41,6 +37,10 @@ Convert
 
 |
 
+.. autofunction:: _prepare_join_sql
+
+|
+
 .. autofunction:: _prep_cast_column_data_types
 
 |
diff --git a/genindex.html b/genindex.html
index 49bf3ccd..8540bb8d 100644
--- a/genindex.html
+++ b/genindex.html
@@ -66,15 +66,13 @@ 

_

  • _expand_path() (in module cytotable.utils)
  • _filter_source_filepaths() (in module cytotable.sources) -
  • -
  • _get_join_chunks() (in module cytotable.convert)
  • _get_source_filepaths() (in module cytotable.sources)
  • - - +
    -
    -
    -cytotable.convert._get_join_chunks(*args, **kwargs)
    -

    Build groups of join keys for later join operations

    -
    -
    Parameters:
    -
      -
    • sources – Dict[List[Dict[str, Any]]]: -Grouped datasets of files which will be used by other functions.

    • -
    • metadata – Union[List[str], Tuple[str, …]]: -List of source data names which are used as metadata.

    • -
    • chunk_columns – Union[List[str], Tuple[str, …]]: -Column names which appear in all compartments to use when performing join.

    • -
    • chunk_size – int: -Size of join chunks which is used to limit data size during join ops.

    • -
    -
    -
    Returns:
    -

    A list of lists with at most chunk size length that contain join keys.

    -
    -
    Return type:
    -

    List[List[Dict[str, Any]]]]

    -
    -
    -
    -

    @@ -345,9 +313,6 @@

    Python API
    Parameters:

    +
    +
    +cytotable.convert._prepare_join_sql(*args, **kwargs)
    +

    Prepare join SQL statement with actual locations of data based on the sources.

    +
    +
    Parameters:
    +
      +
    • sources – Dict[str, List[Dict[str, Any]]]: +Grouped datasets of files which will be used by other functions. +Includes the metadata concerning location of actual data.

    • +
    • joins – str: +DuckDB-compatible SQL which will be used to perform the join +operations using the join_group keys as a reference.

    • +
    +
    +
    Returns:
    +

    String representing the SQL to be used in later join work.

    +
    +
    Return type:
    +

    str

    +
    +
    +
    +

    @@ -536,8 +528,6 @@

    Python APIconvert()
  • _concat_join_sources()
  • _concat_source_group()
  • -
  • _get_join_chunks()
  • _get_table_columns_and_types()
  • _get_table_chunk_offsets()
  • _infer_source_group_common_schema()
  • _join_source_chunk()
  • +
  • _prepare_join_sql()
  • _prep_cast_column_data_types()
  • _prepend_column_name()
  • _return_future()
  • diff --git a/searchindex.js b/searchindex.js index 96d246a9..29c4f270 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["architecture", "architecture.data", "architecture.technical", "code_of_conduct", "contributing", "index", "overview", "python-api", "tutorial"], "filenames": ["architecture.md", "architecture.data.md", "architecture.technical.md", "code_of_conduct.md", "contributing.md", "index.md", "overview.md", "python-api.md", "tutorial.md"], "titles": ["Architecture", "Data Architecture", "Technical Architecture", "Contributor Covenant Code of Conduct", "Contributing", "CytoTable", "Overview", "Python API", "Tutorial"], "terms": {"document": [0, 1, 2, 5, 7], "cover": [0, 1, 2, 8], "variou": [0, 4, 6, 7, 8], "detail": [0, 1, 5], "pyctyomin": [0, 1, 4, 6, 7], "transform": [0, 1, 4, 5, 6, 7], "technic": [0, 5], "workflow": [0, 4, 5], "execut": 0, "data": [0, 4, 5, 7, 8], "technologi": [0, 5], "path": [0, 6, 7, 8], "In": 0, "process": [0, 5, 7], "format": [0, 4, 6], "sql": [0, 7], "base": [0, 1, 4, 7, 8], "manag": [0, 4, 7], "sourc": [0, 2, 4, 5], "structur": [0, 4, 5, 7], "compart": [0, 7, 8], "imag": [0, 5, 6], "identifi": [0, 7], "kei": [0, 7], "field": [0, 7], "relationship": [0, 4, 5], "cytoplasm": [0, 8], "ar": [1, 2, 3, 4, 6, 7], "measur": [1, 6], "creat": [1, 4, 5, 6, 7, 8], "from": [1, 2, 3, 4, 6, 7, 8], "other": [1, 2, 3, 5, 6, 7], "cell": [1, 2, 5, 6, 7, 8], "biologi": [1, 6], "analysi": [1, 5, 6], "tool": [1, 2, 4, 6], "see": [1, 2, 3, 4, 5, 6, 7, 8], "below": [1, 4, 8], "brief": [1, 6, 8], "overview": [1, 2, 5, 7], "type": [1, 2, 4, 5, 7], "cellprofil": [1, 2, 5, 7], "gener": [1, 4, 6, 7], "csv": [1, 5, 6, 7], "sqlite": [1, 5, 6, 7], "databas": [1, 2, 5, 6, 7], "cytomin": [1, 4, 5, 6], "which": [1, 2, 3, 4, 6, 7], "includ": [1, 2, 3, 4, 5, 7], "tabl": [1, 2, 7], "": [1, 2, 3, 4, 7], "mention": 1, "abov": [1, 4, 7], "deepprofil": [1, 5], "npz": [1, 5], "organ": [1, 2], "two": [1, 4], "categori": 1, "about": [1, 3, 4, 7], "itself": 1, "metadata": [1, 7], "nuclei": 1, "actin": 1, "specif": [1, 4, 6, 7], "an": [1, 2, 3, 4, 6, 7, 8], "aspect": [1, 2], "part": [1, 7], "found": 1, "within": [1, 2, 3, 4, 5, 6, 7, 8], "mai": [1, 2, 3, 4, 6, 7, 8], "follow": [1, 2, 3, 4, 5, 6, 7], "imagenumb": 1, "provid": [1, 2, 3, 4, 6, 7], "what": [1, 3, 4, 7], "i": [1, 2, 3, 4, 6, 7], "being": [1, 7], "referenc": [1, 5, 7], "mani": [1, 4, 7], "objectnumb": 1, "object": [1, 6, 7, 8], "parent_cel": 1, "relat": [1, 2, 4, 7], "thi": [1, 2, 3, 4, 5, 6, 7, 8], "canon": 1, "join": [1, 2, 7, 8], "greater": [1, 2], "parent_nuclei": 1, "The": [1, 3, 4, 5, 7, 8], "diagram": [1, 4, 5], "show": [1, 5], "exampl": [1, 2, 3, 4, 6, 7, 8], "us": [1, 2, 3, 4, 5, 6, 7, 8], "pycytomin": [1, 5, 7], "name": [1, 2, 7, 8], "each": [1, 7, 8], "zero": 1, "etc": [1, 2, 4, 7, 8], "via": [1, 3, 4, 6, 7], "These": [1, 6], "parent_": 1, "respect": [1, 3], "cytot": [2, 4, 6, 7, 8], "parsl": [2, 7], "collect": [2, 7], "task": [2, 4, 7], "python_app": [2, 7], "work": [2, 4, 7, 8], "isol": 2, "python": [2, 4, 5], "function": [2, 4, 7], "decor": [2, 7], "join_app": [2, 7], "one": [2, 4, 7], "more": [2, 5, 7], "app": [2, 7], "inform": [2, 3, 4, 7], "how": [2, 4, 6, 8], "procedur": 2, "executor": 2, "configur": [2, 4, 6, 7], "through": [2, 3, 4, 7], "pass": [2, 4, 7], "convert": [2, 5, 6, 8], "parsl_config": [2, 7], "config": [2, 5, 6, 7], "By": [2, 4, 6], "default": [2, 6, 7, 8], "assum": [2, 4, 7], "local": [2, 4, 6, 7, 8], "localprovid": 2, "For": [2, 3, 4, 6, 7, 8], "scalabl": 2, "highthroughputexecutor": 2, "handl": [2, 6], "pathlib": [2, 7], "modul": 2, "cloudpathlib": [2, 6, 7], "refer": [2, 4, 6, 7], "page": [2, 6, 8], "client": [2, 6, 7, 8], "argument": [2, 6, 7], "locat": [2, 5, 7, 8], "store": [2, 7], "download": 2, "cach": [2, 7], "capabl": 2, "perform": [2, 4, 6, 7], "queri": [2, 7], "wai": [2, 3, 6], "requir": [2, 4, 6], "addit": [2, 4], "paramet": [2, 7, 8], "storag": [2, 6, 7, 8], "set": [2, 3, 7], "directori": [2, 7, 8], "explicitli": [2, 6], "avoid": [2, 3, 4, 7], "limit": [2, 6, 7], "some": [2, 7], "temporari": [2, 5], "constrain": 2, "system": [2, 6], "import": [2, 4, 7, 8], "parquet": [2, 5, 6, 7], "source_path": [2, 7, 8], "s3": [2, 6, 7, 8], "bucket": 2, "singl": [2, 3, 5, 7, 8], "dest_path": [2, 6, 7, 8], "test": [2, 7, 8], "dest_datatyp": [2, 6, 7, 8], "dir": 2, "tmpdata": 2, "get": [2, 5, 7], "local_cache_dir": 2, "nativ": 2, "we": [2, 3, 4, 5, 6, 8], "also": [2, 3, 4, 6, 7], "accomplish": 2, "intern": 2, "pyarrow": [2, 7], "apach": [2, 6, 7], "compat": [2, 6, 7, 8], "intend": [2, 6, 7], "assist": [2, 6, 7], "cross": 2, "platform": 2, "util": [2, 4, 5], "encourag": [2, 4], "high": 2, "enabl": [2, 4, 5], "advanc": [2, 3], "integr": [2, 4], "non": 2, "malloc": 2, "jemalloc": 2, "mimalloc": 2, "depend": [2, 4], "oper": [2, 7], "avail": [2, 3, 4, 6], "overridden": 2, "develop": 2, "implement": [2, 4, 7], "help": [2, 4], "user": [2, 4, 7], "environ": [2, 3, 4, 7], "inherit": 2, "c": 2, "note": [2, 4, 6, 7, 8], "arrow_default_memory_pool": 2, "variabl": [2, 7], "static": 2, "defin": [2, 4], "when": [2, 3, 4, 7], "file": [2, 4, 5, 6, 7, 8], "read": [2, 4, 6, 7], "benefit": 2, "memory_map": 2, "you": [2, 4], "disabl": [2, 3], "cytotable_arrow_use_memory_map": 2, "0": [2, 3, 7], "export": [2, 4, 7], "duckdb": [2, 7], "api": [2, 5], "area": 2, "interfac": 2, "statement": 2, "dataset": [2, 7], "result": [2, 7, 8], "member": [3, 4], "leader": 3, "make": [3, 4], "particip": [3, 4], "commun": [3, 4], "harass": 3, "free": [3, 4], "experi": [3, 4], "everyon": 3, "regardless": 3, "ag": 3, "bodi": 3, "size": [3, 4, 7], "visibl": [3, 4], "invis": 3, "ethnic": 3, "sex": 3, "characterist": 3, "gender": 3, "ident": 3, "express": 3, "level": [3, 7], "educ": 3, "socio": 3, "econom": 3, "statu": [3, 4], "nation": 3, "person": 3, "appear": [3, 7], "race": 3, "religion": 3, "sexual": 3, "orient": [3, 6], "act": 3, "interact": 3, "contribut": 3, "open": [3, 4, 6], "welcom": [3, 4], "divers": 3, "inclus": 3, "healthi": 3, "behavior": [3, 4], "posit": 3, "demonstr": 3, "empathi": 3, "kind": [3, 7], "toward": 3, "peopl": 3, "Being": 3, "differ": [3, 4, 7], "opinion": 3, "viewpoint": 3, "give": 3, "gracefulli": 3, "accept": [3, 4], "construct": 3, "feedback": 3, "apolog": 3, "those": 3, "affect": [3, 6], "mistak": 3, "learn": 3, "focus": [3, 6], "best": [3, 4], "just": 3, "u": [3, 4], "individu": [3, 4], "overal": [3, 7], "unaccept": [3, 4], "languag": [3, 7], "imageri": 3, "attent": 3, "ani": [3, 4, 7], "troll": 3, "insult": 3, "derogatori": 3, "comment": [3, 4], "polit": 3, "attack": 3, "public": [3, 6], "privat": 3, "publish": [3, 5], "physic": 3, "email": 3, "address": 3, "without": 3, "explicit": 3, "permiss": 3, "could": 3, "reason": 3, "consid": 3, "inappropri": 3, "profession": 3, "clarifi": [3, 4], "take": [3, 7], "appropri": [3, 4, 5], "fair": 3, "action": [3, 4, 5], "thei": [3, 6], "deem": 3, "threaten": 3, "offens": 3, "harm": 3, "have": [3, 4, 5, 7, 8], "right": 3, "remov": 3, "edit": 3, "reject": 3, "commit": [3, 5], "wiki": 3, "issu": [3, 4], "align": [3, 7], "moder": 3, "decis": 3, "appli": 3, "all": [3, 4, 6, 7], "space": 3, "offici": 3, "repres": [3, 7], "e": [3, 4], "mail": 3, "post": 3, "social": 3, "media": 3, "account": 3, "appoint": 3, "onlin": 3, "offlin": 3, "event": 3, "instanc": 3, "abus": 3, "otherwis": 3, "report": [3, 5], "cytodata": [3, 4], "info": [3, 4], "gmail": [3, 4], "com": [3, 4, 5], "complaint": 3, "review": [3, 4], "investig": 3, "promptli": 3, "fairli": 3, "oblig": 3, "privaci": 3, "secur": 3, "incid": 3, "impact": 3, "determin": 3, "consequ": 3, "violat": 3, "unprofession": 3, "unwelcom": 3, "A": [3, 4, 6, 7], "written": [3, 4, 7], "clariti": 3, "around": 3, "natur": 3, "explan": 3, "why": [3, 4], "wa": [3, 7], "apologi": 3, "request": [3, 5, 6], "seri": 3, "continu": 3, "No": 3, "involv": [3, 4], "unsolicit": 3, "specifi": [3, 6, 7], "period": 3, "time": [3, 4, 7], "well": 3, "extern": 3, "channel": 3, "like": [3, 6], "term": [3, 6], "lead": 3, "seriou": 3, "sustain": 3, "sort": [3, 7], "allow": [3, 7], "dure": [3, 7], "pattern": 3, "aggress": 3, "disparag": 3, "class": 3, "adapt": 3, "version": [3, 4, 7], "http": [3, 4, 5, 7], "www": 3, "org": [3, 7], "code_of_conduct": 3, "html": [3, 4, 7], "were": [3, 4], "inspir": 3, "mozilla": 3, "ladder": 3, "answer": 3, "common": [3, 6, 7], "question": [3, 4], "faq": 3, "translat": 3, "thank": 4, "contain": [4, 6, 7], "guidelin": [4, 5], "most": [4, 7], "effect": 4, "codebas": 4, "If": 4, "stuck": 4, "pleas": [4, 5, 6], "feel": 4, "ask": 4, "project": [4, 5, 8], "govern": 4, "our": [4, 5], "expect": [4, 6], "uphold": 4, "github": [4, 5], "io": 4, "tracker": 4, "packag": 4, "poetri": 4, "blob": 4, "main": 4, "pyproject": 4, "toml": 4, "love": 4, "hear": 4, "case": 4, "softwar": 4, "doe": 4, "opportun": 4, "improv": 4, "howev": 4, "order": 4, "fix": 4, "need": [4, 6, 7], "tell": 4, "exactli": 4, "went": 4, "wrong": 4, "much": 4, "pertin": 4, "possibl": [4, 6, 7], "re": [4, 6], "input": [4, 5, 7], "copi": [4, 7], "past": 4, "piec": 4, "1": [4, 5, 7], "command": [4, 5], "2": [4, 5, 7], "error": 4, "ve": 4, "tri": 4, "overcom": 4, "run": [4, 7], "o": 4, "hardwar": 4, "repositori": 4, "search": 4, "exist": 4, "solut": 4, "It": [4, 6], "solv": 4, "alreadi": [4, 7], "find": 4, "describ": 4, "add": 4, "instead": 4, "new": [4, 7, 8], "deepli": 4, "simpl": [4, 7], "intuit": 4, "support": 4, "core": 4, "profil": 4, "pipelin": [4, 8], "good": 4, "propos": 4, "figur": 4, "out": 4, "next": 4, "check": [4, 7], "someon": 4, "els": 4, "ha": [4, 7], "mind": 4, "do": [4, 6], "interest": 4, "clearli": 4, "would": 4, "particular": 4, "can": 4, "daunt": 4, "strive": 4, "newcom": 4, "while": 4, "ensur": 4, "rigor": 4, "practic": 4, "thing": 4, "go": 4, "futur": [4, 7], "contributor": 4, "tag": 4, "beginn": 4, "want": 4, "haven": 4, "t": 4, "outlin": 4, "discuss": 4, "befor": 4, "write": [4, 7], "reduc": 4, "merg": 4, "plu": 4, "belong": 4, "wast": 4, "after": [4, 7], "decid": 4, "up": 4, "fork": 4, "model": [4, 5], "clone": 4, "featur": 4, "branch": 4, "onc": 4, "necessari": 4, "chang": 4, "should": 4, "incorpor": 4, "content": [4, 6, 7], "descript": 4, "directli": [4, 7], "speed": 4, "abl": 4, "approv": 4, "To": [4, 7], "effici": [4, 6], "step": 4, "instruct": 4, "templat": 4, "tripl": 4, "ad": 4, "small": 4, "bite": 4, "move": 4, "so": 4, "faster": 4, "than": [4, 7], "larg": 4, "submit": 4, "g": 4, "least": 4, "maintain": 4, "fashion": 4, "increas": 4, "abil": 4, "accuraci": 4, "scope": [4, 5], "short": 4, "phrase": 4, "normal": 4, "method": 4, "string": [4, 7], "prefer": 4, "number": 4, "primarili": [4, 7], "pytest": 4, "autom": 4, "instal": [4, 7], "black": 4, "isort": 4, "googl": 4, "docstr": [4, 7], "repo": 4, "automat": 4, "pre": 4, "alongsid": 4, "hook": 4, "same": [4, 7], "manual": [4, 5], "unit": 4, "sphinx": [4, 7], "websit": 4, "w": 4, "turn": 4, "warn": [4, 5, 7], "doc": [4, 7], "cffconvert": 4, "citat": 4, "cff": 4, "valid": [4, 5, 7], "treat": [4, 7], "failur": 4, "cov": 4, "made": 4, "htmlcov": 4, "index": 4, "myst": 4, "markedli": 4, "text": [4, 6], "markdown": 4, "docsit": 4, "sentenc": 4, "per": [4, 7, 8], "line": 4, "extens": [4, 7], "mermaid": 4, "miss": 4, "autodoc": 4, "updat": [4, 7], "push": 4, "trigger": 4, "doctest": 4, "presum": [4, 7], "primari": 4, "b": 4, "portion": 4, "guid": 4, "semant": 4, "semver": 4, "distinguish": 4, "between": [4, 6], "major": 4, "minor": 4, "patch": 4, "here": 4, "pypi": 4, "There": 4, "sever": 4, "prepar": 4, "increment": 4, "under": [4, 6], "label": 4, "drafter": 4, "108": 4, "On": 4, "draft": 4, "yml": 4, "leverag": 4, "modifi": [4, 7], "modif": 4, "anoth": 4, "deploi": 4, "earlier": [4, 7], "flow": [5, 7], "rel": 5, "morphologi": 5, "clean": 5, "output": [5, 6, 7], "scale": 5, "both": 5, "independ": 5, "unifi": [5, 6], "schema": [5, 7], "where": [5, 7], "pip": 5, "git": 5, "ubuntu": 5, "latest": 5, "maco": 5, "runner": 5, "md": 5, "eval": 5, "preset": [5, 8], "overrid": 5, "destin": [5, 7], "tutori": [5, 6], "code": 5, "conduct": 5, "quick": 5, "link": 5, "bug": 5, "suggest": 5, "enhanc": [5, 6], "your": 5, "first": [5, 7], "pull": 5, "messag": 5, "start": 5, "style": 5, "lint": 5, "build": [5, 7], "attribut": 5, "releas": 5, "pledg": 5, "standard": [5, 7], "enforc": 5, "respons": 5, "correct": 5, "3": 5, "ban": 5, "4": 5, "perman": 5, "architectur": 5, "_concat_join_sourc": [5, 7], "_concat_source_group": [5, 7], "_get_join_chunk": [5, 7], "_get_table_columns_and_typ": [5, 7], "_get_table_chunk_offset": [5, 7], "_infer_source_group_common_schema": [5, 7], "_join_source_chunk": [5, 7], "_prep_cast_column_data_typ": [5, 7], "_prepend_column_nam": [5, 7], "_return_futur": [5, 7], "_source_chunk_to_parquet": [5, 7], "_to_parquet": [5, 7], "_build_path": [5, 7], "_filter_source_filepath": [5, 7], "_get_source_filepath": [5, 7], "_infer_source_datatyp": [5, 7], "parsl_appbase_init_for_doc": [5, 7], "_arrow_type_cast_if_specifi": [5, 7], "_cache_cloudpath_to_loc": [5, 7], "_column_sort": [5, 7], "_default_parsl_config": [5, 7], "_duckdb_read": [5, 7], "_expand_path": [5, 7], "_parsl_load": [5, 7], "_sqlite_mixed_type_query_to_parquet": [5, 7], "except": 5, "cytotableexcept": [5, 7], "datatypeexcept": [5, 7], "noinputdataexcept": [5, 7], "schemaexcept": [5, 7], "topic": 6, "introduct": 6, "produc": [6, 8], "cellprofiler_csv": [6, 7, 8], "captur": [6, 7], "outcom": 6, "focu": [6, 7], "filepath": [6, 7], "remot": [6, 7], "aw": 6, "gcp": 6, "cloud": [6, 7], "azur": 6, "hood": 6, "whether": [6, 7], "authent": 6, "special": 6, "s3client": 6, "azureblobcli": 6, "gsclient": 6, "kwarg": [6, 7], "face": 6, "similar": [6, 7, 8], "sign": 6, "no_sign_request": [6, 7, 8], "true": [6, 7, 8], "comma": 6, "separ": 6, "valu": [6, 7], "delimit": 6, "exporttospreadsheet": 6, "source_datatyp": [6, 7, 8], "commonli": 6, "transfer": 6, "rich": 6, "long": 6, "archiv": 6, "exporttodatabas": 6, "cellprofiler_sqlit": [6, 7], "onli": [6, 7], "contrast": 6, "column": [6, 7], "design": 6, "retriev": 6, "compress": 6, "encod": 6, "scheme": 6, "complex": 6, "bulk": 6, "str": 7, "liter": 7, "none": 7, "list": 7, "tupl": 7, "identifying_column": 7, "concat": [7, 8], "bool": 7, "chunk_column": 7, "chunk_siz": 7, "int": 7, "infer_common_schema": 7, "drop_nul": 7, "fals": [7, 8], "data_type_cast_map": 7, "dict": 7, "convent": 7, "intermediari": [7, 8], "must": [7, 8], "datatyp": 7, "option": 7, "convers": 7, "union": 7, "id": 7, "ignor": 7, "regard": 7, "renam": 7, "concaten": [7, 8], "togeth": [7, 8], "chunk": 7, "op": 7, "infer": 7, "drop": 7, "nan": 7, "null": 7, "group": 7, "multipl": 7, "return": 7, "examplehuman": [7, 8], "signatur": [7, 8], "s3path": [7, 8], "s3_local_result": [7, 8], "arg": 7, "arrow": 7, "concat_t": 7, "concern": 7, "actual": 7, "join_sourc": 7, "multi": 7, "tree": 7, "root": 7, "subdir_1": 7, "subdir_2": 7, "becom": 7, "read_data": 7, "source_group_nam": 7, "source_group": 7, "common_schema": 7, "amongst": 7, "slightli": 7, "dictionari": 7, "later": 7, "length": 7, "gather": 7, "offset": 7, "segment": 7, "chanc": 7, "catch": 7, "problemat": 7, "rowcount": 7, "integ": 7, "map": 7, "roughli": 7, "basi": 7, "form": 7, "join_group": 7, "joinabl": 7, "filter": 7, "row": 7, "cast": 7, "receiv": 7, "cast_map": 7, "column_id": 7, "column_nam": 7, "colnam": 7, "column_dtyp": 7, "doubl": 7, "float": 7, "float32": 7, "real": 7, "dtype": 7, "equival": 7, "table_path": 7, "eventu": 7, "data_typ": 7, "synonym": 7, "match": 7, "panda": 7, "data_type_synonym": 7, "filenam": 7, "target": 7, "wrapper": 7, "compliant": 7, "context": 7, "wrap": 7, "purpos": 7, "ex": 7, "along": 7, "count": 7, "keyword": 7, "relev": 7, "seek": 7, "cloudpath": 7, "anypath": 7, "either": 7, "detect": 7, "self": 7, "func": 7, "extend": 7, "appbas": 7, "rather": 7, "attempt": 7, "idx": 7, "potenti": 7, "scenario": 7, "custom": 7, "duckdbpyconnect": 7, "connect": 7, "sqlite_scann": 7, "load": 7, "close": 7, "subsequ": 7, "ddb_reader": 7, "expand": 7, "home": 7, "expans": 7, "resolv": 7, "absolut": 7, "been": 7, "table_nam": 7, "extract": 7, "mismatch": 7, "hierarchi": 7, "challeng": 7, "nest": 8, "sub": 8, "folder": 8, "append": 8, "end": 8, "subfolder_a": 8, "subfolder_b": 8, "unless": 8}, "objects": {"cytotable": [[7, 0, 0, "-", "convert"], [7, 0, 0, "-", "exceptions"], [7, 0, 0, "-", "sources"], [7, 0, 0, "-", "utils"]], "cytotable.convert": [[7, 1, 1, "", "_concat_join_sources"], [7, 1, 1, "", "_concat_source_group"], [7, 1, 1, "", "_get_join_chunks"], [7, 1, 1, "", "_get_table_chunk_offsets"], [7, 1, 1, "", "_get_table_columns_and_types"], [7, 1, 1, "", "_infer_source_group_common_schema"], [7, 1, 1, "", "_join_source_chunk"], [7, 1, 1, "", "_prep_cast_column_data_types"], [7, 1, 1, "", "_prepend_column_name"], [7, 1, 1, "", "_return_future"], [7, 1, 1, "", "_source_chunk_to_parquet"], [7, 1, 1, "", "_to_parquet"], [7, 1, 1, "", "convert"]], "cytotable.exceptions": [[7, 2, 1, "", "CytoTableException"], [7, 2, 1, "", "DatatypeException"], [7, 2, 1, "", "NoInputDataException"], [7, 2, 1, "", "SchemaException"]], "cytotable.presets": [[7, 3, 1, "", "config"]], "cytotable.sources": [[7, 1, 1, "", "_build_path"], [7, 1, 1, "", "_filter_source_filepaths"], [7, 1, 1, "", "_get_source_filepaths"], [7, 1, 1, "", "_infer_source_datatype"]], "cytotable.utils": [[7, 1, 1, "", "Parsl_AppBase_init_for_docs"], [7, 1, 1, "", "_arrow_type_cast_if_specified"], [7, 1, 1, "", "_cache_cloudpath_to_local"], [7, 1, 1, "", "_column_sort"], [7, 1, 1, "", "_default_parsl_config"], [7, 1, 1, "", "_duckdb_reader"], [7, 1, 1, "", "_expand_path"], [7, 1, 1, "", "_parsl_loaded"], [7, 1, 1, "", "_sqlite_mixed_type_query_to_parquet"]]}, "objtypes": {"0": "py:module", "1": "py:function", "2": "py:exception", "3": "py:data"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "function", "Python function"], "2": ["py", "exception", "Python exception"], "3": ["py", "data", "Python data"]}, "titleterms": {"architectur": [0, 1, 2], "content": [0, 5], "data": [1, 2, 6], "sourc": [1, 6, 7], "structur": 1, "compart": 1, "imag": 1, "identifi": 1, "kei": 1, "field": 1, "relationship": 1, "cytoplasm": 1, "technic": 2, "workflow": 2, "execut": 2, "technologi": 2, "path": 2, "cloud": 2, "base": 2, "sqlite": 2, "In": 2, "process": [2, 4], "format": 2, "arrow": 2, "memori": 2, "alloc": 2, "select": 2, "map": 2, "sql": 2, "manag": 2, "contributor": 3, "coven": 3, "code": [3, 4], "conduct": [3, 4], "our": 3, "pledg": 3, "standard": 3, "enforc": 3, "respons": 3, "scope": 3, "guidelin": 3, "1": 3, "correct": 3, "2": 3, "warn": 3, "3": 3, "temporari": 3, "ban": 3, "4": 3, "perman": 3, "attribut": [3, 4], "contribut": [4, 5], "quick": 4, "link": 4, "bug": 4, "report": 4, "suggest": 4, "enhanc": 4, "your": 4, "first": 4, "pull": 4, "request": 4, "git": 4, "commit": 4, "messag": 4, "develop": [4, 5], "overview": [4, 6], "get": 4, "start": 4, "style": 4, "lint": 4, "test": [4, 5], "coverag": 4, "document": 4, "build": 4, "publish": 4, "releas": 4, "locat": [4, 6], "cytot": 5, "summari": 5, "instal": 5, "refer": 5, "preset": [6, 7], "manual": 6, "overrid": 6, "type": 6, "cellprofil": [6, 8], "destin": 6, "python": 7, "api": 7, "convert": 7, "util": 7, "except": 7, "tutori": 8, "csv": 8, "output": 8, "parquet": 8}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"Architecture": [[0, "architecture"]], "Contents:": [[0, null], [5, null]], "Data Architecture": [[1, "data-architecture"]], "Sources": [[1, "sources"], [7, "module-cytotable.sources"]], "Data structure": [[1, "data-structure"]], "Compartments and Images": [[1, "compartments-and-images"]], "Identifying or Key Fields": [[1, "identifying-or-key-fields"]], "Relationships": [[1, "relationships"]], "Image Data Relationships": [[1, "image-data-relationships"]], "Cytoplasm Compartment Data Relationships": [[1, "cytoplasm-compartment-data-relationships"]], "Technical Architecture": [[2, "technical-architecture"]], "Workflows": [[2, "workflows"]], "Workflow Execution": [[2, "workflow-execution"]], "Data Technologies": [[2, "data-technologies"]], "Data Paths": [[2, "data-paths"]], "Data Paths - Cloud-based SQLite": [[2, "data-paths-cloud-based-sqlite"]], "In-process Data Format": [[2, "in-process-data-format"]], "Arrow Memory Allocator Selection": [[2, "arrow-memory-allocator-selection"]], "Arrow Memory Mapping Selection": [[2, "arrow-memory-mapping-selection"]], "SQL-based Data Management": [[2, "sql-based-data-management"]], "Contributor Covenant Code of Conduct": [[3, "contributor-covenant-code-of-conduct"]], "Our Pledge": [[3, "our-pledge"]], "Our Standards": [[3, "our-standards"]], "Enforcement Responsibilities": [[3, "enforcement-responsibilities"]], "Scope": [[3, "scope"]], "Enforcement": [[3, "enforcement"]], "Enforcement Guidelines": [[3, "enforcement-guidelines"]], "1. Correction": [[3, "correction"]], "2. Warning": [[3, "warning"]], "3. Temporary Ban": [[3, "temporary-ban"]], "4. Permanent Ban": [[3, "permanent-ban"]], "Attribution": [[3, "attribution"], [4, "attribution"]], "Contributing": [[4, "contributing"]], "Code of conduct": [[4, "code-of-conduct"]], "Quick links": [[4, "quick-links"]], "Process": [[4, "process"]], "Bug reporting": [[4, "bug-reporting"]], "Suggesting enhancements": [[4, "suggesting-enhancements"]], "Your first code contribution": [[4, "your-first-code-contribution"]], "Pull requests": [[4, "pull-requests"]], "Git commit messages": [[4, "git-commit-messages"]], "Development": [[4, "development"]], "Overview": [[4, "overview"], [6, "overview"]], "Getting started": [[4, "getting-started"]], "Code style": [[4, "code-style"]], "Linting": [[4, "linting"]], "Testing": [[4, "testing"]], "Test Coverage": [[4, "test-coverage"]], "Documentation": [[4, "documentation"]], "Documentation Linting": [[4, "documentation-linting"]], "Documentation Builds": [[4, "documentation-builds"]], "Publishing Releases": [[4, "publishing-releases"]], "Release Locations": [[4, "release-locations"]], "Publishing Process": [[4, "publishing-process"]], "CytoTable": [[5, "cytotable"]], "Summary": [[5, "summary"]], "Installation": [[5, "installation"]], "Contributing, Development, and Testing": [[5, "contributing-development-and-testing"]], "References": [[5, "references"]], "Presets and Manual Overrides": [[6, "presets-and-manual-overrides"]], "Data Sources": [[6, "data-sources"]], "Data Source Locations": [[6, "data-source-locations"]], "Data Source Types": [[6, "data-source-types"]], "CellProfiler Data Sources": [[6, "cellprofiler-data-sources"]], "Data Destinations": [[6, "data-destinations"]], "Data Destination Locations": [[6, "data-destination-locations"]], "Data Destination Types": [[6, "data-destination-types"]], "Python API": [[7, "python-api"]], "Convert": [[7, "module-cytotable.convert"]], "Utils": [[7, "module-cytotable.utils"]], "Presets": [[7, "presets"]], "Exceptions": [[7, "module-cytotable.exceptions"]], "Tutorial": [[8, "tutorial"]], "CellProfiler CSV Output to Parquet": [[8, "cellprofiler-csv-output-to-parquet"]]}, "indexentries": {"cytotableexception": [[7, "cytotable.exceptions.CytoTableException"]], "datatypeexception": [[7, "cytotable.exceptions.DatatypeException"]], "noinputdataexception": [[7, "cytotable.exceptions.NoInputDataException"]], "parsl_appbase_init_for_docs() (in module cytotable.utils)": [[7, "cytotable.utils.Parsl_AppBase_init_for_docs"]], "schemaexception": [[7, "cytotable.exceptions.SchemaException"]], "_arrow_type_cast_if_specified() (in module cytotable.utils)": [[7, "cytotable.utils._arrow_type_cast_if_specified"]], "_build_path() (in module cytotable.sources)": [[7, "cytotable.sources._build_path"]], "_cache_cloudpath_to_local() (in module cytotable.utils)": [[7, "cytotable.utils._cache_cloudpath_to_local"]], "_column_sort() (in module cytotable.utils)": [[7, "cytotable.utils._column_sort"]], "_concat_join_sources() (in module cytotable.convert)": [[7, "cytotable.convert._concat_join_sources"]], "_concat_source_group() (in module cytotable.convert)": [[7, "cytotable.convert._concat_source_group"]], "_default_parsl_config() (in module cytotable.utils)": [[7, "cytotable.utils._default_parsl_config"]], "_duckdb_reader() (in module cytotable.utils)": [[7, "cytotable.utils._duckdb_reader"]], "_expand_path() (in module cytotable.utils)": [[7, "cytotable.utils._expand_path"]], "_filter_source_filepaths() (in module cytotable.sources)": [[7, "cytotable.sources._filter_source_filepaths"]], "_get_join_chunks() (in module cytotable.convert)": [[7, "cytotable.convert._get_join_chunks"]], "_get_source_filepaths() (in module cytotable.sources)": [[7, "cytotable.sources._get_source_filepaths"]], "_get_table_chunk_offsets() (in module cytotable.convert)": [[7, "cytotable.convert._get_table_chunk_offsets"]], "_get_table_columns_and_types() (in module cytotable.convert)": [[7, "cytotable.convert._get_table_columns_and_types"]], "_infer_source_datatype() (in module cytotable.sources)": [[7, "cytotable.sources._infer_source_datatype"]], "_infer_source_group_common_schema() (in module cytotable.convert)": [[7, "cytotable.convert._infer_source_group_common_schema"]], "_join_source_chunk() (in module cytotable.convert)": [[7, "cytotable.convert._join_source_chunk"]], "_parsl_loaded() (in module cytotable.utils)": [[7, "cytotable.utils._parsl_loaded"]], "_prep_cast_column_data_types() (in module cytotable.convert)": [[7, "cytotable.convert._prep_cast_column_data_types"]], "_prepend_column_name() (in module cytotable.convert)": [[7, "cytotable.convert._prepend_column_name"]], "_return_future() (in module cytotable.convert)": [[7, "cytotable.convert._return_future"]], "_source_chunk_to_parquet() (in module cytotable.convert)": [[7, "cytotable.convert._source_chunk_to_parquet"]], "_sqlite_mixed_type_query_to_parquet() (in module cytotable.utils)": [[7, "cytotable.utils._sqlite_mixed_type_query_to_parquet"]], "_to_parquet() (in module cytotable.convert)": [[7, "cytotable.convert._to_parquet"]], "config (in module cytotable.presets)": [[7, "cytotable.presets.config"]], "convert() (in module cytotable.convert)": [[7, "cytotable.convert.convert"]], "cytotable.convert": [[7, "module-cytotable.convert"]], "cytotable.exceptions": [[7, "module-cytotable.exceptions"]], "cytotable.sources": [[7, "module-cytotable.sources"]], "cytotable.utils": [[7, "module-cytotable.utils"]], "module": [[7, "module-cytotable.convert"], [7, "module-cytotable.exceptions"], [7, "module-cytotable.sources"], [7, "module-cytotable.utils"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["architecture", "architecture.data", "architecture.technical", "code_of_conduct", "contributing", "index", "overview", "python-api", "tutorial"], "filenames": ["architecture.md", "architecture.data.md", "architecture.technical.md", "code_of_conduct.md", "contributing.md", "index.md", "overview.md", "python-api.md", "tutorial.md"], "titles": ["Architecture", "Data Architecture", "Technical Architecture", "Contributor Covenant Code of Conduct", "Contributing", "CytoTable", "Overview", "Python API", "Tutorial"], "terms": {"document": [0, 1, 2, 5, 7], "cover": [0, 1, 2, 8], "variou": [0, 4, 6, 7, 8], "detail": [0, 1, 5], "pyctyomin": [0, 1, 4, 6, 7], "transform": [0, 1, 4, 5, 6, 7], "technic": [0, 5], "workflow": [0, 4, 5], "execut": 0, "data": [0, 4, 5, 7, 8], "technologi": [0, 5], "path": [0, 6, 7, 8], "In": 0, "process": [0, 5, 7], "format": [0, 4, 6], "sql": [0, 7], "base": [0, 1, 4, 7, 8], "manag": [0, 4, 7], "sourc": [0, 2, 4, 5], "structur": [0, 4, 5, 7], "compart": [0, 7, 8], "imag": [0, 5, 6], "identifi": [0, 7], "kei": [0, 7], "field": [0, 7], "relationship": [0, 4, 5], "cytoplasm": [0, 8], "ar": [1, 2, 3, 4, 6, 7], "measur": [1, 6], "creat": [1, 4, 5, 6, 7, 8], "from": [1, 2, 3, 4, 6, 7, 8], "other": [1, 2, 3, 5, 6, 7], "cell": [1, 2, 5, 6, 7, 8], "biologi": [1, 6], "analysi": [1, 5, 6], "tool": [1, 2, 4, 6], "see": [1, 2, 3, 4, 5, 6, 7, 8], "below": [1, 4, 8], "brief": [1, 6, 8], "overview": [1, 2, 5, 7], "type": [1, 2, 4, 5, 7], "cellprofil": [1, 2, 5, 7], "gener": [1, 4, 6, 7], "csv": [1, 5, 6, 7], "sqlite": [1, 5, 6, 7], "databas": [1, 2, 5, 6, 7], "cytomin": [1, 4, 5, 6], "which": [1, 2, 3, 4, 6, 7], "includ": [1, 2, 3, 4, 5, 7], "tabl": [1, 2, 7], "": [1, 2, 3, 4, 7], "mention": 1, "abov": [1, 4, 7], "deepprofil": [1, 5], "npz": [1, 5], "organ": [1, 2], "two": [1, 4], "categori": 1, "about": [1, 3, 4, 7], "itself": 1, "metadata": [1, 7], "nuclei": 1, "actin": 1, "specif": [1, 4, 6, 7], "an": [1, 2, 3, 4, 6, 7, 8], "aspect": [1, 2], "part": [1, 7], "found": 1, "within": [1, 2, 3, 4, 5, 6, 7, 8], "mai": [1, 2, 3, 4, 6, 7, 8], "follow": [1, 2, 3, 4, 5, 6, 7], "imagenumb": 1, "provid": [1, 2, 3, 4, 6, 7], "what": [1, 3, 4, 7], "i": [1, 2, 3, 4, 6, 7], "being": [1, 7], "referenc": [1, 5, 7], "mani": [1, 4, 7], "objectnumb": 1, "object": [1, 6, 7, 8], "parent_cel": 1, "relat": [1, 2, 4, 7], "thi": [1, 2, 3, 4, 5, 6, 7, 8], "canon": 1, "join": [1, 2, 7, 8], "greater": [1, 2], "parent_nuclei": 1, "The": [1, 3, 4, 5, 7, 8], "diagram": [1, 4, 5], "show": [1, 5], "exampl": [1, 2, 3, 4, 6, 7, 8], "us": [1, 2, 3, 4, 5, 6, 7, 8], "pycytomin": [1, 5, 7], "name": [1, 2, 7, 8], "each": [1, 7, 8], "zero": 1, "etc": [1, 2, 4, 7, 8], "via": [1, 3, 4, 6, 7], "These": [1, 6], "parent_": 1, "respect": [1, 3], "cytot": [2, 4, 6, 7, 8], "parsl": [2, 7], "collect": [2, 7], "task": [2, 4, 7], "python_app": [2, 7], "work": [2, 4, 7, 8], "isol": 2, "python": [2, 4, 5], "function": [2, 4, 7], "decor": [2, 7], "join_app": [2, 7], "one": [2, 4, 7], "more": [2, 5, 7], "app": [2, 7], "inform": [2, 3, 4, 7], "how": [2, 4, 6, 8], "procedur": 2, "executor": 2, "configur": [2, 4, 6, 7], "through": [2, 3, 4, 7], "pass": [2, 4, 7], "convert": [2, 5, 6, 8], "parsl_config": [2, 7], "config": [2, 5, 6, 7], "By": [2, 4, 6], "default": [2, 6, 7, 8], "assum": [2, 4, 7], "local": [2, 4, 6, 7, 8], "localprovid": 2, "For": [2, 3, 4, 6, 7, 8], "scalabl": 2, "highthroughputexecutor": 2, "handl": [2, 6], "pathlib": [2, 7], "modul": 2, "cloudpathlib": [2, 6, 7], "refer": [2, 4, 6, 7], "page": [2, 6, 8], "client": [2, 6, 7, 8], "argument": [2, 6, 7], "locat": [2, 5, 7, 8], "store": [2, 7], "download": 2, "cach": [2, 7], "capabl": 2, "perform": [2, 4, 6, 7], "queri": [2, 7], "wai": [2, 3, 6], "requir": [2, 4, 6], "addit": [2, 4], "paramet": [2, 7, 8], "storag": [2, 6, 7, 8], "set": [2, 3, 7], "directori": [2, 7, 8], "explicitli": [2, 6], "avoid": [2, 3, 4, 7], "limit": [2, 6, 7], "some": [2, 7], "temporari": [2, 5], "constrain": 2, "system": [2, 6], "import": [2, 4, 7, 8], "parquet": [2, 5, 6, 7], "source_path": [2, 7, 8], "s3": [2, 6, 7, 8], "bucket": 2, "singl": [2, 3, 5, 7, 8], "dest_path": [2, 6, 7, 8], "test": [2, 7, 8], "dest_datatyp": [2, 6, 7, 8], "dir": 2, "tmpdata": 2, "get": [2, 5, 7], "local_cache_dir": 2, "nativ": 2, "we": [2, 3, 4, 5, 6, 8], "also": [2, 3, 4, 6, 7], "accomplish": 2, "intern": 2, "pyarrow": [2, 7], "apach": [2, 6, 7], "compat": [2, 6, 7, 8], "intend": [2, 6, 7], "assist": [2, 6, 7], "cross": 2, "platform": 2, "util": [2, 4, 5], "encourag": [2, 4], "high": 2, "enabl": [2, 4, 5], "advanc": [2, 3], "integr": [2, 4], "non": 2, "malloc": 2, "jemalloc": 2, "mimalloc": 2, "depend": [2, 4], "oper": [2, 7], "avail": [2, 3, 4, 6], "overridden": 2, "develop": 2, "implement": [2, 4, 7], "help": [2, 4], "user": [2, 4, 7], "environ": [2, 3, 4, 7], "inherit": 2, "c": 2, "note": [2, 4, 6, 7, 8], "arrow_default_memory_pool": 2, "variabl": [2, 7], "static": 2, "defin": [2, 4], "when": [2, 3, 4, 7], "file": [2, 4, 5, 6, 7, 8], "read": [2, 4, 6, 7], "benefit": 2, "memory_map": 2, "you": [2, 4], "disabl": [2, 3], "cytotable_arrow_use_memory_map": 2, "0": [2, 3, 7], "export": [2, 4, 7], "duckdb": [2, 7], "api": [2, 5], "area": 2, "interfac": 2, "statement": [2, 7], "dataset": [2, 7], "result": [2, 7, 8], "member": [3, 4], "leader": 3, "make": [3, 4], "particip": [3, 4], "commun": [3, 4], "harass": 3, "free": [3, 4], "experi": [3, 4], "everyon": 3, "regardless": 3, "ag": 3, "bodi": 3, "size": [3, 4, 7], "visibl": [3, 4], "invis": 3, "ethnic": 3, "sex": 3, "characterist": 3, "gender": 3, "ident": 3, "express": 3, "level": [3, 7], "educ": 3, "socio": 3, "econom": 3, "statu": [3, 4], "nation": 3, "person": 3, "appear": 3, "race": 3, "religion": 3, "sexual": 3, "orient": [3, 6], "act": 3, "interact": 3, "contribut": 3, "open": [3, 4, 6], "welcom": [3, 4], "divers": 3, "inclus": 3, "healthi": 3, "behavior": [3, 4], "posit": 3, "demonstr": 3, "empathi": 3, "kind": [3, 7], "toward": 3, "peopl": 3, "Being": 3, "differ": [3, 4, 7], "opinion": 3, "viewpoint": 3, "give": 3, "gracefulli": 3, "accept": [3, 4], "construct": 3, "feedback": 3, "apolog": 3, "those": 3, "affect": [3, 6], "mistak": 3, "learn": 3, "focus": [3, 6], "best": [3, 4], "just": 3, "u": [3, 4], "individu": [3, 4], "overal": [3, 7], "unaccept": [3, 4], "languag": [3, 7], "imageri": 3, "attent": 3, "ani": [3, 4, 7], "troll": 3, "insult": 3, "derogatori": 3, "comment": [3, 4], "polit": 3, "attack": 3, "public": [3, 6], "privat": 3, "publish": [3, 5], "physic": 3, "email": 3, "address": 3, "without": 3, "explicit": 3, "permiss": 3, "could": 3, "reason": 3, "consid": 3, "inappropri": 3, "profession": 3, "clarifi": [3, 4], "take": [3, 7], "appropri": [3, 4, 5], "fair": 3, "action": [3, 4, 5], "thei": [3, 6], "deem": 3, "threaten": 3, "offens": 3, "harm": 3, "have": [3, 4, 5, 7, 8], "right": 3, "remov": 3, "edit": 3, "reject": 3, "commit": [3, 5], "wiki": 3, "issu": [3, 4], "align": [3, 7], "moder": 3, "decis": 3, "appli": 3, "all": [3, 4, 6, 7], "space": 3, "offici": 3, "repres": [3, 7], "e": [3, 4], "mail": 3, "post": 3, "social": 3, "media": 3, "account": 3, "appoint": 3, "onlin": 3, "offlin": 3, "event": 3, "instanc": 3, "abus": 3, "otherwis": 3, "report": [3, 5], "cytodata": [3, 4], "info": [3, 4], "gmail": [3, 4], "com": [3, 4, 5], "complaint": 3, "review": [3, 4], "investig": 3, "promptli": 3, "fairli": 3, "oblig": 3, "privaci": 3, "secur": 3, "incid": 3, "impact": 3, "determin": 3, "consequ": 3, "violat": 3, "unprofession": 3, "unwelcom": 3, "A": [3, 4, 6, 7], "written": [3, 4, 7], "clariti": 3, "around": 3, "natur": 3, "explan": 3, "why": [3, 4], "wa": [3, 7], "apologi": 3, "request": [3, 5, 6], "seri": 3, "continu": 3, "No": 3, "involv": [3, 4], "unsolicit": 3, "specifi": [3, 6, 7], "period": 3, "time": [3, 4, 7], "well": 3, "extern": 3, "channel": 3, "like": [3, 6], "term": [3, 6], "lead": 3, "seriou": 3, "sustain": 3, "sort": [3, 7], "allow": [3, 7], "dure": [3, 7], "pattern": 3, "aggress": 3, "disparag": 3, "class": 3, "adapt": 3, "version": [3, 4, 7], "http": [3, 4, 5, 7], "www": 3, "org": [3, 7], "code_of_conduct": 3, "html": [3, 4, 7], "were": [3, 4], "inspir": 3, "mozilla": 3, "ladder": 3, "answer": 3, "common": [3, 6, 7], "question": [3, 4], "faq": 3, "translat": 3, "thank": 4, "contain": [4, 6, 7], "guidelin": [4, 5], "most": 4, "effect": 4, "codebas": 4, "If": 4, "stuck": 4, "pleas": [4, 5, 6], "feel": 4, "ask": 4, "project": [4, 5, 8], "govern": 4, "our": [4, 5], "expect": [4, 6], "uphold": 4, "github": [4, 5], "io": 4, "tracker": 4, "packag": 4, "poetri": 4, "blob": 4, "main": 4, "pyproject": 4, "toml": 4, "love": 4, "hear": 4, "case": 4, "softwar": 4, "doe": 4, "opportun": 4, "improv": 4, "howev": 4, "order": 4, "fix": 4, "need": [4, 6, 7], "tell": 4, "exactli": 4, "went": 4, "wrong": 4, "much": 4, "pertin": 4, "possibl": [4, 6, 7], "re": [4, 6], "input": [4, 5, 7], "copi": [4, 7], "past": 4, "piec": 4, "1": [4, 5, 7], "command": [4, 5], "2": [4, 5, 7], "error": 4, "ve": 4, "tri": 4, "overcom": 4, "run": [4, 7], "o": 4, "hardwar": 4, "repositori": 4, "search": 4, "exist": 4, "solut": 4, "It": [4, 6], "solv": 4, "alreadi": [4, 7], "find": 4, "describ": 4, "add": 4, "instead": 4, "new": [4, 7, 8], "deepli": 4, "simpl": [4, 7], "intuit": 4, "support": 4, "core": 4, "profil": 4, "pipelin": [4, 8], "good": 4, "propos": 4, "figur": 4, "out": 4, "next": 4, "check": [4, 7], "someon": 4, "els": 4, "ha": [4, 7], "mind": 4, "do": [4, 6], "interest": 4, "clearli": 4, "would": 4, "particular": 4, "can": 4, "daunt": 4, "strive": 4, "newcom": 4, "while": 4, "ensur": 4, "rigor": 4, "practic": 4, "thing": 4, "go": 4, "futur": [4, 7], "contributor": 4, "tag": 4, "beginn": 4, "want": 4, "haven": 4, "t": 4, "outlin": 4, "discuss": 4, "befor": 4, "write": [4, 7], "reduc": 4, "merg": 4, "plu": 4, "belong": 4, "wast": 4, "after": [4, 7], "decid": 4, "up": 4, "fork": 4, "model": [4, 5], "clone": 4, "featur": 4, "branch": 4, "onc": 4, "necessari": 4, "chang": 4, "should": 4, "incorpor": 4, "content": [4, 6, 7], "descript": 4, "directli": [4, 7], "speed": 4, "abl": 4, "approv": 4, "To": [4, 7], "effici": [4, 6], "step": 4, "instruct": 4, "templat": 4, "tripl": 4, "ad": 4, "small": 4, "bite": 4, "move": 4, "so": 4, "faster": 4, "than": [4, 7], "larg": 4, "submit": 4, "g": 4, "least": 4, "maintain": 4, "fashion": 4, "increas": 4, "abil": 4, "accuraci": 4, "scope": [4, 5], "short": 4, "phrase": 4, "normal": 4, "method": 4, "string": [4, 7], "prefer": 4, "number": 4, "primarili": [4, 7], "pytest": 4, "autom": 4, "instal": [4, 7], "black": 4, "isort": 4, "googl": 4, "docstr": [4, 7], "repo": 4, "automat": 4, "pre": 4, "alongsid": 4, "hook": 4, "same": [4, 7], "manual": [4, 5], "unit": 4, "sphinx": [4, 7], "websit": 4, "w": 4, "turn": 4, "warn": [4, 5, 7], "doc": [4, 7], "cffconvert": 4, "citat": 4, "cff": 4, "valid": [4, 5, 7], "treat": [4, 7], "failur": 4, "cov": 4, "made": 4, "htmlcov": 4, "index": 4, "myst": 4, "markedli": 4, "text": [4, 6], "markdown": 4, "docsit": 4, "sentenc": 4, "per": [4, 7, 8], "line": 4, "extens": [4, 7], "mermaid": 4, "miss": 4, "autodoc": 4, "updat": [4, 7], "push": 4, "trigger": 4, "doctest": 4, "presum": [4, 7], "primari": 4, "b": 4, "portion": 4, "guid": 4, "semant": 4, "semver": 4, "distinguish": 4, "between": [4, 6], "major": 4, "minor": 4, "patch": 4, "here": 4, "pypi": 4, "There": 4, "sever": 4, "prepar": [4, 7], "increment": 4, "under": [4, 6], "label": 4, "drafter": 4, "108": 4, "On": 4, "draft": 4, "yml": 4, "leverag": 4, "modifi": [4, 7], "modif": 4, "anoth": 4, "deploi": 4, "earlier": [4, 7], "flow": [5, 7], "rel": 5, "morphologi": 5, "clean": 5, "output": [5, 6, 7], "scale": 5, "both": 5, "independ": 5, "unifi": [5, 6], "schema": [5, 7], "where": [5, 7], "pip": 5, "git": 5, "ubuntu": 5, "latest": 5, "maco": 5, "runner": 5, "md": 5, "eval": 5, "preset": [5, 8], "overrid": 5, "destin": [5, 7], "tutori": [5, 6], "code": 5, "conduct": 5, "quick": 5, "link": 5, "bug": 5, "suggest": 5, "enhanc": [5, 6], "your": 5, "first": [5, 7], "pull": 5, "messag": 5, "start": 5, "style": 5, "lint": 5, "build": [5, 7], "attribut": 5, "releas": 5, "pledg": 5, "standard": [5, 7], "enforc": 5, "respons": 5, "correct": 5, "3": 5, "ban": 5, "4": 5, "perman": 5, "architectur": 5, "_concat_join_sourc": [5, 7], "_concat_source_group": [5, 7], "_get_table_columns_and_typ": [5, 7], "_get_table_chunk_offset": [5, 7], "_infer_source_group_common_schema": [5, 7], "_join_source_chunk": [5, 7], "_prepare_join_sql": [5, 7], "_prep_cast_column_data_typ": [5, 7], "_prepend_column_nam": [5, 7], "_return_futur": [5, 7], "_source_chunk_to_parquet": [5, 7], "_to_parquet": [5, 7], "_build_path": [5, 7], "_filter_source_filepath": [5, 7], "_get_source_filepath": [5, 7], "_infer_source_datatyp": [5, 7], "parsl_appbase_init_for_doc": [5, 7], "_arrow_type_cast_if_specifi": [5, 7], "_cache_cloudpath_to_loc": [5, 7], "_column_sort": [5, 7], "_default_parsl_config": [5, 7], "_duckdb_read": [5, 7], "_expand_path": [5, 7], "_parsl_load": [5, 7], "_sqlite_mixed_type_query_to_parquet": [5, 7], "except": 5, "cytotableexcept": [5, 7], "datatypeexcept": [5, 7], "noinputdataexcept": [5, 7], "schemaexcept": [5, 7], "topic": 6, "introduct": 6, "produc": [6, 8], "cellprofiler_csv": [6, 7, 8], "captur": [6, 7], "outcom": 6, "focu": [6, 7], "filepath": [6, 7], "remot": [6, 7], "aw": 6, "gcp": 6, "cloud": [6, 7], "azur": 6, "hood": 6, "whether": [6, 7], "authent": 6, "special": 6, "s3client": 6, "azureblobcli": 6, "gsclient": 6, "kwarg": [6, 7], "face": 6, "similar": [6, 7, 8], "sign": 6, "no_sign_request": [6, 7, 8], "true": [6, 7, 8], "comma": 6, "separ": 6, "valu": [6, 7], "delimit": 6, "exporttospreadsheet": 6, "source_datatyp": [6, 7, 8], "commonli": 6, "transfer": 6, "rich": 6, "long": 6, "archiv": 6, "exporttodatabas": 6, "cellprofiler_sqlit": [6, 7], "onli": [6, 7], "contrast": 6, "column": [6, 7], "design": 6, "retriev": 6, "compress": 6, "encod": 6, "scheme": 6, "complex": 6, "bulk": 6, "str": 7, "liter": 7, "none": 7, "list": 7, "tupl": 7, "identifying_column": 7, "concat": [7, 8], "bool": 7, "chunk_siz": 7, "int": 7, "infer_common_schema": 7, "drop_nul": 7, "fals": [7, 8], "data_type_cast_map": 7, "dict": 7, "convent": 7, "intermediari": [7, 8], "must": [7, 8], "datatyp": 7, "option": 7, "convers": 7, "union": 7, "id": 7, "ignor": 7, "regard": 7, "renam": 7, "concaten": [7, 8], "togeth": [7, 8], "chunk": 7, "op": 7, "infer": 7, "drop": 7, "nan": 7, "null": 7, "group": 7, "multipl": 7, "return": 7, "examplehuman": [7, 8], "signatur": [7, 8], "s3path": [7, 8], "s3_local_result": [7, 8], "arg": 7, "arrow": 7, "concat_t": 7, "concern": 7, "actual": 7, "join_sourc": 7, "multi": 7, "tree": 7, "root": 7, "subdir_1": 7, "subdir_2": 7, "becom": 7, "read_data": 7, "source_group_nam": 7, "source_group": 7, "common_schema": 7, "amongst": 7, "slightli": 7, "dictionari": 7, "gather": 7, "offset": 7, "later": 7, "segment": 7, "chanc": 7, "catch": 7, "problemat": 7, "rowcount": 7, "integ": 7, "map": 7, "roughli": 7, "basi": 7, "form": 7, "join_group": 7, "joinabl": 7, "filter": 7, "row": 7, "cast": 7, "receiv": 7, "cast_map": 7, "column_id": 7, "column_nam": 7, "colnam": 7, "column_dtyp": 7, "doubl": 7, "float": 7, "float32": 7, "real": 7, "dtype": 7, "equival": 7, "table_path": 7, "eventu": 7, "data_typ": 7, "synonym": 7, "match": 7, "panda": 7, "data_type_synonym": 7, "filenam": 7, "target": 7, "wrapper": 7, "compliant": 7, "context": 7, "wrap": 7, "purpos": 7, "ex": 7, "along": 7, "count": 7, "keyword": 7, "relev": 7, "seek": 7, "cloudpath": 7, "anypath": 7, "either": 7, "detect": 7, "self": 7, "func": 7, "extend": 7, "appbas": 7, "rather": 7, "attempt": 7, "idx": 7, "potenti": 7, "scenario": 7, "custom": 7, "duckdbpyconnect": 7, "connect": 7, "sqlite_scann": 7, "load": 7, "close": 7, "subsequ": 7, "ddb_reader": 7, "expand": 7, "home": 7, "expans": 7, "resolv": 7, "absolut": 7, "been": 7, "table_nam": 7, "extract": 7, "mismatch": 7, "hierarchi": 7, "challeng": 7, "nest": 8, "sub": 8, "folder": 8, "append": 8, "end": 8, "subfolder_a": 8, "subfolder_b": 8, "unless": 8}, "objects": {"cytotable": [[7, 0, 0, "-", "convert"], [7, 0, 0, "-", "exceptions"], [7, 0, 0, "-", "sources"], [7, 0, 0, "-", "utils"]], "cytotable.convert": [[7, 1, 1, "", "_concat_join_sources"], [7, 1, 1, "", "_concat_source_group"], [7, 1, 1, "", "_get_table_chunk_offsets"], [7, 1, 1, "", "_get_table_columns_and_types"], [7, 1, 1, "", "_infer_source_group_common_schema"], [7, 1, 1, "", "_join_source_chunk"], [7, 1, 1, "", "_prep_cast_column_data_types"], [7, 1, 1, "", "_prepare_join_sql"], [7, 1, 1, "", "_prepend_column_name"], [7, 1, 1, "", "_return_future"], [7, 1, 1, "", "_source_chunk_to_parquet"], [7, 1, 1, "", "_to_parquet"], [7, 1, 1, "", "convert"]], "cytotable.exceptions": [[7, 2, 1, "", "CytoTableException"], [7, 2, 1, "", "DatatypeException"], [7, 2, 1, "", "NoInputDataException"], [7, 2, 1, "", "SchemaException"]], "cytotable.presets": [[7, 3, 1, "", "config"]], "cytotable.sources": [[7, 1, 1, "", "_build_path"], [7, 1, 1, "", "_filter_source_filepaths"], [7, 1, 1, "", "_get_source_filepaths"], [7, 1, 1, "", "_infer_source_datatype"]], "cytotable.utils": [[7, 1, 1, "", "Parsl_AppBase_init_for_docs"], [7, 1, 1, "", "_arrow_type_cast_if_specified"], [7, 1, 1, "", "_cache_cloudpath_to_local"], [7, 1, 1, "", "_column_sort"], [7, 1, 1, "", "_default_parsl_config"], [7, 1, 1, "", "_duckdb_reader"], [7, 1, 1, "", "_expand_path"], [7, 1, 1, "", "_parsl_loaded"], [7, 1, 1, "", "_sqlite_mixed_type_query_to_parquet"]]}, "objtypes": {"0": "py:module", "1": "py:function", "2": "py:exception", "3": "py:data"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "function", "Python function"], "2": ["py", "exception", "Python exception"], "3": ["py", "data", "Python data"]}, "titleterms": {"architectur": [0, 1, 2], "content": [0, 5], "data": [1, 2, 6], "sourc": [1, 6, 7], "structur": 1, "compart": 1, "imag": 1, "identifi": 1, "kei": 1, "field": 1, "relationship": 1, "cytoplasm": 1, "technic": 2, "workflow": 2, "execut": 2, "technologi": 2, "path": 2, "cloud": 2, "base": 2, "sqlite": 2, "In": 2, "process": [2, 4], "format": 2, "arrow": 2, "memori": 2, "alloc": 2, "select": 2, "map": 2, "sql": 2, "manag": 2, "contributor": 3, "coven": 3, "code": [3, 4], "conduct": [3, 4], "our": 3, "pledg": 3, "standard": 3, "enforc": 3, "respons": 3, "scope": 3, "guidelin": 3, "1": 3, "correct": 3, "2": 3, "warn": 3, "3": 3, "temporari": 3, "ban": 3, "4": 3, "perman": 3, "attribut": [3, 4], "contribut": [4, 5], "quick": 4, "link": 4, "bug": 4, "report": 4, "suggest": 4, "enhanc": 4, "your": 4, "first": 4, "pull": 4, "request": 4, "git": 4, "commit": 4, "messag": 4, "develop": [4, 5], "overview": [4, 6], "get": 4, "start": 4, "style": 4, "lint": 4, "test": [4, 5], "coverag": 4, "document": 4, "build": 4, "publish": 4, "releas": 4, "locat": [4, 6], "cytot": 5, "summari": 5, "instal": 5, "refer": 5, "preset": [6, 7], "manual": 6, "overrid": 6, "type": 6, "cellprofil": [6, 8], "destin": 6, "python": 7, "api": 7, "convert": 7, "util": 7, "except": 7, "tutori": 8, "csv": 8, "output": 8, "parquet": 8}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"Architecture": [[0, "architecture"]], "Contents:": [[0, null], [5, null]], "Data Architecture": [[1, "data-architecture"]], "Sources": [[1, "sources"], [7, "module-cytotable.sources"]], "Data structure": [[1, "data-structure"]], "Compartments and Images": [[1, "compartments-and-images"]], "Identifying or Key Fields": [[1, "identifying-or-key-fields"]], "Relationships": [[1, "relationships"]], "Image Data Relationships": [[1, "image-data-relationships"]], "Cytoplasm Compartment Data Relationships": [[1, "cytoplasm-compartment-data-relationships"]], "Technical Architecture": [[2, "technical-architecture"]], "Workflows": [[2, "workflows"]], "Workflow Execution": [[2, "workflow-execution"]], "Data Technologies": [[2, "data-technologies"]], "Data Paths": [[2, "data-paths"]], "Data Paths - Cloud-based SQLite": [[2, "data-paths-cloud-based-sqlite"]], "In-process Data Format": [[2, "in-process-data-format"]], "Arrow Memory Allocator Selection": [[2, "arrow-memory-allocator-selection"]], "Arrow Memory Mapping Selection": [[2, "arrow-memory-mapping-selection"]], "SQL-based Data Management": [[2, "sql-based-data-management"]], "Contributor Covenant Code of Conduct": [[3, "contributor-covenant-code-of-conduct"]], "Our Pledge": [[3, "our-pledge"]], "Our Standards": [[3, "our-standards"]], "Enforcement Responsibilities": [[3, "enforcement-responsibilities"]], "Scope": [[3, "scope"]], "Enforcement": [[3, "enforcement"]], "Enforcement Guidelines": [[3, "enforcement-guidelines"]], "1. Correction": [[3, "correction"]], "2. Warning": [[3, "warning"]], "3. Temporary Ban": [[3, "temporary-ban"]], "4. Permanent Ban": [[3, "permanent-ban"]], "Attribution": [[3, "attribution"], [4, "attribution"]], "Contributing": [[4, "contributing"]], "Code of conduct": [[4, "code-of-conduct"]], "Quick links": [[4, "quick-links"]], "Process": [[4, "process"]], "Bug reporting": [[4, "bug-reporting"]], "Suggesting enhancements": [[4, "suggesting-enhancements"]], "Your first code contribution": [[4, "your-first-code-contribution"]], "Pull requests": [[4, "pull-requests"]], "Git commit messages": [[4, "git-commit-messages"]], "Development": [[4, "development"]], "Overview": [[4, "overview"], [6, "overview"]], "Getting started": [[4, "getting-started"]], "Code style": [[4, "code-style"]], "Linting": [[4, "linting"]], "Testing": [[4, "testing"]], "Test Coverage": [[4, "test-coverage"]], "Documentation": [[4, "documentation"]], "Documentation Linting": [[4, "documentation-linting"]], "Documentation Builds": [[4, "documentation-builds"]], "Publishing Releases": [[4, "publishing-releases"]], "Release Locations": [[4, "release-locations"]], "Publishing Process": [[4, "publishing-process"]], "CytoTable": [[5, "cytotable"]], "Summary": [[5, "summary"]], "Installation": [[5, "installation"]], "Contributing, Development, and Testing": [[5, "contributing-development-and-testing"]], "References": [[5, "references"]], "Presets and Manual Overrides": [[6, "presets-and-manual-overrides"]], "Data Sources": [[6, "data-sources"]], "Data Source Locations": [[6, "data-source-locations"]], "Data Source Types": [[6, "data-source-types"]], "CellProfiler Data Sources": [[6, "cellprofiler-data-sources"]], "Data Destinations": [[6, "data-destinations"]], "Data Destination Locations": [[6, "data-destination-locations"]], "Data Destination Types": [[6, "data-destination-types"]], "Python API": [[7, "python-api"]], "Convert": [[7, "module-cytotable.convert"]], "Utils": [[7, "module-cytotable.utils"]], "Presets": [[7, "presets"]], "Exceptions": [[7, "module-cytotable.exceptions"]], "Tutorial": [[8, "tutorial"]], "CellProfiler CSV Output to Parquet": [[8, "cellprofiler-csv-output-to-parquet"]]}, "indexentries": {"cytotableexception": [[7, "cytotable.exceptions.CytoTableException"]], "datatypeexception": [[7, "cytotable.exceptions.DatatypeException"]], "noinputdataexception": [[7, "cytotable.exceptions.NoInputDataException"]], "parsl_appbase_init_for_docs() (in module cytotable.utils)": [[7, "cytotable.utils.Parsl_AppBase_init_for_docs"]], "schemaexception": [[7, "cytotable.exceptions.SchemaException"]], "_arrow_type_cast_if_specified() (in module cytotable.utils)": [[7, "cytotable.utils._arrow_type_cast_if_specified"]], "_build_path() (in module cytotable.sources)": [[7, "cytotable.sources._build_path"]], "_cache_cloudpath_to_local() (in module cytotable.utils)": [[7, "cytotable.utils._cache_cloudpath_to_local"]], "_column_sort() (in module cytotable.utils)": [[7, "cytotable.utils._column_sort"]], "_concat_join_sources() (in module cytotable.convert)": [[7, "cytotable.convert._concat_join_sources"]], "_concat_source_group() (in module cytotable.convert)": [[7, "cytotable.convert._concat_source_group"]], "_default_parsl_config() (in module cytotable.utils)": [[7, "cytotable.utils._default_parsl_config"]], "_duckdb_reader() (in module cytotable.utils)": [[7, "cytotable.utils._duckdb_reader"]], "_expand_path() (in module cytotable.utils)": [[7, "cytotable.utils._expand_path"]], "_filter_source_filepaths() (in module cytotable.sources)": [[7, "cytotable.sources._filter_source_filepaths"]], "_get_source_filepaths() (in module cytotable.sources)": [[7, "cytotable.sources._get_source_filepaths"]], "_get_table_chunk_offsets() (in module cytotable.convert)": [[7, "cytotable.convert._get_table_chunk_offsets"]], "_get_table_columns_and_types() (in module cytotable.convert)": [[7, "cytotable.convert._get_table_columns_and_types"]], "_infer_source_datatype() (in module cytotable.sources)": [[7, "cytotable.sources._infer_source_datatype"]], "_infer_source_group_common_schema() (in module cytotable.convert)": [[7, "cytotable.convert._infer_source_group_common_schema"]], "_join_source_chunk() (in module cytotable.convert)": [[7, "cytotable.convert._join_source_chunk"]], "_parsl_loaded() (in module cytotable.utils)": [[7, "cytotable.utils._parsl_loaded"]], "_prep_cast_column_data_types() (in module cytotable.convert)": [[7, "cytotable.convert._prep_cast_column_data_types"]], "_prepare_join_sql() (in module cytotable.convert)": [[7, "cytotable.convert._prepare_join_sql"]], "_prepend_column_name() (in module cytotable.convert)": [[7, "cytotable.convert._prepend_column_name"]], "_return_future() (in module cytotable.convert)": [[7, "cytotable.convert._return_future"]], "_source_chunk_to_parquet() (in module cytotable.convert)": [[7, "cytotable.convert._source_chunk_to_parquet"]], "_sqlite_mixed_type_query_to_parquet() (in module cytotable.utils)": [[7, "cytotable.utils._sqlite_mixed_type_query_to_parquet"]], "_to_parquet() (in module cytotable.convert)": [[7, "cytotable.convert._to_parquet"]], "config (in module cytotable.presets)": [[7, "cytotable.presets.config"]], "convert() (in module cytotable.convert)": [[7, "cytotable.convert.convert"]], "cytotable.convert": [[7, "module-cytotable.convert"]], "cytotable.exceptions": [[7, "module-cytotable.exceptions"]], "cytotable.sources": [[7, "module-cytotable.sources"]], "cytotable.utils": [[7, "module-cytotable.utils"]], "module": [[7, "module-cytotable.convert"], [7, "module-cytotable.exceptions"], [7, "module-cytotable.sources"], [7, "module-cytotable.utils"]]}}) \ No newline at end of file