Merge branch 'develop' into features/phenov2-1394

bento-platform · Sep 11, 2023 · dc685db · dc685db
2 parents e0d27cb + 1b0dfed
commit dc685db
Show file tree

Hide file tree

Showing 120 changed files with 3,685 additions and 2,318 deletions.
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -9,8 +9,8 @@ jobs:
   lint:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v1
-      - uses: actions/setup-python@v2
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
         name: Set up Python
         with:
           python-version: "3.8"

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -44,5 +44,5 @@ jobs:
       run: |
         export POSTGRES_USER="postgres" && export POSTGRES_PASSWORD="postgres" && export POSTGRES_PORT=5432
         coverage run ./manage.py test
-    - name: Codecov
-      run: codecov
+    - name: Upload Coverage to Codecov
+      uses: codecov/codecov-action@v3
diff --git a/.gitignore b/.gitignore
@@ -40,3 +40,6 @@ config.json
 
 # MacOS
 .DS_Store
+
+tmp/
+chord_metadata_service/vrs
diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -4,6 +4,11 @@
 # Required
 version: 2
 
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.10"
+
 # Build documentation in the docs/ directory with Sphinx
 sphinx:
   configuration: docs/conf.py
@@ -16,7 +21,6 @@ formats:
 
 # Optionally set the version of Python and requirements required to build your docs
 python:
-  version: 3.6
   install:
     - requirements: docs/requirements.txt
-    - requirements: requirements.txt
+    - path: .
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -1,18 +1,17 @@
 {
-    // Use IntelliSense to learn about possible attributes.
-    // Hover to view descriptions of existing attributes.
-    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
     "version": "0.2.0",
     "configurations": [
         {
-            "name": "Python: Django mgmt commands",
+            "name": "Python: Django Test",
             "type": "python",
             "request": "launch",
             "program": "${workspaceFolder}/manage.py",
             "args": [
-                "makemigrations"
+                "test",
+                "${relativeFileDirname}"
             ],
             "django": true,
+            "justMyCode": true
         },
         {
             "name": "Python: Run Django",
@@ -28,11 +27,11 @@
             "django": true
         },
         {
-            "name": "Python: Attach Debugger",
+            "name": "Python: Attach Debugger (Bento)",
             "type": "python",
             "request": "attach",
             "port": 5678,
             "host": "0.0.0.0"
-        },
+        }
     ]
-}
+}
diff --git a/MANIFEST.in b/MANIFEST.in
diff --git a/README.md b/README.md
@@ -4,6 +4,11 @@
 ![Lint Status](https://github.com/bento-platform/katsu/workflows/Lint/badge.svg)
 [![codecov](https://codecov.io/gh/bento-platform/katsu/branch/master/graph/badge.svg)](https://codecov.io/gh/bento-platform/katsu)
 
+<img src="docs/_static/katsu_logo_final.png" width="298" height="50" alt="Katsu logo" />
+
+A Phenopackets-based clinical and phenotypic metadata service for the Bento platform.
+
+
 ## Table of Contents
 
 - [Katsu Metadata Service](#katsu-metadata-service)
@@ -271,6 +276,16 @@ tox
 coverage html
 ```
 
+### Developing and debugging inside a container with VS Code (*Bento*)
+
+The development Docker image includes metadata for the 
+[`devcontainer.json`](https://code.visualstudio.com/docs/devcontainers/attach-container)
+specification. Using VS Code, you can attach to a running instance of a `*-dev` Katsu container
+and launch the `Attach Debugger (Bento)` task to set breakpoints and step through code, as well
+as interacting with and Git-committing inside the container via a remote terminal using the 
+pre-configured `bento_user` user, if the `BENTO_GIT_NAME` and `BENTO_GIT_EMAIL` environment
+variables are set.
+
 ### Terminal Commands
 
 Katsu ships with a variety of command-line helpers to facilitate common actions

diff --git a/bento.Dockerfile b/bento.Dockerfile
@@ -1,4 +1,4 @@
-FROM ghcr.io/bento-platform/bento_base_image:python-debian-2023.02.27
+FROM ghcr.io/bento-platform/bento_base_image:python-debian-2023.03.22
 
 SHELL ["/bin/bash", "-c"]
 

diff --git a/bento.dev.Dockerfile b/bento.dev.Dockerfile
@@ -1,15 +1,23 @@
-FROM ghcr.io/bento-platform/bento_base_image:python-debian-2023.02.27
+FROM ghcr.io/bento-platform/bento_base_image:python-debian-2023.03.22
+
+LABEL org.opencontainers.image.description="Local development image for Katsu."
+LABEL devcontainer.metadata='[{ \
+  "remoteUser": "bento_user", \
+  "customizations": { \
+    "vscode": { \
+      "extensions": ["ms-python.python", "eamodio.gitlens"], \
+      "settings": {"workspaceFolder": "/app"} \
+    } \
+  } \
+}]'
 
 SHELL ["/bin/bash", "-c"]
 
 # Install Postgres client for checking if database is ready
 # Install Poetry for dependency management
-#  - For development, install dependencies inside a venv so the developer can interact / change them
 RUN apt-get update -y && \
     apt-get install -y postgresql-client && \
     rm -rf /var/lib/apt/lists/* && \
-    python -m venv /env && \
-    source /env/bin/activate && \
     pip install --no-cache-dir "uvicorn[standard]==0.20.0"
 
 # Backwards-compatible with old BentoV2 container layout
@@ -22,7 +30,7 @@ COPY poetry.toml .
 # Install production + development dependencies
 # Without --no-root, we get errors related to the code not being copied in yet.
 # But we don't want the code here, otherwise Docker cache doesn't work well.
-RUN source /env/bin/activate && poetry install --no-root
+RUN poetry install --no-root
 
 # Create temporary directory for downloading files etc.
 RUN mkdir -p /app/tmp

diff --git a/chord_metadata_service/chord/admin.py b/chord_metadata_service/chord/admin.py
@@ -1,6 +1,6 @@
 from django.contrib import admin
 
-from .models import Project, Dataset, TableOwnership, Table
+from .models import Project, Dataset
 
 
 @admin.register(Project)
@@ -11,13 +11,3 @@ class ProjectAdmin(admin.ModelAdmin):
 @admin.register(Dataset)
 class DatasetAdmin(admin.ModelAdmin):
     pass
-
-
-@admin.register(TableOwnership)
-class TableOwnershipAdmin(admin.ModelAdmin):
-    pass
-
-
-@admin.register(Table)
-class TableAdmin(admin.ModelAdmin):
-    pass
diff --git a/chord_metadata_service/chord/api_views.py b/chord_metadata_service/chord/api_views.py
@@ -1,25 +1,33 @@
 import logging
+import json
+
+from asgiref.sync import async_to_sync, sync_to_async
 
 from rest_framework import status, viewsets
 from rest_framework.permissions import BasePermission, SAFE_METHODS
 from rest_framework.response import Response
 from rest_framework.settings import api_settings
+from rest_framework.decorators import action
 
 from django_filters.rest_framework import DjangoFilterBackend
+from chord_metadata_service.cleanup.run_all import run_all_cleanup
 
-from chord_metadata_service.cleanup import run_all_cleanup
 from chord_metadata_service.restapi.api_renderers import PhenopacketsRenderer, JSONLDDatasetRenderer, RDFDatasetRenderer
 from chord_metadata_service.restapi.pagination import LargeResultsSetPagination
 
-from .models import Project, Dataset, TableOwnership, Table
+from .models import Project, Dataset, ProjectJsonSchema
 from .permissions import OverrideOrSuperUserOnly
-from .serializers import ProjectSerializer, DatasetSerializer, TableOwnershipSerializer, TableSerializer
+from .serializers import (
+    ProjectJsonSchemaSerializer,
+    ProjectSerializer,
+    DatasetSerializer
+)
 from .filters import AuthorizedDatasetFilter
 
 logger = logging.getLogger(__name__)
 
 
-__all__ = ["ProjectViewSet", "DatasetViewSet", "TableOwnershipViewSet", "TableViewSet"]
+__all__ = ["ProjectViewSet", "DatasetViewSet"]
 
 
 class ReadOnly(BasePermission):
@@ -61,51 +69,43 @@ class DatasetViewSet(CHORDPublicModelViewSet):
 
     filter_backends = [DjangoFilterBackend]
     filterset_class = AuthorizedDatasetFilter
+    lookup_url_kwarg = "dataset_id"
 
     serializer_class = DatasetSerializer
     renderer_classes = tuple(CHORDModelViewSet.renderer_classes) + (JSONLDDatasetRenderer, RDFDatasetRenderer,)
     queryset = Dataset.objects.all().order_by("title")
 
+    @action(detail=True, methods=['get'])
+    def dats(self, _request, *_args, **_kwargs):
+        """
+        Retrieve a specific DATS file for a given dataset.
 
-class TableOwnershipViewSet(CHORDPublicModelViewSet):
-    """
-    get:
-    Return a list of table-(dataset|dataset,biosample) relationships
+        Return the DATS file as a JSON response or an error if not found.
+        """
+        dataset = self.get_object()
+        return Response(json.loads(dataset.dats_file))
 
-    post:
-    Create a new relationship between a dataset (and optionally a specific biosample) and a table
-    in a data service
-    """
+    @async_to_sync
+    async def destroy(self, request, *args, **kwargs):
+        get_obj_async = sync_to_async(self.get_object)
+
+        dataset = await get_obj_async()
+        await dataset.adelete()
 
-    queryset = TableOwnership.objects.all().order_by("table_id")
-    serializer_class = TableOwnershipSerializer
+        logger.info(f"Running cleanup after deleting dataset {dataset.identifier} via DRF API")
+        n_removed = await run_all_cleanup()
+        logger.info(f"Cleanup: removed {n_removed} objects in total")
+        return Response(status=status.HTTP_204_NO_CONTENT)
 
 
-class TableViewSet(CHORDPublicModelViewSet):
+class ProjectJsonSchemaViewSet(CHORDPublicModelViewSet):
     """
     get:
-    Return a list of tables
+    Return list of ProjectJsonSchema
 
     post:
-    Create a new table
+    Create a new ProjectJsonSchema
     """
 
-    # TODO: Create TableOwnership if needed - here or model?
-
-    queryset = Table.objects.all().prefetch_related("ownership_record").order_by("ownership_record_id")
-    serializer_class = TableSerializer
-
-    def destroy(self, request, *args, **kwargs):
-        # First, delete the table record itself
-        # - use the cascade from the ownership record rather than the default DRF behaviour
-        table = self.get_object()
-        table_id = table.ownership_record_id
-        table.ownership_record.delete()
-        table.delete()
-
-        # Then, run cleanup
-        logger.info(f"Running cleanup after deleting table {table_id} via DRF API")
-        n_removed = run_all_cleanup()
-        logger.info(f"Cleanup: removed {n_removed} objects in total")
-
-        return Response(status=status.HTTP_204_NO_CONTENT)
+    queryset = ProjectJsonSchema.objects.all().order_by("project_id")
+    serializer_class = ProjectJsonSchemaSerializer
diff --git a/chord_metadata_service/chord/data_types.py b/chord_metadata_service/chord/data_types.py
@@ -14,6 +14,7 @@
     "DATA_TYPES",
 ]
 
+
 DATA_TYPE_EXPERIMENT = "experiment"
 DATA_TYPE_EXPERIMENT_RESULT = "experiment_result"
 DATA_TYPE_PHENOPACKET = "phenopacket"
@@ -23,13 +24,15 @@
 DATA_TYPES = {
     DATA_TYPE_EXPERIMENT: {
         "label": "Experiments",
+        "queryable": True,
         "schema": EXPERIMENT_SEARCH_SCHEMA,
         "metadata_schema": {
             "type": "object",  # TODO
         },
     },
     DATA_TYPE_PHENOPACKET: {
         "label": settings.KATSU_PHENOPACKET_LABEL,
+        "queryable": True,
         "schema": PHENOPACKET_SEARCH_SCHEMA,
         "metadata_schema": {
             "type": "object",  # TODO
@@ -44,6 +47,7 @@
     # },
     DATA_TYPE_READSET: {
         "label": "Readsets",
+        "queryable": False,
         "schema": {
             "file_format": EXPERIMENT_RESULT_SCHEMA["properties"]["file_format"]
         },
@@ -53,6 +57,7 @@
     },
     DATA_TYPE_EXPERIMENT_RESULT: {
         "label": "Experiment Results",
+        "queryable": False,
         "schema": EXPERIMENT_RESULT_SCHEMA,
         "metadata_schema": {
             "type": "object"

diff --git a/chord_metadata_service/chord/export/cbioportal.py b/chord_metadata_service/chord/export/cbioportal.py
@@ -90,15 +90,15 @@ def study_export(get_path: Callable[[str], str], dataset_id: str):
     # Export patients.
     with open(get_path(PATIENT_DATA_FILENAME), "w", newline="\n") as file_patient:
         # Note: plural in `phenopackets` is intentional (related_name property in model)
-        indiv = Individual.objects.filter(phenopackets__table__ownership_record__dataset_id=dataset.identifier)
+        indiv = Individual.objects.filter(phenopackets__dataset_id=dataset.identifier)
         individual_export(indiv, file_patient)
 
     with open(get_path(PATIENT_META_FILENAME), "w", newline="\n") as file_patient_meta:
         clinical_meta_export(cbio_study_id, PATIENT_DATATYPE, file_patient_meta)
 
     # Export samples
     with open(get_path(SAMPLE_DATA_FILENAME), "w", newline="\n") as file_sample:
-        sampl = pm.Biosample.objects.filter(phenopacket__table__ownership_record__dataset_id=dataset.identifier)
+        sampl = pm.Biosample.objects.filter(phenopacket__dataset_id=dataset.identifier)
         sample_export(sampl, file_sample)
 
     with open(get_path(SAMPLE_META_FILENAME), "w", newline="\n") as file_sample_meta:
@@ -109,7 +109,7 @@ def study_export(get_path: Callable[[str], str], dataset_id: str):
          open(get_path(CASE_LIST_SEQUENCED), "w", newline="\n") as file_case_list:
         exp_res = (
             ExperimentResult.objects
-            .filter(experiment__table__ownership_record__dataset_id=dataset.identifier, file_format="MAF")
+            .filter(experiment__dataset_id=dataset.identifier, file_format="MAF")
             .annotate(biosample_id=F("experiment__biosample"))
         )