From 3a5ac95284d4fcfd9f8a83c5824813cf1fcc3a6a Mon Sep 17 00:00:00 2001 From: Enrique Garcia <40355845+garciagenrique@users.noreply.github.com> Date: Thu, 13 Jun 2024 10:36:18 +0200 Subject: [PATCH] VRE - interTwin DL integration (#45) * build image for vre-interwtin integration * add asyncssh script and lib into the container * fix permissions on docker file * rename asycn ssh script inside the image * add jupyterhub package in the singluser image * update python executer for asynchssh image * fix jhub version to 1.5.1 * fix jhub version to 1.5.0 * create rucio config file in hooks * fix build * fix paths in dockerfile * fix permissions * fix permissions 2 * touch config file * comment creation of rucio config file * rebuild wiht latests version * rebuild * rebuild 2 * fix server instance config * build with rucio cfg * build with rucio cfg 2 * build with rucio cfg 2.5 * build with rucio cfg 2.5 * build with rucio cfg 3 * update configure * add pre hooks in the ssh script * create rucio.cfg file within the setup.sh file * fix permissions and rucio URLs * hardcode jupyter config * hardcode jupyter config * hardcode jupyter config 2 * hardcode jupyter config 3 * hardcode jupyter config 4 * hardcode jupyter config 3 * hardcode jupyter config 2 * hardcode jupyter config * hardcode jupyter config json * test with download mode * test with writable /opt/rucio dir * not trigger dump of config * replica mode and update anchors * replica mode and update trust certs 2 * replica mode * rename dir and add README with documentation * restore CI to main --- vre-singleuser-interTwin/Dockerfile | 99 ++++++++++++++ vre-singleuser-interTwin/README.md | 119 +++++++++++++++++ vre-singleuser-interTwin/asyncssh_config.py | 55 ++++++++ vre-singleuser-interTwin/configure.py | 126 ++++++++++++++++++ .../ipython_kernel_config.json | 6 + vre-singleuser-interTwin/setup.sh | 21 +++ 6 files changed, 426 insertions(+) create mode 100644 vre-singleuser-interTwin/Dockerfile create mode 100644 vre-singleuser-interTwin/README.md create mode 100644 vre-singleuser-interTwin/asyncssh_config.py create mode 100644 vre-singleuser-interTwin/configure.py create mode 100644 vre-singleuser-interTwin/ipython_kernel_config.json create mode 100644 vre-singleuser-interTwin/setup.sh diff --git a/vre-singleuser-interTwin/Dockerfile b/vre-singleuser-interTwin/Dockerfile new file mode 100644 index 0000000..4491ee6 --- /dev/null +++ b/vre-singleuser-interTwin/Dockerfile @@ -0,0 +1,99 @@ +FROM jupyter/scipy-notebook:python-3.9.13 +LABEL author="Image based on the work by Muhammad Aditya Hilmy - ESCAPE WP2 2020" +LABEL maintainer="VRE Team @ CERN 23/24 - E. Garcia, G. Guerrieri" +LABEL org.opencontainers.image.source https://github.com/vre-hub/environments +ARG BUILD_DATE +LABEL org.label-schema.build-date=$BUILD_DATE + +RUN pip install --upgrade pip + +USER $NB_UID + +RUN conda install -y -n base mamba \ + && mamba install -y -c conda-forge python-gfal2 asyncssh jupyterhub==1.5.0 \ + && conda clean --all -f -y + +# Install rucio-jupyterlab with jlab v=3 +RUN pip install rucio-jupyterlab==0.10.0 \ + && jupyter serverextension enable --py rucio_jupyterlab --sys-prefix + +USER root + +RUN apt update -y \ + && apt install -y build-essential curl voms-clients-java software-properties-common \ + && apt clean -y \ + && rm /opt/conda/bin/voms-proxy-init \ + && ln -s /usr/bin/voms-proxy-init /opt/conda/bin/voms-proxy-init + +# ESCAPE grid-security and VOMS setup +RUN wget -q -O - https://dist.eugridpma.info/distribution/igtf/current/GPG-KEY-EUGridPMA-RPM-3 | apt-key add - + +RUN apt update \ + && add-apt-repository 'deb http://repository.egi.eu/sw/production/cas/1/current egi-igtf core' \ + && apt -y install ca-policy-egi-core + +RUN mkdir -p /etc/vomses \ + && wget https://indigo-iam.github.io/escape-docs/voms-config/voms-escape.cloud.cnaf.infn.it.vomses -O /etc/vomses/voms-escape.cloud.cnaf.infn.it.vomses \ + && mkdir -p /etc/grid-security/vomsdir/escape \ + && wget https://indigo-iam.github.io/escape-docs/voms-config/voms-escape.cloud.cnaf.infn.it.lsc -O /etc/grid-security/vomsdir/escape/voms-escape.cloud.cnaf.infn.it.lsc + +# Setup merged CERN CA file on Ubuntu based images. +# This file is contained in the `CERN-bundle.pem` file downloaded using +RUN mkdir /certs \ + && touch /certs/rucio_ca.pem \ + && curl -fsSL 'https://cafiles.cern.ch/cafiles/certificates/CERN%20Root%20Certification%20Authority%202.crt' | openssl x509 -inform DER -out /tmp/cernrootca2.crt \ + && curl -fsSL 'https://cafiles.cern.ch/cafiles/certificates/CERN%20Grid%20Certification%20Authority(1).crt' -o /tmp/cerngridca.crt \ + && curl -fsSL 'https://cafiles.cern.ch/cafiles/certificates/CERN%20Certification%20Authority.crt' -o /tmp/cernca.crt \ + && cat /tmp/cernrootca2.crt >> /certs/rucio_ca.pem \ + && cat /tmp/cerngridca.crt >> /certs/rucio_ca.pem \ + && cat /tmp/cernca.crt >> /certs/rucio_ca.pem \ + && rm /tmp/*.crt \ + && update-ca-certificates + +# # Install trust anchors +# RUN curl https://ca.cern.ch/cafiles/certificates/CERN%20Root%20Certification%20Authority%202.crt -o /etc/pki/ca-trust/source/anchors/1.crt &&\ +# curl https://ca.cern.ch/cafiles/certificates/CERN%20Grid%20Certification%20Authority.crt -o /etc/pki/ca-trust/source/anchors/2.crt &&\ +# curl https://ca.cern.ch/cafiles/certificates/CERN%20Grid%20Certification%20Authority\(1\).crt -o /etc/pki/ca-trust/source/anchors/3.crt &&\ +# curl http://signet-ca.ijs.si/pub/cacert/signet02cacert.crt -o /etc/pki/ca-trust/source/anchors/4.crt &&\ +# curl https://doku.tid.dfn.de/_media/de:dfnpki:ca:tcs-server-certificate-ca-bundle.tar -o geant-bundle.tar &&\ +# tar xf geant-bundle.tar &&\ +# cp tcs-server-certificate-ca-bundle/*.pem /etc/pki/ca-trust/source/anchors/ &&\ +# rm -rf geant-bundle.tar tcs-server-certificate-ca-bundle &&\ +# update-ca-trust + +# Add async ssh script +ADD asyncssh_config.py /opt/ssh/jupyterhub-singleuser +RUN fix-permissions /opt/ssh/jupyterhub-singleuser \ + && chown -R $NB_UID /opt/ssh/jupyterhub-singleuser \ + && chmod +x /opt/ssh/jupyterhub-singleuser + +# Setup extension Rucio instance config +ADD configure.py /opt/setup-rucio-jupyterlab/configure.py +RUN fix-permissions /opt/setup-rucio-jupyterlab/configure.py \ + && chown -R $NB_UID /opt/setup-rucio-jupyterlab/configure.py \ + && chmod +x /opt/setup-rucio-jupyterlab/configure.py + +ADD setup.sh /usr/local/bin/setup.sh +RUN fix-permissions /usr/local/bin/setup.sh \ + && sed -i -e 's/\r$/\n/' /usr/local/bin/setup.sh \ + && chmod +x /usr/local/bin/setup.sh + +RUN mkdir -p /opt/rucio/etc \ +# && touch /opt/rucio/etc/rucio.cfg \ + && fix-permissions /opt/rucio/etc \ + && chown -R ${NB_UID}:${NB_GID} /opt/rucio/etc + +# && /usr/local/bin/setup.sh +#RUN chown -R $NB_UID $HOME/.jupyter/jupyter_notebook_config.json +# && chown -R $NB_UID /etc/jupyter/jupyter_notebook_config.py + + +#ENV IPYTHONDIR=/etc/ipython +#ADD ipython_kernel_config.json /etc/ipython/profile_default/ipython_kernel_config.json +#RUN chown -R $NB_UID /etc/ipython +ENV JUPYTER_ENABLE_LAB=yes + +WORKDIR $HOME +USER $NB_UID + +CMD ["setup.sh", "start-notebook.sh"] diff --git a/vre-singleuser-interTwin/README.md b/vre-singleuser-interTwin/README.md new file mode 100644 index 0000000..64bebdc --- /dev/null +++ b/vre-singleuser-interTwin/README.md @@ -0,0 +1,119 @@ +# Integration of the Rucio JupyterLab extension in the interTwin infrastructure + +The extentension interacts with the interTwin RUCIO instance, running at DESY. The Jupyter session is spawned by [interLink](https://github.com/interTwin-eu/interLink), a package able to run a k8s pod (and thus any contenairised application) on remote resources. + +Please note that this image is still under development and will be constantly evolving + +## Try this image + +**Needs both an EGI CheckIn and an interTwin Rucio active account** + + - Connect to the [cluster](https://jhub.131.154.98.40.myip.cloud.infn.it/) + - Use the following image within the `Select your desired image` field, and select the desired offload mode. You must enable an offloading mode to be able to interact with Rucio. + ```bash + docker://ghcr.io/vre-hub/vre-singleuser-intertwin_dev:sha-7d09a10 + ``` + +### Setup your image to be compatible with interLink + +Within your Dockerfile image, install the following packages and add the `asyncssh_config.py` to the container. You could have a look to the [Dockerfile](./Dockerfile) as well. + +```Dockerfile +# Spawning a jupyterLab session needs any jupyter BASE layer +FROM jupyter/scipy-notebook:python-3.9.13 + +RUN conda install -y -n base mamba \ + && mamba install -y -c conda-forge asyncssh jupyterhub==1.5.0 \ + && conda clean --all -f -y + +ADD asyncssh_config.py /opt/ssh/jupyterhub-singleuser +RUN fix-permissions /opt/ssh/jupyterhub-singleuser \ + && chown -R $NB_UID /opt/ssh/jupyterhub-singleuser \ + && chmod +x /opt/ssh/jupyterhub-singleuser +``` +with +```python +$ cat asyncssh_config.py + +#!/opt/conda/bin/python +# -*- coding: utf-8 -*- +# +# D. Ciangottini +# +import re +import sys +from jupyterhub.singleuser import main +import asyncssh +import logging +import sys +import asyncio +import os +from subprocess import Popen + +ssh_host = os.environ.get("JHUB_HOST") +ssh_url_port = os.environ.get("SSH_PORT") +username = os.environ.get("JUPYTERHUB_USER") +token = os.environ.get("JUPYTERHUB_API_TOKEN") + +fwd_port = os.environ.get("FWD_PORT") + +async def run_client(): + async with asyncssh.connect( + host=ssh_host, + port=int(ssh_url_port), + username=username, + password=token, + known_hosts=None, + ) as conn: + conn.set_keepalive(interval=14.0, count_max=10) + listener = await conn.forward_remote_port( + "0.0.0.0", + int(fwd_port), + "0.0.0.0", + int(fwd_port), + ) + await listener.wait_closed() + + +if __name__ == '__main__': + print("Connecting ssh...") + loop = asyncio.get_event_loop() + loop.create_task(run_client()) + + print("Starting JLAB") + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) +``` + +### Install the Rucio [JupyterLab extension](https://github.com/rucio/jupyterlab-extension) in the container + +Within your Dockerfile add the following lines. Check, and addapt if needed, the content of the bash and python files that also need to be added to the container. + +```Dockerfile +# Installation of the RUCIO JupyterLab extension + +RUN conda install -y -n base mamba \ + && mamba install -y -c conda-forge python-gfal2 + +# jupyterlab extension compatible with jlab v=3.X.Y +RUN pip install rucio-jupyterlab==0.10.0 \ + && jupyter serverextension enable --py rucio_jupyterlab --sys-prefix + +ADD configure.py /opt/setup-rucio-jupyterlab/configure.py +RUN fix-permissions /opt/setup-rucio-jupyterlab/configure.py \ + && chown -R $NB_UID /opt/setup-rucio-jupyterlab/configure.py \ + && chmod +x /opt/setup-rucio-jupyterlab/configure.py + +ADD setup.sh /usr/local/bin/setup.sh +RUN fix-permissions /usr/local/bin/setup.sh \ + && sed -i -e 's/\r$/\n/' /usr/local/bin/setup.sh \ + && chmod +x /usr/local/bin/setup.sh + +RUN mkdir -p /opt/rucio/etc \ + && fix-permissions /opt/rucio/etc \ + && chown -R ${NB_UID}:${NB_GID} /opt/rucio/etc + +ENV JUPYTER_ENABLE_LAB=yes + +CMD ["setup.sh", "start-notebook.sh"] +``` \ No newline at end of file diff --git a/vre-singleuser-interTwin/asyncssh_config.py b/vre-singleuser-interTwin/asyncssh_config.py new file mode 100644 index 0000000..29119ab --- /dev/null +++ b/vre-singleuser-interTwin/asyncssh_config.py @@ -0,0 +1,55 @@ +#!/opt/conda/bin/python +# -*- coding: utf-8 -*- +# +# D. Ciangottini +# +import re +import sys +from jupyterhub.singleuser import main +import asyncssh +import logging +import sys +import asyncio +import os +from subprocess import Popen + +ssh_host = os.environ.get("JHUB_HOST") +ssh_url_port = os.environ.get("SSH_PORT") +username = os.environ.get("JUPYTERHUB_USER") +token = os.environ.get("JUPYTERHUB_API_TOKEN") + +fwd_port = os.environ.get("FWD_PORT") + +async def run_client(): + async with asyncssh.connect( + host=ssh_host, + port=int(ssh_url_port), + username=username, + password=token, + known_hosts=None, + ) as conn: + conn.set_keepalive(interval=14.0, count_max=10) + listener = await conn.forward_remote_port( + "0.0.0.0", + int(fwd_port), + "0.0.0.0", + int(fwd_port), + ) + await listener.wait_closed() + + +if __name__ == '__main__': + print("Connecting ssh...") + loop = asyncio.get_event_loop() + loop.create_task(run_client()) + + print("Configuring Rucio extension...") + p = Popen(['/usr/local/bin/setup.sh']) + while p.poll() is None: + pass + + print("Starting JLAB") + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) + + diff --git a/vre-singleuser-interTwin/configure.py b/vre-singleuser-interTwin/configure.py new file mode 100644 index 0000000..4235fbf --- /dev/null +++ b/vre-singleuser-interTwin/configure.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python +# Derived from https://gitlab.cern.ch/escape-wp2/docker-images/-/blob/master/datalake-singleuser/bin/configure.py (restricted access). + +import os +import json +import configparser + +#HOME = '/home/jovyan' +#HOME = '/ceph/hpc/home/ciangottinid' + +def write_jupyterlab_config(): + HOME = os.getenv('HOME', '/ceph/hpc/home/ciangottinid') + + file_path = HOME + '/.jupyter/jupyter_notebook_config.json' + if not os.path.isfile(file_path): + os.makedirs(HOME + '/.jupyter/', exist_ok=True) + else: + config_file = open(file_path, 'r') + config_payload = config_file.read() + config_file.close() + + try: + config_json = json.loads(config_payload) + except: + config_json = {} + +# Looking to the rucio-jupyterlab configuration; https://github.com/rucio/jupyterlab-extension/blob/master/rucio_jupyterlab/config/schema.py#L101 +# either ("destination_rse", "rse_mount_path") either ("rucio_ca_cert") are required env vars, even if they are defined in the jhub manifest. +# Adding 'rucio_base_url' too - from debugging experience + + # instance_config = { + # "name": os.getenv('RUCIO_NAME', 'default'), + # "display_name": os.getenv('RUCIO_DISPLAY_NAME', 'Default Instance'), + # "rucio_base_url": os.getenv('RUCIO_BASE_URL', 'DEFAULT rucio base url'), + # "rucio_auth_url": os.getenv('RUCIO_AUTH_URL'), + # "rucio_webui_url": os.getenv('RUCIO_WEBUI_URL'), + # "rucio_ca_cert": os.getenv('RUCIO_CA_CERT', 'cacert.pem'), + # "site_name": os.getenv('RUCIO_SITE_NAME'), + # "vo": os.getenv('RUCIO_VO'), + # "voms_enabled": os.getenv('RUCIO_VOMS_ENABLED', '0') == '1', + # "voms_vomses_path": os.getenv('RUCIO_VOMS_VOMSES_PATH'), + # "voms_certdir_path": os.getenv('RUCIO_VOMS_CERTDIR_PATH'), + # "voms_vomsdir_path": os.getenv('RUCIO_VOMS_VOMSDIR_PATH'), + # "destination_rse": os.getenv('RUCIO_DESTINATION_RSE', 'DEFAULT rse destination'), + # "rse_mount_path": os.getenv('RUCIO_RSE_MOUNT_PATH', 'DEFAULT rse mount path'), + # "replication_rule_lifetime_days": int(os.getenv('RUCIO_REPLICATION_RULE_LIFETIME_DAYS')) if os.getenv('RUCIO_REPLICATION_RULE_LIFETIME_DAYS') else None, + # "path_begins_at": int(os.getenv('RUCIO_PATH_BEGINS_AT', '0')), + # "mode": os.getenv('RUCIO_MODE', 'replica'), + # "wildcard_enabled": os.getenv('RUCIO_WILDCARD_ENABLED', '0') == '1', + # "oidc_auth": os.getenv('RUCIO_OIDC_AUTH'), + # "oidc_env_name": os.getenv('RUCIO_OIDC_ENV_NAME'), + # "oidc_file_name": os.getenv('RUCIO_OIDC_FILE_NAME'), + # } + + # instance_config = {k: v for k, + # v in instance_config.items() if v is not None} + # config_json['RucioConfig'] = { + # 'instances': [instance_config], + # "default_instance": os.getenv('RUCIO_DEFAULT_INSTANCE',), + # "default_auth_type": os.getenv('RUCIO_DEFAULT_AUTH_TYPE'), + # } + + # Debugging and hardcoding from here + + instance_config = { + "name": "rucio-intertwin-testbed.desy.de", + "display_name": "interTwinRUCIO", + "rucio_base_url": "https://rucio-intertwin-testbed.desy.de", + "rucio_auth_url": "https://rucio-intertwin-testbed-auth.desy.de", + "rucio_ca_cert": "/opt/conda/lib/python3.9/site-packages/certifi/cacert.pem", + "site_name": "VEGA", + "voms_enabled": os.getenv('RUCIO_VOMS_ENABLED', '0') == '1', + "destination_rse": "VEGA-DCACHE", + "rse_mount_path": "/dcache/sling.si/projects/intertwin", + "path_begins_at": 4, + "mode": "replica", + #"mode": "download", + "wildcard_enabled": os.getenv('RUCIO_WILDCARD_ENABLED', '0') == '0', + "oidc_auth": "env", + "oidc_env_name": "RUCIO_ACCESS_TOKEN" + } + + instance_config = {k: v for k, + v in instance_config.items() if v is not None} + config_json['RucioConfig'] = { + 'instances': [instance_config], + "default_instance": os.getenv('RUCIO_DEFAULT_INSTANCE', 'rucio-intertwin-testbed.desy.de'), + "default_auth_type": os.getenv('RUCIO_DEFAULT_AUTH_TYPE', 'oidc'), + } + + # up to here + + config_file = open(file_path, 'w') + config_file.write(json.dumps(config_json, indent=2)) + config_file.close() + +def write_rucio_config(): + + rucio_config = configparser.ConfigParser() + + client_config = { + 'rucio_host': os.getenv('RUCIO_BASE_URL', 'https://rucio-intertwin-testbed.desy.de'), + 'auth_host': os.getenv('RUCIO_AUTH_URL', 'https://rucio-intertwin-testbed-auth.desy.de'), + 'ca_cert': os.getenv('RUCIO_CA_CERT', '/certs/rucio_ca.pem'), + 'auth_type': os.getenv('RUCIO_AUTH_TYPE', 'oidc'), # 'x509' or 'oidc' + 'account': os.getenv('RUCIO_ACCOUNT', '$RUCIO_ACCOUNT'), # This is the RUCIO account name, need to be mapped from idp + 'oidc_polling': 'true', + 'oidc_scope': 'openid profile offline_access eduperson_entitlement', + #'username': os.getenv('RUCIO_USERNAME', ''), + #'password': os.getenv('RUCIO_PASSWORD', ''), + 'auth_token_file_path': '/tmp/rucio_oauth.token', + 'request_retries': 3, + 'protocol_stat_retries': 6 + } + client_config = dict((k, v) for k, v in client_config.items() if v) + rucio_config['client'] = client_config + + if not os.path.isfile('/opt/rucio/etc/rucio.cfg'): + os.makedirs('/opt/rucio/etc/', exist_ok=True) + + with open('/opt/rucio/etc/rucio.cfg', 'w') as f: + rucio_config.write(f) + +if __name__ == '__main__': + write_jupyterlab_config() + #write_rucio_config() \ No newline at end of file diff --git a/vre-singleuser-interTwin/ipython_kernel_config.json b/vre-singleuser-interTwin/ipython_kernel_config.json new file mode 100644 index 0000000..f5e15bb --- /dev/null +++ b/vre-singleuser-interTwin/ipython_kernel_config.json @@ -0,0 +1,6 @@ +{ + "IPKernelApp": { + "extensions": ["rucio_jupyterlab.kernels.ipython"] + } + } + diff --git a/vre-singleuser-interTwin/setup.sh b/vre-singleuser-interTwin/setup.sh new file mode 100644 index 0000000..1571291 --- /dev/null +++ b/vre-singleuser-interTwin/setup.sh @@ -0,0 +1,21 @@ +#!/bin/bash +set -e +python /opt/setup-rucio-jupyterlab/configure.py + +# Creation of the rucio.cfg file +mkdir -p /certs /tmp; +echo -n $RUCIO_ACCESS_TOKEN > /tmp/rucio_oauth.token; +# mkdir -p /opt/rucio/etc; +# echo "[client]" >> /opt/rucio/etc/rucio.cfg; +# echo "rucio_host = https://rucio-intertwin-testbed.desy.de" >> /opt/rucio/etc/rucio.cfg; +# echo "auth_host = https://rucio-intertwin-testbed-auth.desy.de" >> /opt/rucio/etc/rucio.cfg; +# #echo "ca_cert = /certs/rucio_ca.pem" >> /opt/rucio/etc/rucio.cfg; +# echo "ca_cert = /opt/conda/lib/python3.9/site-packages/certifi/cacert.pem" >> /opt/rucio/etc/rucio.cfg; +# echo "account = $JUPYTERHUB_USER" >> /opt/rucio/etc/rucio.cfg; +# echo "auth_type = oidc" >> /opt/rucio/etc/rucio.cfg; +# echo "oidc_audience = rucio-testbed" >> /opt/rucio/etc/rucio.cfg; +# echo "oidc_polling = true" >> /opt/rucio/etc/rucio.cfg; +# echo "oidc_scope = openid profile offline_access eduperson_entitlement" >> /opt/rucio/etc/rucio.cfg; +# echo "auth_token_file_path = /tmp/rucio_oauth.token" >> /opt/rucio/etc/rucio.cfg; + +exec "$@" \ No newline at end of file