Skip to content

Commit

Permalink
and, why not. Share arbitrary sets of files via HTTP/FTP/SFTP/WEBDAV/…
Browse files Browse the repository at this point in the history
…RESTIC/... w/some access controls and expriring access-- poc w/rclone
  • Loading branch information
John Major committed Jul 24, 2024
1 parent 11a66cf commit 47cfa4f
Show file tree
Hide file tree
Showing 9 changed files with 257 additions and 36 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ _bloom natively will support arbitrarily defined labware, a 96w plate is just on
### Hardware Supported
_see build test badges above for all supported platforms_
* Mac (14+)
* `brew install coreutils` is required for the `gtimeout` command for some rclone functionality. run `alias timeout=gtimeout` to use the gtimeout w/zsh.
* Ubuntu 22+
* Centos 9

Expand Down
85 changes: 78 additions & 7 deletions bloom_lims/bdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import json
import sys
import re
import subprocess

import random
import string
Expand Down Expand Up @@ -3663,22 +3664,25 @@ class BloomFileReference(BloomObj):
def __init__(self, bdb):
super().__init__(bdb)

def create_file_reference(self, file_euid, reference_type, visibility, valid_duration, start_datetime=None, end_datetime=None, comments="", status="active", presigned_url="", file_set_euid=None):


def create_file_reference(self, file_euid=None, reference_type='presigned', visibility='public', valid_duration=0, start_datetime=None, end_datetime=None, comments="", status="active", presigned_url="", file_set_euid=None, rclone_config={}):
"""
Create a shared file reference.
:param file_euid: EUID of the file.
:param reference_type: Type of reference. 'presigned'.
:param reference_type: Type of reference. 'presigned' or 'rclone http'.
:param visibility: 'public' or 'controlled'.
:param valid_duration: Duration in seconds for which the reference is valid.
:param start_datetime: (Optional) Start datetime for the reference. Defaults to now.
:param end_datetime: (Optional) End datetime for the reference. Calculated from valid_duration if not provided.
:param comments: Additional comments for the reference.
:param status: Status of the reference. Defaults to 'active'.
:param rclone_config: Configuration for rclone http serve {'port': 8080, 'host': '0.0.0.0', 'user': 'user', 'passwd': 'passwd', 'bucket':'xxx-dewey-0'}.
:return: Created file reference instance.
"""

start_datetime = start_datetime or datetime.utcnow()
start_datetime = start_datetime or datetime.now(UTC)
end_datetime = end_datetime or (start_datetime + timedelta(seconds=valid_duration))

file_reference_metadata = {
Expand All @@ -3689,7 +3693,8 @@ def create_file_reference(self, file_euid, reference_type, visibility, valid_dur
"valid_duration": valid_duration,
"start_datetime": start_datetime.isoformat(),
"end_datetime": end_datetime.isoformat(),
"presigned_url": presigned_url
"presigned_url": presigned_url,
"rclone_config": rclone_config
}

file_reference = self.create_instance(
Expand All @@ -3698,13 +3703,79 @@ def create_file_reference(self, file_euid, reference_type, visibility, valid_dur
)[0].euid,
{"properties": file_reference_metadata},
)
self.create_generic_instance_lineage_by_euids(
file_euid, file_reference.euid, reference_type
)

if reference_type.startswith('rclone'):
# Start the rclone http serve

filter_fn = f"logs/{file_reference.euid}_filter.txt"
fh = open(filter_fn, "w")

fs = self.get_by_euid(file_set_euid)
for x in fs.parent_of_lineages:
print(x.child_instance.euid)
fh.write(f"+ {x.child_instance.json_addl['properties']['current_s3_key']}\n")
fh.write('- *\n')
fh.close()


cmd = f"timeout {valid_duration} {reference_type} blms3:{rclone_config['bucket']} --filter-from logs/{file_reference.euid}_filter.txt --addr {rclone_config['host']}:{rclone_config['port']} --user {rclone_config['user']} --pass {rclone_config['passwd']} 2>&1 > logs/{file_reference.euid}_rclone.log &"
logging.info(f"Starting rclone http serve with command: {cmd}")

file_reference.json_addl['properties']['rclone_cmd'] = cmd
flag_modified(file_reference, "json_addl")

try:
# Start the command in the background
process = subprocess.Popen(
cmd,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)

# Log that the process has started
logging.info(f"rclone command started with PID: {process.pid}")

# Optionally, you can wait a moment and then check if the process is still running
process.communicate(timeout=5)
if process.poll() is None:
logging.info(f"rclone is running successfully in the background.")
file_reference.json_addl['properties']['rclone_pid'] = process.pid
file_reference.json_addl['properties']['rclone_status'] = 'running'
flag_modified(file_reference, "json_addl")
else:
file_reference.json_addl['properties']['rclone_status'] = 'error'
flag_modified(file_reference, "json_addl")
logging.error(f"rclone command failed to start properly. Error: {process.stderr.read().decode().strip()}")

except subprocess.TimeoutExpired:
logging.info(f"rclone command started and is running in the background.")
file_reference.json_addl['properties']['rclone_pid'] = process.pid
file_reference.json_addl['properties']['rclone_status'] = 'running bkgrnd'
flag_modified(file_reference, "json_addl")
except Exception as e:
logging.error(f"An error occurred while starting rclone: {str(e)}")
file_reference.json_addl['properties']['rclone_status'] = 'error'
flag_modified(file_reference, "json_addl")

self.session.commit()
logging.info(f"{cmd} was executed... see logs")




if file_euid not in [None]:
self.create_generic_instance_lineage_by_euids(
file_euid, file_reference.euid, reference_type
)

if file_set_euid not in [None]:
self.create_generic_instance_lineage_by_euids(
file_set_euid, file_reference.euid, "from_set"
)



self.session.commit()
return file_reference

Expand Down
43 changes: 43 additions & 0 deletions bloom_lims/bin/read_rclone_envs.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/bin/bash

# Set the path to your .aws directory
AWS_DIR="$HOME/.aws"
CREDENTIALS_FILE="$AWS_DIR/credentials"
CONFIG_FILE="$AWS_DIR/config"

# Function to read the AWS credentials
read_aws_credentials() {
local profile=$1
local aws_access_key_id aws_secret_access_key

aws_access_key_id=$(grep -A 2 "\[$profile\]" "$CREDENTIALS_FILE" | grep aws_access_key_id | awk '{print $3}')
aws_secret_access_key=$(grep -A 2 "\[$profile\]" "$CREDENTIALS_FILE" | grep aws_secret_access_key | awk '{print $3}')

export AWS_ACCESS_KEY_ID=$aws_access_key_id
export AWS_SECRET_ACCESS_KEY=$aws_secret_access_key
}

# Function to read the AWS config
read_aws_config() {
local profile=$1
local region

region=$(grep -A 2 "\[$profile\]" "$CONFIG_FILE" | grep region | awk '{print $3}')

export AWS_REGION=$region
}

# Read the default profile by default, or use the AWS_PROFILE environment variable if set
PROFILE="${AWS_PROFILE:-default}"

# Read and export the AWS credentials and config
read_aws_credentials $PROFILE
read_aws_config $PROFILE

# Print out the values to confirm
echo "AWS_ACCESS_KEY_ID: $AWS_ACCESS_KEY_ID"
echo "AWS_SECRET_ACCESS_KEY: $AWS_SECRET_ACCESS_KEY"
echo "AWS_REGION: $AWS_REGION"

# Example usage of rclone with the set environment variables
# rclone ls s3:your-bucket-name
1 change: 1 addition & 0 deletions bloom_lims/env/install_postgres.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ if [[ "$1" == "" ]]; then
else
echo "Conda environment BLOOM created successfully."
fi
mkdir -p ~/.config/rclone/ && touch ~/.config/rclone/rclone.conf && cat bloom_lims/env/rclone.conf >> ~/.config/rclone/rclone.conf

conda activate BLOOM
if [[ $? -ne 0 ]]; then
Expand Down
6 changes: 6 additions & 0 deletions bloom_lims/env/rclone.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[blms3]
type = s3
provider = AWS
env_auth = true
region = us-west-2
location_constraint = us-west-2
57 changes: 38 additions & 19 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ def setup_logging():
BloomWorkflowStep,
BloomFile,
BloomFileSet,
BloomFileReference
)

from bloom_lims.bvars import BloomVars
Expand Down Expand Up @@ -1884,7 +1885,9 @@ async def dewey(request: Request, _auth=Depends(require_auth)):
ui_fields=ui_form_fields,
controlled_properties=f_template.json_addl.get("controlled_properties", {}),
has_ui_form_properties=bool(ui_form_properties),
searchable_properties=sorted(f_template.json_addl['properties'].keys()),
searchable_properties=sorted(f_template.json_addl['properties'].keys()),
s3_bucket_prefix=os.environ.get("BLOOM_DEWEY_S3_BUCKET_PREFIX", "NEEDS TO BE SET!")+"0",

)

return HTMLResponse(content=content)
Expand Down Expand Up @@ -2287,7 +2290,7 @@ async def search_files(
columns=columns,
table_data=table_data,
style=style,
udat=user_data,
udat=user_data,s3_bucket_prefix=os.environ.get("BLOOM_DEWEY_S3_BUCKET_PREFIX", "NEEDS TO BE SET!")+"0",
)
return HTMLResponse(content=content)

Expand Down Expand Up @@ -2332,25 +2335,34 @@ async def create_file_set(
file_set_tag: str = Form(...),
comments: str = Form(None),
file_euids: str = Form(...),
create_presigned_urls: str = Form("no"),
presigned_url_duration: float = Form(0)
):
if create_presigned_urls == "on":
create_presigned_urls = True
else:
create_presigned_urls = False

ref_type: str = Form("na"),
duration: float = Form(0),
bucket: str = Form(""),
host: str = Form(""),
port: int = Form(0),
user: str = Form(""),
passwd: str = Form("")
):
rclone_config = {
"bucket": bucket,
"host": host,
"port": port,
"user": user,
"passwd": passwd
}
try:
bf = BloomFile(BLOOMdb3(app_username=request.session["user_data"]["email"]))
bfs = BloomFileSet(BLOOMdb3(app_username=request.session["user_data"]["email"]))

bfr = BloomFileReference(BLOOMdb3(app_username=request.session["user_data"]["email"]))

file_set_metadata = {
"name": file_set_name,
"description": file_set_description,
"tag": file_set_tag,
"comments": comments,
"create_presigned_urls": create_presigned_urls,
"presigned_url_duration": presigned_url_duration
"ref_type": ref_type,
"duration": duration,
"rclone_config": rclone_config
}

# Create the file set
Expand All @@ -2362,14 +2374,21 @@ async def create_file_set(
file_set_euid=new_file_set.euid, file_euids=file_euids_list
)

if create_presigned_urls:
for f_euid in file_euids_list:
# where presugned_url_duration is in days
presigned_url_duration_sec = presigned_url_duration * 24 * 60 * 60

# where in duration is days
duration_sec = duration * 24 * 60 * 60

if ref_type == "presigned_url":
for f_euid in file_euids_list:
bf.create_presigned_url(file_euid=f_euid, file_set_euid=new_file_set.euid,
valid_duration=presigned_url_duration_sec
valid_duration=duration_sec
)
elif ref_type.startswith('rclone'):
bfr.create_file_reference(reference_type=ref_type,valid_duration=duration_sec,file_set_euid=new_file_set.euid, rclone_config=rclone_config)
elif ref_type.startswith("na"):
pass
else:
raise ValueError(f"UNSUPPORTED ref_type: {ref_type}")


return RedirectResponse(
url=f"/euid_details?euid={new_file_set.euid}", status_code=303
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name="bloom_lims",
version="0.9.10",
version="0.9.11",
packages=find_packages(),
install_requires=[
# Add dependencies here,
Expand Down
46 changes: 42 additions & 4 deletions templates/create_file_set.html
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,52 @@ <h3>Create A File Set</h3>
<textarea id="file_euids" name="file_euids" required></textarea>

<br><br>
<label for="create_presigned_urls"> Create Presigned URLs: </label>
<input type="checkbox" id="create_presigned_urls" name="create_presigned_urls">
<label for="ref_type">Share Files In Set? </label>
<select id="ref_type" name="ref_type" onchange="toggleRcloneOptions()">
<option value="na">Just Create File Set</option>
<option value="presigned_url">Create Presigned S3 URLS Per File</option>
<option value="rclone serve http">Share File Set Via HTTP</option>
<option value="rclone serve ftp">Share File Set Via FTP </option>
<option value="rclone serve sftp">Share File Set Via SFTP</option>
<option value="rclone serve webdav">Share File Set Via WEBDAV</option>
<option value="rclone serve restic">Share File Set Via RESTIC</option>
<option value="ne rclone nfs">Share File Set Via NFS (not enabled)</option>
<option value="ne rclone s3">Share File Set Via S3 (not enabled)</option>
<option value="ne rclone dlna">Share File Set Via DLNA (not enabled)</option>
<option value="ne rclone docker">Share File Set Via DOCKER (not enabled)</option>
</select><br>
<div id="rclone-options" style="display: none;">
<small><a href=https://rclone.org/commands/rclone_serve/>rclone serve</a> is magic & free</small>
<br><br>
bucket:<input type="text" name="bucket" value="{{ s3_bucket_prefix }}"> //
host:<input type="text" name="host" value="0.0.0.0"> // port:<input type="text" name="port" value="8080"> //
user:<input type="text" name="user" value="user"> // pass:<input type="text" name="passwd" value="passwd">
</div>

<br>
<label for="presigned_url_duration"> Presigned URL Duration (days, float acceptable): </label>
<input id="presigned_url_duration" name="presigned_url_duration" value="1" >
<label for="duration"> Share Duration (days, float acceptable): </label>
<input id="duration" name="duration" value="1" >

<br><br>
<button type="submit">Create File Set</button>
</form>
</ul>
</div>

<script>
function toggleRcloneOptions() {
var refType = document.getElementById("ref_type").value;
var rcloneOptions = document.getElementById("rclone-options");

if (refType.startsWith("rclone")) {
rcloneOptions.style.display = "block";
} else {
rcloneOptions.style.display = "none";
}
}

// Call the function on page load to set the correct state
document.addEventListener("DOMContentLoaded", function() {
toggleRcloneOptions();
});
</script>
Loading

0 comments on commit 47cfa4f

Please sign in to comment.