From 798009b500f93a07d4c14e0ea84a490e75b55c56 Mon Sep 17 00:00:00 2001 From: Milen Kouylekov Date: Tue, 29 Oct 2024 16:02:23 +0100 Subject: [PATCH] Quote of Remote path + fix: path href split --- tsdapiclient/fileapi.py | 35 ++++++++++++++++------------------- tsdapiclient/guide.py | 19 ++++++++++--------- tsdapiclient/sync.py | 11 +++++++---- tsdapiclient/tacl.py | 24 ++++++++++++------------ 4 files changed, 45 insertions(+), 44 deletions(-) diff --git a/tsdapiclient/fileapi.py b/tsdapiclient/fileapi.py index 186c49b..2cf4373 100644 --- a/tsdapiclient/fileapi.py +++ b/tsdapiclient/fileapi.py @@ -115,12 +115,12 @@ def format_filename(filename: str) -> str: return os.path.basename(filename) -def upload_resource_name(filename: str, is_dir: bool, group: Optional[str] = None, remote_path: Optional[str] =None) -> str: +def upload_resource_name(filename: str, is_dir: bool, group: Optional[str] = None, remote_path: Optional[str] = None) -> str: if not is_dir: debug_step('uploading file') resource = quote(format_filename(filename)) if remote_path: - resource = f'{remote_path}{resource}' + resource = f'{quote(remote_path)}{resource}' else: resource = f'/{resource}' if group: @@ -132,7 +132,7 @@ def upload_resource_name(filename: str, is_dir: bool, group: Optional[str] = Non else: target = filename if remote_path: - resource = f'{group}{remote_path}{quote(target)}' + resource = f'{group}{quote(remote_path)}{quote(target)}' else: resource = f'{group}/{quote(target)}' return resource @@ -157,7 +157,7 @@ def lazy_reader( - verify the hash of a given chunk, between given offsets - create the iterator from a given offset - Dependig on how the function is called it can return either bytes + Depending on how the function is called it can return either bytes or tuples. 1) When the caller provides the public_key, but _not_ a nonce and key, then the function will generate the nonce and key, and return a tuple with the encrypted nonce and key, along with the data and chunksize @@ -334,9 +334,9 @@ def import_list( per_page: number of files to list per page """ - resource = directory if directory else '' + resource = quote(directory) if directory else '' if remote_path: - endpoint=f"stream/{group}{remote_path}{resource}" + endpoint=f"stream/{group}{quote(remote_path)}{resource}" else: endpoint=f"stream/{group}{resource}" url = f'{file_api_url(env, pnum, backend, endpoint=endpoint , page=page, per_page=per_page)}' @@ -405,9 +405,9 @@ def import_delete( tokens = maybe_refresh(env, pnum, api_key, token, refresh_token, refresh_target) token = tokens.get("access_token") if tokens else token if remote_path: - endpoint = f'stream/{group}{remote_path}{filename}' + endpoint = f'stream/{group}{quote(remote_path)}{quote(filename)}' else: - endpoint = f'stream/{group}{filename}' + endpoint = f'stream/{group}{quote(filename)}' url = f'{file_api_url(env, pnum, "files", endpoint=endpoint)}' headers = {'Authorization': f'Bearer {token}'} print(f'deleting: {filename}') @@ -431,9 +431,9 @@ def export_delete( tokens = maybe_refresh(env, pnum, api_key, token, refresh_token, refresh_target) token = tokens.get("access_token") if tokens else token if remote_path: - endpoint = f'export{remote_path}{filename}' + endpoint = f'export{quote(remote_path)}{quote(filename)}' else: - endpoint = f'export/{filename}' + endpoint = f'export/{quote(filename)}' url = f'{file_api_url(env, pnum, "files", endpoint=endpoint)}' headers = {'Authorization': f'Bearer {token}'} print(f'deleting: {filename}') @@ -490,7 +490,7 @@ def export_list( sys.exit(f'{remote_path} is a file, not a directory') if not exists: sys.exit(f'{remote_path} does not exist') - endpoint = f"export{remote_path}{resource}" + endpoint = f"export{quote(remote_path)}{resource}" else: endpoint = f'export/{resource}' url = f'{file_api_url(env, pnum, backend, endpoint=endpoint, page=page, per_page=per_page)}' @@ -515,9 +515,9 @@ def export_head( ) -> requests.Response: headers = {'Authorization': 'Bearer {0}'.format(token), "Accept-Encoding": "*"} if remote_path: - endpoint = f"export{remote_path}{filename}" + endpoint = f"export{quote(remote_path)}{quote(filename)}" else: - endpoint = f'export/{filename}' + endpoint = f'export/{quote(filename)}' url = f'{file_api_url(env, pnum, backend, endpoint=endpoint)}' resp = session.head(url, headers=headers) return resp @@ -586,11 +586,11 @@ def export_get( if dev_url: url = dev_url else: - if backend == 'files': + if backend == 'survey': urlpath = '' else: if remote_path: - urlpath = f"export{remote_path}" + urlpath = f"export{quote(remote_path)}" else: urlpath = 'export/' endpoint = f'{urlpath}{filename}' @@ -670,10 +670,7 @@ def _resumable_url( ) -> str: resource = upload_resource_name(filename, is_dir, group=group, remote_path=remote_path) if not dev_url: - if remote_path: - endpoint = f"stream{remote_path}{resource}" - else: - endpoint = f"stream/{resource}" + endpoint = f"stream/{resource}" url = f'{file_api_url(env, pnum, backend, endpoint=endpoint)}' else: url = dev_url diff --git a/tsdapiclient/guide.py b/tsdapiclient/guide.py index 5947d2a..efcd878 100644 --- a/tsdapiclient/guide.py +++ b/tsdapiclient/guide.py @@ -160,16 +160,17 @@ The instances are currently only used for uploading data. To upload data to with an instance, you can use the following command: - tacl p11 --api-key --link-id (--secret-challenge )? --upload myfile + tacl p11 --api-key --link-id (--secret-challenge-file )? --upload myfile where is the link id provided to you by the project owner, -and is the secret challenge provided to you by the -project owner. The secret challenge is used to verify that the instance -is the correct one. The link id is used to identify the instance it can -be provided as UUID or a https link. Example of an https link is: +and is a file that contains secret challenge provided +to you by the project owner. The secret challenge is used to verify that +the instance is the correct one. The link id is used to identify the +instance it can be provided as UUID or a https link. Example of an https +link is: tacl p11 --api-key --link-id https://data.tsd.usit.no/i/ --upload myfile - tacl p11 --api-key --link-id https://data.tsd.usit.no/c/ --secret-challenge @path-to-secret-file --upload myfile + tacl p11 --api-key --link-id https://data.tsd.usit.no/c/ --secret-challenge-file @path-to-secret-file --upload myfile where the 'c' and 'i' are the type of the link. The 'c' for instance that requires a secret challenge and the 'i' for instance that does not @@ -177,15 +178,15 @@ but without the https and the domain. tacl p11 --api-key --link-id --upload myfile - tacl p11 --api-key --link-id --secret-challenge @path-to-secret-file --upload myfile + tacl p11 --api-key --link-id --secret-challenge-file @path-to-secret-file --upload myfile You can also store the instance in a file (only the instance or url no newlines), and invoke tacl as such: - tacl p11 --link-id @path-to-instance-file --secret @path-to-secret-file --upload myfile + tacl p11 --link-id @path-to-instance-file --secret-challenge-file @path-to-secret-file --upload myfile While the secret challenge is optional, it has to provided always as a file to avoid leaking secrets in the shared -machines process. If not provided the secret challenge will be asked for in the terminal. +machines process. If not provided the secret challenge will be asked for in the terminal if needed. """ debugging = f""" diff --git a/tsdapiclient/sync.py b/tsdapiclient/sync.py index 82618e2..adfa4f4 100644 --- a/tsdapiclient/sync.py +++ b/tsdapiclient/sync.py @@ -1,4 +1,3 @@ - import os import time import shutil @@ -305,6 +304,7 @@ def sync(self) -> bool: self.delete_cache.add_many(key=self.directory, items=deletes) # 3. transfer resources for resource, integrity_reference in resources: + print(f'transferring: {resource}') self._transfer(resource, integrity_reference=integrity_reference) if self.use_cache: self.transfer_cache.remove(key=self.directory, item=resource) @@ -366,8 +366,9 @@ def _find_remote_resources(self, path: str) -> list: Recursively list a remote path. Ignore prefixes and suffixes if they exist. Collect integrity references for all resources. - """ + + print(f'finding remote resources for {path}') list_funcs = { 'export': { 'func': export_list, @@ -403,8 +404,9 @@ def _find_remote_resources(self, path: str) -> list: next_page = out.get('page') if found: for entry in found: - subdir_and_resource = entry.get("href").split(f"/{path}")[-1] - ref = f'{path}{subdir_and_resource}' + import os + subdir_and_resource = os.path.basename(entry.get("href")) + ref = f'{path}/{subdir_and_resource}' ignore_prefix = False # check if we should ignore it for prefix in self.ignore_prefixes: @@ -516,6 +518,7 @@ def _transfer_remote_to_local( if not os.path.lexists(target): debug_step(f'creating directory: {target}') os.makedirs(target) + print(f'downloading: {resource}') resp = export_get( self.env, self.pnum, diff --git a/tsdapiclient/tacl.py b/tsdapiclient/tacl.py index 4f6b45a..7a98418 100644 --- a/tsdapiclient/tacl.py +++ b/tsdapiclient/tacl.py @@ -477,10 +477,11 @@ def construct_correct_remote_path(path: str) -> str: help='Pass a download link obtained from the TSD API. This must be used with --api-key as well as it requires a specific client' ) @click.option( - '--secret-challenge', + '--secret-challenge-file', required=False, default=None, - help='Pass a secret challenge for instance authentication' + type=click.Path(exists=True), + help='Pass a secret challenge for instance authentication if needed as a file: --secret-challenge-file @path-to-file' ) @click.option( '--encrypt', @@ -503,7 +504,7 @@ def construct_correct_remote_path(path: str) -> str: @click.option( '--remote-path', required=False, - help='Specify a path on the remote server' + help='Specify a path on the remote server. For example a directory in the file-import/ directory or file-export directory of the TSD project' ) def cli( pnum: str, @@ -543,7 +544,7 @@ def cli( download_delete: str, api_key: str, link_id: str, - secret_challenge: str, + secret_challenge_file: str, encrypt: bool, chunk_size: int, resumable_threshold: int, @@ -649,14 +650,12 @@ def cli( sys.exit(f"link id file not found: {link_id_file}") debug_step(f'reading link id from {link_id_file}') with open(link_id_file, "r") as f: - link_id = f.read() - if secret_challenge and secret_challenge.startswith("@"): - secret_challenge_file = secret_challenge.split("@")[-1] - if not os.path.lexists(secret_challenge_file): - sys.exit(f"secret challenge not found: {secret_challenge_file}") + link_id = f.read().strip() + secret_challenge = None + if secret_challenge_file: debug_step(f'reading secret challenge from {secret_challenge_file}') with open(secret_challenge_file, "r") as f: - secret_challenge = f.read() + secret_challenge = f.read().strip() if link_id.startswith("https://"): click.echo("extracting link_id from URL") patten = r"https://(?P.+)/(?Pc|i)/(?P[a-f\d0-9-]{36})" @@ -666,12 +665,12 @@ def cli( if not secret_challenge: click.echo("instance requires a secret challenge") secret_challenge = click.prompt("secret challenge > ", hide_input=True) - print(secret_challenge) if not secret_challenge: click.echo("instance authentication requires a secret challenge") sys.exit(1) else: link_id = uuid.UUID(link_id) + print(link_id, secret_challenge ) token, refresh_token = get_jwt_instance_auth(env, pnum, api_key, link_id, secret_challenge, token_type) else: token, refresh_token = get_jwt_basic_auth(env, pnum, api_key, token_type) @@ -809,6 +808,7 @@ def cli( filename = download debug_step('starting file export') resp = export_head(env, pnum, filename, token, remote_path=remote_path) + print(resp) if resp.headers.get('Content-Type') == 'directory': click.echo(f'downloading directory: {download}') downloader = SerialDirectoryDownloader( @@ -847,7 +847,7 @@ def cli( elif download_sync: filename = download_sync debug_step('starting directory sync') - resp = export_head(env, pnum, filename, token) + resp = export_head(env, pnum, filename, token, remote_path=remote_path) if resp.headers.get('Content-Type') != 'directory': sys.exit('directory sync does not apply to files') syncer = SerialDirectoryDownloadSynchroniser(