Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DRAFT] Introduce basics for modification detection #9

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -169,15 +169,20 @@ To test a nest with more levels, simply paste a nested folder structure inside `

Run

`./test-download.py --remote=prod --archive-path="/archives/rclone QA 1 (0a21-0000)/My Files/" --remote-dir=nested`

Check the downloads folder in `test-tree/downloads` and ensure that the `downloads/nested` directory has a structure like the nested directory uploaded in the [nested uploads test](#nested-uploads).
`./test-download.py --remote=prod --archive-path="/archives/rclone QA 1 (0a21-0000)/My Files/" --remote-dir=misc/nested`

To verify that everything in the nest folder was downloaded correctly run `./verify.py --nested-complete`.

### What file types and scenarios are left out?

Anything not included in the section above describing what is currently covered is by implication excluded from these tests.

## Hash verification

### Modification Detection

To verify that files that were successfully uploaded and downloaded have remained unchanged as we would expect run `./verify.py --succeeded`.

## Troubleshooting

- Remember that the commands are examples and some of the arguments may not apply to your specific environment.
Expand Down
119 changes: 119 additions & 0 deletions verify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#!/usr/bin/env python3
import os
import sys
import hashlib
import argparse

DOWNLOAD_MISC_DIR = "test-tree/downloads/misc"
MISC_DIR = "test-tree/misc"
CHUNK_SIZE = 1024

OKBLUE = "\033[94m"
OKGREEN = "\033[92m"
FAIL = "\033[91m"
WARNING = "\033[93m"
ENDC = "\033[0m"


def hash_file(file_path):
""" "Make and return SHA-1 hash of file at file_path"""
h = hashlib.sha1()
with open(file_path, "rb") as file:
chunk = 0
while chunk != b"":
# read only CHUNK_SIZE bytes at a time
chunk = file.read(CHUNK_SIZE)
h.update(chunk)
return h.hexdigest()


def crawl_upload_and_download_paths():
"""Build a list of uploaded and downloaded paths"""
uploaded_paths = []
downloaded_paths = []
for subdir, _, files in os.walk(MISC_DIR):
for file in files:
uploaded_paths.append(os.path.join(subdir, file))

for subdir, _, files in os.walk(DOWNLOAD_MISC_DIR):
for file in files:
downloaded_paths.append(os.path.join(subdir, file))
return uploaded_paths, downloaded_paths


def make_file_to_harsh_maps():
uploaded_paths, downloaded_paths = crawl_upload_and_download_paths()
pre_upload_hashes = []
post_upload_hashes = []
for path in uploaded_paths:
pre_upload_hashes.append({"path": path, "hash": hash_file(path)})
for path in downloaded_paths:
post_upload_hashes.append({"path": path, "hash": hash_file(path)})
return pre_upload_hashes, post_upload_hashes


def parse_cli():
"""Prepare parser"""
parser = argparse.ArgumentParser(
prog="verify", description="Check results of upload/download operations"
)
parser.add_argument(
"--misc-complete",
help="Verify that both the upload and download of the complete misc folder was successful",
action="store_true",
)
parser.add_argument(
"--nested-complete",
help="Verify that both the upload and download of the complete nested folder was successful",
action="store_true",
)
parser.add_argument(
"--succeeded",
help="Verify that files that were successfully uploaded where downloaded successfully",
action="store_true",
)

return parser


def main():
parser = parse_cli()
args = parser.parse_args()
pre_upload_hash_data, post_upload_hash_data = make_file_to_harsh_maps()
pre_upload_hashes = map(lambda x: x.get("hash"), pre_upload_hash_data)

failed_once = False
if args.succeeded:
for file_data in post_upload_hash_data:
print(f"{OKBLUE}Verifying hash for {file_data.get('path')} ...{ENDC}")
if file_data.get("hash") not in pre_upload_hashes:
print(
f"{WARNING}The hash to the path {file_data.get('path')} is missing!{ENDC}"
)
print(
f"{WARNING}File has either been modified (on disk or permanent) or is missing!{ENDC}\n"
)
failed_once = True
if not failed_once:
print(f"{OKGREEN}\nVerification complete!{ENDC}\n")
print(
f"{OKGREEN}All downloaded files have matching hashes in pre-uploaded file hashes.{ENDC}\n"
)
else:
print(
f"{FAIL}\nVerification complete but failed! Missing hash(es) detected.\n{ENDC}"
)
print(
f"{FAIL}At least once missing hash detected, check the logs above.\n{ENDC}"
)
elif args.misc_complete:
pass
elif args.nested_complete:
pass
else:
print("Not sure what to do!\n\n")
parser.print_help()


if __name__ == "__main__":
main()