Skip to content

Commit

Permalink
dump_workbaskets: output amount of seconds to render. (#596)
Browse files Browse the repository at this point in the history
Add option to disable envelope splitting by size.
Add option to specify the maximum envelope size.
Add option to use the next available envelope id (not super useful yet as we don't use envelope upload)
  • Loading branch information
stuaxo authored Jun 14, 2022
1 parent c8ba099 commit 23ba2bc
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 13 deletions.
12 changes: 11 additions & 1 deletion common/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,12 +199,19 @@ def __init__(
self,
output: IO,
envelope_id: int,
transaction_counter: Counter = counter_generator(),
message_counter: Counter = counter_generator(),
max_envelope_size: Optional[int] = None,
format: str = "xml",
newline: bool = False,
) -> None:
"""
:param output: The output stream to write to.
:param envelope_id: The id of the envelope.
:param message_counter: A counter for the message ids.
:param max_envelope_size: The maximum size of an envelope, if None then no limit.
:param format: Format to serialize to, defaults to xml.
:param newline: Whether to add a newline after the envelope.
"""
self.output = output
self.message_counter = message_counter
self.envelope_id = envelope_id
Expand Down Expand Up @@ -247,11 +254,13 @@ def __exit__(self, *_) -> None:
self.write(self.render_envelope_end())

def render_file_header(self) -> str:
"""Output the file header."""
return render_to_string(
template_name="common/taric/start_file.xml",
)

def render_envelope_start(self) -> str:
"""Output the envelope start."""
return render_to_string(
template_name="common/taric/start_envelope.xml",
context={"envelope_id": self.envelope_id},
Expand All @@ -278,6 +287,7 @@ def render_envelope_body(
)

def render_envelope_end(self) -> str:
"""Output the envelope end."""
return render_to_string(template_name="common/taric/end_envelope.xml")

def start_next_envelope(self) -> None:
Expand Down
45 changes: 35 additions & 10 deletions exporter/management/commands/dump_transactions.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from common.serializers import validate_envelope
from exporter.serializers import MultiFileEnvelopeTransactionSerializer
from exporter.util import dit_file_generator
from exporter.util import item_timer
from taric.models import Envelope
from workbaskets.models import WorkBasket
from workbaskets.validators import WorkflowStatus
Expand All @@ -29,11 +30,11 @@ class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
"envelope_id",
help="Override first envelope id [6 digit number].",
type=int,
default=None,
help="Override first envelope id [6 digit number] or auto for to use the next available.",
type=str,
default="auto",
action="store",
nargs="?",
nargs=1,
)

parser.add_argument(
Expand All @@ -56,6 +57,21 @@ def add_arguments(self, parser):
action="store",
)

parser.add_argument(
"--max-envelope-size",
help=f"Set the maximum envelope size in bytes, defaults to settings.EXPORTER_MAXIMUM_ENVELOPE_SIZE [{settings.EXPORTER_MAXIMUM_ENVELOPE_SIZE}].",
type=int,
default=settings.EXPORTER_MAXIMUM_ENVELOPE_SIZE,
action="store",
)

parser.add_argument(
"--disable-splitting",
help="Do not split envelopes larger than MAX_ENVELOPE_SIZE, overrides --max-envelope-size.",
default=False,
action="store_true",
)

@atomic
def handle(self, *args, **options):
workbasket_ids = options.get("workbasket_ids")
Expand All @@ -76,21 +92,30 @@ def handle(self, *args, **options):
f"Nothing to upload: {workbaskets.count()} Workbaskets APPROVED but none contain any transactions.",
)

if options.get("envelope_id") is not None:
envelope_id = int(options.get("envelope_id"))
else:
if options.get("envelope_id") == ["auto"]:
envelope_id = int(Envelope.next_envelope_id())
else:
envelope_id = int(options.get("envelope_id")[0])

# Setting max_envelope_size to 0, also disables splitting - so normalise 0 to None:
max_envelope_size = (
None
if options.get("disable_splitting")
else int(options.get("max_envelope_size") or None)
)

directory = options.get("directory", ".")

output_file_constructor = dit_file_generator(directory, envelope_id)
serializer = MultiFileEnvelopeTransactionSerializer(
output_file_constructor,
envelope_id=envelope_id,
max_envelope_size=settings.EXPORTER_MAXIMUM_ENVELOPE_SIZE,
max_envelope_size=max_envelope_size,
)
errors = False
for rendered_envelope in serializer.split_render_transactions(transactions):
for time_to_render, rendered_envelope in item_timer(
serializer.split_render_transactions(transactions),
):
envelope_file = rendered_envelope.output
if not rendered_envelope.transactions:
self.stdout.write(
Expand All @@ -108,7 +133,7 @@ def handle(self, *args, **options):
else:
total_transactions = len(rendered_envelope.transactions)
self.stdout.write(
f"{envelope_file.name} \N{WHITE HEAVY CHECK MARK} XML valid. {total_transactions} transactions in {envelope_file.tell()} bytes.",
f"{envelope_file.name} \N{WHITE HEAVY CHECK MARK} XML valid. {total_transactions} transactions, serialized in {time_to_render:.2f} seconds using {envelope_file.tell()} bytes.",
)
if errors:
sys.exit(1)
9 changes: 8 additions & 1 deletion exporter/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,19 @@ def __init__(
self,
output_constructor: callable,
envelope_id=1,
benchmark=False,
*args,
**kwargs,
) -> None:
"""
:param output_constructor: callable that returns a file like object to write to, called each time a new envelope is started.
:param envelope_id: Envelope ID, to use later, when creating Envelope objects in the database.
:param args: Passed through to EnvelopeSerializer.
:param kwargs: Passed through to EnvelopeSerializer.
"""
self.output_constructor = output_constructor
EnvelopeSerializer.__init__(
self, self.output_constructor(), envelope_id=envelope_id, *args, **kwargs
self, self.output_constructor(), envelope_id=envelope_id, **kwargs
)

def start_next_envelope(self):
Expand Down
34 changes: 34 additions & 0 deletions exporter/tests/test_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from exporter.util import exceptions_as_messages
from exporter.util import item_timer


def test_exceptions_as_messages():
exception_list = {
"first_exception": [Exception("test")],
"second_exception": [Exception("test2")],
}

messages = exceptions_as_messages(exception_list)

assert messages == {
"first_exception": ["raised an test"],
"second_exception": ["raised an test2"],
}


def test_item_timer():
"""Verify that item_timer yields a tuple containing the time to retrieve
each item and the item itself."""
items = item_timer([1, 2])

time_taken, item = next(items)

assert item == 1
assert isinstance(time_taken, float)
assert time_taken > 0.0

time_taken, item = next(items)

assert item == 2
assert isinstance(time_taken, float)
assert time_taken > 0.0
20 changes: 19 additions & 1 deletion exporter/util.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
import sys
import time
from itertools import count
from pathlib import Path
from typing import Any
from typing import Dict
from typing import Generator
from typing import List
from typing import Sequence
from typing import Tuple


def dit_filename_generator(start=1):
Expand Down Expand Up @@ -128,9 +133,22 @@ def exceptions_as_messages(
) -> Dict[int, List[str]]:
"""
:param error_dict: dict of lists of exceptions.
:return: dict of lists of human readable strings containing the exception name.
:return: dict of lists of human-readable strings containing the exception name.
"""
new_errors = {}
for k, errors in error_dict.items():
new_errors[k] = [f"raised an {exc}" for exc in errors]
return new_errors


def item_timer(items: Sequence[Any]) -> Generator[Tuple[float, Any], None, None]:
"""
:param items: Sequence of items.
Iterates over the items and yield a tuple of (time_taken, item).
"""
start_time = time.time()
for o in items:
time_taken = time.time() - start_time
yield time_taken, o
start_time = time.time()
1 change: 1 addition & 0 deletions pii-ner-exclude.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1138,3 +1138,4 @@ Checkers
self.linked_model
"Attach BusinessRules
WorkBasketOutputFormat Enum
param kwargs:

0 comments on commit 23ba2bc

Please sign in to comment.