Skip to content

Commit

Permalink
Add StreamDescriptor as params of AirbyteTracedException.__init__ (#3…
Browse files Browse the repository at this point in the history
  • Loading branch information
maxi297 authored Apr 12, 2024
1 parent 6d5ecca commit a900c78
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 25 deletions.
11 changes: 6 additions & 5 deletions airbyte-cdk/python/airbyte_cdk/sources/abstract_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,13 +159,14 @@ def read(
logger.info(f"Marking stream {configured_stream.stream.name} as STOPPED")
yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.INCOMPLETE)
display_message = stream_instance.get_error_display_message(e)
stream_descriptor = StreamDescriptor(name=configured_stream.stream.name)
if display_message:
traced_exception = AirbyteTracedException.from_exception(e, message=display_message)
traced_exception = AirbyteTracedException.from_exception(
e, message=display_message, stream_descriptor=stream_descriptor
)
else:
traced_exception = AirbyteTracedException.from_exception(e)
yield traced_exception.as_sanitized_airbyte_message(
stream_descriptor=StreamDescriptor(name=configured_stream.stream.name)
)
traced_exception = AirbyteTracedException.from_exception(e, stream_descriptor=stream_descriptor)
yield traced_exception.as_sanitized_airbyte_message()
stream_name_to_exception[stream_instance.name] = traced_exception
if self.stop_sync_on_stream_failure:
logger.info(f"{self.name} does not support continuing syncs on error from stream {configured_stream.stream.name}")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,14 +98,22 @@ def on_partition_complete_sentinel(self, sentinel: PartitionCompleteSentinel) ->
3. Emit messages that were added to the message repository
"""
partition = sentinel.partition
partition.close()
partitions_running = self._streams_to_running_partitions[partition.stream_name()]
if partition in partitions_running:
partitions_running.remove(partition)
# If all partitions were generated and this was the last one, the stream is done
if partition.stream_name() not in self._streams_currently_generating_partitions and len(partitions_running) == 0:
yield from self._on_stream_is_done(partition.stream_name())
yield from self._message_repository.consume_queue()

try:
partition.close()
except Exception as exception:
self._flag_exception(partition.stream_name(), exception)
yield AirbyteTracedException.from_exception(
exception, stream_descriptor=StreamDescriptor(name=partition.stream_name())
).as_sanitized_airbyte_message()
finally:
partitions_running = self._streams_to_running_partitions[partition.stream_name()]
if partition in partitions_running:
partitions_running.remove(partition)
# If all partitions were generated and this was the last one, the stream is done
if partition.stream_name() not in self._streams_currently_generating_partitions and len(partitions_running) == 0:
yield from self._on_stream_is_done(partition.stream_name())
yield from self._message_repository.consume_queue()

def on_record(self, record: Record) -> Iterable[AirbyteMessage]:
"""
Expand Down Expand Up @@ -136,11 +144,14 @@ def on_exception(self, exception: StreamThreadException) -> Iterable[AirbyteMess
1. Stop all running streams
2. Raise the exception
"""
self._exceptions_per_stream_name.setdefault(exception.stream_name, []).append(exception.exception)
self._flag_exception(exception.stream_name, exception.exception)
self._logger.exception(f"Exception while syncing stream {exception.stream_name}", exc_info=exception.exception)
yield AirbyteTracedException.from_exception(exception).as_airbyte_message(
stream_descriptor=StreamDescriptor(name=exception.stream_name)
)
yield AirbyteTracedException.from_exception(
exception, stream_descriptor=StreamDescriptor(name=exception.stream_name)
).as_airbyte_message()

def _flag_exception(self, stream_name: str, exception: Exception) -> None:
self._exceptions_per_stream_name.setdefault(stream_name, []).append(exception)

def start_next_partition_generator(self) -> Optional[AirbyteMessage]:
"""
Expand Down
26 changes: 18 additions & 8 deletions airbyte-cdk/python/airbyte_cdk/utils/traced_exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,28 @@ def __init__(
message: Optional[str] = None,
failure_type: FailureType = FailureType.system_error,
exception: Optional[BaseException] = None,
stream_descriptor: Optional[StreamDescriptor] = None,
):
"""
:param internal_message: the internal error that caused the failure
:param message: a user-friendly message that indicates the cause of the error
:param failure_type: the type of error
:param exception: the exception that caused the error, from which the stack trace should be retrieved
:param stream_descriptor: describe the stream from which the exception comes from
"""
self.internal_message = internal_message
self.message = message
self.failure_type = failure_type
self._exception = exception
self._stream_descriptor = stream_descriptor
super().__init__(internal_message)

def as_airbyte_message(self, stream_descriptor: StreamDescriptor = None) -> AirbyteMessage:
def as_airbyte_message(self, stream_descriptor: Optional[StreamDescriptor] = None) -> AirbyteMessage:
"""
Builds an AirbyteTraceMessage from the exception
:param stream_descriptor is deprecated, please use the stream_description in `__init__ or `from_exception`. If many
stream_descriptors are defined, the one from `as_airbyte_message` will be discarded.
"""
now_millis = datetime.now().timestamp() * 1000.0

Expand All @@ -61,18 +67,18 @@ def as_airbyte_message(self, stream_descriptor: StreamDescriptor = None) -> Airb
internal_message=self.internal_message,
failure_type=self.failure_type,
stack_trace=stack_trace_str,
stream_descriptor=stream_descriptor,
stream_descriptor=self._stream_descriptor if self._stream_descriptor is not None else stream_descriptor,
),
)

return AirbyteMessage(type=MessageType.TRACE, trace=trace_message)

def as_connection_status_message(self) -> AirbyteMessage:
def as_connection_status_message(self) -> Optional[AirbyteMessage]:
if self.failure_type == FailureType.config_error:
output_message = AirbyteMessage(
return AirbyteMessage(
type=MessageType.CONNECTION_STATUS, connectionStatus=AirbyteConnectionStatus(status=Status.FAILED, message=self.message)
)
return output_message
return None

def emit_message(self) -> None:
"""
Expand All @@ -84,16 +90,20 @@ def emit_message(self) -> None:
print(filtered_message)

@classmethod
def from_exception(cls, exc: BaseException, *args, **kwargs) -> "AirbyteTracedException": # type: ignore # ignoring because of args and kwargs
def from_exception(cls, exc: BaseException, stream_descriptor: Optional[StreamDescriptor] = None, *args, **kwargs) -> "AirbyteTracedException": # type: ignore # ignoring because of args and kwargs
"""
Helper to create an AirbyteTracedException from an existing exception
:param exc: the exception that caused the error
:param stream_descriptor: describe the stream from which the exception comes from
"""
return cls(internal_message=str(exc), exception=exc, *args, **kwargs) # type: ignore # ignoring because of args and kwargs
return cls(internal_message=str(exc), exception=exc, stream_descriptor=stream_descriptor, *args, **kwargs) # type: ignore # ignoring because of args and kwargs

def as_sanitized_airbyte_message(self, stream_descriptor: StreamDescriptor = None) -> AirbyteMessage:
def as_sanitized_airbyte_message(self, stream_descriptor: Optional[StreamDescriptor] = None) -> AirbyteMessage:
"""
Builds an AirbyteTraceMessage from the exception and sanitizes any secrets from the message body
:param stream_descriptor is deprecated, please use the stream_description in `__init__ or `from_exception`. If many
stream_descriptors are defined, the one from `as_sanitized_airbyte_message` will be discarded.
"""
error_message = self.as_airbyte_message(stream_descriptor=stream_descriptor)
if error_message.trace.error.message:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,41 @@ def test_handle_on_partition_complete_sentinel_yields_status_message_if_the_stre
assert messages == expected_messages
self._a_closed_partition.close.assert_called_once()

@freezegun.freeze_time("2020-01-01T00:00:00")
def test_given_exception_on_partition_complete_sentinel_then_yield_error_trace_message_and_stream_is_incomplete(self) -> None:
self._a_closed_partition.stream_name.return_value = self._stream.name
self._a_closed_partition.close.side_effect = ValueError

handler = ConcurrentReadProcessor(
[self._stream],
self._partition_enqueuer,
self._thread_pool_manager,
self._logger,
self._slice_logger,
self._message_repository,
self._partition_reader,
)
handler.start_next_partition_generator()
handler.on_partition(self._a_closed_partition)
list(handler.on_partition_generation_completed(PartitionGenerationCompletedSentinel(self._stream)))
messages = list(handler.on_partition_complete_sentinel(PartitionCompleteSentinel(self._a_closed_partition)))

expected_status_message = AirbyteMessage(
type=MessageType.TRACE,
trace=AirbyteTraceMessage(
type=TraceType.STREAM_STATUS,
stream_status=AirbyteStreamStatusTraceMessage(
stream_descriptor=StreamDescriptor(
name=self._stream.name,
),
status=AirbyteStreamStatus.INCOMPLETE,
),
emitted_at=1577836800000.0,
),
)
assert list(map(lambda message: message.trace.type, messages)) == [TraceType.ERROR, TraceType.STREAM_STATUS]
assert messages[1] == expected_status_message

@freezegun.freeze_time("2020-01-01T00:00:00")
def test_handle_on_partition_complete_sentinel_yields_no_status_message_if_the_stream_is_not_done(self):
stream_instances_to_read_from = [self._stream]
Expand Down
29 changes: 29 additions & 0 deletions airbyte-cdk/python/unit_tests/utils/test_traced_exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@
)
from airbyte_cdk.models.airbyte_protocol import Type as MessageType
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
from airbyte_protocol.models import StreamDescriptor

_AN_EXCEPTION = ValueError("An exception")
_A_STREAM_DESCRIPTOR = StreamDescriptor(name="a_stream")
_ANOTHER_STREAM_DESCRIPTOR = StreamDescriptor(name="another_stream")


@pytest.fixture
Expand Down Expand Up @@ -105,3 +110,27 @@ def test_emit_message(capsys):
printed_message.trace.emitted_at = 0.0

assert printed_message == expected_message


def test_given_both_init_and_as_message_with_stream_descriptor_when_as_airbyte_message_use_init_stream_descriptor() -> None:
traced_exc = AirbyteTracedException(stream_descriptor=_A_STREAM_DESCRIPTOR)
message = traced_exc.as_airbyte_message(stream_descriptor=_ANOTHER_STREAM_DESCRIPTOR)
assert message.trace.error.stream_descriptor == _A_STREAM_DESCRIPTOR


def test_given_both_init_and_as_sanitized_airbyte_message_with_stream_descriptor_when_as_airbyte_message_use_init_stream_descriptor() -> None:
traced_exc = AirbyteTracedException(stream_descriptor=_A_STREAM_DESCRIPTOR)
message = traced_exc.as_sanitized_airbyte_message(stream_descriptor=_ANOTHER_STREAM_DESCRIPTOR)
assert message.trace.error.stream_descriptor == _A_STREAM_DESCRIPTOR


def test_given_both_from_exception_and_as_message_with_stream_descriptor_when_as_airbyte_message_use_init_stream_descriptor() -> None:
traced_exc = AirbyteTracedException.from_exception(_AN_EXCEPTION, stream_descriptor=_A_STREAM_DESCRIPTOR)
message = traced_exc.as_airbyte_message(stream_descriptor=_ANOTHER_STREAM_DESCRIPTOR)
assert message.trace.error.stream_descriptor == _A_STREAM_DESCRIPTOR


def test_given_both_from_exception_and_as_sanitized_airbyte_message_with_stream_descriptor_when_as_airbyte_message_use_init_stream_descriptor() -> None:
traced_exc = AirbyteTracedException.from_exception(_AN_EXCEPTION, stream_descriptor=_A_STREAM_DESCRIPTOR)
message = traced_exc.as_sanitized_airbyte_message(stream_descriptor=_ANOTHER_STREAM_DESCRIPTOR)
assert message.trace.error.stream_descriptor == _A_STREAM_DESCRIPTOR

0 comments on commit a900c78

Please sign in to comment.