From f7b0bcd2b5dfc55b73d2a1d6eac203b7d76ae040 Mon Sep 17 00:00:00 2001 From: Pete MacKinnon Date: Wed, 7 Feb 2024 14:26:41 -0800 Subject: [PATCH 01/18] Update mlflow plugin version for deployments fix --- models/mlflow/docker/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/mlflow/docker/Dockerfile b/models/mlflow/docker/Dockerfile index 4baa5d1c40..c6e9f41e2b 100644 --- a/models/mlflow/docker/Dockerfile +++ b/models/mlflow/docker/Dockerfile @@ -15,7 +15,7 @@ ARG FROM_IMAGE="condaforge/miniforge-pypy3" ARG PYTHON_VER=3.11 -ARG TAG=23.1.0-1 +ARG TAG=23.11.0-0 FROM ${FROM_IMAGE}:${TAG} AS base WORKDIR /mlflow @@ -44,7 +44,7 @@ RUN sed -i 's/conda activate base/conda activate mlflow/g' ~/.bashrc SHELL ["/opt/conda/bin/conda", "run", "-n", "mlflow", "/bin/bash", "-c"] ARG TRITON_DIR=/mlflow/triton-inference-server -ARG TRITON_VER=r23.05 +ARG TRITON_VER=r24.01 RUN mkdir ${TRITON_DIR} && \ cd ${TRITON_DIR} && \ From cb62e07225e1d9a567cd3d5202d8860e5365c311 Mon Sep 17 00:00:00 2001 From: David Gardner <96306125+dagardner-nv@users.noreply.github.com> Date: Thu, 8 Feb 2024 14:31:57 -0800 Subject: [PATCH 02/18] Fix logging of sleep time (#1493) * Currently the `request_with_retry` method incorrectly logs the starting value of sleep time, not the actual sleep time. ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - David Gardner (https://github.com/dagardner-nv) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1493 --- morpheus/utils/http_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/morpheus/utils/http_utils.py b/morpheus/utils/http_utils.py index de1274b66f..9ecb17b538 100644 --- a/morpheus/utils/http_utils.py +++ b/morpheus/utils/http_utils.py @@ -133,16 +133,16 @@ def request_with_retry( raise e if retry_after_header is not None: - sleep_time_ = retry_after_header + actual_sleep_time = retry_after_header else: - sleep_time_ = (2**(try_count - 1)) * sleep_time + actual_sleep_time = (2**(try_count - 1)) * sleep_time logger.error("Error occurred performing %s request to %s: %s", request_kwargs['method'], request_kwargs['url'], e) - logger.debug("Sleeping for %s seconds before retrying request again", sleep_time) - time.sleep(sleep_time_) + logger.debug("Sleeping for %s seconds before retrying request again", actual_sleep_time) + time.sleep(actual_sleep_time) def prepare_url(url: str) -> str: From 33751a51b07b269038de951d642f1a9822572371 Mon Sep 17 00:00:00 2001 From: David Gardner <96306125+dagardner-nv@users.noreply.github.com> Date: Fri, 9 Feb 2024 14:44:43 -0800 Subject: [PATCH 03/18] Adopt updated builds of CI runners (#1503) * per request from ops ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - David Gardner (https://github.com/dagardner-nv) Approvers: - Eli Fajardo (https://github.com/efajardo-nv) - Christopher Harris (https://github.com/cwharris) URL: https://github.com/nv-morpheus/Morpheus/pull/1503 --- .github/workflows/pr.yaml | 4 ++-- ci/scripts/run_ci_local.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index c420f25497..e2ccaa59ea 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -94,7 +94,7 @@ jobs: # Disable conda upload for now, once we have morpheus packages in conda forge set the value to # !fromJSON(needs.prepare.outputs.is_pr) && (fromJSON(needs.prepare.outputs.is_main_branch) && 'main' || 'dev') || '' conda_upload_label: "" - container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-build-240129 - test_container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-test-240129 + container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-build-240209 + test_container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-test-240209 secrets: NGC_API_KEY: ${{ secrets.NGC_API_KEY }} diff --git a/ci/scripts/run_ci_local.sh b/ci/scripts/run_ci_local.sh index 353a493803..8da5630043 100755 --- a/ci/scripts/run_ci_local.sh +++ b/ci/scripts/run_ci_local.sh @@ -51,7 +51,7 @@ GIT_BRANCH=$(git branch --show-current) GIT_COMMIT=$(git log -n 1 --pretty=format:%H) LOCAL_CI_TMP=${LOCAL_CI_TMP:-${MORPHEUS_ROOT}/.tmp/local_ci_tmp} -CONTAINER_VER=${CONTAINER_VER:-240129} +CONTAINER_VER=${CONTAINER_VER:-240209} CUDA_VER=${CUDA_VER:-12.1} DOCKER_EXTRA_ARGS=${DOCKER_EXTRA_ARGS:-""} From 77cc0e56d5f4410bf8c6e8c32f959bd8af19cb6d Mon Sep 17 00:00:00 2001 From: David Gardner <96306125+dagardner-nv@users.noreply.github.com> Date: Mon, 12 Feb 2024 10:15:22 -0800 Subject: [PATCH 04/18] Support ControlMessage output in the C++ impl of DeserializeStage (#1478) * C++ impl of `DeserializeStage` now templates the output message type * Add the ability to cast a Python instance of a `MessageMeta` to the C++ instance of `MessageMeta`. This removes the need to explicitly import the C++ impl of `MessageMeta` in order to set the payload of a `ControMessage`. * Use C++ mode by default for LLM examples * Support cudf DataFrames for both `WebScraperStage` and `RSSController` * utility method `show_warning_message` now marked visible in lib * Move `tests/test_deserialize_stage_pipe.py` -> `tests/stages/test_deserialize_stage_pipe.py` & `tests/test_message_meta.py` -> `tests/messages/test_message_meta.py` * Update docstrings Closes #1328 Closes #1480 Closes #1481 Closes #1342 ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - David Gardner (https://github.com/dagardner-nv) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1478 --- ci/iwyu/mappings.imp | 2 - examples/llm/cli.py | 2 +- .../vdb_upload/module/web_scraper_module.py | 6 +- .../include/morpheus/messages/control.hpp | 10 +- .../_lib/include/morpheus/messages/meta.hpp | 9 ++ .../include/morpheus/stages/deserialize.hpp | 147 ++++++++++++++---- .../morpheus/utilities/python_util.hpp | 2 + morpheus/_lib/messages/__init__.pyi | 2 + morpheus/_lib/messages/module.cpp | 14 +- morpheus/_lib/src/messages/control.cpp | 7 + morpheus/_lib/src/messages/meta.cpp | 28 +++- morpheus/_lib/src/stages/deserialize.cpp | 122 ++++++--------- morpheus/_lib/stages/__init__.pyi | 8 +- morpheus/_lib/stages/module.cpp | 21 ++- morpheus/controllers/rss_controller.py | 7 +- morpheus/messages/message_meta.py | 12 +- morpheus/modules/preprocess/deserialize.py | 10 +- .../stages/preprocess/deserialize_stage.py | 13 +- tests/controllers/test_rss_controller.py | 11 +- tests/llm/nodes/test_extractor_node.py | 6 +- .../nodes/test_langchain_agent_node_pipe.py | 3 - .../llm/nodes/test_llm_generate_node_pipe.py | 3 - .../llm/nodes/test_llm_retriever_node_pipe.py | 2 - .../nodes/test_prompt_template_node_pipe.py | 1 - tests/llm/nodes/test_rag_node_pipe.py | 3 - tests/llm/services/test_llm_service_pipe.py | 4 - .../task_handlers/test_simple_task_handler.py | 6 +- tests/llm/test_agents_simple_pipe.py | 2 - tests/llm/test_completion_pipe.py | 4 - ...test_extractor_simple_task_handler_pipe.py | 3 - tests/llm/test_rag_standalone_pipe.py | 4 - tests/messages/test_control_message.py | 8 +- tests/{ => messages}/test_message_meta.py | 72 ++++++++- .../test_deserialize_stage_pipe.py | 33 ++-- 34 files changed, 378 insertions(+), 209 deletions(-) rename tests/{ => messages}/test_message_meta.py (68%) rename tests/{ => stages}/test_deserialize_stage_pipe.py (74%) diff --git a/ci/iwyu/mappings.imp b/ci/iwyu/mappings.imp index 27bae40b50..a8d955dbe9 100644 --- a/ci/iwyu/mappings.imp +++ b/ci/iwyu/mappings.imp @@ -118,7 +118,6 @@ { "symbol": ["nlohmann::json", "private", "", "public"] }, # pybind11 -{ "symbol": ["pybind11", "private", "", "public"] }, { "symbol": ["pybind11", "private", "", "public"] }, { "symbol": ["pybind11", "private", "", "public"] }, { "symbol": ["pybind11", "private", "", "public"] }, @@ -133,7 +132,6 @@ { "symbol": ["PyObject", "private", "", "public"] }, { "symbol": ["PySequence_GetItem", "private", "", "public"] }, { "symbol": ["pybind11::arg", "private", "", "public"] }, -{ "symbol": ["pybind11::detail::get_type_info", "private", "", "public"] }, { "symbol": ["pybind11::detail::key_error", "private", "", "public"] }, { "symbol": ["pybind11::detail::overload_cast_impl", "private", "", "public"] }, { "symbol": ["pybind11::detail::str_attr_accessor", "private", "", "public"] }, diff --git a/examples/llm/cli.py b/examples/llm/cli.py index 1ea9198dc1..c8aea20320 100644 --- a/examples/llm/cli.py +++ b/examples/llm/cli.py @@ -32,7 +32,7 @@ callback=parse_log_level, help="Specify the logging level to use.") @click.option('--use_cpp', - default=False, + default=True, type=bool, help=("Whether or not to use C++ node and message types or to prefer python. " "Only use as a last resort if bugs are encountered")) diff --git a/examples/llm/vdb_upload/module/web_scraper_module.py b/examples/llm/vdb_upload/module/web_scraper_module.py index 83cb7ed8a2..c361339d49 100644 --- a/examples/llm/vdb_upload/module/web_scraper_module.py +++ b/examples/llm/vdb_upload/module/web_scraper_module.py @@ -18,12 +18,11 @@ import mrc import mrc.core.operators as ops -import pandas as pd import requests import requests_cache from bs4 import BeautifulSoup from langchain.text_splitter import RecursiveCharacterTextSplitter -from pydantic import BaseModel +from pydantic import BaseModel # pylint: disable=no-name-in-module from pydantic import ValidationError import cudf @@ -60,9 +59,6 @@ def download_and_split(msg: MessageMeta, text_splitter, link_column, session) -> df = msg.copy_dataframe() - if isinstance(df, cudf.DataFrame): - df: pd.DataFrame = df.to_pandas() - # Convert the dataframe into a list of dictionaries df_dicts = df.to_dict(orient="records") diff --git a/morpheus/_lib/include/morpheus/messages/control.hpp b/morpheus/_lib/include/morpheus/messages/control.hpp index 7ef4807aba..9adb568f90 100644 --- a/morpheus/_lib/include/morpheus/messages/control.hpp +++ b/morpheus/_lib/include/morpheus/messages/control.hpp @@ -17,7 +17,6 @@ #pragma once -#include "morpheus/messages/memory/tensor_memory.hpp" #include "morpheus/messages/meta.hpp" #include @@ -29,7 +28,6 @@ #include namespace morpheus { -class MessageMeta; #pragma GCC visibility push(default) enum class ControlMessageType @@ -246,7 +244,7 @@ class ControlMessage const nlohmann::json list_metadata() const; /** - * @brief Set the payload object for the control message. + * @brief Get the payload object for the control message. * @param payload * A shared pointer to the message payload. */ @@ -308,6 +306,12 @@ struct ControlMessageProxy static pybind11::object get_metadata(ControlMessage& self, std::optional const& key); static pybind11::dict list_metadata(ControlMessage& self); + + /** + * @brief Set the payload object given a Python instance of MessageMeta + * @param meta + */ + static void payload_from_python_meta(ControlMessage& self, const pybind11::object& meta); }; #pragma GCC visibility pop diff --git a/morpheus/_lib/include/morpheus/messages/meta.hpp b/morpheus/_lib/include/morpheus/messages/meta.hpp index 7a91dfe769..24687013a8 100644 --- a/morpheus/_lib/include/morpheus/messages/meta.hpp +++ b/morpheus/_lib/include/morpheus/messages/meta.hpp @@ -171,6 +171,15 @@ struct MessageMetaInterfaceProxy */ static std::shared_ptr init_python(pybind11::object&& data_frame); + /** + * @brief Initialize MessageMeta cpp object with a given a MessageMeta python objectand returns shared pointer as + * the result + * + * @param meta : Python MesageMeta object + * @return std::shared_ptr + */ + static std::shared_ptr init_python_meta(const pybind11::object& meta); + /** * @brief Get messages count * diff --git a/morpheus/_lib/include/morpheus/stages/deserialize.hpp b/morpheus/_lib/include/morpheus/stages/deserialize.hpp index 1d62cfcacd..9b93547132 100644 --- a/morpheus/_lib/include/morpheus/stages/deserialize.hpp +++ b/morpheus/_lib/include/morpheus/stages/deserialize.hpp @@ -17,32 +17,31 @@ #pragma once +#include "morpheus/messages/control.hpp" #include "morpheus/messages/meta.hpp" #include "morpheus/messages/multi.hpp" -#include "morpheus/types.hpp" // for TensorIndex - -#include -#include -#include -#include -#include -#include +#include "morpheus/types.hpp" // for TensorIndex +#include "morpheus/utilities/python_util.hpp" // for show_warning_message +#include "morpheus/utilities/string_util.hpp" // for MORPHEUS_CONCAT_STR + +#include #include #include -#include +#include +#include // for object +#include // for PyExc_RuntimeWarning #include #include -// IWYU pragma: no_include "rxcpp/sources/rx-iterate.hpp" -#include +#include // IWYU pragma: keep for std::min +#include // for exception_ptr #include +#include // IWYU pragma: keep for glog #include -#include -#include +#include // for pair namespace morpheus { /****** Component public implementations *******************/ -/****** DeserializationStage********************************/ /** * @addtogroup stages @@ -51,15 +50,26 @@ namespace morpheus { */ #pragma GCC visibility push(default) -/** - * @brief Slices incoming Dataframes into smaller `batch_size`'d chunks. This stage accepts the `MessageMeta` output - * from `FileSourceStage`/`KafkaSourceStage` stages breaking them up into into `MultiMessage`'s. This should be one of - * the first stages after the `Source` object. - */ -class DeserializeStage : public mrc::pymrc::PythonNode, std::shared_ptr> +using cm_task_t = std::pair; + +void make_output_message(std::shared_ptr& incoming_message, + TensorIndex start, + TensorIndex stop, + cm_task_t* task, + std::shared_ptr& windowed_message); + +void make_output_message(std::shared_ptr& incoming_message, + TensorIndex start, + TensorIndex stop, + cm_task_t* task, + std::shared_ptr& windowed_message); + +/****** DeserializationStage********************************/ +template +class DeserializeStage : public mrc::pymrc::PythonNode, std::shared_ptr> { public: - using base_t = mrc::pymrc::PythonNode, std::shared_ptr>; + using base_t = mrc::pymrc::PythonNode, std::shared_ptr>; using typename base_t::sink_type_t; using typename base_t::source_type_t; using typename base_t::subscribe_fn_t; @@ -69,17 +79,22 @@ class DeserializeStage : public mrc::pymrc::PythonNode task = nullptr) : + base_t(base_t::op_factory_from_sub_fn(build_operator())), + m_batch_size(batch_size), + m_ensure_sliceable_index(ensure_sliceable_index), + m_task(std::move(task)){}; private: - /** - * TODO(Documentation) - */ subscribe_fn_t build_operator(); TensorIndex m_batch_size; bool m_ensure_sliceable_index{true}; + std::unique_ptr m_task{nullptr}; }; /****** DeserializationStageInterfaceProxy******************/ @@ -89,18 +104,88 @@ class DeserializeStage : public mrc::pymrc::PythonNode> + * @param ensure_sliceable_index Whether or not to call `ensure_sliceable_index()` on all incoming `MessageMeta` + * @return std::shared_ptr>> */ - static std::shared_ptr> init(mrc::segment::Builder& builder, - const std::string& name, - TensorIndex batch_size, - bool ensure_sliceable_index); + static std::shared_ptr>> init_multi( + mrc::segment::Builder& builder, const std::string& name, TensorIndex batch_size, bool ensure_sliceable_index); + + /** + * @brief Create and initialize a DeserializationStage that emits ControlMessage's, and return the result. + * If `task_type` is not None, `task_payload` must also be not None, and vice versa. + * + * @param builder : Pipeline context object reference + * @param name : Name of a stage reference + * @param batch_size : Number of messages to be divided into each batch + * @param ensure_sliceable_index Whether or not to call `ensure_sliceable_index()` on all incoming `MessageMeta` + * @param task_type : Optional task type to be added to all outgoing messages + * @param task_payload : Optional json object describing the task to be added to all outgoing messages + * @return std::shared_ptr>> + */ + static std::shared_ptr>> init_cm( + mrc::segment::Builder& builder, + const std::string& name, + TensorIndex batch_size, + bool ensure_sliceable_index, + const pybind11::object& task_type, + const pybind11::object& task_payload); }; + +template +typename DeserializeStage::subscribe_fn_t DeserializeStage::build_operator() +{ + return [this](rxcpp::observable input, rxcpp::subscriber output) { + return input.subscribe(rxcpp::make_observer( + [this, &output](sink_type_t incoming_message) { + if (!incoming_message->has_sliceable_index()) + { + if (m_ensure_sliceable_index) + { + auto old_index_name = incoming_message->ensure_sliceable_index(); + + if (old_index_name.has_value()) + { + // Generate a warning + LOG(WARNING) << MORPHEUS_CONCAT_STR( + "Incoming MessageMeta does not have a unique and monotonic index. Updating index " + "to be unique. Existing index will be retained in column '" + << *old_index_name << "'"); + } + } + else + { + utilities::show_warning_message( + "Detected a non-sliceable index on an incoming MessageMeta. Performance when taking slices " + "of messages may be degraded. Consider setting `ensure_sliceable_index==True`", + PyExc_RuntimeWarning); + } + } + // Loop over the MessageMeta and create sub-batches + for (TensorIndex i = 0; i < incoming_message->count(); i += this->m_batch_size) + { + std::shared_ptr windowed_message{nullptr}; + make_output_message(incoming_message, + i, + std::min(i + this->m_batch_size, incoming_message->count()), + m_task.get(), + windowed_message); + output.on_next(std::move(windowed_message)); + } + }, + [&](std::exception_ptr error_ptr) { + output.on_error(error_ptr); + }, + [&]() { + output.on_completed(); + })); + }; +} + #pragma GCC visibility pop /** @} */ // end of group } // namespace morpheus diff --git a/morpheus/_lib/include/morpheus/utilities/python_util.hpp b/morpheus/_lib/include/morpheus/utilities/python_util.hpp index 3bc7b99276..6fc258d912 100644 --- a/morpheus/_lib/include/morpheus/utilities/python_util.hpp +++ b/morpheus/_lib/include/morpheus/utilities/python_util.hpp @@ -27,6 +27,7 @@ using PyObject = _object; // NOLINT(readability-identifier-naming) namespace morpheus::utilities { +#pragma GCC visibility push(default) /** * @brief Shows a python warning using the `warnings.warn` module. These warnings can be suppressed and work different * than `logger.warn()` @@ -38,5 +39,6 @@ namespace morpheus::utilities { void show_warning_message(const std::string& deprecation_message, PyObject* category = nullptr, ssize_t stack_level = 1); +#pragma GCC visibility pop } // namespace morpheus::utilities diff --git a/morpheus/_lib/messages/__init__.pyi b/morpheus/_lib/messages/__init__.pyi index 759d34a9a8..4f7137a60a 100644 --- a/morpheus/_lib/messages/__init__.pyi +++ b/morpheus/_lib/messages/__init__.pyi @@ -58,6 +58,8 @@ class ControlMessage(): def payload(self) -> MessageMeta: ... @typing.overload def payload(self, arg0: MessageMeta) -> None: ... + @typing.overload + def payload(self, meta: object) -> None: ... def remove_task(self, task_type: str) -> dict: ... def set_metadata(self, key: str, value: object) -> None: ... @typing.overload diff --git a/morpheus/_lib/messages/module.cpp b/morpheus/_lib/messages/module.cpp index 2f4361a574..7aa21f24d1 100644 --- a/morpheus/_lib/messages/module.cpp +++ b/morpheus/_lib/messages/module.cpp @@ -35,19 +35,11 @@ #include "morpheus/messages/multi_tensor.hpp" #include "morpheus/objects/data_table.hpp" #include "morpheus/objects/mutable_table_ctx_mgr.hpp" -#include "morpheus/types.hpp" // for TensorIndex #include "morpheus/utilities/cudf_util.hpp" #include "morpheus/utilities/string_util.hpp" #include "morpheus/version.hpp" -#include -#include // for Status #include -#include -#include -#include -#include -#include #include // IWYU pragma: keep #include #include @@ -57,9 +49,7 @@ #include // for pymrc::import #include -#include #include -#include #include #include #include @@ -386,6 +376,10 @@ PYBIND11_MODULE(messages, _module) .def("list_metadata", &ControlMessageProxy::list_metadata) .def("payload", pybind11::overload_cast<>(&ControlMessage::payload), py::return_value_policy::move) .def("payload", pybind11::overload_cast&>(&ControlMessage::payload)) + .def( + "payload", + pybind11::overload_cast(&ControlMessageProxy::payload_from_python_meta), + py::arg("meta")) .def("remove_task", &ControlMessageProxy::remove_task, py::arg("task_type")) .def("set_metadata", &ControlMessageProxy::set_metadata, py::arg("key"), py::arg("value")) .def("task_type", pybind11::overload_cast<>(&ControlMessage::task_type)) diff --git a/morpheus/_lib/src/messages/control.cpp b/morpheus/_lib/src/messages/control.cpp index 2e902a6d53..f1413c2650 100644 --- a/morpheus/_lib/src/messages/control.cpp +++ b/morpheus/_lib/src/messages/control.cpp @@ -17,6 +17,8 @@ #include "morpheus/messages/control.hpp" +#include "morpheus/messages/meta.hpp" + #include #include #include @@ -261,4 +263,9 @@ void ControlMessageProxy::config(ControlMessage& self, py::dict& config) self.config(mrc::pymrc::cast_from_pyobject(config)); } +void ControlMessageProxy::payload_from_python_meta(ControlMessage& self, const pybind11::object& meta) +{ + self.payload(MessageMetaInterfaceProxy::init_python_meta(meta)); +} + } // namespace morpheus diff --git a/morpheus/_lib/src/messages/meta.cpp b/morpheus/_lib/src/messages/meta.cpp index c779b56dad..eedce67439 100644 --- a/morpheus/_lib/src/messages/meta.cpp +++ b/morpheus/_lib/src/messages/meta.cpp @@ -65,7 +65,6 @@ MutableTableInfo MessageMeta::get_mutable_info() const std::vector MessageMeta::get_column_names() const { - pybind11::gil_scoped_release no_gil; return m_data->get_info().get_column_names(); } @@ -145,13 +144,37 @@ std::shared_ptr MessageMetaInterfaceProxy::init_python(py::object&& } else { - throw pybind11::value_error("Dataframe is not a cudf or pandas dataframe"); + // check to see if its a Python MessageMeta object + auto msg_meta_cls = py::module_::import("morpheus.messages").attr("MessageMeta"); + if (py::isinstance(data_frame, msg_meta_cls)) + { + return init_python_meta(data_frame); + } + else + { + throw pybind11::value_error("Dataframe is not a cudf or pandas dataframe"); + } } } return MessageMeta::create_from_python(std::move(data_frame)); } +std::shared_ptr MessageMetaInterfaceProxy::init_python_meta(const py::object& meta) +{ + // check to see if its a Python MessageMeta object + auto msg_meta_cls = py::module_::import("morpheus.messages").attr("MessageMeta"); + if (py::isinstance(meta, msg_meta_cls)) + { + DVLOG(10) << "Converting Python impl of MessageMeta to C++ impl"; + return init_python(meta.attr("copy_dataframe")()); + } + else + { + throw pybind11::value_error("meta is not a Python instance of MestageMeta"); + } +} + TensorIndex MessageMetaInterfaceProxy::count(MessageMeta& self) { return self.count(); @@ -159,6 +182,7 @@ TensorIndex MessageMetaInterfaceProxy::count(MessageMeta& self) std::vector MessageMetaInterfaceProxy::get_column_names(MessageMeta& self) { + pybind11::gil_scoped_release no_gil; return self.get_column_names(); } diff --git a/morpheus/_lib/src/stages/deserialize.cpp b/morpheus/_lib/src/stages/deserialize.cpp index 43bc952155..c5356251e0 100644 --- a/morpheus/_lib/src/stages/deserialize.cpp +++ b/morpheus/_lib/src/stages/deserialize.cpp @@ -17,91 +17,69 @@ #include "morpheus/stages/deserialize.hpp" -#include "mrc/node/rx_sink_base.hpp" -#include "mrc/node/rx_source_base.hpp" -#include "mrc/node/sink_properties.hpp" -#include "mrc/node/source_properties.hpp" -#include "mrc/segment/object.hpp" -#include "mrc/types.hpp" - +#include "morpheus/messages/control.hpp" #include "morpheus/types.hpp" -#include "morpheus/utilities/python_util.hpp" -#include "morpheus/utilities/string_util.hpp" - -#include -#include -#include -#include -#include -#include // for min -#include -#include -#include -#include -#include -#include +#include +#include // for cast_from_pyobject +// IWYU pragma: no_include "rxcpp/sources/rx-iterate.hpp" namespace morpheus { -// Component public implementations -// ************ DeserializationStage **************************** // -DeserializeStage::DeserializeStage(TensorIndex batch_size, bool ensure_sliceable_index) : - PythonNode(base_t::op_factory_from_sub_fn(build_operator())), - m_batch_size(batch_size), - m_ensure_sliceable_index(ensure_sliceable_index) -{} -DeserializeStage::subscribe_fn_t DeserializeStage::build_operator() +void make_output_message(std::shared_ptr& incoming_message, + TensorIndex start, + TensorIndex stop, + cm_task_t* task, + std::shared_ptr& windowed_message) { - return [this](rxcpp::observable input, rxcpp::subscriber output) { - return input.subscribe(rxcpp::make_observer( - [this, &output](sink_type_t x) { - if (!x->has_sliceable_index()) - { - if (m_ensure_sliceable_index) - { - auto old_index_name = x->ensure_sliceable_index(); - - if (old_index_name.has_value()) - { - // Generate a warning - LOG(WARNING) << MORPHEUS_CONCAT_STR( - "Incoming MessageMeta does not have a unique and monotonic index. Updating index " - "to be unique. Existing index will be retained in column '" - << *old_index_name << "'"); - } - } - else - { - utilities::show_warning_message( - "Detected a non-sliceable index on an incoming MessageMeta. Performance when taking slices " - "of messages may be degraded. Consider setting `ensure_sliceable_index==True`", - PyExc_RuntimeWarning); - } - } - - // Make one large MultiMessage - auto full_message = std::make_shared(x, 0, x->count()); + DCHECK_EQ(task, nullptr) << "Task is not supported for MultiMessage"; + auto sliced_msg = std::make_shared(incoming_message, start, stop - start); + windowed_message.swap(sliced_msg); +} - // Loop over the MessageMeta and create sub-batches - for (TensorIndex i = 0; i < x->count(); i += this->m_batch_size) - { - auto next = full_message->get_slice(i, std::min(i + this->m_batch_size, x->count())); +void make_output_message(std::shared_ptr& incoming_message, + TensorIndex start, + TensorIndex stop, + cm_task_t* task, + std::shared_ptr& windowed_message) +{ + auto slidced_meta = std::make_shared(incoming_message, start, stop); + auto message = std::make_shared(); + message->payload(slidced_meta); + if (task) + { + message->add_task(task->first, task->second); + } - output.on_next(std::move(next)); - } - }, - [&](std::exception_ptr error_ptr) { output.on_error(error_ptr); }, - [&]() { output.on_completed(); })); - }; + windowed_message.swap(message); } -// ************ DeserializationStageInterfaceProxy ************* // -std::shared_ptr> DeserializeStageInterfaceProxy::init( +std::shared_ptr>> DeserializeStageInterfaceProxy::init_multi( mrc::segment::Builder& builder, const std::string& name, TensorIndex batch_size, bool ensure_sliceable_index) { - auto stage = builder.construct_object(name, batch_size, ensure_sliceable_index); + return builder.construct_object>(name, batch_size, ensure_sliceable_index, nullptr); +} + +std::shared_ptr>> DeserializeStageInterfaceProxy::init_cm( + mrc::segment::Builder& builder, + const std::string& name, + TensorIndex batch_size, + bool ensure_sliceable_index, + const pybind11::object& task_type, + const pybind11::object& task_payload) +{ + std::unique_ptr task{nullptr}; + + if (!task_type.is_none() && !task_payload.is_none()) + { + task = std::make_unique(pybind11::cast(task_type), + mrc::pymrc::cast_from_pyobject(task_payload)); + } + + auto stage = builder.construct_object>( + name, batch_size, ensure_sliceable_index, std::move(task)); return stage; } + } // namespace morpheus diff --git a/morpheus/_lib/stages/__init__.pyi b/morpheus/_lib/stages/__init__.pyi index 723f870574..580a7a8357 100644 --- a/morpheus/_lib/stages/__init__.pyi +++ b/morpheus/_lib/stages/__init__.pyi @@ -15,7 +15,8 @@ import mrc.core.segment __all__ = [ "AddClassificationsStage", "AddScoresStage", - "DeserializeStage", + "DeserializeControlMessageStage", + "DeserializeMultiMessageStage", "FileSourceStage", "FilterDetectionsStage", "FilterSource", @@ -37,7 +38,10 @@ class AddClassificationsStage(mrc.core.segment.SegmentObject): class AddScoresStage(mrc.core.segment.SegmentObject): def __init__(self, builder: mrc.core.segment.Builder, name: str, idx2label: typing.Dict[int, str]) -> None: ... pass -class DeserializeStage(mrc.core.segment.SegmentObject): +class DeserializeControlMessageStage(mrc.core.segment.SegmentObject): + def __init__(self, builder: mrc.core.segment.Builder, name: str, batch_size: int, ensure_sliceable_index: bool = True, task_type: object = None, task_payload: object = None) -> None: ... + pass +class DeserializeMultiMessageStage(mrc.core.segment.SegmentObject): def __init__(self, builder: mrc.core.segment.Builder, name: str, batch_size: int, ensure_sliceable_index: bool = True) -> None: ... pass class FileSourceStage(mrc.core.segment.SegmentObject): diff --git a/morpheus/_lib/stages/module.cpp b/morpheus/_lib/stages/module.cpp index 29243331ed..d5f8b6bad0 100644 --- a/morpheus/_lib/stages/module.cpp +++ b/morpheus/_lib/stages/module.cpp @@ -15,6 +15,7 @@ * limitations under the License. */ +#include "morpheus/messages/control.hpp" // for ControlMessage #include "morpheus/messages/meta.hpp" #include "morpheus/messages/multi.hpp" #include "morpheus/objects/file_types.hpp" // for FileTypes @@ -84,16 +85,28 @@ PYBIND11_MODULE(stages, _module) .def( py::init<>(&AddScoresStageInterfaceProxy::init), py::arg("builder"), py::arg("name"), py::arg("idx2label")); - py::class_, + py::class_>, mrc::segment::ObjectProperties, - std::shared_ptr>>( - _module, "DeserializeStage", py::multiple_inheritance()) - .def(py::init<>(&DeserializeStageInterfaceProxy::init), + std::shared_ptr>>>( + _module, "DeserializeMultiMessageStage", py::multiple_inheritance()) + .def(py::init<>(&DeserializeStageInterfaceProxy::init_multi), py::arg("builder"), py::arg("name"), py::arg("batch_size"), py::arg("ensure_sliceable_index") = true); + py::class_>, + mrc::segment::ObjectProperties, + std::shared_ptr>>>( + _module, "DeserializeControlMessageStage", py::multiple_inheritance()) + .def(py::init<>(&DeserializeStageInterfaceProxy::init_cm), + py::arg("builder"), + py::arg("name"), + py::arg("batch_size"), + py::arg("ensure_sliceable_index") = true, + py::arg("task_type") = py::none(), + py::arg("task_payload") = py::none()); + py::class_, mrc::segment::ObjectProperties, std::shared_ptr>>( diff --git a/morpheus/controllers/rss_controller.py b/morpheus/controllers/rss_controller.py index e13a9c0f8f..88bba4ca0b 100644 --- a/morpheus/controllers/rss_controller.py +++ b/morpheus/controllers/rss_controller.py @@ -19,10 +19,11 @@ from dataclasses import dataclass from urllib.parse import urlparse -import pandas as pd import requests import requests_cache +import cudf + logger = logging.getLogger(__name__) IMPORT_EXCEPTION = None @@ -293,14 +294,14 @@ def fetch_dataframes(self): entry_accumulator.append(entry) if self._batch_size > 0 and len(entry_accumulator) >= self._batch_size: - yield pd.DataFrame(entry_accumulator) + yield cudf.DataFrame(entry_accumulator) entry_accumulator.clear() self._previous_entries = current_entries # Yield any remaining entries. if entry_accumulator: - yield pd.DataFrame(entry_accumulator) + yield cudf.DataFrame(entry_accumulator) else: logger.debug("No new entries found.") diff --git a/morpheus/messages/message_meta.py b/morpheus/messages/message_meta.py index 929465e0c8..4609795591 100644 --- a/morpheus/messages/message_meta.py +++ b/morpheus/messages/message_meta.py @@ -24,6 +24,7 @@ import morpheus._lib.messages as _messages from morpheus.messages.message_base import MessageBase +from morpheus.utils.type_aliases import DataFrameType logger = logging.getLogger(__name__) @@ -78,23 +79,26 @@ class MessageMeta(MessageBase, cpp_class=_messages.MessageMeta): Input rows in dataframe. """ - _df: pd.DataFrame = dataclasses.field(init=False) + _df: DataFrameType = dataclasses.field(init=False) _mutex: threading.RLock = dataclasses.field(init=False, repr=False) - def __init__(self, df: pd.DataFrame) -> None: + def __init__(self, df: DataFrameType) -> None: super().__init__() + if isinstance(df, MessageMeta): + df = df.copy_dataframe() + self._mutex = threading.RLock() self._df = df @property - def df(self) -> pd.DataFrame: + def df(self) -> DataFrameType: msg = ("Warning the df property returns a copy, please use the copy_dataframe method or the mutable_dataframe " "context manager to modify the DataFrame in-place instead.") warnings.warn(msg, DeprecationWarning) return self.copy_dataframe() - def copy_dataframe(self) -> pd.DataFrame: + def copy_dataframe(self) -> DataFrameType: return self._df.copy(deep=True) def mutable_dataframe(self): diff --git a/morpheus/modules/preprocess/deserialize.py b/morpheus/modules/preprocess/deserialize.py index 6f993a4ed2..6fe3f0df66 100644 --- a/morpheus/modules/preprocess/deserialize.py +++ b/morpheus/modules/preprocess/deserialize.py @@ -128,12 +128,6 @@ def _process_dataframe_to_control_message(message: MessageMeta, list of ControlMessage A list of ControlMessage objects. """ - - # Because ControlMessages only have a C++ implementation, we need to import the C++ MessageMeta and use that - # 100% of the time - # pylint: disable=morpheus-incorrect-lib-from-import - from morpheus._lib.messages import MessageMeta as MessageMetaCpp - message = _check_slicable_index(message, ensure_sliceable_index) # Now break it up by batches @@ -147,7 +141,7 @@ def _process_dataframe_to_control_message(message: MessageMeta, ctrl_msg = ControlMessage() - ctrl_msg.payload(MessageMetaCpp(df=df.iloc[i:i + batch_size])) + ctrl_msg.payload(MessageMeta(df=df.iloc[i:i + batch_size])) if (task_tuple is not None): ctrl_msg.add_task(task_type=task_tuple[0], task=task_tuple[1]) @@ -156,7 +150,7 @@ def _process_dataframe_to_control_message(message: MessageMeta, else: ctrl_msg = ControlMessage() - ctrl_msg.payload(MessageMetaCpp(message.df)) + ctrl_msg.payload(MessageMeta(message.df)) if (task_tuple is not None): ctrl_msg.add_task(task_type=task_tuple[0], task=task_tuple[1]) diff --git a/morpheus/stages/preprocess/deserialize_stage.py b/morpheus/stages/preprocess/deserialize_stage.py index dd031e2952..10518f2887 100644 --- a/morpheus/stages/preprocess/deserialize_stage.py +++ b/morpheus/stages/preprocess/deserialize_stage.py @@ -46,7 +46,17 @@ class DeserializeStage(MultiMessageStage): ---------- c : `morpheus.config.Config` Pipeline configuration instance. - + ensure_sliceable_index : bool, default = True + Whether or not to call `ensure_sliceable_index()` on all incoming `MessageMeta`, which will replace the index + of the underlying dataframe if the existing one is not unique and monotonic. + message_type : typing.Literal[MultiMessage, ControlMessage], default = MultiMessage + Sets the type of message to be emitted from this stage. + task_type : str, default = None + If specified, adds the specified task to the `ControlMessage`. This parameter is only valid when `message_type` + is set to `ControlMessage`. If not `None`, `task_payload` must also be specified. + task_payload : dict, default = None + If specified, adds the specified task to the `ControlMessage`. This parameter is only valid when `message_type` + is set to `ControlMessage`. If not `None`, `task_type` must also be specified. """ def __init__(self, @@ -110,7 +120,6 @@ def compute_schema(self, schema: StageSchema): def _build_single(self, builder: mrc.Builder, input_node: mrc.SegmentObject) -> mrc.SegmentObject: if (self.supports_cpp_node()): - # TODO(Devin): Skip this for now we get conflicting types for cpp and python message metas out_node = _stages.DeserializeStage(builder, self.unique_name, self._batch_size) builder.make_edge(input_node, out_node) else: diff --git a/tests/controllers/test_rss_controller.py b/tests/controllers/test_rss_controller.py index 670f76c1cc..dad981ad07 100644 --- a/tests/controllers/test_rss_controller.py +++ b/tests/controllers/test_rss_controller.py @@ -19,9 +19,10 @@ from unittest.mock import patch import feedparser -import pandas as pd import pytest +import cudf + from _utils import TEST_DIRS from morpheus.controllers.rss_controller import FeedStats from morpheus.controllers.rss_controller import RSSController @@ -99,7 +100,7 @@ def test_skip_duplicates_feed_inputs(feed_input: str, expected_count: int): controller = RSSController(feed_input=[feed_input, feed_input]) # Pass duplicate feed inputs dataframes_generator = controller.fetch_dataframes() dataframe = next(dataframes_generator, None) - assert isinstance(dataframe, pd.DataFrame) + assert isinstance(dataframe, cudf.DataFrame) assert len(dataframe) == expected_count @@ -124,7 +125,7 @@ def test_fetch_dataframes_url(feed_input: str | list[str], with patch("requests.Session.get", return_value=mock_get_response): dataframes_generator = controller.fetch_dataframes() dataframe = next(dataframes_generator, None) - assert isinstance(dataframe, pd.DataFrame) + assert isinstance(dataframe, cudf.DataFrame) assert "link" in dataframe.columns assert len(dataframe) > 0 @@ -134,7 +135,7 @@ def test_fetch_dataframes_filepath(feed_input: str | list[str]): controller = RSSController(feed_input=feed_input) dataframes_generator = controller.fetch_dataframes() dataframe = next(dataframes_generator, None) - assert isinstance(dataframe, pd.DataFrame) + assert isinstance(dataframe, cudf.DataFrame) assert "link" in dataframe.columns assert len(dataframe) > 0 @@ -143,7 +144,7 @@ def test_fetch_dataframes_filepath(feed_input: str | list[str]): def test_batch_size(feed_input: list[str], batch_size: int): controller = RSSController(feed_input=feed_input, batch_size=batch_size) for df in controller.fetch_dataframes(): - assert isinstance(df, pd.DataFrame) + assert isinstance(df, cudf.DataFrame) assert len(df) <= batch_size diff --git a/tests/llm/nodes/test_extractor_node.py b/tests/llm/nodes/test_extractor_node.py index 1854e32e81..196d99403a 100644 --- a/tests/llm/nodes/test_extractor_node.py +++ b/tests/llm/nodes/test_extractor_node.py @@ -16,12 +16,10 @@ import cudf from _utils.llm import execute_node -# pylint: disable=morpheus-incorrect-lib-from-import -from morpheus._lib.messages import MessageMeta as MessageMetaCpp -# pylint: enable=morpheus-incorrect-lib-from-import from morpheus.llm import LLMNodeBase from morpheus.llm.nodes.extracter_node import ExtracterNode from morpheus.messages import ControlMessage +from morpheus.messages import MessageMeta def test_constructor(): @@ -40,7 +38,7 @@ def test_execute(): reptiles = ['lizards', 'snakes', 'turtles', 'frogs', 'toads'] df = cudf.DataFrame({"insects": insects.copy(), "mammals": mammals.copy(), "reptiles": reptiles.copy()}) message = ControlMessage() - message.payload(MessageMetaCpp(df)) + message.payload(MessageMeta(df)) task_dict = {"input_keys": ["mammals", "reptiles"]} node = ExtracterNode() diff --git a/tests/llm/nodes/test_langchain_agent_node_pipe.py b/tests/llm/nodes/test_langchain_agent_node_pipe.py index aa2266313a..376d218673 100644 --- a/tests/llm/nodes/test_langchain_agent_node_pipe.py +++ b/tests/llm/nodes/test_langchain_agent_node_pipe.py @@ -15,8 +15,6 @@ from unittest import mock -import pytest - from _utils import assert_results from _utils.dataset_manager import DatasetManager from morpheus.config import Config @@ -41,7 +39,6 @@ def _build_engine(mock_agent_executor: mock.MagicMock) -> LLMEngine: return engine -@pytest.mark.use_python def test_pipeline(config: Config, dataset_cudf: DatasetManager, mock_agent_executor: mock.MagicMock): input_df = dataset_cudf["filter_probs.csv"] expected_df = input_df.copy(deep=True) diff --git a/tests/llm/nodes/test_llm_generate_node_pipe.py b/tests/llm/nodes/test_llm_generate_node_pipe.py index c0748ccca0..19534dc31d 100644 --- a/tests/llm/nodes/test_llm_generate_node_pipe.py +++ b/tests/llm/nodes/test_llm_generate_node_pipe.py @@ -15,8 +15,6 @@ from unittest import mock -import pytest - import cudf from _utils import assert_results @@ -42,7 +40,6 @@ def _build_engine(mock_llm_client: mock.MagicMock) -> LLMEngine: return engine -@pytest.mark.use_python def test_pipeline(config: Config, mock_llm_client: mock.MagicMock): expected_output = ["response1", "response2"] mock_llm_client.generate_batch_async.return_value = expected_output.copy() diff --git a/tests/llm/nodes/test_llm_retriever_node_pipe.py b/tests/llm/nodes/test_llm_retriever_node_pipe.py index 49a7de8015..2dd6dba8df 100644 --- a/tests/llm/nodes/test_llm_retriever_node_pipe.py +++ b/tests/llm/nodes/test_llm_retriever_node_pipe.py @@ -52,7 +52,6 @@ def _build_engine(vdb_service, **similarity_search_kwargs) -> LLMEngine: return engine -@pytest.mark.use_python def test_pipeline(config: Config): expected_output = [[1, 2, 3], [4, 5, 6]] @@ -83,7 +82,6 @@ def test_pipeline(config: Config): assert actual_df.to_pandas().equals(expected_df.to_pandas()) -@pytest.mark.use_python @pytest.mark.milvus def test_pipeline_with_milvus(config: Config, milvus_service: MilvusVectorDBService, diff --git a/tests/llm/nodes/test_prompt_template_node_pipe.py b/tests/llm/nodes/test_prompt_template_node_pipe.py index 2ee856c6f6..013b5de43b 100644 --- a/tests/llm/nodes/test_prompt_template_node_pipe.py +++ b/tests/llm/nodes/test_prompt_template_node_pipe.py @@ -49,7 +49,6 @@ def _build_engine(template: str, template_format: str) -> LLMEngine: return engine -@pytest.mark.use_python @pytest.mark.parametrize("template,template_format,values,expected_output", [("Hello {name}!", "f-string", { diff --git a/tests/llm/nodes/test_rag_node_pipe.py b/tests/llm/nodes/test_rag_node_pipe.py index 4f9be8d0a8..2e15b9c9fd 100644 --- a/tests/llm/nodes/test_rag_node_pipe.py +++ b/tests/llm/nodes/test_rag_node_pipe.py @@ -15,8 +15,6 @@ from unittest import mock -import pytest - import cudf from _utils import assert_results @@ -53,7 +51,6 @@ def _build_engine(mock_llm_client: mock.MagicMock) -> LLMEngine: return engine -@pytest.mark.use_python def test_pipeline(config: Config, mock_llm_client: mock.MagicMock): expected_output = ["response1", "response2"] mock_llm_client.generate_batch_async.return_value = expected_output.copy() diff --git a/tests/llm/services/test_llm_service_pipe.py b/tests/llm/services/test_llm_service_pipe.py index eb2286e1c6..a04d09a683 100644 --- a/tests/llm/services/test_llm_service_pipe.py +++ b/tests/llm/services/test_llm_service_pipe.py @@ -15,8 +15,6 @@ from unittest import mock -import pytest - import cudf from _utils import assert_results @@ -73,7 +71,6 @@ def _run_pipeline(config: Config, llm_service_cls: LLMService, country_prompts: assert_results(sink.get_results()) -@pytest.mark.use_python @mock.patch("asyncio.wrap_future") @mock.patch("asyncio.gather", new_callable=mock.AsyncMock) def test_completion_pipe_nemo( @@ -88,7 +85,6 @@ def test_completion_pipe_nemo( _run_pipeline(config, NeMoLLMService, country_prompts, capital_responses) -@pytest.mark.use_python def test_completion_pipe_openai(config: Config, mock_chat_completion: mock.MagicMock, country_prompts: list[str], diff --git a/tests/llm/task_handlers/test_simple_task_handler.py b/tests/llm/task_handlers/test_simple_task_handler.py index b1e20855b9..3cac5e1d0e 100644 --- a/tests/llm/task_handlers/test_simple_task_handler.py +++ b/tests/llm/task_handlers/test_simple_task_handler.py @@ -17,12 +17,10 @@ from _utils.dataset_manager import DatasetManager from _utils.llm import execute_task_handler -# pylint: disable=morpheus-incorrect-lib-from-import -from morpheus._lib.messages import MessageMeta as MessageMetaCpp -# pylint: enable=morpheus-incorrect-lib-from-import from morpheus.llm import LLMTaskHandler from morpheus.llm.task_handlers.simple_task_handler import SimpleTaskHandler from morpheus.messages import ControlMessage +from morpheus.messages import MessageMeta def test_constructor(): @@ -44,7 +42,7 @@ def test_try_handle(dataset_cudf: DatasetManager): expected_df['reptiles'] = reptiles.copy() message = ControlMessage() - message.payload(MessageMetaCpp(df)) + message.payload(MessageMeta(df)) task_handler = SimpleTaskHandler(['reptiles']) diff --git a/tests/llm/test_agents_simple_pipe.py b/tests/llm/test_agents_simple_pipe.py index bce799b9d9..8839154009 100644 --- a/tests/llm/test_agents_simple_pipe.py +++ b/tests/llm/test_agents_simple_pipe.py @@ -111,7 +111,6 @@ def _run_pipeline(config: Config, @pytest.mark.usefixtures("openai", "openai_api_key", "serpapi_api_key") -@pytest.mark.use_python def test_agents_simple_pipe_integration_openai(config: Config, questions: list[str]): sink = _run_pipeline(config, questions=questions, model_name="gpt-3.5-turbo-instruct") @@ -126,7 +125,6 @@ def test_agents_simple_pipe_integration_openai(config: Config, questions: list[s @pytest.mark.usefixtures("openai", "restore_environ") -@pytest.mark.use_python @mock.patch("langchain.utilities.serpapi.SerpAPIWrapper.aresults") @mock.patch("langchain.OpenAI._agenerate", autospec=True) # autospec is needed as langchain will inspect the function def test_agents_simple_pipe(mock_openai_agenerate: mock.AsyncMock, diff --git a/tests/llm/test_completion_pipe.py b/tests/llm/test_completion_pipe.py index c79493ee9d..db020b6ba6 100644 --- a/tests/llm/test_completion_pipe.py +++ b/tests/llm/test_completion_pipe.py @@ -89,7 +89,6 @@ def _run_pipeline(config: Config, @pytest.mark.usefixtures("nemollm") -@pytest.mark.use_python @mock.patch("asyncio.wrap_future") @mock.patch("asyncio.gather", new_callable=mock.AsyncMock) def test_completion_pipe_nemo( @@ -106,7 +105,6 @@ def test_completion_pipe_nemo( @pytest.mark.usefixtures("openai") -@pytest.mark.use_python def test_completion_pipe_openai(config: Config, mock_chat_completion: mock.MagicMock, countries: list[str], @@ -125,7 +123,6 @@ def test_completion_pipe_openai(config: Config, @pytest.mark.usefixtures("nemollm") @pytest.mark.usefixtures("ngc_api_key") -@pytest.mark.use_python def test_completion_pipe_integration_nemo(config: Config, countries: list[str], capital_responses: list[str]): results = _run_pipeline(config, NeMoLLMService, @@ -139,7 +136,6 @@ def test_completion_pipe_integration_nemo(config: Config, countries: list[str], @pytest.mark.usefixtures("openai") @pytest.mark.usefixtures("openai_api_key") -@pytest.mark.use_python def test_completion_pipe_integration_openai(config: Config, countries: list[str], capital_responses: list[str]): results = _run_pipeline(config, OpenAIChatService, diff --git a/tests/llm/test_extractor_simple_task_handler_pipe.py b/tests/llm/test_extractor_simple_task_handler_pipe.py index a358b33f8c..d9caee7b16 100644 --- a/tests/llm/test_extractor_simple_task_handler_pipe.py +++ b/tests/llm/test_extractor_simple_task_handler_pipe.py @@ -13,8 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest - from _utils import assert_results from _utils.dataset_manager import DatasetManager from morpheus.config import Config @@ -37,7 +35,6 @@ def _build_engine() -> LLMEngine: return engine -@pytest.mark.use_python def test_extractor_simple_task_handler_pipeline(config: Config, dataset_cudf: DatasetManager): input_df = dataset_cudf["filter_probs.csv"] expected_df = input_df.copy(deep=True) diff --git a/tests/llm/test_rag_standalone_pipe.py b/tests/llm/test_rag_standalone_pipe.py index ec99752052..2ddca7ef11 100644 --- a/tests/llm/test_rag_standalone_pipe.py +++ b/tests/llm/test_rag_standalone_pipe.py @@ -126,7 +126,6 @@ def _run_pipeline(config: Config, @pytest.mark.usefixtures("nemollm") @pytest.mark.milvus -@pytest.mark.use_python @pytest.mark.use_cudf @pytest.mark.parametrize("repeat_count", [5]) @pytest.mark.import_mod(os.path.join(TEST_DIRS.examples_dir, 'llm/common/utils.py')) @@ -163,7 +162,6 @@ def test_rag_standalone_pipe_nemo( @pytest.mark.usefixtures("openai") @pytest.mark.milvus -@pytest.mark.use_python @pytest.mark.use_cudf @pytest.mark.parametrize("repeat_count", [5]) @pytest.mark.import_mod(os.path.join(TEST_DIRS.examples_dir, 'llm/common/utils.py')) @@ -203,7 +201,6 @@ def test_rag_standalone_pipe_openai(config: Config, @pytest.mark.usefixtures("nemollm") @pytest.mark.usefixtures("ngc_api_key") @pytest.mark.milvus -@pytest.mark.use_python @pytest.mark.use_cudf @pytest.mark.parametrize("repeat_count", [5]) @pytest.mark.import_mod(os.path.join(TEST_DIRS.examples_dir, 'llm/common/utils.py')) @@ -236,7 +233,6 @@ def test_rag_standalone_pipe_integration_nemo(config: Config, @pytest.mark.usefixtures("openai") @pytest.mark.usefixtures("openai_api_key") @pytest.mark.milvus -@pytest.mark.use_python @pytest.mark.use_cudf @pytest.mark.parametrize("repeat_count", [5]) @pytest.mark.import_mod(os.path.join(TEST_DIRS.examples_dir, 'llm/common/utils.py')) diff --git a/tests/messages/test_control_message.py b/tests/messages/test_control_message.py index f7c8c4c4e8..4e913be066 100644 --- a/tests/messages/test_control_message.py +++ b/tests/messages/test_control_message.py @@ -20,6 +20,9 @@ from morpheus import messages +# pylint: disable=unsupported-membership-test +# pylint: disable=unsubscriptable-object + @pytest.mark.usefixtures("config_only_cpp") def test_control_message_init(): @@ -65,7 +68,7 @@ def test_control_message_tasks(): # Ensure the underlying tasks cannot are not modified message = messages.ControlMessage() tasks = message.get_tasks() - tasks["type_a"] = [{"key_x", "value_x"}] + tasks["type_a"] = [{"key_x", "value_x"}] # pylint: disable=unsupported-assignment-operation assert len(message.get_tasks()) == 0 message = messages.ControlMessage() @@ -98,7 +101,7 @@ def test_control_message_metadata(): assert message.get_metadata()["key_y"] == "value_yy" - message.get_metadata()["not_mutable"] = 5 + message.get_metadata()["not_mutable"] = 5 # pylint: disable=unsupported-assignment-operation assert "not_mutable" not in message.get_metadata() @@ -148,7 +151,6 @@ def test_control_message_set(): assert (control_message.has_task("load")) -@pytest.mark.usefixtures("config_only_cpp") def test_control_message_set_and_get_payload(): df = cudf.DataFrame({ 'col1': [1, 2, 3, 4, 5], diff --git a/tests/test_message_meta.py b/tests/messages/test_message_meta.py similarity index 68% rename from tests/test_message_meta.py rename to tests/messages/test_message_meta.py index 1a45e963e8..2ee8dd3c40 100644 --- a/tests/test_message_meta.py +++ b/tests/messages/test_message_meta.py @@ -25,7 +25,9 @@ from _utils.dataset_manager import DatasetManager # pylint: disable=morpheus-incorrect-lib-from-import from morpheus._lib.messages import MessageMeta as MessageMetaCpp +from morpheus.config import Config from morpheus.messages.message_meta import MessageMeta +from morpheus.utils.type_aliases import DataFrameType @pytest.fixture(name="index_type", scope="function", params=["normal", "skip", "dup", "down", "updown"]) @@ -83,20 +85,20 @@ def fixture_is_sliceable(index_type: typing.Literal['normal', 'skip', 'dup', 'do return index_type not in ("dup", "updown") -def test_count(df: cudf.DataFrame): +def test_count(df: DataFrameType): meta = MessageMeta(df) assert meta.count == len(df) -def test_has_sliceable_index(df: cudf.DataFrame, is_sliceable: bool): +def test_has_sliceable_index(df: DataFrameType, is_sliceable: bool): meta = MessageMeta(df) assert meta.has_sliceable_index() == is_sliceable -def test_ensure_sliceable_index(df: cudf.DataFrame, is_sliceable: bool): +def test_ensure_sliceable_index(df: DataFrameType, is_sliceable: bool): meta = MessageMeta(df) @@ -106,7 +108,7 @@ def test_ensure_sliceable_index(df: cudf.DataFrame, is_sliceable: bool): assert old_index_name == (None if is_sliceable else "_index_") -def test_mutable_dataframe(df: cudf.DataFrame): +def test_mutable_dataframe(df: DataFrameType): meta = MessageMeta(df) @@ -116,7 +118,7 @@ def test_mutable_dataframe(df: cudf.DataFrame): assert meta.copy_dataframe()['v2'].iloc[3] == 47 -def test_using_ctx_outside_with_block(df: cudf.DataFrame): +def test_using_ctx_outside_with_block(df: DataFrameType): meta = MessageMeta(df) @@ -132,7 +134,7 @@ def test_using_ctx_outside_with_block(df: cudf.DataFrame): pytest.raises(AttributeError, operator.setitem, ctx, 'col', 5) -def test_copy_dataframe(df: cudf.DataFrame): +def test_copy_dataframe(df: DataFrameType): meta = MessageMeta(df) @@ -159,3 +161,61 @@ def test_pandas_df_cpp(dataset_pandas: DatasetManager): assert isinstance(meta, MessageMetaCpp) assert isinstance(meta.df, cudf.DataFrame) DatasetManager.assert_compare_df(meta.df, df) + + +def test_cast(config: Config, dataset: DatasetManager): # pylint: disable=unused-argument + """ + Test tcopy constructor + """ + df = dataset["filter_probs.csv"] + meta1 = MessageMeta(df) + + meta2 = MessageMeta(meta1) + assert isinstance(meta2, MessageMeta) + + DatasetManager.assert_compare_df(meta2.copy_dataframe(), df) + + +@pytest.mark.use_pandas +@pytest.mark.use_python +def test_cast_python_to_cpp(dataset: DatasetManager): + """ + Test that we can cast a python MessageMeta to a C++ MessageMeta + """ + df = dataset["filter_probs.csv"] + + py_meta = MessageMeta(df) + assert isinstance(py_meta, MessageMeta) + assert not isinstance(py_meta, MessageMetaCpp) + + cpp_meta = MessageMetaCpp(py_meta) + assert isinstance(cpp_meta, MessageMeta) + assert isinstance(cpp_meta, MessageMetaCpp) + + DatasetManager.assert_compare_df(cpp_meta.copy_dataframe(), df) + + +@pytest.mark.use_pandas +@pytest.mark.use_python +def test_cast_cpp_to_python(dataset: DatasetManager): + """ + Test that we can cast a a C++ MessageMeta to a python MessageMeta + """ + df = dataset["filter_probs.csv"] + cpp_meta = MessageMetaCpp(df) + + py_meta = MessageMeta(cpp_meta) + assert isinstance(py_meta, MessageMeta) + assert not isinstance(py_meta, MessageMetaCpp) + + DatasetManager.assert_compare_df(py_meta.copy_dataframe(), df) + + +def test_get_column_names(df: DataFrameType): + """ + Test that we can get the column names from a MessageMeta + """ + expected_columns = sorted(df.columns.to_list()) + meta = MessageMeta(df) + + assert sorted(meta.get_column_names()) == expected_columns diff --git a/tests/test_deserialize_stage_pipe.py b/tests/stages/test_deserialize_stage_pipe.py similarity index 74% rename from tests/test_deserialize_stage_pipe.py rename to tests/stages/test_deserialize_stage_pipe.py index f3023f4085..5231002a20 100755 --- a/tests/test_deserialize_stage_pipe.py +++ b/tests/stages/test_deserialize_stage_pipe.py @@ -18,13 +18,16 @@ from _utils import assert_results from _utils.dataset_manager import DatasetManager +# pylint: disable=morpheus-incorrect-lib-from-import +from morpheus._lib.messages import MultiMessage as MultiMessageCpp from morpheus.config import Config +from morpheus.messages import ControlMessage from morpheus.messages import MessageMeta +from morpheus.messages import MultiMessage from morpheus.modules.preprocess.deserialize import _process_dataframe_to_multi_message from morpheus.pipeline import LinearPipeline from morpheus.stages.input.in_memory_source_stage import InMemorySourceStage from morpheus.stages.output.compare_dataframe_stage import CompareDataFrameStage -from morpheus.stages.postprocess.serialize_stage import SerializeStage from morpheus.stages.preprocess.deserialize_stage import DeserializeStage @@ -53,9 +56,20 @@ def test_fixing_non_unique_indexes(dataset: DatasetManager): assert "_index_" in meta.df.columns +def _assert_received_types(sink: CompareDataFrameStage, message_type: type): + if message_type is MultiMessage: + expected_types = (MultiMessage, MultiMessageCpp) + else: + expected_types = (ControlMessage, ) + + for msg in sink.get_messages(): + assert isinstance(msg, expected_types) + + @pytest.mark.use_cudf @pytest.mark.parametrize("dup_index", [False, True]) -def test_deserialize_pipe(config: Config, dataset: DatasetManager, dup_index: bool): +@pytest.mark.parametrize("message_type", [MultiMessage, ControlMessage]) +def test_deserialize_pipe(config: Config, dataset: DatasetManager, dup_index: bool, message_type: type): """ End-to-end test for DeserializeStage """ @@ -66,17 +80,18 @@ def test_deserialize_pipe(config: Config, dataset: DatasetManager, dup_index: bo pipe = LinearPipeline(config) pipe.set_source(InMemorySourceStage(config, [filter_probs_df])) - pipe.add_stage(DeserializeStage(config)) - pipe.add_stage(SerializeStage(config, include=[r'^v\d+$'])) - comp_stage = pipe.add_stage(CompareDataFrameStage(config, dataset.pandas["filter_probs.csv"])) + pipe.add_stage(DeserializeStage(config, message_type=message_type)) + comp_stage = pipe.add_stage(CompareDataFrameStage(config, dataset.pandas["filter_probs.csv"], exclude=["_index_"])) pipe.run() + _assert_received_types(comp_stage, message_type) assert_results(comp_stage.get_results()) @pytest.mark.use_cudf @pytest.mark.parametrize("dup_index", [False, True]) -def test_deserialize_multi_segment_pipe(config: Config, dataset: DatasetManager, dup_index: bool): +@pytest.mark.parametrize("message_type", [MultiMessage, ControlMessage]) +def test_deserialize_multi_segment_pipe(config: Config, dataset: DatasetManager, dup_index: bool, message_type: type): """ End-to-end test across mulitiple segments """ @@ -88,9 +103,9 @@ def test_deserialize_multi_segment_pipe(config: Config, dataset: DatasetManager, pipe = LinearPipeline(config) pipe.set_source(InMemorySourceStage(config, [filter_probs_df])) pipe.add_segment_boundary(MessageMeta) - pipe.add_stage(DeserializeStage(config)) - pipe.add_stage(SerializeStage(config, include=[r'^v\d+$'])) - comp_stage = pipe.add_stage(CompareDataFrameStage(config, dataset.pandas["filter_probs.csv"])) + pipe.add_stage(DeserializeStage(config, message_type=message_type)) + comp_stage = pipe.add_stage(CompareDataFrameStage(config, dataset.pandas["filter_probs.csv"], exclude=["_index_"])) pipe.run() + _assert_received_types(comp_stage, message_type) assert_results(comp_stage.get_results()) From ddd10d17060353bd019b96bbced6a1fd7f2c2fec Mon Sep 17 00:00:00 2001 From: David Gardner <96306125+dagardner-nv@users.noreply.github.com> Date: Mon, 12 Feb 2024 14:09:18 -0800 Subject: [PATCH 05/18] Fix or silence warnings emitted during tests (#1501) * Reduce the number of warnings emitted while running tests * Replace usage of deprecated methods/attribues where possible * When no other option is avail, silence known warnings. * Remove out-of-date warning filters ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - David Gardner (https://github.com/dagardner-nv) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1501 --- .../module/content_extractor_module.py | 4 +- examples/llm/vdb_upload/run.py | 2 +- morpheus/_lib/src/objects/table_info.cpp | 4 +- morpheus/controllers/rss_controller.py | 2 +- .../service/vdb/milvus_vector_db_service.py | 2 +- morpheus/stages/general/monitor_stage.py | 2 +- .../stages/output/http_server_sink_stage.py | 2 +- morpheus/utils/column_info.py | 9 +++- morpheus/utils/downloader.py | 9 +++- pyproject.toml | 12 +++-- tests/_utils/dataset_manager.py | 6 ++- tests/_utils/kafka.py | 14 ++++-- tests/dfencoder/test_autoencoder.py | 6 ++- tests/dfencoder/test_scalers.py | 38 ++++++++------- .../test_dfp_postprocessing_stage.py | 2 +- .../test_dfp_split_users_stage.py | 8 +++- tests/examples/llm/common/conftest.py | 6 +-- .../test_preprocessing.py | 6 +-- tests/llm/conftest.py | 23 +++++++++ tests/llm/test_rag_standalone_pipe.py | 18 +++---- tests/pipeline/test_pipeline.py | 5 +- tests/test_column_info.py | 48 ++++++++++++------- tests/test_monitor_stage.py | 11 +++-- .../utils/nvt/test_json_flatten_transform.py | 8 +++- tests/utils/nvt/test_mutate_op.py | 14 ++++-- tests/utils/nvt/test_schema_converters.py | 7 ++- tests/utils/nvt/test_transforms.py | 8 +++- 27 files changed, 188 insertions(+), 88 deletions(-) diff --git a/examples/llm/vdb_upload/module/content_extractor_module.py b/examples/llm/vdb_upload/module/content_extractor_module.py index 5e0c2963f5..5b2ed2ce0f 100755 --- a/examples/llm/vdb_upload/module/content_extractor_module.py +++ b/examples/llm/vdb_upload/module/content_extractor_module.py @@ -29,7 +29,7 @@ import pandas as pd from docx import Document from langchain.text_splitter import RecursiveCharacterTextSplitter -from pydantic import BaseModel +from pydantic import BaseModel # pylint: disable=no-name-in-module from pydantic import Field from pydantic import ValidationError from pydantic import validator @@ -197,7 +197,7 @@ def _csv_to_text_converter(input_info: ConverterInputInfo) -> list[str]: raise ValueError("The CSV file must either include a 'content' column or have a " "columns specified in the meta configuration with key 'text_column_names'.") df.fillna(value='', inplace=True) - text_arr = df[text_column_names].apply(lambda x: ' '.join(map(str, x)), axis=1).tolist() + text_arr = df[sorted(text_column_names)].apply(lambda x: ' '.join(map(str, x)), axis=1).tolist() return text_arr diff --git a/examples/llm/vdb_upload/run.py b/examples/llm/vdb_upload/run.py index f1b177062e..04627f8359 100644 --- a/examples/llm/vdb_upload/run.py +++ b/examples/llm/vdb_upload/run.py @@ -67,7 +67,7 @@ def run(): ) @click.option( "--model_max_batch_size", - default=64, + default=256, type=click.IntRange(min=1), help="Max batch size to use for the model", ) diff --git a/morpheus/_lib/src/objects/table_info.cpp b/morpheus/_lib/src/objects/table_info.cpp index a454b56222..b781ebac16 100644 --- a/morpheus/_lib/src/objects/table_info.cpp +++ b/morpheus/_lib/src/objects/table_info.cpp @@ -29,7 +29,6 @@ #include // IWYU pragma: keep #include // for find, transform -#include // needed for pybind11::make_tuple #include // for size_t #include // for back_insert_iterator, back_inserter #include @@ -306,7 +305,8 @@ std::optional MutableTableInfo::ensure_sliceable_index() auto df_index = py_df.attr("index"); // Check to see if we actually need the change - if (df_index.attr("is_unique").cast() && df_index.attr("is_monotonic").cast()) + if (df_index.attr("is_unique").cast() && (df_index.attr("is_monotonic_increasing").cast() || + df_index.attr("is_monotonic_decreasing").cast())) { // Set the outputname to nullopt old_index_col_name = std::nullopt; diff --git a/morpheus/controllers/rss_controller.py b/morpheus/controllers/rss_controller.py index 88bba4ca0b..5b9c36f369 100644 --- a/morpheus/controllers/rss_controller.py +++ b/morpheus/controllers/rss_controller.py @@ -157,7 +157,7 @@ def _read_file_content(self, file_path: str) -> str: def _try_parse_feed_with_beautiful_soup(self, feed_input: str) -> "feedparser.FeedParserDict": - soup = BeautifulSoup(feed_input, 'lxml') + soup = BeautifulSoup(feed_input, features='xml') # Verify whether the given feed has 'item' or 'entry' tags. if soup.find('item'): diff --git a/morpheus/service/vdb/milvus_vector_db_service.py b/morpheus/service/vdb/milvus_vector_db_service.py index 8855180c6f..37cd82d1ba 100644 --- a/morpheus/service/vdb/milvus_vector_db_service.py +++ b/morpheus/service/vdb/milvus_vector_db_service.py @@ -698,7 +698,7 @@ def _build_schema_conf(self, df: typing.Union[cudf.DataFrame, pd.DataFrame]) -> df = df.to_pandas() # Loop over all of the columns of the first row and build the schema - for col_name, col_val in df.iloc[0].iteritems(): + for col_name, col_val in df.iloc[0].items(): field_dict = { "name": col_name, diff --git a/morpheus/stages/general/monitor_stage.py b/morpheus/stages/general/monitor_stage.py index 3b228f783f..8d709d7d92 100644 --- a/morpheus/stages/general/monitor_stage.py +++ b/morpheus/stages/general/monitor_stage.py @@ -99,7 +99,7 @@ def accepted_types(self) -> typing.Tuple: def supports_cpp_node(self): return False - def on_start(self): + async def start_async(self): """ Starts the pipeline stage's progress bar. """ diff --git a/morpheus/stages/output/http_server_sink_stage.py b/morpheus/stages/output/http_server_sink_stage.py index ae3eb7508a..2a0be0a298 100644 --- a/morpheus/stages/output/http_server_sink_stage.py +++ b/morpheus/stages/output/http_server_sink_stage.py @@ -142,7 +142,7 @@ def supports_cpp_node(self): """Indicates whether or not this stage supports a C++ node.""" return False - def on_start(self): + async def start_async(self): """Starts the HTTP server.""" from morpheus.common import HttpServer self._server = HttpServer(parse_fn=self._request_handler, diff --git a/morpheus/utils/column_info.py b/morpheus/utils/column_info.py index 783bbb88c6..59ce19a6ba 100644 --- a/morpheus/utils/column_info.py +++ b/morpheus/utils/column_info.py @@ -387,7 +387,14 @@ def _process_column(self, df: pd.DataFrame) -> pd.Series: The processed column as a datetime Series. """ - return pd.to_datetime(df[self.input_name], infer_datetime_format=True, utc=True).astype(self.get_pandas_dtype()) + dt_series = pd.to_datetime(df[self.input_name], infer_datetime_format=True, utc=True) + + dtype = self.get_pandas_dtype() + if dtype == 'datetime64[ns]': + # avoid deprecation warning about using .astype to convert from a tz-aware type to a tz-naive type + return dt_series.dt.tz_localize(None) + + return dt_series.astype(dtype) @dataclasses.dataclass diff --git a/morpheus/utils/downloader.py b/morpheus/utils/downloader.py index a5eeb4f737..0a68ae6e14 100644 --- a/morpheus/utils/downloader.py +++ b/morpheus/utils/downloader.py @@ -21,6 +21,7 @@ import os import threading import typing +import warnings from enum import Enum import fsspec @@ -131,7 +132,13 @@ def get_dask_client(self): dask.config.set({"distributed.client.heartbeat": self._dask_heartbeat_interval}) if (self._merlin_distributed is None): - self._merlin_distributed = Distributed(client=dask.distributed.Client(self.get_dask_cluster())) + with warnings.catch_warnings(): + # Merlin.Distributed will warn if a client already exists, the client in question is the one created + # and are explicitly passing to it in the constructor. + warnings.filterwarnings("ignore", + message="Existing Dask-client object detected in the current context.*", + category=UserWarning) + self._merlin_distributed = Distributed(client=dask.distributed.Client(self.get_dask_cluster())) return self._merlin_distributed diff --git a/pyproject.toml b/pyproject.toml index 29262c933c..48a811e56d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,16 +25,18 @@ markers = [ ] filterwarnings = [ - # Warning coming from mlflow's usage of numpy - 'ignore:`np.object` is a deprecated alias for the builtin `object`. To silence this warning, use `object` by itself. Doing this will not modify any behavior and is safe', + # Ignore our own warning about the df property since we still have to test it 'ignore:Warning the df property returns a copy, please use the copy_dataframe method or the mutable_dataframe context manager to modify the DataFrame in-place instead.', - 'ignore:`np.MachAr` is deprecated \(NumPy 1.22\):DeprecationWarning', - 'ignore:Please use `spmatrix` from the `scipy.sparse` namespace, the `scipy.sparse.base` namespace is deprecated:DeprecationWarning', # Deprecation warning from any project using distutils, currently known sources of this are: # GPUtils https://github.com/anderskm/gputil/issues/48 - # PySpark https://issues.apache.org/jira/browse/SPARK-45390 + # PySpark https://issues.apache.org/jira/browse/SPARK-45390 & https://issues.apache.org/jira/browse/SPARK-38660 'ignore:The distutils package is deprecated and slated for removal in Python 3.12. Use setuptools or check PEP 632 for potential alternatives', + 'ignore:distutils Version classes are deprecated. Use packaging.version instead.', + + # Ignore cudf warnings about Pandas being used under the hood for processing json + 'ignore:Using CPU via Pandas to write JSON dataset', + 'ignore:Using CPU via Pandas to read JSON dataset', ] testpaths = ["tests"] diff --git a/tests/_utils/dataset_manager.py b/tests/_utils/dataset_manager.py index a7986418c6..72a277cf21 100644 --- a/tests/_utils/dataset_manager.py +++ b/tests/_utils/dataset_manager.py @@ -18,6 +18,7 @@ import os import random import typing +import warnings import cupy as cp import pandas as pd @@ -235,7 +236,10 @@ def compare_df(cls, dfb: typing.Union[pd.DataFrame, cdf.DataFrame], **compare_args): """Wrapper for `morpheus.utils.compare_df.compare_df`.""" - return compare_df.compare_df(cls._value_as_pandas(dfa), cls._value_as_pandas(dfb), **compare_args) + with warnings.catch_warnings(): + # Ignore performance warnings from pandas triggered by the comparison + warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning) + return compare_df.compare_df(cls._value_as_pandas(dfa), cls._value_as_pandas(dfb), **compare_args) @classmethod def assert_compare_df(cls, diff --git a/tests/_utils/kafka.py b/tests/_utils/kafka.py index db9c055615..21e8dee721 100644 --- a/tests/_utils/kafka.py +++ b/tests/_utils/kafka.py @@ -18,6 +18,7 @@ import subprocess import time import typing +import warnings from collections import namedtuple from functools import partial @@ -73,10 +74,15 @@ def seek_to_beginning(kafka_consumer: "KafkaConsumer", timeout: int = PARTITION_ @pytest.fixture(name='kafka_consumer', scope='function') def kafka_consumer_fixture(kafka_topics: KafkaTopics, _kafka_consumer: "KafkaConsumer"): - _kafka_consumer.subscribe([kafka_topics.output_topic]) - seek_to_beginning(_kafka_consumer) - - yield _kafka_consumer + with warnings.catch_warnings(): + # Ignore warnings specific to the test fixture and not the actual morpheus code + warnings.filterwarnings("ignore", + message=r"Exception ignored in:.*ConsumerCoordinator\.__del__", + category=pytest.PytestUnraisableExceptionWarning) + _kafka_consumer.subscribe([kafka_topics.output_topic]) + seek_to_beginning(_kafka_consumer) + + yield _kafka_consumer def _init_pytest_kafka() -> (bool, Exception): diff --git a/tests/dfencoder/test_autoencoder.py b/tests/dfencoder/test_autoencoder.py index 70a85ec781..43a1b7574b 100755 --- a/tests/dfencoder/test_autoencoder.py +++ b/tests/dfencoder/test_autoencoder.py @@ -16,6 +16,7 @@ import os import typing +import warnings from unittest.mock import patch import numpy as np @@ -493,7 +494,10 @@ def test_auto_encoder_num_only_convergence(train_ae: autoencoder.AutoEncoder): 'num_feat_2': [3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1], }) - train_ae.fit(num_df, epochs=50) + with warnings.catch_warnings(): + # Ignore warning regarding tensorflow not being installed + warnings.filterwarnings("ignore", message="Initializing zero-element tensors is a no-op", category=UserWarning) + train_ae.fit(num_df, epochs=50) avg_loss = np.sum([np.array(loss[1]) for loss in train_ae.logger.train_fts.values()], axis=0) / len(train_ae.logger.train_fts) diff --git a/tests/dfencoder/test_scalers.py b/tests/dfencoder/test_scalers.py index 35e0272958..820169df6e 100644 --- a/tests/dfencoder/test_scalers.py +++ b/tests/dfencoder/test_scalers.py @@ -14,6 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import warnings + import numpy as np import pytest import torch @@ -25,35 +27,41 @@ # pylint: disable=redefined-outer-name -@pytest.fixture(scope="function") -def fit_tensor(): +@pytest.fixture(name="fit_tensor", scope="function") +def fit_tensor_fixture(): yield torch.tensor([4.4, 5.3, 6.5], dtype=torch.float32) -@pytest.fixture(scope="function") -def tensor(): +@pytest.fixture(name="tensor", scope="function") +def tensor_fixture(): yield torch.tensor([7.4, 8.3, 9.5], dtype=torch.float32) -@pytest.fixture(scope="function") -def standard_scaler(fit_tensor): +@pytest.fixture(name="standard_scaler", scope="function") +def standard_scaler_fixture(fit_tensor): scaler = scalers.StandardScaler() scaler.fit(fit_tensor) yield scaler -@pytest.fixture(scope="function") -def modified_scaler(fit_tensor): +@pytest.fixture(name="modified_scaler", scope="function") +def modified_scaler_fixture(fit_tensor): scaler = scalers.ModifiedScaler() scaler.fit(fit_tensor) yield scaler -@pytest.fixture(scope="function") -def gauss_rank_scaler(fit_tensor): +@pytest.fixture(name="gauss_rank_scaler", scope="function") +def gauss_rank_scaler_fixture(fit_tensor): scaler = scalers.GaussRankScaler() - scaler.fit(fit_tensor) - yield scaler + + with warnings.catch_warnings(): + # This warning is triggered by the abnormally small tensor size used in this test + warnings.filterwarnings("ignore", + message=r"n_quantiles \(1000\) is greater than the total number of samples \(3\).*", + category=UserWarning) + scaler.fit(fit_tensor) + yield scaler def test_ensure_float_type(): @@ -111,8 +119,7 @@ def test_modified_scaler_transform(modified_scaler, tensor): assert torch.equal(torch.round(results, decimals=2), expected), f"{results} != {expected}" # Test alternate path where median absolute deviation is 1 - test = torch.tensor([3.0, 4.0, 4.0, 5.0]) - modified_scaler.fit(test) + modified_scaler.fit(torch.tensor([3.0, 4.0, 4.0, 5.0])) results = modified_scaler.transform(tensor) expected = torch.tensor([5.43, 6.86, 8.78]) assert torch.equal(torch.round(results, decimals=2), expected), f"{results} != {expected}" @@ -124,8 +131,7 @@ def test_modified_scaler_inverse_transform(modified_scaler, tensor): assert torch.equal(torch.round(results, decimals=2), expected), f"{results} != {expected}" # Test alternate path where median absolute deviation is 1 - test = torch.tensor([3.0, 4.0, 4.0, 5.0]) - modified_scaler.fit(test) + modified_scaler.fit(torch.tensor([3.0, 4.0, 4.0, 5.0])) results = modified_scaler.inverse_transform(tensor) expected = torch.tensor([8.64, 9.2, 9.95]) assert torch.equal(torch.round(results, decimals=2), expected), f"{results} != {expected}" diff --git a/tests/examples/digital_fingerprinting/test_dfp_postprocessing_stage.py b/tests/examples/digital_fingerprinting/test_dfp_postprocessing_stage.py index d3f169f4dc..4b13bacde5 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_postprocessing_stage.py +++ b/tests/examples/digital_fingerprinting/test_dfp_postprocessing_stage.py @@ -51,7 +51,7 @@ def test_process_events_on_data(mock_datetime: mock.MagicMock, # post-process should replace nans, lets add a nan to the DF with dfp_multi_ae_message.meta.mutable_dataframe() as df: - df['v2'][10] = np.nan + df.loc[10, 'v2'] = np.nan df['event_time'] = '' set_log_level(morpheus_log_level) diff --git a/tests/examples/digital_fingerprinting/test_dfp_split_users_stage.py b/tests/examples/digital_fingerprinting/test_dfp_split_users_stage.py index 635a6dbc78..8189df73fe 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_split_users_stage.py +++ b/tests/examples/digital_fingerprinting/test_dfp_split_users_stage.py @@ -16,6 +16,7 @@ import json import os import typing +import warnings import pytest @@ -99,7 +100,12 @@ def test_extract_users(config: Config, skip_users=skip_users, only_users=only_users) - results = stage.extract_users(df) + with warnings.catch_warnings(): + # Ignore warning about the log message not being set. This happens whenever there aren't any output_messages + warnings.filterwarnings("ignore", + message="Must set log msg before end of context! Skipping log", + category=UserWarning) + results = stage.extract_users(df) if not include_generic and not include_individual: # Extra check for weird combination diff --git a/tests/examples/llm/common/conftest.py b/tests/examples/llm/common/conftest.py index 2a1e58ba22..c0bd0ad9ba 100644 --- a/tests/examples/llm/common/conftest.py +++ b/tests/examples/llm/common/conftest.py @@ -52,10 +52,10 @@ def import_content_extractor_module(restore_sys_path): # pylint: disable=unused return content_extractor_module -@pytest.fixture(name="nemollm", autouse=True, scope='session') -def nemollm_fixture(fail_missing: bool): +@pytest.fixture(name="langchain", autouse=True, scope='session') +def langchain_fixture(fail_missing: bool): """ - All the tests in this subdir require nemollm + All the tests in this subdir require langchain """ skip_reason = ("Tests for the WebScraperStage require the langchain package to be installed, to install this run:\n" diff --git a/tests/examples/ransomware_detection/test_preprocessing.py b/tests/examples/ransomware_detection/test_preprocessing.py index 9faf81f999..a72225edbf 100644 --- a/tests/examples/ransomware_detection/test_preprocessing.py +++ b/tests/examples/ransomware_detection/test_preprocessing.py @@ -139,10 +139,10 @@ def test_merge_curr_and_prev_snapshots(self, config: Config, rwd_conf: dict, dat } expected_df = dataset_pandas['examples/ransomware_detection/dask_results.csv'].fillna('') - expected_df['pid_process'][1] = 'test_val1' - expected_df['pid_process'][3] = 'test_val2' + expected_df.loc[1, 'pid_process'] = 'test_val1' + expected_df.loc[3, 'pid_process'] = 'test_val2' - expected_df['snapshot_id'] = snapshot_ids + expected_df.loc[:, 'snapshot_id'] = snapshot_ids expected_df.index = expected_df.snapshot_id stage._merge_curr_and_prev_snapshots(df, source_pid_process) diff --git a/tests/llm/conftest.py b/tests/llm/conftest.py index 26ca672b1f..f92a16d148 100644 --- a/tests/llm/conftest.py +++ b/tests/llm/conftest.py @@ -13,6 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import asyncio +import typing +from unittest import mock + import pytest from _utils import require_env_variable @@ -94,3 +98,22 @@ def serpapi_api_key_fixture(): yield require_env_variable( varname="SERPAPI_API_KEY", reason="serpapi integration tests require the `SERPAPI_API_KEY` environment variable to be defined.") + + +@pytest.mark.usefixtures("nemollm") +@pytest.fixture(name="mock_nemollm") +def mock_nemollm_fixture(mock_nemollm: mock.MagicMock): + + # The generate function is a blocking call that returns a future when return_type="async" + async def mock_task(fut: asyncio.Future, value: typing.Any = mock.DEFAULT): + fut.set_result(value) + + def create_future(*args, **kwargs) -> asyncio.Future: # pylint: disable=unused-argument + event_loop = asyncio.get_event_loop() + fut = event_loop.create_future() + event_loop.create_task(mock_task(fut, mock.DEFAULT)) + return fut + + mock_nemollm.generate.side_effect = create_future + + yield mock_nemollm diff --git a/tests/llm/test_rag_standalone_pipe.py b/tests/llm/test_rag_standalone_pipe.py index 2ddca7ef11..b9577a89ef 100644 --- a/tests/llm/test_rag_standalone_pipe.py +++ b/tests/llm/test_rag_standalone_pipe.py @@ -129,24 +129,18 @@ def _run_pipeline(config: Config, @pytest.mark.use_cudf @pytest.mark.parametrize("repeat_count", [5]) @pytest.mark.import_mod(os.path.join(TEST_DIRS.examples_dir, 'llm/common/utils.py')) -@mock.patch("asyncio.wrap_future") -@mock.patch("asyncio.gather", new_callable=mock.AsyncMock) -def test_rag_standalone_pipe_nemo( - mock_asyncio_gather: mock.AsyncMock, - mock_asyncio_wrap_future: mock.MagicMock, # pylint: disable=unused-argument - config: Config, - mock_nemollm: mock.MagicMock, - dataset: DatasetManager, - milvus_server_uri: str, - repeat_count: int, - import_mod: types.ModuleType): +def test_rag_standalone_pipe_nemo(config: Config, + mock_nemollm: mock.MagicMock, + dataset: DatasetManager, + milvus_server_uri: str, + repeat_count: int, + import_mod: types.ModuleType): collection_name = "test_rag_standalone_pipe_nemo" populate_milvus(milvus_server_uri=milvus_server_uri, collection_name=collection_name, resource_kwargs=import_mod.build_default_milvus_config(embedding_size=EMBEDDING_SIZE), df=dataset["service/milvus_rss_data.json"], overwrite=True) - mock_asyncio_gather.return_value = [mock.MagicMock() for _ in range(repeat_count)] mock_nemollm.post_process_generate_response.side_effect = [{"text": EXPECTED_RESPONSE} for _ in range(repeat_count)] results = _run_pipeline( config=config, diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py index 1d93dc22c0..e763ca95c9 100755 --- a/tests/pipeline/test_pipeline.py +++ b/tests/pipeline/test_pipeline.py @@ -98,7 +98,10 @@ def _run_pipeline(filter_probs_df: DataFrameType, pipe = LinearPipeline(config) pipe.set_source(SourceTestStage(config, [filter_probs_df], **source_callbacks)) pipe.add_stage(SinkTestStage(config, **sink_callbacks)) - pipe.run() + + with pytest.deprecated_call(match="The on_start method is deprecated and may be removed in the future.*"): + # The sink stage ensures that the on_start callback method still works, even though it is deprecated. + pipe.run() @pytest.mark.use_cudf diff --git a/tests/test_column_info.py b/tests/test_column_info.py index 6a3f2e97eb..4cd71a9804 100644 --- a/tests/test_column_info.py +++ b/tests/test_column_info.py @@ -27,6 +27,7 @@ import cudf from _utils import TEST_DIRS +from morpheus.io.deserializers import read_file_to_df from morpheus.utils.column_info import ColumnInfo from morpheus.utils.column_info import CustomColumn from morpheus.utils.column_info import DataFrameInputSchema @@ -38,12 +39,29 @@ from morpheus.utils.schema_transforms import process_dataframe -@pytest.mark.use_python -def test_dataframe_input_schema_with_json_cols(): +@pytest.fixture(name="_azure_ad_logs_pdf", scope="module") +def _azure_ad_logs_pdf_fixture(): + # Explicitly reading this in to ensure that lines=False. + # Using pandas since the C++ impl for read_file_to_df doesn't support parser_kwargs, this also avoids a warning + # that cudf.read_json uses pandas.read_json under the hood. src_file = os.path.join(TEST_DIRS.tests_data_dir, "azure_ad_logs.json") + yield read_file_to_df(src_file, df_type='pandas', parser_kwargs={'lines': False}) + + +@pytest.fixture(name="azure_ad_logs_pdf", scope="function") +def azure_ad_logs_pdf_fixture(_azure_ad_logs_pdf: pd.DataFrame): + yield _azure_ad_logs_pdf.copy(deep=True) + + +@pytest.fixture(name="azure_ad_logs_cdf", scope="function") +def azure_ad_logs_cdf_fixture(_azure_ad_logs_pdf: pd.DataFrame): + # cudf.from_pandas essentially does a deep copy, so we can use this to ensure that the source pandas df is not + # modified + yield cudf.from_pandas(_azure_ad_logs_pdf) - input_df = cudf.read_json(src_file) +@pytest.mark.use_python +def test_dataframe_input_schema_with_json_cols(azure_ad_logs_cdf: cudf.DataFrame): raw_data_columns = [ 'time', 'resourceId', @@ -63,8 +81,8 @@ def test_dataframe_input_schema_with_json_cols(): 'properties' ] - assert len(input_df.columns) == 16 - assert list(input_df.columns) == raw_data_columns + assert len(azure_ad_logs_cdf.columns) == 16 + assert list(azure_ad_logs_cdf.columns) == raw_data_columns column_info = [ DateTimeColumn(name="timestamp", dtype='datetime64[ns]', input_name="time"), @@ -89,10 +107,10 @@ def test_dataframe_input_schema_with_json_cols(): schema = DataFrameInputSchema(json_columns=["properties"], column_info=column_info) - df_processed_schema = process_dataframe(input_df, schema) + df_processed_schema = process_dataframe(azure_ad_logs_cdf, schema) processed_df_cols = df_processed_schema.columns - assert len(input_df) == len(df_processed_schema) + assert len(azure_ad_logs_cdf) == len(df_processed_schema) assert len(processed_df_cols) == len(column_info) assert "timestamp" in processed_df_cols assert "userId" in processed_df_cols @@ -100,17 +118,13 @@ def test_dataframe_input_schema_with_json_cols(): assert "properties.userPrincipalName" not in processed_df_cols nvt_workflow = create_and_attach_nvt_workflow(schema) - df_processed_workflow = process_dataframe(input_df, nvt_workflow) + df_processed_workflow = process_dataframe(azure_ad_logs_cdf, nvt_workflow) assert df_processed_schema.equals(df_processed_workflow) @pytest.mark.use_python -def test_dataframe_input_schema_without_json_cols(): - src_file = os.path.join(TEST_DIRS.tests_data_dir, "azure_ad_logs.json") - - input_df = pd.read_json(src_file) - - assert len(input_df.columns) == 16 # pylint:disable=no-member +def test_dataframe_input_schema_without_json_cols(azure_ad_logs_pdf: pd.DataFrame): + assert len(azure_ad_logs_pdf.columns) == 16 column_info = [ DateTimeColumn(name="timestamp", dtype='datetime64[ns]', input_name="time"), @@ -119,10 +133,10 @@ def test_dataframe_input_schema_without_json_cols(): schema = DataFrameInputSchema(column_info=column_info) - df_processed = process_dataframe(input_df, schema) + df_processed = process_dataframe(azure_ad_logs_pdf, schema) processed_df_cols = df_processed.columns - assert len(input_df) == len(df_processed) + assert len(azure_ad_logs_pdf) == len(df_processed) assert len(processed_df_cols) == len(column_info) assert "timestamp" in processed_df_cols assert "time" not in processed_df_cols @@ -152,7 +166,7 @@ def test_dataframe_input_schema_without_json_cols(): # When trying to concat columns that don't exist in the dataframe, an exception is raised. with pytest.raises(Exception): - process_dataframe(input_df, schema2) + process_dataframe(azure_ad_logs_pdf, schema2) @pytest.mark.use_python diff --git a/tests/test_monitor_stage.py b/tests/test_monitor_stage.py index ad026bda1c..91bc936878 100755 --- a/tests/test_monitor_stage.py +++ b/tests/test_monitor_stage.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import asyncio import inspect import logging import os @@ -59,13 +60,13 @@ def two_x(x): @mock.patch('morpheus.controllers.monitor_controller.MorpheusTqdm') -def test_on_start(mock_morph_tqdm: mock.MagicMock, config: Config): +def test_start_async(mock_morph_tqdm: mock.MagicMock, config: Config): mock_morph_tqdm.return_value = mock_morph_tqdm stage = MonitorStage(config, log_level=logging.WARNING) assert stage._mc._progress is None - stage.on_start() + asyncio.run(stage.start_async()) mock_morph_tqdm.assert_called_once() mock_morph_tqdm.reset.assert_called_once() assert stage._mc._progress is mock_morph_tqdm @@ -82,7 +83,7 @@ def test_stop(mock_morph_tqdm: mock.MagicMock, config: Config): stage.stop() mock_morph_tqdm.assert_not_called() - stage.on_start() + asyncio.run(stage.start_async()) stage.stop() mock_morph_tqdm.close.assert_called_once() @@ -94,7 +95,7 @@ def test_refresh(mock_morph_tqdm: mock.MagicMock, config: Config): stage = MonitorStage(config, log_level=logging.WARNING) assert stage._mc._progress is None - stage.on_start() + asyncio.run(stage.start_async()) stage._mc.refresh_progress(None) mock_morph_tqdm.refresh.assert_called_once() @@ -138,7 +139,7 @@ def test_progress_sink(mock_morph_tqdm: mock.MagicMock, config: Config): mock_morph_tqdm.return_value = mock_morph_tqdm stage = MonitorStage(config, log_level=logging.WARNING) - stage.on_start() + asyncio.run(stage.start_async()) stage._mc.progress_sink(None) assert stage._mc._determine_count_fn is None diff --git a/tests/utils/nvt/test_json_flatten_transform.py b/tests/utils/nvt/test_json_flatten_transform.py index bce4f9ad28..e0657925f5 100644 --- a/tests/utils/nvt/test_json_flatten_transform.py +++ b/tests/utils/nvt/test_json_flatten_transform.py @@ -12,9 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import warnings + import pandas as pd import pytest -from nvtabular.ops.operator import ColumnSelector + +with warnings.catch_warnings(): + # Ignore warning regarding tensorflow not being installed + warnings.filterwarnings("ignore", message=".*No module named 'tensorflow'", category=UserWarning) + from nvtabular.ops.operator import ColumnSelector import cudf diff --git a/tests/utils/nvt/test_mutate_op.py b/tests/utils/nvt/test_mutate_op.py index db2029d503..3023d9701e 100644 --- a/tests/utils/nvt/test_mutate_op.py +++ b/tests/utils/nvt/test_mutate_op.py @@ -12,13 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import warnings + import numpy as np import pandas as pd import pytest -from merlin.core.dispatch import DataFrameType -from merlin.schema import ColumnSchema -from merlin.schema import Schema -from nvtabular.ops.operator import ColumnSelector + +with warnings.catch_warnings(): + # Ignore warning regarding tensorflow not being installed + warnings.filterwarnings("ignore", message=".*No module named 'tensorflow'", category=UserWarning) + from merlin.core.dispatch import DataFrameType + from merlin.schema import ColumnSchema + from merlin.schema import Schema + from nvtabular.ops.operator import ColumnSelector from morpheus.utils.nvt.mutate import MutateOp diff --git a/tests/utils/nvt/test_schema_converters.py b/tests/utils/nvt/test_schema_converters.py index 1efe4109f1..9b00440d1a 100644 --- a/tests/utils/nvt/test_schema_converters.py +++ b/tests/utils/nvt/test_schema_converters.py @@ -13,8 +13,13 @@ # limitations under the License. import json +import warnings + +with warnings.catch_warnings(): + # Ignore warning regarding tensorflow not being installed + warnings.filterwarnings("ignore", message=".*No module named 'tensorflow'", category=UserWarning) + import nvtabular as nvt -import nvtabular as nvt import pandas as pd import pytest diff --git a/tests/utils/nvt/test_transforms.py b/tests/utils/nvt/test_transforms.py index 95607c5c60..96df15447c 100644 --- a/tests/utils/nvt/test_transforms.py +++ b/tests/utils/nvt/test_transforms.py @@ -12,9 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import warnings + import pandas as pd import pytest -from nvtabular.ops.operator import ColumnSelector + +with warnings.catch_warnings(): + # Ignore warning regarding tensorflow not being installed + warnings.filterwarnings("ignore", message=".*No module named 'tensorflow'", category=UserWarning) + from nvtabular.ops.operator import ColumnSelector from _utils.dataset_manager import DatasetManager from morpheus.utils.nvt.transforms import json_flatten From e30fe76d492c90a26deed51c3292bf4c9768c3be Mon Sep 17 00:00:00 2001 From: David Gardner <96306125+dagardner-nv@users.noreply.github.com> Date: Mon, 12 Feb 2024 19:36:34 -0800 Subject: [PATCH 06/18] Fix intermittent segfault on interpreter shutdown (#1513) * Remove static reference to the `cupy` python module in `CupyUtil` which was causing a segfault on interpreter shutdown. ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - David Gardner (https://github.com/dagardner-nv) Approvers: - Devin Robison (https://github.com/drobison00) URL: https://github.com/nv-morpheus/Morpheus/pull/1513 --- .../include/morpheus/utilities/cupy_util.hpp | 2 -- morpheus/_lib/src/utilities/cupy_util.cpp | 16 ++-------------- tests/test_tensor_memory.py | 18 ++++++++++++++++++ 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/morpheus/_lib/include/morpheus/utilities/cupy_util.hpp b/morpheus/_lib/include/morpheus/utilities/cupy_util.hpp index 3d035f3d12..e197d9e9e4 100644 --- a/morpheus/_lib/include/morpheus/utilities/cupy_util.hpp +++ b/morpheus/_lib/include/morpheus/utilities/cupy_util.hpp @@ -43,8 +43,6 @@ struct CupyUtil using tensor_map_t = std::map; using py_tensor_map_t = std::map; - static pybind11::object cp_module; // handle to cupy module - /** * @brief Import and return the cupy module. Requires GIL to have already been aqcuired. * diff --git a/morpheus/_lib/src/utilities/cupy_util.cpp b/morpheus/_lib/src/utilities/cupy_util.cpp index 2a2248f2a6..ebc40e022a 100644 --- a/morpheus/_lib/src/utilities/cupy_util.cpp +++ b/morpheus/_lib/src/utilities/cupy_util.cpp @@ -23,8 +23,7 @@ #include "morpheus/utilities/tensor_util.hpp" #include -#include // for COMPACT_GOOGLE_LOG_FATAL, DCHECK, LogMessageFatal -#include +#include // for COMPACT_GOOGLE_LOG_FATAL, DCHECK, LogMessageFatal #include // IWYU pragma: keep #include // IWYU pragma: keep #include @@ -33,7 +32,6 @@ #include // for cuda_stream_per_thread #include // for device_buffer -#include // for array #include // for uintptr_t #include // for make_shared #include @@ -46,20 +44,10 @@ namespace morpheus { namespace py = pybind11; -pybind11::object CupyUtil::cp_module = pybind11::none(); - pybind11::module_ CupyUtil::get_cp() { DCHECK(PyGILState_Check() != 0); - - if (cp_module.is_none()) - { - cp_module = pybind11::module_::import("cupy"); - } - - auto m = pybind11::cast(cp_module); - - return m; + return pybind11::cast(pybind11::module_::import("cupy")); } bool CupyUtil::is_cupy_array(pybind11::object test_obj) diff --git a/tests/test_tensor_memory.py b/tests/test_tensor_memory.py index 1efc58031c..e3f072277c 100644 --- a/tests/test_tensor_memory.py +++ b/tests/test_tensor_memory.py @@ -220,3 +220,21 @@ def test_tensorindex_bug(config: Config, tensor_cls: type, shape: typing.Tuple[i tensor_a = mem.get_tensor('a') assert tensor_a.shape == shape assert tensor_a.nbytes == shape[0] * shape[1] * 4 + + +def test_tensor_update(config: Config): + tensor_data = { + "input_ids": cp.array([1, 2, 3]), "input_mask": cp.array([1, 1, 1]), "segment_ids": cp.array([0, 0, 1]) + } + tensor_memory = TensorMemory(count=3, tensors=tensor_data) + + # Update tensors with new data + new_tensors = { + "input_ids": cp.array([4, 5, 6]), "input_mask": cp.array([1, 0, 1]), "segment_ids": cp.array([1, 1, 0]) + } + + tensor_memory.set_tensors(new_tensors) + + for (key, cp_arr) in new_tensors.items(): + tensor = tensor_memory.get_tensor(key) + cp.allclose(tensor, cp_arr) From c5bc7dab0e760f86581254ede77e9e89647e1d79 Mon Sep 17 00:00:00 2001 From: Christopher Harris Date: Thu, 15 Feb 2024 09:22:25 -0600 Subject: [PATCH 07/18] Update CI Containers (#1521) ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - Christopher Harris (https://github.com/cwharris) Approvers: - Devin Robison (https://github.com/drobison00) - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1521 --- .github/workflows/pr.yaml | 4 ++-- ci/scripts/run_ci_local.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index e2ccaa59ea..7921fb430a 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -94,7 +94,7 @@ jobs: # Disable conda upload for now, once we have morpheus packages in conda forge set the value to # !fromJSON(needs.prepare.outputs.is_pr) && (fromJSON(needs.prepare.outputs.is_main_branch) && 'main' || 'dev') || '' conda_upload_label: "" - container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-build-240209 - test_container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-test-240209 + container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-build-240214 + test_container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-test-240214 secrets: NGC_API_KEY: ${{ secrets.NGC_API_KEY }} diff --git a/ci/scripts/run_ci_local.sh b/ci/scripts/run_ci_local.sh index 8da5630043..1575555ad3 100755 --- a/ci/scripts/run_ci_local.sh +++ b/ci/scripts/run_ci_local.sh @@ -51,7 +51,7 @@ GIT_BRANCH=$(git branch --show-current) GIT_COMMIT=$(git log -n 1 --pretty=format:%H) LOCAL_CI_TMP=${LOCAL_CI_TMP:-${MORPHEUS_ROOT}/.tmp/local_ci_tmp} -CONTAINER_VER=${CONTAINER_VER:-240209} +CONTAINER_VER=${CONTAINER_VER:-240214} CUDA_VER=${CUDA_VER:-12.1} DOCKER_EXTRA_ARGS=${DOCKER_EXTRA_ARGS:-""} From 9cd886378c8f27567e582ffa43debbaf7cdd3856 Mon Sep 17 00:00:00 2001 From: Eli Fajardo Date: Thu, 15 Feb 2024 15:11:20 -0500 Subject: [PATCH 08/18] Grafana log monitoring and error alerting example (#1463) + Add Loki service to production DFP docker-compose + Add Loki data source to Grafana + Add `DFP Logs` dashboard to Grafana + Pipeline run script that uses Loki logging handler + Update README for Grafana DFP Example + Add instructions for setting up error alerting in Grafana + Update Morpheus logger to accept additional handlers so that we can add the Loki handler. Closes #1336 ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - Eli Fajardo (https://github.com/efajardo-nv) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1463 --- .../guides/1_simple_python_stage.md | 12 + .../production/conda_env.yml | 4 + .../production/docker-compose.yml | 19 +- .../production/grafana/README.md | 128 +++-- .../production/grafana/config/grafana.ini | 2 +- .../production/grafana/config/loki-config.yml | 65 +++ ...DFP_Dashboard.json => dfp_detections.json} | 2 +- .../grafana/dashboards/dfp_logs.json | 78 +++ .../grafana/datasources/datasources.yaml | 6 + ...nshot.png => dfp_detections_dashboard.png} | 0 .../grafana/img/dfp_error_alert_setup.png | 3 + .../grafana/img/dfp_logs_dashboard.png | 3 + .../production/grafana/run.py | 476 ++++++++++++++++++ morpheus/stages/general/buffer_stage.py | 5 +- morpheus/stages/general/delay_stage.py | 5 +- morpheus/utils/logger.py | 20 +- tests/test_logger.py | 159 ++++++ tests/tests_data/logging.json | 3 + 18 files changed, 945 insertions(+), 45 deletions(-) create mode 100644 examples/digital_fingerprinting/production/grafana/config/loki-config.yml rename examples/digital_fingerprinting/production/grafana/dashboards/{DFP_Dashboard.json => dfp_detections.json} (99%) create mode 100644 examples/digital_fingerprinting/production/grafana/dashboards/dfp_logs.json rename examples/digital_fingerprinting/production/grafana/img/{screenshot.png => dfp_detections_dashboard.png} (100%) create mode 100644 examples/digital_fingerprinting/production/grafana/img/dfp_error_alert_setup.png create mode 100644 examples/digital_fingerprinting/production/grafana/img/dfp_logs_dashboard.png create mode 100644 examples/digital_fingerprinting/production/grafana/run.py create mode 100644 tests/test_logger.py create mode 100644 tests/tests_data/logging.json diff --git a/docs/source/developer_guide/guides/1_simple_python_stage.md b/docs/source/developer_guide/guides/1_simple_python_stage.md index 95a71012e9..adb813a8b2 100644 --- a/docs/source/developer_guide/guides/1_simple_python_stage.md +++ b/docs/source/developer_guide/guides/1_simple_python_stage.md @@ -230,6 +230,18 @@ Before constructing the pipeline, we need to do a bit of environment configurati ```python configure_logging(log_level=logging.DEBUG) ``` +We use the default configuration with the `DEBUG` logging level. The logger will output to both the console and a file. The logging handlers are non-blocking since they utilize a queue to send the log messages on a separate thread. + +We can also use `configure_logging` to add one or more logging handlers to the default configuration. The added handlers will also be non-blocking. The following is from the Grafana example (`examples/digital_fingerprinting/production/grafana/README.md`) where we add a [Loki](https://grafana.com/oss/loki/) logging handler to also publish Morpheus logs to a Loki log aggregation server. +```python +loki_handler = logging_loki.LokiHandler( + url=f"{loki_url}/loki/api/v1/push", + tags={"app": "morpheus"}, + version="1", +) + +configure_logging(loki_handler, log_level=log_level) +``` Next, we will build a Morpheus `Config` object. We will cover setting some common configuration parameters in the next guide. For now, it is important to know that we will always need to build a `Config` object: ```python diff --git a/examples/digital_fingerprinting/production/conda_env.yml b/examples/digital_fingerprinting/production/conda_env.yml index 1b6117cf80..80e40b9f88 100644 --- a/examples/digital_fingerprinting/production/conda_env.yml +++ b/examples/digital_fingerprinting/production/conda_env.yml @@ -32,3 +32,7 @@ dependencies: - nvtabular=23.06 - papermill - s3fs>=2023.6 + + ##### Pip Dependencies (keep sorted!) ####### + - pip: + - python-logging-loki diff --git a/examples/digital_fingerprinting/production/docker-compose.yml b/examples/digital_fingerprinting/production/docker-compose.yml index 47881c3e58..7cf1c636b1 100644 --- a/examples/digital_fingerprinting/production/docker-compose.yml +++ b/examples/digital_fingerprinting/production/docker-compose.yml @@ -145,9 +145,26 @@ services: - ./grafana/config/dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml - ./grafana/dashboards/:/var/lib/grafana/dashboards/ - ./grafana/datasources/:/etc/grafana/provisioning/datasources/ - - ./morpheus:/workspace + - ./grafana:/workspace ports: - "3000:3000" + networks: + - frontend + - backend + depends_on: + - loki + + loki: + image: grafana/loki:2.9.3 + volumes: + - ./grafana/config/loki-config.yml:/etc/loki/loki-config.yml + ports: + - "3100:3100" + networks: + - frontend + - backend + restart: unless-stopped + command: -config.file=/etc/loki/loki-config.yml networks: frontend: diff --git a/examples/digital_fingerprinting/production/grafana/README.md b/examples/digital_fingerprinting/production/grafana/README.md index 8cdf47d2b3..f79fe6e92b 100644 --- a/examples/digital_fingerprinting/production/grafana/README.md +++ b/examples/digital_fingerprinting/production/grafana/README.md @@ -14,24 +14,31 @@ # limitations under the License. --> -# Grafana DFP Dashboard Example +# Using Grafana with Morpheus DFP Pipeline -This example demonstrates how to use [Grafana](https://grafana.com/grafana/) to visualize the inference results from the [Azure DFP pipeline example](../production/README.md). +This example builds on the [Azure DFP pipeline example](../production/README.md) to demonstrate how [Grafana](https://grafana.com/grafana/) can be used for log monitoring, error alerting, and inference results visualization. ## Grafana Configuration -### CSV data source plugin +The data sources and dashboards in this example are managed using config files. [Grafana's provisioning system](https://grafana.com/docs/grafana/latest/administration/provisioning/) then uses these files to add the data sources and dashboards to Grafana upon startup. -The [CSV data source plugin](https://grafana.com/grafana/plugins/marcusolsson-csv-datasource/) is installed to Grafana to read the Azure inference results CSV file. This example assumes we are using the CSV file generated from running the Python script for [Azure DFP pipeline example](../production/README.md). +### Data Sources -If using the [notebook version](../production/morpheus/notebooks/dfp_azure_inference.ipynb) to run inference, you'll need to update the `url` in [datasources.yaml](./datasources/datasources.yaml) as follows: -``` -url: /workspace/notebooks/dfp_detections_azure.csv -``` +Grafana includes built-in support for many data sources. There are also several data sources available that can be installed as plugins. More information about how to manage Grafana data sources can be found [here](https://grafana.com/docs/grafana/latest/datasources/). + +The following data sources for this example are configured in [datasources.yaml](./datasources/datasources.yaml): + +#### Loki data source + +[Loki](https://grafana.com/docs/loki/latest/) is Grafana's log aggregation system. The Loki service is started automatically when the Grafana service starts up. The [Python script for running the DFP pipeline](./run.py) has been updated to configure a logging handler that sends the Morpheus logs to the Loki service. + +#### CSV data source plugin + +The [CSV data source plugin](https://grafana.com/grafana/plugins/marcusolsson-csv-datasource/) is installed to Grafana to read the Azure inference results CSV file. This example assumes we are using the CSV file generated from running the Python script for [Azure DFP pipeline example](../production/README.md). Please note that the use of the CSV plugin is for demonstration purposes only. Grafana includes support for many data sources more suitable for production deployments. See [here](https://grafana.com/docs/grafana/latest/datasources/) for more information. -### Updates to grafana.ini +#### Updates to grafana.ini The following is added to the default `grafana.ini` to enable local mode for CSV data source plugin. This allows the CSV data source plugin to access files on local file system. @@ -40,14 +47,24 @@ The following is added to the default `grafana.ini` to enable local mode for CSV allow_local_mode = true ``` -## Run Azure Production DFP Training and Inference Examples +## Add Loki logging handler to DFP pipeline -### Start Morpheus DFP pipeline container +The [pipeline run script](./run.py) for the Azure DFP example has been updated with the following to add the Loki logging handler which will publish the Morpheus logs to our Loki service: -The following steps are taken from [Azure DFP pipeline example](../production/README.md). Run the followng commands to start the Morpheus container: +``` +loki_handler = logging_loki.LokiHandler( + url=f"{loki_url}/loki/api/v1/push", + tags={"app": "morpheus"}, + version="1", +) -Build the Morpheus container: +configure_logging(loki_handler, log_level=log_level) +``` + +More information about Loki Python logging can be found [here](https://pypi.org/project/python-logging-loki/). +## Build the Morpheus container: +From the root of the Morpheus repo: ```bash ./docker/build_container_release.sh ``` @@ -60,45 +77,96 @@ export MORPHEUS_CONTAINER_VERSION="$(git describe --tags --abbrev=0)-runtime" docker compose build ``` -Create `bash` shell in `morpheus_pipeline` container: +## Start Grafana and Loki services: +To start Grafana and Loki, run the following command on host in `examples/digital_fingerprinting/production`: ```bash -docker compose run morpheus_pipeline bash +docker compose up grafana ``` -### Run Azure training pipeline +## Run Azure DFP Training -Run the following in the container to train Azure models. +Create `bash` shell in `morpheus_pipeline` container: ```bash -python dfp_azure_pipeline.py --log_level INFO --train_users generic --start_time "2022-08-01" --input_file="../../../data/dfp/azure-training-data/AZUREAD_2022*.json" +docker compose run --rm morpheus_pipeline bash ``` -### Run Azure inference pipeline: - -Run the inference pipeline with `filter_threshold=0.0`. This will disable the filtering of the inference results. +Set `PYTHONPATH` environment variable to allow import of production DFP Morpheus stages: +``` +export PYTHONPATH=/workspace/examples/digital_fingerprinting/production/morpheus +``` +Run the following in the container to train the Azure models. ```bash -python dfp_azure_pipeline.py --log_level INFO --train_users none --start_time "2022-08-30" --input_file="../../../data/dfp/azure-inference-data/*.json" --filter_threshold=0.0 +cd /workspace/examples/digital_fingerprinting/production/grafana +python run.py --log_level DEBUG --train_users generic --start_time "2022-08-01" --input_file="../../../data/dfp/azure-training-data/AZUREAD_2022*.json" ``` -The inference results will be saved to `dfp_detection_azure.csv` in the directory where script was run. +## View DFP Logs Dashboard in Grafana + +While the training pipeline is running, you can view Morpheus logs live in a Grafana dashboard at http://localhost:3000/dashboards. + +Click on `DFP Logs` in the `General` folder. You may need to expand the `General` folder to see the link. + + + +This dashboard was provisioned using config files but can also be manually created with the following steps: +1. Click `Dashboards` in the left-side menu. +2. Click `New` and select `New Dashboard`. +3. On the empty dashboard, click `+ Add visualization`. +4. In the dialog box that opens, Select the `Loki` data source. +5. In the `Edit Panel` view, change from `Time Series` visualization to `Logs`. +6. Add label filter: `app = morpheus`. +7. Change Order to `Oldest first`. +8. Click `Apply` to see your changes applied to the dashboard. Then click the save icon in the dashboard header. -## Run Grafana Docker Image +## Set up Error Alerting -To start Grafana, run the following command on host in `examples/digital_fingerprinting/production`: +We demonstrate here with a simple example how we can use Grafana Alerting to notify us of a pipeline error moments after it occurs. This is especially useful with long-running pipelines. +1. Click `Alert Rules` under `Alerting` in the left-side menu. +2. Click `New Alert Rule` +3. Enter alert rule name: `DFP Error Alert Rule` +4. In `Define query and alert condition` section, select `Loki` data source. +5. Switch to `Code` view by clicking the `Code` button on the right. +6. Enter the following Loki Query which counts the number of log lines in last minute that have an error label (`severity=error`): ``` -docker compose up grafana +count_over_time({severity="error"}[1m]) +``` +7. Under `Expressions`, keep default configurations for `Reduce` and `Threshold`. The alert condition threshold will be error counts > 0. +7. In `Set evaluation behavior` section, click `+ New folder`, enter `morpheus` then click `Create` button. +8. Click `+ New evaluation group`, enter `dfp` for `Evaluation group name` and `1m` for `Evaluation interval`, then click `Create` button. +9. Enter `0s` for `Pending period`. This configures alerts to be fired instantly when alert condition is met. +10. Test your alert rule, by running the following in your `morpheus_pipeline` container. This will cause an error because `--input-file` glob will no longer match any of our training data files. +``` +python run.py --log_level DEBUG --train_users generic --start_time "2022-08-01" --input_file="../../../data/dfp/azure-training-data/AZUREAD_2022*.json" ``` +11. Click the `Preview` button to test run the alert rule. You should now see how our alert query picks up the error log, processes it through our reduce/threshold expressions and satisfies our alert condition. This is indicated by the `Firing` label in the `Threshold` section. + + + +12. Finally, click `Save rule and exit` at top right of the page. + +By default, all alerts will be sent through the `grafana-default-email` contact point. You can add email addresses to this contact point by clicking on `Contact points` under `Alerting` in the left-side menu. You would also have to configure SMTP in the `[smtp]` section of your `grafana.ini`. More information about about Grafana Alerting contact points can found [here](https://grafana.com/docs/grafana/latest/alerting/fundamentals/contact-points/). + +## Run Azure DFP Inference: + +Run the inference pipeline with `filter_threshold=0.0`. This will disable the filtering of the inference results. + +```bash +python run.py --log_level DEBUG --train_users none --start_time "2022-08-30" --input_file="../../../data/dfp/azure-inference-data/*.json" --filter_threshold=0.0 +``` + +The inference results will be saved to `dfp_detection_azure.csv` in the directory where script was run. -## View DFP Dashboard +## View DFP Detections Dashboard in Grafana -Our Grafana DFP dashboard can now be accessed via web browser at http://localhost:3000/dashboards. +When the inference pipeline completes, you can view visualizations of the inference results at http://localhost:3000/dashboards. -Click on `DFP_Dashboard` in the `General` folder. You may need to expand the `General` folder to see the link. +Click on `DFP Detections` in the `General` folder. You may need to expand the `General` folder to see the link. - + The dashboard has the following visualization panels: diff --git a/examples/digital_fingerprinting/production/grafana/config/grafana.ini b/examples/digital_fingerprinting/production/grafana/config/grafana.ini index 6b30172ff5..97df7a8bba 100644 --- a/examples/digital_fingerprinting/production/grafana/config/grafana.ini +++ b/examples/digital_fingerprinting/production/grafana/config/grafana.ini @@ -379,7 +379,7 @@ ;token_rotation_interval_minutes = 10 # Set to true to disable (hide) the login form, useful if you use OAuth, defaults to false -disable_login_form = true +;disable_login_form = true # Set to true to disable the sign out link in the side menu. Useful if you use auth.proxy or auth.jwt, defaults to false ;disable_signout_menu = false diff --git a/examples/digital_fingerprinting/production/grafana/config/loki-config.yml b/examples/digital_fingerprinting/production/grafana/config/loki-config.yml new file mode 100644 index 0000000000..77cfa39956 --- /dev/null +++ b/examples/digital_fingerprinting/production/grafana/config/loki-config.yml @@ -0,0 +1,65 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +auth_enabled: false + +server: + http_listen_port: 3100 + grpc_listen_port: 9096 + +common: + instance_addr: 127.0.0.1 + path_prefix: /tmp/loki + storage: + filesystem: + chunks_directory: /tmp/loki/chunks + rules_directory: /tmp/loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +query_range: + results_cache: + cache: + embedded_cache: + enabled: true + max_size_mb: 100 + +schema_config: + configs: + - from: 2020-10-24 + store: boltdb-shipper + object_store: filesystem + schema: v11 + index: + prefix: index_ + period: 24h + +ruler: + alertmanager_url: http://localhost:9093 + +# By default, Loki will send anonymous, but uniquely-identifiable usage and configuration +# analytics to Grafana Labs. These statistics are sent to https://stats.grafana.org/ +# +# Statistics help us better understand how Loki is used, and they show us performance +# levels for most users. This helps us prioritize features and documentation. +# For more information on what's sent, look at +# https://github.com/grafana/loki/blob/main/pkg/usagestats/stats.go +# Refer to the buildReport method to see what goes into a report. +# +# If you would like to disable reporting, uncomment the following lines: +#analytics: +# reporting_enabled: false diff --git a/examples/digital_fingerprinting/production/grafana/dashboards/DFP_Dashboard.json b/examples/digital_fingerprinting/production/grafana/dashboards/dfp_detections.json similarity index 99% rename from examples/digital_fingerprinting/production/grafana/dashboards/DFP_Dashboard.json rename to examples/digital_fingerprinting/production/grafana/dashboards/dfp_detections.json index f80780a381..9167ecaca5 100644 --- a/examples/digital_fingerprinting/production/grafana/dashboards/DFP_Dashboard.json +++ b/examples/digital_fingerprinting/production/grafana/dashboards/dfp_detections.json @@ -557,7 +557,7 @@ }, "timepicker": {}, "timezone": "", - "title": "DFP_Dashboard", + "title": "DFP Detections", "uid": "f810d98f-bf31-42d4-98aa-9eb3fa187184", "version": 1, "weekStart": "" diff --git a/examples/digital_fingerprinting/production/grafana/dashboards/dfp_logs.json b/examples/digital_fingerprinting/production/grafana/dashboards/dfp_logs.json new file mode 100644 index 0000000000..c4ed0448c9 --- /dev/null +++ b/examples/digital_fingerprinting/production/grafana/dashboards/dfp_logs.json @@ -0,0 +1,78 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "loki", + "uid": "P8E80F9AEF21F6940" + }, + "gridPos": { + "h": 18, + "w": 23, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": false, + "sortOrder": "Ascending", + "wrapLogMessage": false + }, + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "P8E80F9AEF21F6940" + }, + "editorMode": "builder", + "expr": "{app=\"morpheus\"} |= ``", + "queryType": "range", + "refId": "A" + } + ], + "type": "logs" + } + ], + "refresh": "5s", + "schemaVersion": 38, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "DFP Logs", + "uid": "dfb4fe34-daae-4894-9ff0-b8f89b7d256e", + "version": 1, + "weekStart": "" +} \ No newline at end of file diff --git a/examples/digital_fingerprinting/production/grafana/datasources/datasources.yaml b/examples/digital_fingerprinting/production/grafana/datasources/datasources.yaml index 8d5182a3ea..edcebabb8a 100644 --- a/examples/digital_fingerprinting/production/grafana/datasources/datasources.yaml +++ b/examples/digital_fingerprinting/production/grafana/datasources/datasources.yaml @@ -16,6 +16,12 @@ apiVersion: 1 datasources: + - name: Loki + type: loki + access: proxy + url: http://loki:3100 + jsonData: + maxLines: 1000 - name: csv-datasource uid: 1257c93b-f998-438c-a784-7e90fb94fb36 type: marcusolsson-csv-datasource diff --git a/examples/digital_fingerprinting/production/grafana/img/screenshot.png b/examples/digital_fingerprinting/production/grafana/img/dfp_detections_dashboard.png similarity index 100% rename from examples/digital_fingerprinting/production/grafana/img/screenshot.png rename to examples/digital_fingerprinting/production/grafana/img/dfp_detections_dashboard.png diff --git a/examples/digital_fingerprinting/production/grafana/img/dfp_error_alert_setup.png b/examples/digital_fingerprinting/production/grafana/img/dfp_error_alert_setup.png new file mode 100644 index 0000000000..516249f41a --- /dev/null +++ b/examples/digital_fingerprinting/production/grafana/img/dfp_error_alert_setup.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ada0a466cfeb34363623cf78d478e17737a1d439b43bec989c7b026749a0fe2 +size 474978 diff --git a/examples/digital_fingerprinting/production/grafana/img/dfp_logs_dashboard.png b/examples/digital_fingerprinting/production/grafana/img/dfp_logs_dashboard.png new file mode 100644 index 0000000000..8cec30b668 --- /dev/null +++ b/examples/digital_fingerprinting/production/grafana/img/dfp_logs_dashboard.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:753f199658371c462e6cbdbc324ee08acdafcf5453d0dae9b4042d133dfbabe0 +size 581211 diff --git a/examples/digital_fingerprinting/production/grafana/run.py b/examples/digital_fingerprinting/production/grafana/run.py new file mode 100644 index 0000000000..1f10cd4f67 --- /dev/null +++ b/examples/digital_fingerprinting/production/grafana/run.py @@ -0,0 +1,476 @@ +# Copyright (c) 2022-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""DFP training & inference pipelines for Azure Active Directory logs.""" + +import functools +import logging +import logging.handlers +import os +import typing +from datetime import datetime +from datetime import timedelta +from datetime import timezone + +import click +import logging_loki +import mlflow +import pandas as pd +from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage +from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage +from dfp.stages.dfp_inference_stage import DFPInferenceStage +from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage +from dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage +from dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage +from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage +from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage +from dfp.stages.dfp_training import DFPTraining +from dfp.stages.multi_file_source import MultiFileSource +from dfp.utils.regex_utils import iso_date_regex + +from morpheus.cli.utils import get_log_levels +from morpheus.cli.utils import get_package_relative_file +from morpheus.cli.utils import load_labels_file +from morpheus.cli.utils import parse_log_level +from morpheus.common import FileTypes +from morpheus.common import FilterSource +from morpheus.config import Config +from morpheus.config import ConfigAutoEncoder +from morpheus.config import CppConfig +from morpheus.pipeline import LinearPipeline +from morpheus.stages.general.monitor_stage import MonitorStage +from morpheus.stages.output.write_to_file_stage import WriteToFileStage +from morpheus.stages.postprocess.filter_detections_stage import FilterDetectionsStage +from morpheus.stages.postprocess.serialize_stage import SerializeStage +from morpheus.utils.column_info import ColumnInfo +from morpheus.utils.column_info import DataFrameInputSchema +from morpheus.utils.column_info import DateTimeColumn +from morpheus.utils.column_info import DistinctIncrementColumn +from morpheus.utils.column_info import IncrementColumn +from morpheus.utils.column_info import RenameColumn +from morpheus.utils.column_info import StringCatColumn +from morpheus.utils.file_utils import date_extractor +from morpheus.utils.logger import configure_logging + + +def _file_type_name_to_enum(file_type: str) -> FileTypes: + """Converts a file type name to a FileTypes enum.""" + if (file_type == "JSON"): + return FileTypes.JSON + if (file_type == "CSV"): + return FileTypes.CSV + if (file_type == "PARQUET"): + return FileTypes.PARQUET + + return FileTypes.Auto + + +@click.command() +@click.option( + "--train_users", + type=click.Choice(["all", "generic", "individual", "none"], case_sensitive=False), + help=("Indicates whether or not to train per user or a generic model for all users. " + "Selecting none runs the inference pipeline."), +) +@click.option( + "--skip_user", + multiple=True, + type=str, + help="User IDs to skip. Mutually exclusive with only_user", +) +@click.option( + "--only_user", + multiple=True, + type=str, + help="Only users specified by this option will be included. Mutually exclusive with skip_user", +) +@click.option( + "--start_time", + type=click.DateTime( + formats=['%Y-%m-%d', '%Y-%m-%dT%H:%M:%S', '%Y-%m-%d %H:%M:%S', '%Y-%m-%dT%H:%M:%S%z', '%Y-%m-%d %H:%M:%S%z']), + default=None, + help="The start of the time window, if undefined start_date will be `now()-duration`", +) +@click.option( + "--duration", + type=str, + default="60d", + help="The duration to run starting from start_time", +) +@click.option( + "--cache_dir", + type=str, + default="./.cache/dfp", + show_envvar=True, + help="The location to cache data such as S3 downloads and pre-processed data", +) +@click.option("--log_level", + default=logging.getLevelName(Config().log_level), + type=click.Choice(get_log_levels(), case_sensitive=False), + callback=parse_log_level, + help="Specify the logging level to use.") +@click.option("--sample_rate_s", + type=int, + default=0, + show_envvar=True, + help="Minimum time step, in milliseconds, between object logs.") +@click.option("--filter_threshold", + type=float, + default=2.0, + show_envvar=True, + help="Filter out inference results below this threshold") +@click.option( + "--input_file", + "-f", + type=str, + multiple=True, + help=("List of files to process. Can specify multiple arguments for multiple files. " + "Also accepts glob (*) wildcards and schema prefixes such as `s3://`. " + "For example, to make a local cache of an s3 bucket, use `filecache::s3://mybucket/*`. " + "Refer to fsspec documentation for list of possible options."), +) +@click.option("--file_type_override", + "-t", + type=click.Choice(["AUTO", "JSON", "CSV", "PARQUET"], case_sensitive=False), + default="JSON", + help="Override the detected file type. Values can be 'AUTO', 'JSON', 'CSV', or 'PARQUET'.", + callback=lambda _, + __, + value: None if value is None else _file_type_name_to_enum(value)) +@click.option('--watch_inputs', + type=bool, + is_flag=True, + default=False, + help=("Instructs the pipeline to continuously check the paths specified by `--input_file` for new files. " + "This assumes that the at least one paths contains a wildcard.")) +@click.option("--watch_interval", + type=float, + default=1.0, + help=("Amount of time, in seconds, to wait between checks for new files. " + "Only used if --watch_inputs is set.")) +@click.option('--tracking_uri', + type=str, + default="http://mlflow:5000", + help=("The MLflow tracking URI to connect to the tracking backend.")) +@click.option('--mlflow_experiment_name_template', + type=str, + default="dfp/azure/training/{reg_model_name}", + help="The MLflow experiment name template to use when logging experiments. ") +@click.option('--mlflow_model_name_template', + type=str, + default="DFP-azure-{user_id}", + help="The MLflow model name template to use when logging models. ") +@click.option('--use_postproc_schema', is_flag=True, help='Assume that input data has already been preprocessed.') +@click.option('--inference_detection_file_name', type=str, default="dfp_detections_azure.csv") +@click.option('--loki_url', + type=str, + default="http://loki:3100", + help=("Loki URL for error logging and alerting in Grafana.")) +def run_pipeline(train_users, + skip_user: typing.Tuple[str], + only_user: typing.Tuple[str], + start_time: datetime, + duration, + cache_dir, + log_level, + sample_rate_s, + filter_threshold, + mlflow_experiment_name_template, + mlflow_model_name_template, + file_type_override, + use_postproc_schema, + inference_detection_file_name, + loki_url, + **kwargs): + """Runs the DFP pipeline.""" + # To include the generic, we must be training all or generic + include_generic = train_users in ("all", "generic") + + # To include individual, we must be either training or inferring + include_individual = train_users != "generic" + + # None indicates we aren't training anything + is_training = train_users != "none" + + skip_users = list(skip_user) + only_users = list(only_user) + + duration = timedelta(seconds=pd.Timedelta(duration).total_seconds()) + if start_time is None: + end_time = datetime.now(tz=timezone.utc) + start_time = end_time - duration + else: + if start_time.tzinfo is None: + start_time = start_time.replace(tzinfo=timezone.utc) + + end_time = start_time + duration + + # Enable the Morpheus logger + loki_handler = logging_loki.LokiHandler( + url=f"{loki_url}/loki/api/v1/push", + tags={"app": "morpheus"}, + version="1", + ) + configure_logging(loki_handler, log_level=log_level) + logging.getLogger("mlflow").setLevel(log_level) + + if (len(skip_users) > 0 and len(only_users) > 0): + logging.error("Option --skip_user and --only_user are mutually exclusive. Exiting") + + logger = logging.getLogger(f"morpheus.{__name__}") + + logger.info("Running training pipeline with the following options: ") + logger.info("Train generic_user: %s", include_generic) + logger.info("Skipping users: %s", skip_users) + logger.info("Start Time: %s", start_time) + logger.info("Duration: %s", duration) + logger.info("Cache Dir: %s", cache_dir) + + if ("tracking_uri" in kwargs): + # Initialize ML Flow + mlflow.set_tracking_uri(kwargs["tracking_uri"]) + logger.info("Tracking URI: %s", mlflow.get_tracking_uri()) + + config = Config() + + CppConfig.set_should_use_cpp(False) + + config.num_threads = os.cpu_count() + + config.ae = ConfigAutoEncoder() + + config.ae.feature_columns = load_labels_file(get_package_relative_file("data/columns_ae_azure.txt")) + config.ae.userid_column_name = "username" + config.ae.timestamp_column_name = "timestamp" + + # Specify the column names to ensure all data is uniform + if (use_postproc_schema): + + source_column_info = [ + ColumnInfo(name="autonomousSystemNumber", dtype=str), + ColumnInfo(name="location_geoCoordinates_latitude", dtype=float), + ColumnInfo(name="location_geoCoordinates_longitude", dtype=float), + ColumnInfo(name="resourceDisplayName", dtype=str), + ColumnInfo(name="travel_speed_kmph", dtype=float), + DateTimeColumn(name=config.ae.timestamp_column_name, dtype=datetime, input_name="time"), + ColumnInfo(name="appDisplayName", dtype=str), + ColumnInfo(name="clientAppUsed", dtype=str), + RenameColumn(name=config.ae.userid_column_name, dtype=str, input_name="userPrincipalName"), + RenameColumn(name="deviceDetailbrowser", dtype=str, input_name="deviceDetail_browser"), + RenameColumn(name="deviceDetaildisplayName", dtype=str, input_name="deviceDetail_displayName"), + RenameColumn(name="deviceDetailoperatingSystem", dtype=str, input_name="deviceDetail_operatingSystem"), + + # RenameColumn(name="location_country", dtype=str, input_name="location_countryOrRegion"), + ColumnInfo(name="location_city_state_country", dtype=str), + ColumnInfo(name="location_state_country", dtype=str), + ColumnInfo(name="location_country", dtype=str), + + # Non-features + ColumnInfo(name="is_corp_vpn", dtype=bool), + ColumnInfo(name="distance_km", dtype=float), + ColumnInfo(name="ts_delta_hour", dtype=float), + ] + source_schema = DataFrameInputSchema(column_info=source_column_info) + + preprocess_column_info = [ + ColumnInfo(name=config.ae.timestamp_column_name, dtype=datetime), + ColumnInfo(name=config.ae.userid_column_name, dtype=str), + + # Resource access + ColumnInfo(name="appDisplayName", dtype=str), + ColumnInfo(name="resourceDisplayName", dtype=str), + ColumnInfo(name="clientAppUsed", dtype=str), + + # Device detail + ColumnInfo(name="deviceDetailbrowser", dtype=str), + ColumnInfo(name="deviceDetaildisplayName", dtype=str), + ColumnInfo(name="deviceDetailoperatingSystem", dtype=str), + + # Location information + ColumnInfo(name="autonomousSystemNumber", dtype=str), + ColumnInfo(name="location_geoCoordinates_latitude", dtype=float), + ColumnInfo(name="location_geoCoordinates_longitude", dtype=float), + ColumnInfo(name="location_city_state_country", dtype=str), + ColumnInfo(name="location_state_country", dtype=str), + ColumnInfo(name="location_country", dtype=str), + + # Derived information + ColumnInfo(name="travel_speed_kmph", dtype=float), + + # Non-features + ColumnInfo(name="is_corp_vpn", dtype=bool), + ColumnInfo(name="distance_km", dtype=float), + ColumnInfo(name="ts_delta_hour", dtype=float), + ] + + preprocess_schema = DataFrameInputSchema(column_info=preprocess_column_info, preserve_columns=["_batch_id"]) + + exclude_from_training = [ + config.ae.userid_column_name, + config.ae.timestamp_column_name, + "is_corp_vpn", + "distance_km", + "ts_delta_hour", + ] + + config.ae.feature_columns = [ + name for (name, dtype) in preprocess_schema.output_columns if name not in exclude_from_training + ] + else: + source_column_info = [ + DateTimeColumn(name=config.ae.timestamp_column_name, dtype=datetime, input_name="time"), + RenameColumn(name=config.ae.userid_column_name, dtype=str, input_name="properties.userPrincipalName"), + RenameColumn(name="appDisplayName", dtype=str, input_name="properties.appDisplayName"), + ColumnInfo(name="category", dtype=str), + RenameColumn(name="clientAppUsed", dtype=str, input_name="properties.clientAppUsed"), + RenameColumn(name="deviceDetailbrowser", dtype=str, input_name="properties.deviceDetail.browser"), + RenameColumn(name="deviceDetaildisplayName", dtype=str, input_name="properties.deviceDetail.displayName"), + RenameColumn(name="deviceDetailoperatingSystem", + dtype=str, + input_name="properties.deviceDetail.operatingSystem"), + StringCatColumn(name="location", + dtype=str, + input_columns=[ + "properties.location.city", + "properties.location.countryOrRegion", + ], + sep=", "), + RenameColumn(name="statusfailureReason", dtype=str, input_name="properties.status.failureReason"), + ] + + source_schema = DataFrameInputSchema(json_columns=["properties"], column_info=source_column_info) + + # Preprocessing schema + preprocess_column_info = [ + ColumnInfo(name=config.ae.timestamp_column_name, dtype=datetime), + ColumnInfo(name=config.ae.userid_column_name, dtype=str), + ColumnInfo(name="appDisplayName", dtype=str), + ColumnInfo(name="clientAppUsed", dtype=str), + ColumnInfo(name="deviceDetailbrowser", dtype=str), + ColumnInfo(name="deviceDetaildisplayName", dtype=str), + ColumnInfo(name="deviceDetailoperatingSystem", dtype=str), + ColumnInfo(name="statusfailureReason", dtype=str), + + # Derived columns + IncrementColumn(name="logcount", + dtype=int, + input_name=config.ae.timestamp_column_name, + groupby_column=config.ae.userid_column_name), + DistinctIncrementColumn(name="locincrement", + dtype=int, + input_name="location", + groupby_column=config.ae.userid_column_name, + timestamp_column=config.ae.timestamp_column_name), + DistinctIncrementColumn(name="appincrement", + dtype=int, + input_name="appDisplayName", + groupby_column=config.ae.userid_column_name, + timestamp_column=config.ae.timestamp_column_name) + ] + + preprocess_schema = DataFrameInputSchema(column_info=preprocess_column_info, preserve_columns=["_batch_id"]) + + # Create a linear pipeline object + pipeline = LinearPipeline(config) + + pipeline.set_source( + MultiFileSource(config, + filenames=list(kwargs["input_file"]), + watch=kwargs["watch_inputs"], + watch_interval=kwargs["watch_interval"])) + + # Batch files into batches by time. Use the default ISO date extractor from the filename + pipeline.add_stage( + DFPFileBatcherStage(config, + period="D", + sampling_rate_s=sample_rate_s, + date_conversion_func=functools.partial(date_extractor, filename_regex=iso_date_regex), + start_time=start_time, + end_time=end_time)) + + parser_kwargs = None + if (file_type_override == FileTypes.JSON): + parser_kwargs = {"lines": False, "orient": "records"} + # Output is a list of fsspec files. Convert to DataFrames. This caches downloaded data + pipeline.add_stage( + DFPFileToDataFrameStage( + config, + schema=source_schema, + file_type=file_type_override, + parser_kwargs=parser_kwargs, # TODO(Devin) probably should be configurable too + cache_dir=cache_dir)) + + pipeline.add_stage(MonitorStage(config, description="Input data rate")) + + # This will split users or just use one single user + pipeline.add_stage( + DFPSplitUsersStage(config, + include_generic=include_generic, + include_individual=include_individual, + skip_users=skip_users, + only_users=only_users)) + + # Next, have a stage that will create rolling windows + pipeline.add_stage( + DFPRollingWindowStage( + config, + min_history=300 if is_training else 1, + min_increment=300 if is_training else 0, + # For inference, we only ever want 1 day max + max_history="60d" if is_training else "1d", + cache_dir=cache_dir)) + + # Output is UserMessageMeta -- Cached frame set + pipeline.add_stage(DFPPreprocessingStage(config, input_schema=preprocess_schema)) + + model_name_formatter = mlflow_model_name_template + experiment_name_formatter = mlflow_experiment_name_template + + if (is_training): + # Finally, perform training which will output a model + pipeline.add_stage(DFPTraining(config, epochs=100, validation_size=0.15)) + + pipeline.add_stage(MonitorStage(config, description="Training rate", smoothing=0.001)) + + # Write that model to MLFlow + pipeline.add_stage( + DFPMLFlowModelWriterStage(config, + model_name_formatter=model_name_formatter, + experiment_name_formatter=experiment_name_formatter)) + else: + # Perform inference on the preprocessed data + pipeline.add_stage(DFPInferenceStage(config, model_name_formatter=model_name_formatter)) + + pipeline.add_stage(MonitorStage(config, description="Inference rate", smoothing=0.001)) + + # Filter for only the anomalous logs + pipeline.add_stage( + FilterDetectionsStage(config, + threshold=filter_threshold, + filter_source=FilterSource.DATAFRAME, + field_name='mean_abs_z')) + pipeline.add_stage(DFPPostprocessingStage(config)) + + # Exclude the columns we don't want in our output + pipeline.add_stage(SerializeStage(config, exclude=['batch_count', 'origin_hash', '_row_hash', '_batch_id'])) + + # Write all anomalies to a CSV file + pipeline.add_stage(WriteToFileStage(config, filename=inference_detection_file_name, overwrite=True)) + + # Run the pipeline + pipeline.run() + + +if __name__ == "__main__": + # pylint: disable=no-value-for-parameter + run_pipeline(obj={}, auto_envvar_prefix='DFP', show_default=True, prog_name="dfp") diff --git a/morpheus/stages/general/buffer_stage.py b/morpheus/stages/general/buffer_stage.py index 6e529adbb5..261b4cec82 100644 --- a/morpheus/stages/general/buffer_stage.py +++ b/morpheus/stages/general/buffer_stage.py @@ -66,7 +66,8 @@ def supports_cpp_node(self): return False def _build_single(self, builder: mrc.Builder, input_node: mrc.SegmentObject) -> mrc.SegmentObject: - # This stage is no longer needed and is just a pass thru stage - deprecated_stage_warning(logger, type(self), self.unique_name) + reason = "The stage is no longer required to manage backpressure and has been deprecated. It has no" \ + " effect acts as a pass through stage." + deprecated_stage_warning(logger, type(self), self.unique_name, reason=reason) return input_node diff --git a/morpheus/stages/general/delay_stage.py b/morpheus/stages/general/delay_stage.py index f83e13b450..49768c32a2 100644 --- a/morpheus/stages/general/delay_stage.py +++ b/morpheus/stages/general/delay_stage.py @@ -66,7 +66,8 @@ def supports_cpp_node(self): return False def _build_single(self, builder: mrc.Builder, input_node: mrc.SegmentObject) -> mrc.SegmentObject: - # This stage is no longer needed and is just a pass thru stage - deprecated_stage_warning(logger, type(self), self.unique_name) + reason = "The stage is no longer required to manage backpressure and has been deprecated. It has no" \ + " effect acts as a pass through stage." + deprecated_stage_warning(logger, type(self), self.unique_name, reason=reason) return input_node diff --git a/morpheus/utils/logger.py b/morpheus/utils/logger.py index 7ef44a8897..693e6574b7 100644 --- a/morpheus/utils/logger.py +++ b/morpheus/utils/logger.py @@ -95,13 +95,14 @@ def _configure_from_log_file(log_config_file: str): logging.config.fileConfig(log_config_file) -def _configure_from_log_level(log_level: int): +def _configure_from_log_level(*extra_handlers: logging.Handler, log_level: int): """ Default config with only option being the logging level. Outputs to both the console and a file. Sets up a logging producer/consumer that works well in multi-thread/process environments. Parameters ---------- + *extra_handlers: List of additional handlers which will handle entries placed on the queue log_level : int Log level and above to report """ @@ -143,6 +144,7 @@ def _configure_from_log_level(log_level: int): queue_listener = logging.handlers.QueueListener(morpheus_logging_queue, console_handler, file_handler, + *extra_handlers, respect_handler_level=True) queue_listener.start() queue_listener._thread.name = "Logging Thread" @@ -155,7 +157,7 @@ def stop_queue_listener(): atexit.register(stop_queue_listener) -def configure_logging(log_level: int, log_config_file: str = None): +def configure_logging(*extra_handlers: logging.Handler, log_level: int = None, log_config_file: str = None): """ Configures Morpheus logging in one of two ways. Either specifying a logging config file to load or a logging level which will use a default configuration. The default configuration outputs to both the console and a file. Sets up a @@ -163,6 +165,7 @@ def configure_logging(log_level: int, log_config_file: str = None): Parameters ---------- + *extra_handlers: List of handlers to add to existing default console and file handlers. log_level: int Specifies the log level and above to output. Must be one of the available levels in the `logging` module. log_config_file: str, optional (default = None): @@ -180,7 +183,8 @@ def configure_logging(log_level: int, log_config_file: str = None): # Configure using log file _configure_from_log_file(log_config_file=log_config_file) else: - _configure_from_log_level(log_level=log_level) + assert log_level is not None, "log_level must be specified" + _configure_from_log_level(*extra_handlers, log_level=log_level) def set_log_level(log_level: int): @@ -211,12 +215,12 @@ def set_log_level(log_level: int): return old_level -def deprecated_stage_warning(logger, cls, name): +def deprecated_stage_warning(logger, cls, name, reason: str = None): """Log a warning about a deprecated stage.""" - logger.warning(("The '%s' stage ('%s') is no longer required to manage backpressure and has been deprecated. " - "It has no effect and acts as a pass through stage."), - cls.__name__, - name) + message = f"The '{cls.__name__}' stage ('{name}') has been deprecated and will be removed in a future version." + if reason is not None: + message = " ".join((message, reason)) + logger.warning(message) def deprecated_message_warning(logger, cls, new_cls): diff --git a/tests/test_logger.py b/tests/test_logger.py new file mode 100644 index 0000000000..dddf05d345 --- /dev/null +++ b/tests/test_logger.py @@ -0,0 +1,159 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import multiprocessing +import os +from unittest.mock import patch + +import pytest + +from _utils import TEST_DIRS +from morpheus.utils.logger import TqdmLoggingHandler +from morpheus.utils.logger import configure_logging +from morpheus.utils.logger import deprecated_message_warning +from morpheus.utils.logger import deprecated_stage_warning +from morpheus.utils.logger import set_log_level + + +@patch('logging.handlers.QueueListener') +@patch('logging.handlers.QueueHandler.emit') +def test_configure_logging_from_level_default_handlers(queue_handler, queue_listener): + configure_logging(log_level=logging.DEBUG) + morpheus_logger = logging.getLogger("morpheus") + assert morpheus_logger.level == logging.DEBUG + assert morpheus_logger.propagate is False + pos_args = queue_listener.call_args[0] + assert len(pos_args) == 3 + assert isinstance(pos_args[0], multiprocessing.queues.Queue) + assert isinstance(pos_args[1], TqdmLoggingHandler) + assert isinstance(pos_args[2], logging.handlers.RotatingFileHandler) + assert pos_args[2].baseFilename.endswith("morpheus.log") + morpheus_logger.debug("test") + queue_handler.assert_called() + + +def test_configure_logging__no_args(): + with pytest.raises(Exception) as excinfo: + configure_logging() + assert "log_level must be specified" in str(excinfo.value) + + +@patch('logging.handlers.RotatingFileHandler.emit') +@patch('morpheus.utils.logger.TqdmLoggingHandler.emit') +def test_configure_logging_from_file(console_handler, file_handler): + log_config_file = os.path.join(TEST_DIRS.tests_data_dir, "logging.json") + configure_logging(log_config_file=log_config_file) + morpheus_logger = logging.getLogger("morpheus") + assert morpheus_logger.level == logging.DEBUG + assert morpheus_logger.propagate is False + morpheus_logger.debug("test") + console_handler.assert_called_once() + file_handler.assert_called_once() + + +def test_configure_logging_from_file_filenotfound(): + with pytest.raises(FileNotFoundError): + configure_logging(log_config_file="does_not_exist.json") + + +@patch('logging.handlers.QueueListener') +@patch('logging.handlers.QueueHandler.emit') +def test_configure_logging_add_one_handler(queue_handler, queue_listener): + new_handler = logging.StreamHandler() + configure_logging(new_handler, log_level=logging.DEBUG) + morpheus_logger = logging.getLogger("morpheus") + assert morpheus_logger.level == logging.DEBUG + assert morpheus_logger.propagate is False + pos_args = queue_listener.call_args[0] + assert len(pos_args) == 4 + assert isinstance(pos_args[0], multiprocessing.queues.Queue) + assert isinstance(pos_args[1], TqdmLoggingHandler) + assert isinstance(pos_args[2], logging.handlers.RotatingFileHandler) + assert isinstance(pos_args[3], logging.StreamHandler) + morpheus_logger.debug("test") + queue_handler.assert_called() + + +@patch('logging.handlers.QueueListener') +@patch('logging.handlers.QueueHandler.emit') +def test_configure_logging_add_two_handlers(queue_handler, queue_listener): + new_handler_1 = logging.StreamHandler() + new_handler_2 = logging.StreamHandler() + configure_logging(new_handler_1, new_handler_2, log_level=logging.DEBUG) + morpheus_logger = logging.getLogger("morpheus") + assert morpheus_logger.level == logging.DEBUG + assert morpheus_logger.propagate is False + pos_args = queue_listener.call_args[0] + assert len(pos_args) == 5 + assert isinstance(pos_args[0], multiprocessing.queues.Queue) + assert isinstance(pos_args[1], TqdmLoggingHandler) + assert isinstance(pos_args[2], logging.handlers.RotatingFileHandler) + assert isinstance(pos_args[3], logging.StreamHandler) + assert isinstance(pos_args[4], logging.StreamHandler) + morpheus_logger.debug("test") + queue_handler.assert_called() + + +def test_set_log_level(): + configure_logging(log_level=logging.INFO) + morpheus_logger = logging.getLogger("morpheus") + assert morpheus_logger.level == logging.INFO + set_log_level(logging.DEBUG) + assert morpheus_logger.level == logging.DEBUG + + +def test_deprecated_stage_warning(caplog): + + class DummyStage(): + pass + + logger = logging.getLogger() + caplog.set_level(logging.WARNING) + deprecated_stage_warning(logger, DummyStage, "dummy_stage") + assert len(caplog.records) == 1 + assert caplog.records[0].levelname == "WARNING" + assert "The 'DummyStage' stage ('dummy_stage') has been deprecated" in caplog.text + + +def test_deprecated_stage_warning_with_reason(caplog): + + class DummyStage(): + pass + + logger = logging.getLogger() + caplog.set_level(logging.WARNING) + deprecated_stage_warning(logger, DummyStage, "dummy_stage", reason="This is the reason.") + assert len(caplog.records) == 1 + assert caplog.records[0].levelname == "WARNING" + assert "The 'DummyStage' stage ('dummy_stage') has been deprecated and will be removed in a future version. " \ + "This is the reason." in caplog.text + + +def test_deprecated_message_warning(caplog): + + class OldMessage(): + pass + + class NewMessage(): + pass + + logger = logging.getLogger() + caplog.set_level(logging.WARNING) + deprecated_message_warning(logger, OldMessage, NewMessage) + assert len(caplog.records) == 1 + assert caplog.records[0].levelname == "WARNING" + assert "The 'OldMessage' message has been deprecated and will be removed in a future version. " \ + "Please use 'NewMessage' instead." in caplog.text diff --git a/tests/tests_data/logging.json b/tests/tests_data/logging.json new file mode 100644 index 0000000000..bb681247b7 --- /dev/null +++ b/tests/tests_data/logging.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c9bffb074de716dc573421a31fa8ab75a3476676db1522fb839c2e75807faba +size 752 From 5fd661ba979a35d17e195c097b9cdcc1773370ef Mon Sep 17 00:00:00 2001 From: Eli Fajardo Date: Thu, 15 Feb 2024 15:11:40 -0500 Subject: [PATCH 09/18] Improve pipeline stop logic to ensure join is called exactly once for all stages (#1479) 1. Removes the `_is_built`, `_is_started` and `_is_stopped` flags and replaces with single member which holds onto the state enum for: INITIALIZED, BUILT, STARTED, STOPPED, COMPLETED 1. Changes the meaning of `stop()` and the meaning of `join()` for stages 1. `stop()` called 0 or 1 times. Only way it can get called is if `pipeline.stop()` was called indicating the pipeline should try to shut down gracefully. 1. Users should only implement this method if they have a source stage (or sources in their stage) 1. `join()` called exactly 1 time. Only called when the pipeline is complete and all stages are shut down. This is where users should implement any cleanup code 1. Tests for handling all of these scenarios with the pipeline. Closes #1477 ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - Eli Fajardo (https://github.com/efajardo-nv) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1479 --- morpheus/pipeline/pipeline.py | 124 +++---- morpheus/stages/general/monitor_stage.py | 2 +- .../postprocess/generate_viz_frames_stage.py | 2 +- tests/pipeline/test_pipe_viz.py | 3 +- tests/pipeline/test_pipeline_state.py | 312 ++++++++++++++++++ tests/test_http_server_source_stage_pipe.py | 5 +- tests/test_monitor_stage.py | 10 +- 7 files changed, 389 insertions(+), 69 deletions(-) create mode 100755 tests/pipeline/test_pipeline_state.py diff --git a/morpheus/pipeline/pipeline.py b/morpheus/pipeline/pipeline.py index 4859244c10..0604971d7b 100644 --- a/morpheus/pipeline/pipeline.py +++ b/morpheus/pipeline/pipeline.py @@ -17,9 +17,11 @@ import os import signal import sys +import threading import typing from collections import OrderedDict from collections import defaultdict +from enum import Enum from functools import partial import mrc @@ -41,6 +43,14 @@ StageT = typing.TypeVar("StageT", bound=StageBase) +class PipelineState(Enum): + INITIALIZED = "initialized" + BUILT = "built" + STARTED = "started" + STOPPED = "stopped" + COMPLETED = "completed" + + class Pipeline(): """ Class for building your pipeline. A pipeline for your use case can be constructed by first adding a @@ -56,16 +66,19 @@ class Pipeline(): """ def __init__(self, config: Config): + + self._mutex = threading.RLock() + self._source_count: int = None # Maximum number of iterations for progress reporting. None = Unknown/Unlimited self._id_counter = 0 self._num_threads = config.num_threads # Complete set of nodes across segments in this pipeline - self._stages: typing.Set[Stage] = set() + self._stages: typing.List[Stage] = [] # Complete set of sources across segments in this pipeline - self._sources: typing.Set[SourceStage] = set() + self._sources: typing.List[SourceStage] = [] # Dictionary containing segment information for this pipeline self._segments: typing.Dict = defaultdict(lambda: {"nodes": set(), "ingress_ports": [], "egress_ports": []}) @@ -75,19 +88,21 @@ def __init__(self, config: Config): self._segment_graphs = defaultdict(lambda: networkx.DiGraph()) - self._is_built = False - self._is_started = False + self._state = PipelineState.INITIALIZED self._mrc_executor: mrc.Executor = None self._loop: asyncio.AbstractEventLoop = None + # Future that allows post_start to propagate exceptions back to pipeline + self._post_start_future: asyncio.Future = None + @property - def is_built(self) -> bool: - return self._is_built + def state(self) -> PipelineState: + return self._state def _assert_not_built(self): - assert not self.is_built, "Pipeline has already been built. Cannot modify pipeline." + assert self._state == PipelineState.INITIALIZED, "Pipeline has already been built. Cannot modify pipeline." def add_stage(self, stage: StageT, segment_id: str = "main") -> StageT: """ @@ -110,10 +125,10 @@ def add_stage(self, stage: StageT, segment_id: str = "main") -> StageT: # Add to list of stages if it's a stage, not a source if (isinstance(stage, Stage)): segment_nodes.add(stage) - self._stages.add(stage) + self._stages.append(stage) elif (isinstance(stage, SourceStage)): segment_nodes.add(stage) - self._sources.add(stage) + self._sources.append(stage) else: raise NotImplementedError(f"add_stage() failed. Unknown node type: {type(stage)}") @@ -279,7 +294,7 @@ def build(self): Once the pipeline has been constructed, this will start the pipeline by calling `Source.start` on the source object. """ - assert not self._is_built, "Pipeline can only be built once!" + assert self._state == PipelineState.INITIALIZED, "Pipeline can only be built once!" assert len(self._sources) > 0, "Pipeline must have a source stage" self._pre_build() @@ -341,19 +356,16 @@ def inner_build(builder: mrc.Builder, segment_id: str): self._mrc_executor.register_pipeline(mrc_pipeline) - self._is_built = True + with self._mutex: + self._state = PipelineState.BUILT logger.info("====Registering Pipeline Complete!====") async def _start(self): - assert self._is_built, "Pipeline must be built before starting" - - # Only execute this once - if (self._is_started): - return + assert self._state == PipelineState.BUILT, "Pipeline must be built before starting" - # Stop from running this twice - self._is_started = True + with self._mutex: + self._state = PipelineState.STARTED # Save off the current loop so we can use it in async_start self._loop = asyncio.get_running_loop() @@ -392,10 +404,35 @@ def term_signal(): logger.info("====Pipeline Started====") + async def post_start(executor): + + try: + # Make a local reference so the object doesn't go out of scope from a call to stop() + await executor.join_async() + except Exception: + logger.exception("Exception occurred in pipeline. Rethrowing") + raise + finally: + # Call join on all sources. This only occurs after all messages have been processed fully. + for source in list(self._sources): + await source.join() + + # Now call join on all stages + for stage in list(self._stages): + await stage.join() + + self._on_stop() + + with self._mutex: + self._state = PipelineState.COMPLETED + + self._post_start_future = asyncio.create_task(post_start(self._mrc_executor)) + def stop(self): """ Stops all running stages and the underlying MRC pipeline. """ + assert self._state == PipelineState.STARTED, "Pipeline must be running to stop it" logger.info("====Stopping Pipeline====") for stage in list(self._sources) + list(self._stages): @@ -403,52 +440,26 @@ def stop(self): self._mrc_executor.stop() + with self._mutex: + self._state = PipelineState.STOPPED + logger.info("====Pipeline Stopped====") self._on_stop() async def join(self): """ - Suspend execution all currently running stages and the MRC pipeline. - Typically called after `stop`. + Wait until pipeline completes upon which join methods of sources and stages will be called. """ - try: - # If the pipeline failed any pre-flight checks self._mrc_executor will be None - if self._mrc_executor is None: - raise RuntimeError("Pipeline failed pre-flight checks.") - - # Make a local reference so the object doesnt go out of scope from a call to stop() - executor = self._mrc_executor - - await executor.join_async() - except Exception: - logger.exception("Exception occurred in pipeline. Rethrowing") - raise - finally: - # Make sure these are always shut down even if there was an error - for source in list(self._sources): - source.stop() + assert self._post_start_future is not None, "Pipeline must be started before joining" - # First wait for all sources to stop. This only occurs after all messages have been processed fully - for source in list(self._sources): - await source.join() - - # Now that there is no more data, call stop on all stages to ensure shutdown (i.e., for stages that have - # their own worker loop thread) - for stage in list(self._stages): - stage.stop() - - # Now call join on all stages - for stage in list(self._stages): - await stage.join() - - self._on_stop() + await self._post_start_future def _on_stop(self): self._mrc_executor = None - async def _build_and_start(self): + async def build_and_start(self): - if (not self.is_built): + if (self._state == PipelineState.INITIALIZED): try: self.build() except Exception: @@ -470,7 +481,7 @@ def visualize(self, filename: str = None, **graph_kwargs): exists it will be overwritten. Requires the graphviz library. """ - if not self._is_built: + if self._state == PipelineState.INITIALIZED: raise RuntimeError("Pipeline.visualize() requires that the Pipeline has been started before generating " "the visualization. Please call Pipeline.build() or Pipeline.run() before calling " "Pipeline.visualize().") @@ -624,9 +635,7 @@ async def run_async(self): This function sets up the current asyncio loop, builds the pipeline, and awaits on it to complete. """ try: - await self._build_and_start() - - # Wait for completion + await self.build_and_start() await self.join() except KeyboardInterrupt: @@ -635,9 +644,6 @@ async def run_async(self): # Stop the pipeline self.stop() - # Wait again for nice completion - await self.join() - finally: # Shutdown the async generator sources and exit logger.info("====Pipeline Complete====") diff --git a/morpheus/stages/general/monitor_stage.py b/morpheus/stages/general/monitor_stage.py index 8d709d7d92..cc3a96f33f 100644 --- a/morpheus/stages/general/monitor_stage.py +++ b/morpheus/stages/general/monitor_stage.py @@ -111,7 +111,7 @@ async def start_async(self): if (not self._mc.delayed_start): self._mc.ensure_progress_bar() - def stop(self): + async def join(self): """ Clean up and close the progress bar. """ diff --git a/morpheus/stages/postprocess/generate_viz_frames_stage.py b/morpheus/stages/postprocess/generate_viz_frames_stage.py index 93f385cf4d..cf60d638b8 100644 --- a/morpheus/stages/postprocess/generate_viz_frames_stage.py +++ b/morpheus/stages/postprocess/generate_viz_frames_stage.py @@ -235,7 +235,7 @@ async def run_server(): return await super().start_async() - def stop(self): + async def join(self): """ Stages can implement this to perform cleanup steps when pipeline is stopped. """ diff --git a/tests/pipeline/test_pipe_viz.py b/tests/pipeline/test_pipe_viz.py index 0515824e8c..da2b245886 100755 --- a/tests/pipeline/test_pipe_viz.py +++ b/tests/pipeline/test_pipe_viz.py @@ -27,6 +27,7 @@ from morpheus.cli.commands import RANKDIR_CHOICES from morpheus.pipeline import LinearPipeline from morpheus.pipeline.pipeline import Pipeline +from morpheus.pipeline.pipeline import PipelineState from morpheus.stages.input.in_memory_source_stage import InMemorySourceStage from morpheus.stages.output.in_memory_sink_stage import InMemorySinkStage from morpheus.stages.postprocess.add_classifications_stage import AddClassificationsStage @@ -76,7 +77,7 @@ def test_viz_without_run(viz_pipeline: Pipeline, tmp_path: str): # Verify that the output file exists and is a valid png file assert_path_exists(viz_file) assert imghdr.what(viz_file) == 'png' - assert viz_pipeline.is_built + assert viz_pipeline.state != PipelineState.INITIALIZED @pytest.mark.slow diff --git a/tests/pipeline/test_pipeline_state.py b/tests/pipeline/test_pipeline_state.py new file mode 100755 index 0000000000..1dbaefccea --- /dev/null +++ b/tests/pipeline/test_pipeline_state.py @@ -0,0 +1,312 @@ +#!/usr/bin/env python +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock + +import pytest + +from morpheus.config import Config +from morpheus.pipeline import LinearPipeline +from morpheus.pipeline.pipeline import PipelineState +from morpheus.pipeline.stage_decorator import source +from morpheus.stages.input.in_memory_source_stage import InMemorySourceStage +from morpheus.stages.preprocess.deserialize_stage import DeserializeStage +from morpheus.utils.type_aliases import DataFrameType + + +@source +def source_test_stage() -> int: + for i in range(10): + yield i + + +# pylint: disable=too-many-function-args + + +def test_build(config: Config): + pipeline = LinearPipeline(config) + assert pipeline.state == PipelineState.INITIALIZED + pipeline.set_source(source_test_stage(config)) + pipeline.build() + assert pipeline.state == PipelineState.BUILT + + +def test_build_after_build(config: Config): + pipeline = LinearPipeline(config) + assert pipeline.state == PipelineState.INITIALIZED + pipeline.set_source(source_test_stage(config)) + pipeline.build() + assert pipeline.state == PipelineState.BUILT + with pytest.raises(Exception) as excinfo: + pipeline.build() + assert "can only be built once" in str(excinfo.value) + + +def test_build_without_source(config: Config): + pipeline = LinearPipeline(config) + assert pipeline.state == PipelineState.INITIALIZED + with pytest.raises(Exception) as excinfo: + pipeline.build() + assert "must have a source stage" in str(excinfo.value) + + +def test_normal_run(config: Config): + pipeline = LinearPipeline(config) + assert pipeline.state == PipelineState.INITIALIZED + pipeline.set_source(source_test_stage(config)) + pipeline.run() + assert pipeline.state == PipelineState.COMPLETED + + +async def test_normal_build_and_start(config: Config): + pipeline = LinearPipeline(config) + assert pipeline.state == PipelineState.INITIALIZED + pipeline.set_source(source_test_stage(config)) + await pipeline.build_and_start() + assert pipeline.state == PipelineState.STARTED + await pipeline.join() + assert pipeline.state == PipelineState.COMPLETED + + +async def test_stop_after_start(config: Config): + + pipeline = LinearPipeline(config) + assert pipeline.state == PipelineState.INITIALIZED + pipeline.set_source(source_test_stage(config)) + await pipeline.build_and_start() + assert pipeline.state == PipelineState.STARTED + pipeline.stop() + assert pipeline.state == PipelineState.STOPPED + await pipeline.join() + assert pipeline.state == PipelineState.COMPLETED + + +def test_stop_after_run(config: Config): + pipeline = LinearPipeline(config) + assert pipeline.state == PipelineState.INITIALIZED + pipeline.set_source(source_test_stage(config)) + pipeline.run() + assert pipeline.state == PipelineState.COMPLETED + with pytest.raises(Exception) as excinfo: + pipeline.stop() + assert "must be running" in str(excinfo.value) + + +def test_stop_without_start(config: Config): + + pipeline = LinearPipeline(config) + assert pipeline.state == PipelineState.INITIALIZED + pipeline.set_source(source_test_stage(config)) + with pytest.raises(Exception) as excinfo: + pipeline.stop() + assert "must be running" in str(excinfo.value) + + +async def test_stop_after_stop(config: Config): + + pipeline = LinearPipeline(config) + assert pipeline.state == PipelineState.INITIALIZED + pipeline.set_source(source_test_stage(config)) + await pipeline.build_and_start() + assert pipeline.state == PipelineState.STARTED + pipeline.stop() + assert pipeline.state == PipelineState.STOPPED + with pytest.raises(Exception) as excinfo: + pipeline.stop() + assert "must be running" in str(excinfo.value) + await pipeline.join() + + +async def test_join_without_start(config: Config): + + pipeline = LinearPipeline(config) + assert pipeline.state == PipelineState.INITIALIZED + pipeline.set_source(source_test_stage(config)) + with pytest.raises(Exception) as excinfo: + await pipeline.join() + assert "must be started" in str(excinfo.value) + + +async def test_join_after_join(config: Config): + pipeline = LinearPipeline(config) + assert pipeline.state == PipelineState.INITIALIZED + pipeline.set_source(source_test_stage(config)) + await pipeline.build_and_start() + assert pipeline.state == PipelineState.STARTED + await pipeline.join() + assert pipeline.state == PipelineState.COMPLETED + await pipeline.join() + assert pipeline.state == PipelineState.COMPLETED + + +@mock.patch('morpheus.stages.preprocess.deserialize_stage.DeserializeStage.join') +@mock.patch('morpheus.stages.preprocess.deserialize_stage.DeserializeStage.stop') +@mock.patch('morpheus.stages.input.in_memory_source_stage.InMemorySourceStage.join') +@mock.patch('morpheus.stages.input.in_memory_source_stage.InMemorySourceStage.stop') +def test_stage_methods_called_normal_run(mock_source_stage_stop, + mock_source_stage_join, + mock_deserialize_stage_stop, + mock_deserialize_stage_join, + config: Config, + filter_probs_df: DataFrameType): + pipeline = LinearPipeline(config) + pipeline.set_source(InMemorySourceStage(config, [filter_probs_df])) + pipeline.add_stage(DeserializeStage(config)) + pipeline.run() + mock_source_stage_stop.assert_not_called() + mock_source_stage_join.assert_called_once() + mock_deserialize_stage_stop.assert_not_called() + mock_deserialize_stage_join.assert_called_once() + + +@mock.patch('morpheus.stages.preprocess.deserialize_stage.DeserializeStage.join') +@mock.patch('morpheus.stages.preprocess.deserialize_stage.DeserializeStage.stop') +@mock.patch('morpheus.stages.input.in_memory_source_stage.InMemorySourceStage.join') +@mock.patch('morpheus.stages.input.in_memory_source_stage.InMemorySourceStage.stop') +async def test_stage_methods_called_stop_after_start(mock_source_stage_stop, + mock_source_stage_join, + mock_deserialize_stage_stop, + mock_deserialize_stage_join, + config: Config, + filter_probs_df: DataFrameType): + pipeline = LinearPipeline(config) + pipeline.set_source(InMemorySourceStage(config, [filter_probs_df])) + pipeline.add_stage(DeserializeStage(config)) + await pipeline.build_and_start() + pipeline.stop() + await pipeline.join() + mock_source_stage_stop.assert_called_once() + mock_source_stage_join.assert_called_once() + mock_deserialize_stage_stop.assert_called_once() + mock_deserialize_stage_join.assert_called_once() + + +@mock.patch('morpheus.stages.preprocess.deserialize_stage.DeserializeStage.join') +@mock.patch('morpheus.stages.preprocess.deserialize_stage.DeserializeStage.stop') +@mock.patch('morpheus.stages.input.in_memory_source_stage.InMemorySourceStage.join') +@mock.patch('morpheus.stages.input.in_memory_source_stage.InMemorySourceStage.stop') +def test_stage_methods_called_stop_after_run(mock_source_stage_stop, + mock_source_stage_join, + mock_deserialize_stage_stop, + mock_deserialize_stage_join, + config: Config, + filter_probs_df: DataFrameType): + pipeline = LinearPipeline(config) + pipeline.set_source(InMemorySourceStage(config, [filter_probs_df])) + pipeline.add_stage(DeserializeStage(config)) + pipeline.run() + with pytest.raises(Exception) as excinfo: + pipeline.stop() + assert "must be running" in str(excinfo.value) + mock_source_stage_stop.assert_not_called() + mock_source_stage_join.assert_called_once() + mock_deserialize_stage_stop.assert_not_called() + mock_deserialize_stage_join.assert_called_once() + + +@mock.patch('morpheus.stages.preprocess.deserialize_stage.DeserializeStage.join') +@mock.patch('morpheus.stages.preprocess.deserialize_stage.DeserializeStage.stop') +@mock.patch('morpheus.stages.input.in_memory_source_stage.InMemorySourceStage.join') +@mock.patch('morpheus.stages.input.in_memory_source_stage.InMemorySourceStage.stop') +def test_stage_methods_called_stop_without_start(mock_source_stage_stop, + mock_source_stage_join, + mock_deserialize_stage_stop, + mock_deserialize_stage_join, + config: Config, + filter_probs_df: DataFrameType): + + pipeline = LinearPipeline(config) + assert pipeline.state == PipelineState.INITIALIZED + pipeline.set_source(InMemorySourceStage(config, [filter_probs_df])) + pipeline.add_stage(DeserializeStage(config)) + with pytest.raises(Exception) as excinfo: + pipeline.stop() + assert "must be running" in str(excinfo.value) + mock_source_stage_stop.assert_not_called() + mock_source_stage_join.assert_not_called() + mock_deserialize_stage_stop.assert_not_called() + mock_deserialize_stage_join.assert_not_called() + + +@mock.patch('morpheus.stages.preprocess.deserialize_stage.DeserializeStage.join') +@mock.patch('morpheus.stages.preprocess.deserialize_stage.DeserializeStage.stop') +@mock.patch('morpheus.stages.input.in_memory_source_stage.InMemorySourceStage.join') +@mock.patch('morpheus.stages.input.in_memory_source_stage.InMemorySourceStage.stop') +async def test_stage_methods_called_stop_after_stop(mock_source_stage_stop, + mock_source_stage_join, + mock_deserialize_stage_stop, + mock_deserialize_stage_join, + config: Config, + filter_probs_df: DataFrameType): + pipeline = LinearPipeline(config) + pipeline.set_source(InMemorySourceStage(config, [filter_probs_df])) + pipeline.add_stage(DeserializeStage(config)) + await pipeline.build_and_start() + pipeline.stop() + with pytest.raises(Exception) as excinfo: + pipeline.stop() + assert "must be running" in str(excinfo.value) + await pipeline.join() + mock_source_stage_stop.assert_called_once() + mock_source_stage_join.assert_called_once() + mock_deserialize_stage_stop.assert_called_once() + mock_deserialize_stage_join.assert_called_once() + + +@mock.patch('morpheus.stages.preprocess.deserialize_stage.DeserializeStage.join') +@mock.patch('morpheus.stages.preprocess.deserialize_stage.DeserializeStage.stop') +@mock.patch('morpheus.stages.input.in_memory_source_stage.InMemorySourceStage.join') +@mock.patch('morpheus.stages.input.in_memory_source_stage.InMemorySourceStage.stop') +async def test_stage_methods_called_join_without_start(mock_source_stage_stop, + mock_source_stage_join, + mock_deserialize_stage_stop, + mock_deserialize_stage_join, + config: Config, + filter_probs_df: DataFrameType): + pipeline = LinearPipeline(config) + pipeline.set_source(InMemorySourceStage(config, [filter_probs_df])) + pipeline.add_stage(DeserializeStage(config)) + with pytest.raises(Exception) as excinfo: + await pipeline.join() + assert "must be started" in str(excinfo.value) + mock_source_stage_stop.assert_not_called() + mock_source_stage_join.assert_not_called() + mock_deserialize_stage_stop.assert_not_called() + mock_deserialize_stage_join.assert_not_called() + + +@mock.patch('morpheus.stages.preprocess.deserialize_stage.DeserializeStage.join') +@mock.patch('morpheus.stages.preprocess.deserialize_stage.DeserializeStage.stop') +@mock.patch('morpheus.stages.input.in_memory_source_stage.InMemorySourceStage.join') +@mock.patch('morpheus.stages.input.in_memory_source_stage.InMemorySourceStage.stop') +async def test_stage_methods_called_join_after_join(mock_source_stage_stop, + mock_source_stage_join, + mock_deserialize_stage_stop, + mock_deserialize_stage_join, + config: Config, + filter_probs_df: DataFrameType): + pipeline = LinearPipeline(config) + pipeline.set_source(InMemorySourceStage(config, [filter_probs_df])) + pipeline.add_stage(DeserializeStage(config)) + await pipeline.build_and_start() + await pipeline.join() + assert pipeline.state == PipelineState.COMPLETED + await pipeline.join() + assert pipeline.state == PipelineState.COMPLETED + mock_source_stage_stop.assert_not_called() + mock_source_stage_join.assert_called_once() + mock_deserialize_stage_stop.assert_not_called() + mock_deserialize_stage_join.assert_called_once() diff --git a/tests/test_http_server_source_stage_pipe.py b/tests/test_http_server_source_stage_pipe.py index 35e7fdd200..d00297a7dd 100644 --- a/tests/test_http_server_source_stage_pipe.py +++ b/tests/test_http_server_source_stage_pipe.py @@ -26,6 +26,7 @@ from morpheus.config import Config from morpheus.io.serializers import df_to_stream_json from morpheus.pipeline import LinearPipeline +from morpheus.pipeline.pipeline import PipelineState from morpheus.stages.input.http_server_source_stage import HttpServerSourceStage from morpheus.stages.output.compare_dataframe_stage import CompareDataFrameStage from morpheus.utils.http_utils import HTTPMethod @@ -41,11 +42,11 @@ async def make_request(pipe: LinearPipeline, payload: typing.Any, content_type: str): attempt = 0 - while not pipe._is_started and attempt < 2: + while pipe.state != PipelineState.STARTED and attempt < 2: await asyncio.sleep(1) attempt += 1 - if not pipe._is_started: + if pipe.state != PipelineState.STARTED: raise RuntimeError("HttpServerSourceStage did not start") # Not strictly needed, but we don't have a good way of knowing when the server is ready to accept requests diff --git a/tests/test_monitor_stage.py b/tests/test_monitor_stage.py index 91bc936878..e023f159b3 100755 --- a/tests/test_monitor_stage.py +++ b/tests/test_monitor_stage.py @@ -73,18 +73,18 @@ def test_start_async(mock_morph_tqdm: mock.MagicMock, config: Config): @mock.patch('morpheus.controllers.monitor_controller.MorpheusTqdm') -def test_stop(mock_morph_tqdm: mock.MagicMock, config: Config): +async def test_join(mock_morph_tqdm: mock.MagicMock, config: Config): mock_morph_tqdm.return_value = mock_morph_tqdm stage = MonitorStage(config, log_level=logging.WARNING) assert stage._mc._progress is None - # Calling on_stop is a noop if we are stopped - stage.stop() + # Calling join is a noop if we are stopped + await stage.join() mock_morph_tqdm.assert_not_called() - asyncio.run(stage.start_async()) - stage.stop() + await stage.start_async() + await stage.join() mock_morph_tqdm.close.assert_called_once() From aa8d42e79936bc7b2558682ca1197cedca8c7041 Mon Sep 17 00:00:00 2001 From: Devin Robison Date: Thu, 15 Feb 2024 20:33:47 -0700 Subject: [PATCH 10/18] ControlMessage improvements (#1511) Resolves #1502 Adds the ability to attach TensorMemory to ControlMessages Improves get/set/list metadata functions Adds the ability to attach grouped/keyed timestamps Authors: - Devin Robison (https://github.com/drobison00) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1511 --- examples/llm/cli.py | 2 +- .../include/morpheus/messages/control.hpp | 241 +++++++++++++++-- morpheus/_lib/messages/__init__.pyi | 20 +- morpheus/_lib/messages/module.cpp | 25 +- morpheus/_lib/src/messages/control.cpp | 162 +++++++++-- .../tests/messages/test_control_message.cpp | 199 ++++++++++++++ morpheus/stages/inference/inference_stage.py | 12 +- .../stages/preprocess/preprocess_nlp_stage.py | 20 +- tests/messages/test_control_message.py | 251 +++++++++++++++++- tests/utils/test_control_message_utils.py | 8 +- 10 files changed, 858 insertions(+), 82 deletions(-) diff --git a/examples/llm/cli.py b/examples/llm/cli.py index c8aea20320..1ea9198dc1 100644 --- a/examples/llm/cli.py +++ b/examples/llm/cli.py @@ -32,7 +32,7 @@ callback=parse_log_level, help="Specify the logging level to use.") @click.option('--use_cpp', - default=True, + default=False, type=bool, help=("Whether or not to use C++ node and message types or to prefer python. " "Only use as a last resort if bugs are encountered")) diff --git a/morpheus/_lib/include/morpheus/messages/control.hpp b/morpheus/_lib/include/morpheus/messages/control.hpp index 9adb568f90..8ee020c76d 100644 --- a/morpheus/_lib/include/morpheus/messages/control.hpp +++ b/morpheus/_lib/include/morpheus/messages/control.hpp @@ -22,10 +22,12 @@ #include #include +#include #include #include #include #include +#include namespace morpheus { @@ -159,6 +161,11 @@ enum class ControlMessageType // std::shared_ptr m_tensors; // }; +class TensorMemory; + +// System-clock for better compatibility with pybind11/chrono +using time_point_t = std::chrono::time_point; + /** * @brief Class representing a control message for coordinating data processing tasks. * @@ -170,7 +177,8 @@ class ControlMessage { public: ControlMessage(); - ControlMessage(const nlohmann::json& config); + explicit ControlMessage(const nlohmann::json& config); + ControlMessage(const ControlMessage& other); // Copies config and metadata, but not payload /** @@ -183,7 +191,7 @@ class ControlMessage * @brief Get the configuration object for the control message. * @return A const reference to the json object containing configuration information. */ - const nlohmann::json& config() const; + [[nodiscard]] const nlohmann::json& config() const; /** * @brief Add a task of the given type to the control message. @@ -197,19 +205,19 @@ class ControlMessage * @param task_type A string indicating the type of the task. * @return True if a task of the given type exists, false otherwise. */ - bool has_task(const std::string& task_type) const; + [[nodiscard]] bool has_task(const std::string& task_type) const; /** * @brief Remove and return a task of the given type from the control message. * @param task_type A string indicating the type of the task. * @return A json object describing the task. */ - const nlohmann::json remove_task(const std::string& task_type); + nlohmann::json remove_task(const std::string& task_type); /** * @brief Get the tasks for the control message. */ - const nlohmann::json& get_tasks() const; + [[nodiscard]] const nlohmann::json& get_tasks() const; /** * @brief Add a key-value pair to the metadata for the control message. @@ -223,27 +231,47 @@ class ControlMessage * @param key A string indicating the metadata key. * @return True if the metadata key exists, false otherwise. */ - bool has_metadata(const std::string& key) const; + [[nodiscard]] bool has_metadata(const std::string& key) const; /** * @brief Get the metadata for the control message. */ - const nlohmann::json& get_metadata() const; + [[nodiscard]] nlohmann::json get_metadata() const; /** * @brief Get the metadata value for the given key from the control message. + * If the key does not exist, the behavior depends on the fail_on_nonexist parameter. + * * @param key A string indicating the metadata key. - * @return A json object describing the metadata value. + * @param fail_on_nonexist If true, throws an exception when the key does not exist. + * If false, returns std::nullopt for non-existing keys. + * @return An optional json object describing the metadata value if it exists. */ - const nlohmann::json get_metadata(const std::string& key) const; + [[nodiscard]] nlohmann::json get_metadata(const std::string& key, bool fail_on_nonexist = false) const; /** - * @brief Get all metadata keys for the control message. - * @return A json object containing all metadata keys and values. + * @brief Lists all metadata keys currently stored in the control message. + * + * This method retrieves a list of all metadata keys present in the control message. + * Metadata within a control message typically includes supplementary information + * such as configuration settings, operational parameters, or annotations that + * are not directly part of the message payload but are crucial for processing + * or understanding the message. + * + * @return A std::vector containing the keys of all metadata entries + * in the control message. If no metadata has been set, the returned vector + * will be empty. */ - const nlohmann::json list_metadata() const; + [[nodiscard]] std::vector list_metadata() const; /** + * @brief Retrieves the current payload object of the control message. + * + * This method returns a shared pointer to the current payload object associated + * with this control message. The payload object encapsulates metadata or data + * specific to this message instance. + * + * @return A shared pointer to the MessageMeta instance representing the message payload. * @brief Get the payload object for the control message. * @param payload * A shared pointer to the message payload. @@ -251,11 +279,42 @@ class ControlMessage std::shared_ptr payload(); /** - * @brief Set the payload object - * @param payload + * @brief Assigns a new payload object to the control message. + * + * Sets the payload of the control message to the provided MessageMeta instance. + * The payload contains data or metadata pertinent to the message. Using a shared + * pointer ensures that the payload is managed efficiently with automatic reference + * counting. + * + * @param payload A shared pointer to the MessageMeta instance to be set as the new payload. */ void payload(const std::shared_ptr& payload); + /** + * @brief Retrieves the tensor memory associated with the control message. + * + * This method returns a shared pointer to the TensorMemory object linked with + * the control message, if any. TensorMemory typically contains or references + * tensors or other large data blobs relevant to the message's purpose. + * + * @return A shared pointer to the TensorMemory instance associated with the message, + * or nullptr if no tensor memory is set. + */ + std::shared_ptr tensors(); + + /** + * @brief Associates tensor memory with the control message. + * + * Sets the tensor memory for the control message to the provided TensorMemory instance. + * This tensor memory can contain tensors or large data blobs pertinent to the message. + * Utilizing a shared pointer facilitates efficient memory management through automatic + * reference counting. + * + * @param tensor_memory A shared pointer to the TensorMemory instance to be associated + * with the control message. + */ + void tensors(const std::shared_ptr& tensor_memory); + /** * @brief Get the type of task associated with the control message. * @return An enum value indicating the task type. @@ -269,49 +328,189 @@ class ControlMessage */ void task_type(ControlMessageType task_type); + /** + * @brief Sets a timestamp for a specific key. + * + * This method stores a timestamp associated with a unique identifier, + * If the key already exists, its timestamp will be updated to the new value. + * + * @param key The specific key for which the timestamp is to be set. + * @param timestamp The timestamp to be associated with the key. + */ + void set_timestamp(const std::string& key, time_point_t timestamp_ns); + + /** + * @brief Retrieves the timestamp for a specific key. + * + * Attempts to find and return the timestamp associated with the specified key. + * If the key does not exist, the method's behavior is determined by the fail_if_nonexist flag. + * + * @param key The specific key for which the timestamp is requested. + * @param fail_if_nonexist If true, the method throws an exception if the timestamp doesn't exist. + * If false, returns std::nullopt for non-existing timestamps. + * @return An optional containing the timestamp if found, or std::nullopt + * otherwise. + */ + std::optional get_timestamp(const std::string& key, bool fail_if_nonexist = false); + + /** + * @brief Retrieves timestamps for all keys that match a regex pattern. + * + * Searches for the specified for keys that match the provided regex filter and returns + * a map of these keys and their associated timestamps. + * + * @param regex_filter A regular expression pattern that keys must match to be included in the result. + * @return A map containing the matching key and their timestamps. The map will be empty if no matches are found. + */ + std::map filter_timestamp(const std::string& regex_filter); + private: static const std::string s_config_schema; // NOLINT static std::map s_task_type_map; // NOLINT ControlMessageType m_cm_type{ControlMessageType::NONE}; std::shared_ptr m_payload{nullptr}; + std::shared_ptr m_tensors{nullptr}; nlohmann::json m_tasks{}; nlohmann::json m_config{}; + + std::map m_timestamps{}; }; struct ControlMessageProxy { + /** + * @brief Creates a new ControlMessage instance from a configuration dictionary. + * @param config A pybind11::dict representing the configuration for the ControlMessage. + * @return A shared_ptr to a newly created ControlMessage instance. + */ static std::shared_ptr create(pybind11::dict& config); + + /** + * @brief Creates a new ControlMessage instance as a copy of an existing one. + * @param other A shared_ptr to another ControlMessage instance to copy. + * @return A shared_ptr to the newly copied ControlMessage instance. + */ static std::shared_ptr create(std::shared_ptr other); + /** + * @brief Creates a deep copy of the ControlMessage instance. + * @param self Reference to the underlying ControlMessage object. + * @return A shared_ptr to the copied ControlMessage instance. + */ static std::shared_ptr copy(ControlMessage& self); + /** + * @brief Retrieves the configuration of the ControlMessage as a dictionary. + * @param self Reference to the underlying ControlMessage object. + * @return A pybind11::dict representing the ControlMessage's configuration. + */ static pybind11::dict config(ControlMessage& self); - // Required for proxy conversion of json -> dict in python + /** + * @brief Updates the configuration of the ControlMessage from a dictionary. + * @param self Reference to the underlying ControlMessage object. + * @param config A pybind11::dict representing the new configuration. + */ static void config(ControlMessage& self, pybind11::dict& config); + /** + * @brief Adds a task to the ControlMessage. + * @param self Reference to the underlying ControlMessage object. + * @param type The type of the task to be added. + * @param task A pybind11::dict representing the task to be added. + */ static void add_task(ControlMessage& self, const std::string& type, pybind11::dict& task); + + /** + * @brief Removes and returns a task of the given type from the ControlMessage. + * @param self Reference to the underlying ControlMessage object. + * @param type The type of the task to be removed. + * @return A pybind11::dict representing the removed task. + */ static pybind11::dict remove_task(ControlMessage& self, const std::string& type); + + /** + * @brief Retrieves all tasks from the ControlMessage. + * @param self Reference to the underlying ControlMessage object. + * @return A pybind11::dict containing all tasks. + */ static pybind11::dict get_tasks(ControlMessage& self); /** - * @brief Set a metadata key-value pair -- value must be json serializable - * @param self - * @param key - * @param value + * @brief Sets a metadata key-value pair. + * @param self Reference to the underlying ControlMessage object. + * @param key The key for the metadata entry. + * @param value The value for the metadata entry, must be JSON serializable. */ static void set_metadata(ControlMessage& self, const std::string& key, pybind11::object& value); - static pybind11::object get_metadata(ControlMessage& self, std::optional const& key); - static pybind11::dict list_metadata(ControlMessage& self); + /** + * @brief Retrieves a metadata value by key, with an optional default value. + * + * @param self Reference to the underlying ControlMessage object. + * @param key The key for the metadata entry. If not provided, retrieves all metadata. + * @param default_value An optional default value to return if the key does not exist. + * @return The value associated with the key, the default value if the key is not found, or all metadata if the key + * is not provided. + */ + static pybind11::object get_metadata(ControlMessage& self, + const pybind11::object& key, + pybind11::object default_value); + + /** + * @brief Lists all metadata keys of the ControlMessage. + * @param self Reference to the underlying ControlMessage object. + * @return A pybind11::list containing all metadata keys. + */ + static pybind11::list list_metadata(ControlMessage& self); /** * @brief Set the payload object given a Python instance of MessageMeta * @param meta */ static void payload_from_python_meta(ControlMessage& self, const pybind11::object& meta); + + /** + * @brief Sets a timestamp for a given key. + * @param self Reference to the underlying ControlMessage object. + * @param key The key associated with the timestamp. + * @param timestamp A datetime.datetime object representing the timestamp. + * + * This method directly takes a datetime.datetime object from Python and sets the corresponding + * std::chrono::system_clock::time_point for the specified key in the ControlMessage object. + */ + static void set_timestamp(ControlMessage& self, const std::string& key, pybind11::object timestamp); + + /** + * @brief Retrieves the timestamp for a specific key from the ControlMessage object. + * + * @param self Reference to the underlying ControlMessage object. + * @param key The specific key for which the timestamp is requested. + * @param fail_if_nonexist Determines the behavior when the requested timestamp does not exist. + * If true, an exception is thrown. If false, py::none is returned. + * @return A datetime.datetime object representing the timestamp if found, or py::none if not found + * and fail_if_nonexist is false. + * + * This method fetches the timestamp associated with the specified key and returns it as a + * datetime.datetime object in Python. If the timestamp does not exist and fail_if_nonexist is true, + * an exception is raised. + */ + static pybind11::object get_timestamp(ControlMessage& self, const std::string& key, bool fail_if_nonexist = false); + + /** + * @brief Retrieves timestamps for all keys that match a regex pattern from the ControlMessage object. + * + * @param self Reference to the underlying ControlMessage object. + * @param regex_filter The regex pattern that keys must match to be included in the result. + * @return A Python dictionary of matching keys and their timestamps as datetime.datetime objects. + * + * This method retrieves all timestamps within the ControlMessage object that match a specified + * regex pattern. Each key and its associated timestamp are returned in a Python dictionary, with + * timestamps represented as datetime.datetime objects. + */ + static pybind11::dict filter_timestamp(ControlMessage& self, const std::string& regex_filter); }; #pragma GCC visibility pop diff --git a/morpheus/_lib/messages/__init__.pyi b/morpheus/_lib/messages/__init__.pyi index 4f7137a60a..937e0a6084 100644 --- a/morpheus/_lib/messages/__init__.pyi +++ b/morpheus/_lib/messages/__init__.pyi @@ -49,11 +49,19 @@ class ControlMessage(): @typing.overload def config(self, config: dict) -> None: ... def copy(self) -> ControlMessage: ... - def get_metadata(self, key: typing.Optional[str] = None) -> object: ... + def filter_timestamp(self, regex_filter: str) -> dict: + """ + Retrieve timestamps matching a regex filter within a given group. + """ + def get_metadata(self, key: object = None, default_value: object = None) -> object: ... def get_tasks(self) -> dict: ... + def get_timestamp(self, key: str, fail_if_nonexist: bool = False) -> object: + """ + Retrieve the timestamp for a given group and key. Returns None if the timestamp does not exist and fail_if_nonexist is False. + """ def has_metadata(self, key: str) -> bool: ... def has_task(self, task_type: str) -> bool: ... - def list_metadata(self) -> dict: ... + def list_metadata(self) -> list: ... @typing.overload def payload(self) -> MessageMeta: ... @typing.overload @@ -62,10 +70,18 @@ class ControlMessage(): def payload(self, meta: object) -> None: ... def remove_task(self, task_type: str) -> dict: ... def set_metadata(self, key: str, value: object) -> None: ... + def set_timestamp(self, key: str, timestamp: object) -> None: + """ + Set a timestamp for a given key and group. + """ @typing.overload def task_type(self) -> ControlMessageType: ... @typing.overload def task_type(self, task_type: ControlMessageType) -> None: ... + @typing.overload + def tensors(self) -> TensorMemory: ... + @typing.overload + def tensors(self, arg0: TensorMemory) -> None: ... pass class ControlMessageType(): """ diff --git a/morpheus/_lib/messages/module.cpp b/morpheus/_lib/messages/module.cpp index 7aa21f24d1..b5b84ee071 100644 --- a/morpheus/_lib/messages/module.cpp +++ b/morpheus/_lib/messages/module.cpp @@ -358,7 +358,6 @@ PYBIND11_MODULE(messages, _module) .value("NONE", ControlMessageType::INFERENCE) .value("TRAINING", ControlMessageType::TRAINING); - // TODO(Devin): Circle back on return value policy choices py::class_>(_module, "ControlMessage") .def(py::init<>()) .def(py::init(py::overload_cast(&ControlMessageProxy::create))) @@ -369,17 +368,37 @@ PYBIND11_MODULE(messages, _module) py::arg("config")) .def("config", pybind11::overload_cast(&ControlMessageProxy::config)) .def("copy", &ControlMessageProxy::copy) - .def("get_metadata", &ControlMessageProxy::get_metadata, py::arg("key") = py::none()) + .def("get_metadata", + &ControlMessageProxy::get_metadata, + py::arg("key") = py::none(), + py::arg("default_value") = py::none()) .def("get_tasks", &ControlMessageProxy::get_tasks) + .def("filter_timestamp", + py::overload_cast(&ControlMessageProxy::filter_timestamp), + "Retrieve timestamps matching a regex filter within a given group.", + py::arg("regex_filter")) + .def("get_timestamp", + py::overload_cast(&ControlMessageProxy::get_timestamp), + "Retrieve the timestamp for a given group and key. Returns None if the timestamp does not exist and " + "fail_if_nonexist is False.", + py::arg("key"), + py::arg("fail_if_nonexist") = false) + .def("set_timestamp", + &ControlMessageProxy::set_timestamp, + "Set a timestamp for a given key and group.", + py::arg("key"), + py::arg("timestamp")) .def("has_metadata", &ControlMessage::has_metadata, py::arg("key")) .def("has_task", &ControlMessage::has_task, py::arg("task_type")) .def("list_metadata", &ControlMessageProxy::list_metadata) - .def("payload", pybind11::overload_cast<>(&ControlMessage::payload), py::return_value_policy::move) + .def("payload", pybind11::overload_cast<>(&ControlMessage::payload)) .def("payload", pybind11::overload_cast&>(&ControlMessage::payload)) .def( "payload", pybind11::overload_cast(&ControlMessageProxy::payload_from_python_meta), py::arg("meta")) + .def("tensors", pybind11::overload_cast<>(&ControlMessage::tensors)) + .def("tensors", pybind11::overload_cast&>(&ControlMessage::tensors)) .def("remove_task", &ControlMessageProxy::remove_task, py::arg("task_type")) .def("set_metadata", &ControlMessageProxy::set_metadata, py::arg("key"), py::arg("value")) .def("task_type", pybind11::overload_cast<>(&ControlMessage::task_type)) diff --git a/morpheus/_lib/src/messages/control.cpp b/morpheus/_lib/src/messages/control.cpp index f1413c2650..dd54b80a43 100644 --- a/morpheus/_lib/src/messages/control.cpp +++ b/morpheus/_lib/src/messages/control.cpp @@ -20,12 +20,17 @@ #include "morpheus/messages/meta.hpp" #include +#include // IWYU pragma: keep +#include #include #include +#include #include #include +#include #include +#include namespace py = pybind11; @@ -58,7 +63,6 @@ const nlohmann::json& ControlMessage::config() const void ControlMessage::add_task(const std::string& task_type, const nlohmann::json& task) { - // TODO(Devin) Schema check VLOG(20) << "Adding task of type " << task_type << " to control message" << task.dump(4); auto _task_type = s_task_type_map.contains(task_type) ? s_task_type_map[task_type] : ControlMessageType::NONE; @@ -85,9 +89,9 @@ const nlohmann::json& ControlMessage::get_tasks() const return m_tasks; } -const nlohmann::json ControlMessage::list_metadata() const +std::vector ControlMessage::list_metadata() const { - nlohmann::json key_list = nlohmann::json::array(); + std::vector key_list{}; for (auto it = m_config["metadata"].begin(); it != m_config["metadata"].end(); ++it) { @@ -112,17 +116,31 @@ bool ControlMessage::has_metadata(const std::string& key) const return m_config["metadata"].contains(key); } -const nlohmann::json& ControlMessage::get_metadata() const +nlohmann::json ControlMessage::get_metadata() const { - return m_config["metadata"]; + auto metadata = m_config["metadata"]; + + return metadata; } -const nlohmann::json ControlMessage::get_metadata(const std::string& key) const +nlohmann::json ControlMessage::get_metadata(const std::string& key, bool fail_on_nonexist) const { - return m_config["metadata"].at(key); + // Assuming m_metadata is a std::map storing metadata + auto metadata = m_config["metadata"]; + auto it = metadata.find(key); + if (it != metadata.end()) + { + return metadata.at(key); + } + else if (fail_on_nonexist) + { + throw std::runtime_error("Metadata key does not exist: " + key); + } + + return {}; } -const nlohmann::json ControlMessage::remove_task(const std::string& task_type) +nlohmann::json ControlMessage::remove_task(const std::string& task_type) { auto& task_set = m_tasks.at(task_type); auto iter_task = task_set.begin(); @@ -138,6 +156,43 @@ const nlohmann::json ControlMessage::remove_task(const std::string& task_type) throw std::runtime_error("No tasks of type " + task_type + " found"); } +void ControlMessage::set_timestamp(const std::string& key, time_point_t timestamp_ns) +{ + // Insert or update the timestamp in the map + m_timestamps[key] = timestamp_ns; +} + +std::map ControlMessage::filter_timestamp(const std::string& regex_filter) +{ + std::map matching_timestamps; + std::regex filter(regex_filter); + + for (const auto& [key, timestamp] : m_timestamps) + { + // Check if the key matches the regex + if (std::regex_search(key, filter)) + { + matching_timestamps[key] = timestamp; + } + } + + return matching_timestamps; +} + +std::optional ControlMessage::get_timestamp(const std::string& key, bool fail_if_nonexist) +{ + auto it = m_timestamps.find(key); + if (it != m_timestamps.end()) + { + return it->second; // Return the found timestamp + } + else if (fail_if_nonexist) + { + throw std::runtime_error("Timestamp for the specified key does not exist."); + } + return std::nullopt; +} + void ControlMessage::config(const nlohmann::json& config) { if (config.contains("type")) @@ -173,10 +228,6 @@ void ControlMessage::config(const nlohmann::json& config) std::shared_ptr ControlMessage::payload() { - // auto temp = std::move(m_payload); - // TODO(Devin): Decide if we copy or steal the payload - // m_payload = nullptr; - return m_payload; } @@ -185,6 +236,16 @@ void ControlMessage::payload(const std::shared_ptr& payload) m_payload = payload; } +std::shared_ptr ControlMessage::tensors() +{ + return m_tensors; +} + +void ControlMessage::tensors(const std::shared_ptr& tensors) +{ + m_tensors = tensors; +} + ControlMessageType ControlMessage::task_type() { return m_cm_type; @@ -236,14 +297,23 @@ py::dict ControlMessageProxy::config(ControlMessage& self) return dict; } -py::object ControlMessageProxy::get_metadata(ControlMessage& self, std::optional const& key) +py::object ControlMessageProxy::get_metadata(ControlMessage& self, + const py::object& key, + pybind11::object default_value) { - if (key == std::nullopt) + if (key.is_none()) + { + auto metadata = self.get_metadata(); + return mrc::pymrc::cast_from_json(metadata); + } + + auto value = self.get_metadata(py::cast(key), false); + if (value.empty()) { - return mrc::pymrc::cast_from_json(self.get_metadata()); + return default_value; } - return mrc::pymrc::cast_from_json(self.get_metadata(key.value())); + return mrc::pymrc::cast_from_json(value); } void ControlMessageProxy::set_metadata(ControlMessage& self, const std::string& key, pybind11::object& value) @@ -251,11 +321,65 @@ void ControlMessageProxy::set_metadata(ControlMessage& self, const std::string& self.set_metadata(key, mrc::pymrc::cast_from_pyobject(value)); } -py::dict ControlMessageProxy::list_metadata(ControlMessage& self) +py::list ControlMessageProxy::list_metadata(ControlMessage& self) { - auto dict = mrc::pymrc::cast_from_json(self.list_metadata()); + auto keys = self.list_metadata(); + py::list py_keys; + for (const auto& key : keys) + { + py_keys.append(py::str(key)); + } + return py_keys; +} - return dict; +py::dict ControlMessageProxy::filter_timestamp(ControlMessage& self, const std::string& regex_filter) +{ + auto cpp_map = self.filter_timestamp(regex_filter); + py::dict py_dict; + for (const auto& [key, timestamp] : cpp_map) + { + // Directly use the timestamp as datetime.datetime in Python + py_dict[py::str(key)] = timestamp; + } + return py_dict; +} + +// Get a specific timestamp and return it as datetime.datetime or None +py::object ControlMessageProxy::get_timestamp(ControlMessage& self, const std::string& key, bool fail_if_nonexist) +{ + try + { + auto timestamp_opt = self.get_timestamp(key, fail_if_nonexist); + if (timestamp_opt) + { + // Directly return the timestamp as datetime.datetime in Python + return py::cast(*timestamp_opt); + } + + return py::none(); + } catch (const std::runtime_error& e) + { + if (fail_if_nonexist) + { + throw py::value_error(e.what()); + } + return py::none(); + } +} + +// Set a timestamp using a datetime.datetime object from Python +void ControlMessageProxy::set_timestamp(ControlMessage& self, const std::string& key, py::object timestamp_ns) +{ + if (!py::isinstance(timestamp_ns)) + { + // Convert Python datetime.datetime to std::chrono::system_clock::time_point before setting + auto _timestamp_ns = timestamp_ns.cast(); + self.set_timestamp(key, _timestamp_ns); + } + else + { + throw std::runtime_error("Timestamp cannot be None"); + } } void ControlMessageProxy::config(ControlMessage& self, py::dict& config) diff --git a/morpheus/_lib/tests/messages/test_control_message.cpp b/morpheus/_lib/tests/messages/test_control_message.cpp index 61bc59b72e..7fe86afd6c 100644 --- a/morpheus/_lib/tests/messages/test_control_message.cpp +++ b/morpheus/_lib/tests/messages/test_control_message.cpp @@ -19,18 +19,25 @@ #include "test_messages.hpp" #include "morpheus/messages/control.hpp" +#include "morpheus/messages/memory/tensor_memory.hpp" #include "morpheus/messages/meta.hpp" #include #include +#include +#include +#include #include +#include #include #include using namespace morpheus; using namespace morpheus::test; +using clock_type_t = std::chrono::system_clock; + TEST_F(TestControlMessage, InitializationTest) { auto msg_one = ControlMessage(); @@ -48,6 +55,76 @@ TEST_F(TestControlMessage, InitializationTest) ASSERT_EQ(msg_two.has_task("load"), true); } +TEST_F(TestControlMessage, SetAndGetMetadata) +{ + auto msg = ControlMessage(); + + nlohmann::json value = {{"property", "value"}}; + std::string key = "testKey"; + + // Set metadata + msg.set_metadata(key, value); + + // Verify metadata can be retrieved and matches what was set + EXPECT_TRUE(msg.has_metadata(key)); + auto retrievedValue = msg.get_metadata(key, true); + EXPECT_EQ(value, retrievedValue); + + // Verify listing metadata includes the key + auto keys = msg.list_metadata(); + auto it = std::find(keys.begin(), keys.end(), key); + EXPECT_NE(it, keys.end()); +} + +// Test for overwriting metadata +TEST_F(TestControlMessage, OverwriteMetadata) +{ + auto msg = ControlMessage(); + + nlohmann::json value1 = {{"initial", "data"}}; + nlohmann::json value2 = {{"updated", "data"}}; + std::string key = "overwriteKey"; + + // Set initial metadata + msg.set_metadata(key, value1); + + // Overwrite metadata + msg.set_metadata(key, value2); + + // Verify metadata was overwritten + auto retrievedValue = msg.get_metadata(key, false); + EXPECT_EQ(value2, retrievedValue); +} + +// Test retrieving metadata when it does not exist +TEST_F(TestControlMessage, GetNonexistentMetadata) +{ + auto msg = ControlMessage(); + + std::string key = "nonexistentKey"; + + // Attempt to retrieve metadata that does not exist + EXPECT_FALSE(msg.has_metadata(key)); + EXPECT_THROW(auto const x = msg.get_metadata(key, true), std::runtime_error); + EXPECT_NO_THROW(auto const x = msg.get_metadata(key, false)); // Should not throw, but return empty json +} + +// Test retrieving all metadata +TEST_F(TestControlMessage, GetAllMetadata) +{ + auto msg = ControlMessage(); + + // Setup - add some metadata + msg.set_metadata("key1", {{"data", "value1"}}); + msg.set_metadata("key2", {{"data", "value2"}}); + + // Retrieve all metadata + auto metadata = msg.get_metadata(); + EXPECT_EQ(2, metadata.size()); // Assuming get_metadata() returns a json object with all metadata + EXPECT_TRUE(metadata.contains("key1")); + EXPECT_TRUE(metadata.contains("key2")); +} + TEST_F(TestControlMessage, SetMessageTest) { auto msg = ControlMessage(); @@ -131,4 +208,126 @@ TEST_F(TestControlMessage, PayloadTest) msg.payload(data_payload); ASSERT_EQ(msg.payload(), data_payload); +} + +TEST_F(TestControlMessage, SetAndGetTimestamp) +{ + auto msg = ControlMessage(); + + // Test setting a timestamp + auto start = clock_type_t::now(); + msg.set_timestamp("group1::key1", start); + + auto result = msg.get_timestamp("group1::key1", false); + ASSERT_TRUE(result.has_value()); + + // Direct comparison since we're using time points now + EXPECT_EQ(start, result.value()); +} + +TEST_F(TestControlMessage, GetTimestampWithRegex) +{ + auto start = clock_type_t::now(); + auto msg = ControlMessage(); + + // Set two timestamps slightly apart + msg.set_timestamp("group1::key1", start); + auto later = clock_type_t::now(); + msg.set_timestamp("group1::key2", later); + + auto result = msg.filter_timestamp("group1::key.*"); + ASSERT_EQ(2, result.size()); + + // Check using the actual time points + EXPECT_EQ(start, result["group1::key1"]); + EXPECT_EQ(later, result["group1::key2"]); + + auto resultSingle = msg.filter_timestamp("group1::key1"); + ASSERT_EQ(1, resultSingle.size()); + EXPECT_EQ(start, resultSingle["group1::key1"]); +} + +TEST_F(TestControlMessage, GetTimestampNonExistentKey) +{ + auto msg = ControlMessage(); + + auto result = msg.get_timestamp("group1::nonexistent", false); + EXPECT_FALSE(result.has_value()); + + EXPECT_THROW( + { + try + { + msg.get_timestamp("group1::nonexistent", true); + } catch (const std::runtime_error& e) + { + EXPECT_STREQ("Timestamp for the specified key does not exist.", e.what()); + throw; + } + }, + std::runtime_error); +} + +TEST_F(TestControlMessage, UpdateTimestamp) +{ + auto msg = ControlMessage(); + + auto start = clock_type_t::now(); + msg.set_timestamp("group1::key1", start); + auto later = clock_type_t::now(); + msg.set_timestamp("group1::key1", later); + + auto result = msg.get_timestamp("group1::key1", false); + ASSERT_TRUE(result.has_value()); + + // Check using the actual time points for update + EXPECT_EQ(later, result.value()); +} + +// Test setting and getting Ten:sorMemory +TEST_F(TestControlMessage, SetAndGetTensorMemory) +{ + auto msg = ControlMessage(); + + auto tensorMemory = std::make_shared(0); + // Optionally, modify tensorMemory here if it has any mutable state to test + + // Set the tensor memory + msg.tensors(tensorMemory); + + // Retrieve the tensor memory + auto retrievedTensorMemory = msg.tensors(); + + // Verify that the retrieved tensor memory matches what was set + EXPECT_EQ(tensorMemory, retrievedTensorMemory); +} + +// Test setting TensorMemory to nullptr +TEST_F(TestControlMessage, SetTensorMemoryToNull) +{ + auto msg = ControlMessage(); + + // Set tensor memory to a valid object first + msg.tensors(std::make_shared(0)); + + // Now set it to nullptr + msg.tensors(nullptr); + + // Retrieve the tensor memory + auto retrievedTensorMemory = msg.tensors(); + + // Verify that the retrieved tensor memory is nullptr + EXPECT_EQ(nullptr, retrievedTensorMemory); +} + +// Test retrieving TensorMemory when none has been set +TEST_F(TestControlMessage, GetTensorMemoryWhenNoneSet) +{ + auto msg = ControlMessage(); + + // Attempt to retrieve tensor memory without setting it first + auto retrievedTensorMemory = msg.tensors(); + + // Verify that the retrieved tensor memory is nullptr + EXPECT_EQ(nullptr, retrievedTensorMemory); } \ No newline at end of file diff --git a/morpheus/stages/inference/inference_stage.py b/morpheus/stages/inference/inference_stage.py index 1cc6703fc6..d601d3880d 100644 --- a/morpheus/stages/inference/inference_stage.py +++ b/morpheus/stages/inference/inference_stage.py @@ -37,7 +37,6 @@ from morpheus.messages.memory.tensor_memory import TensorMemory from morpheus.pipeline.multi_message_stage import MultiMessageStage from morpheus.pipeline.stage_schema import StageSchema -from morpheus.stages.preprocess.preprocess_nlp_stage import base64_to_cupyarray from morpheus.utils.producer_consumer_queue import ProducerConsumerQueue logger = logging.getLogger(__name__) @@ -240,19 +239,12 @@ def on_next(message: typing.Union[MultiInferenceMessage, ControlMessage]): _message = None if (isinstance(message, ControlMessage)): _message = message + tensors = message.tensors() memory_params: dict = message.get_metadata("inference_memory_params") inference_type: str = memory_params["inference_type"] - count = int(memory_params["count"]) - segment_ids = base64_to_cupyarray(memory_params["segment_ids"]) - input_ids = base64_to_cupyarray(memory_params["input_ids"]) - input_mask = base64_to_cupyarray(memory_params["input_mask"]) if (inference_type == "nlp"): - memory = InferenceMemoryNLP(count=count, - input_ids=input_ids, - input_mask=input_mask, - seq_ids=segment_ids) - + memory = InferenceMemoryNLP(count=tensors.count, **tensors.get_tensors()) meta_message = MessageMeta(df=message.payload().df) multi_message = MultiMessage(meta=meta_message) diff --git a/morpheus/stages/preprocess/preprocess_nlp_stage.py b/morpheus/stages/preprocess/preprocess_nlp_stage.py index b5587ee90e..8b45dafe37 100644 --- a/morpheus/stages/preprocess/preprocess_nlp_stage.py +++ b/morpheus/stages/preprocess/preprocess_nlp_stage.py @@ -25,6 +25,8 @@ import cudf import morpheus._lib.stages as _stages +# pylint: disable=morpheus-incorrect-lib-from-import +from morpheus._lib.messages import TensorMemory as CppTensorMemory from morpheus.cli.register_stage import register_stage from morpheus.cli.utils import MorpheusRelativePath from morpheus.cli.utils import get_package_relative_file @@ -203,15 +205,15 @@ def process_control_message(message: ControlMessage, del text_series - message.set_metadata( - "inference_memory_params", - { - "inference_type": "nlp", - "count": tokenized.input_ids.shape[0], - "segment_ids": cupyarray_to_base64(tokenized.segment_ids), - "input_ids": cupyarray_to_base64(tokenized.input_ids), - "input_mask": cupyarray_to_base64(tokenized.input_mask), - }) + message.tensors( + CppTensorMemory(count=tokenized.input_ids.shape[0], + tensors={ + "input_ids": tokenized.input_ids, + "input_mask": tokenized.input_mask, + "seq_ids": tokenized.segment_ids + })) + + message.set_metadata("inference_memory_params", {"inference_type": "nlp"}) return message diff --git a/tests/messages/test_control_message.py b/tests/messages/test_control_message.py index 4e913be066..dc2c1a3c2b 100644 --- a/tests/messages/test_control_message.py +++ b/tests/messages/test_control_message.py @@ -14,11 +14,16 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime + +import cupy as cp import pytest import cudf from morpheus import messages +# pylint: disable=morpheus-incorrect-lib-from-import +from morpheus.messages import TensorMemory # pylint: disable=unsupported-membership-test # pylint: disable=unsubscriptable-object @@ -84,18 +89,20 @@ def test_control_message_tasks(): @pytest.mark.usefixtures("config_only_cpp") def test_control_message_metadata(): message = messages.ControlMessage() + message.set_metadata("key_x", "value_x") message.set_metadata("key_y", "value_y") message.set_metadata("key_z", "value_z") - assert len(message.get_metadata()) == 3 + metadata_tags = message.list_metadata() + assert len(metadata_tags) == 3 - assert "key_x" in message.get_metadata() - assert "key_y" in message.get_metadata() - assert "key_z" in message.get_metadata() - assert message.get_metadata()["key_x"] == "value_x" - assert message.get_metadata()["key_y"] == "value_y" - assert message.get_metadata()["key_z"] == "value_z" + assert "key_x" in metadata_tags + assert "key_y" in metadata_tags + assert "key_z" in metadata_tags + assert message.get_metadata("key_x") == "value_x" + assert message.get_metadata("key_y") == "value_y" + assert message.get_metadata("key_z") == "value_z" message.set_metadata("key_y", "value_yy") @@ -106,6 +113,52 @@ def test_control_message_metadata(): assert "not_mutable" not in message.get_metadata() +def test_set_and_get_metadata(): + message = messages.ControlMessage() + + # Test setting and getting metadata + message.set_metadata("test_key", "test_value") + assert message.get_metadata("test_key") == "test_value" + + # Test getting metadata with a default value when the key does not exist + default_value = "default" + assert message.get_metadata("nonexistent_key", default_value) == default_value + + # Test getting all metadata + message.set_metadata("another_key", "another_value") + all_metadata = message.get_metadata() + assert isinstance(all_metadata, dict) + assert all_metadata["test_key"] == "test_value" + assert all_metadata["another_key"] == "another_value" + + +def test_list_metadata(): + message = messages.ControlMessage() + + # Setting some metadata + message.set_metadata("key1", "value1") + message.set_metadata("key2", "value2") + message.set_metadata("key3", "value3") + + # Listing all metadata keys + keys = message.list_metadata() + assert isinstance(keys, list) + assert set(keys) == {"key1", "key2", "key3"} + + +def test_get_metadata_default_value(): + message = messages.ControlMessage() + + # Setting metadata to test default value retrieval + message.set_metadata("existing_key", "existing_value") + + # Getting an existing key without default value + assert message.get_metadata("existing_key") == "existing_value" + + # Getting a non-existing key with default value provided + assert message.get_metadata("non_existing_key", "default_value") == "default_value" + + @pytest.mark.usefixtures("config_only_cpp") def test_control_message_get(): raw_control_message = messages.ControlMessage({ @@ -168,8 +221,182 @@ def test_control_message_set_and_get_payload(): assert payload.df == payload2.df -if (__name__ == "__main__"): - test_control_message_init() - test_control_message_get() - test_control_message_set() - test_control_message_set_and_get_payload() +@pytest.mark.usefixtures("config_only_cpp") +def test_set_and_get_timestamp_single(): + # Create a ControlMessage instance + msg = messages.ControlMessage() + + # Define test data + key = "group1::key1" + timestamp = datetime.datetime.now() + + # Set timestamp + msg.set_timestamp(key, timestamp) + + # Get timestamp and assert it's as expected + result = msg.get_timestamp(key, True) + assert result == timestamp, "The retrieved timestamp should match the one that was set." + + +@pytest.mark.usefixtures("config_only_cpp") +def test_filter_timestamp(): + # Create a ControlMessage instance + msg = messages.ControlMessage() + + # Setup test data + group = "group1" + timestamp1 = datetime.datetime.now() + timestamp2 = timestamp1 + datetime.timedelta(seconds=1) + msg.set_timestamp(f"{group}::key1", timestamp1) + msg.set_timestamp(f"{group}::key2", timestamp2) + + # Use a regex that matches both keys + result = msg.filter_timestamp(f"{group}::key.*") + + # Assert both keys are in the result and have correct timestamps + assert len(result) == 2, "Both keys should be present in the result." + assert result[f"{group}::key1"] == timestamp1, "The timestamp for key1 should match." + assert result[f"{group}::key2"] == timestamp2, "The timestamp for key2 should match." + + +@pytest.mark.usefixtures("config_only_cpp") +def test_get_timestamp_fail_if_nonexist(): + # Create a ControlMessage instance + msg = messages.ControlMessage() + + # Setup test data + key = "nonexistent_key" + + # Attempt to get a timestamp for a non-existent key, expecting failure + with pytest.raises(ValueError) as exc_info: + msg.get_timestamp(key, True) + assert str(exc_info.value) == "Timestamp for the specified key does not exist." + + +# Test setting and getting tensors with cupy arrays +@pytest.mark.usefixtures("config_only_cpp") +def test_tensors_setting_and_getting(): + data = {"input_ids": cp.array([1, 2, 3]), "input_mask": cp.array([1, 1, 1]), "segment_ids": cp.array([0, 0, 1])} + message = messages.ControlMessage() + tensor_memory = TensorMemory(count=data["input_ids"].shape[0]) + tensor_memory.set_tensors(data) + + message.tensors(tensor_memory) + + retrieved_tensors = message.tensors() + assert retrieved_tensors.count == data["input_ids"].shape[0], "Tensor count mismatch." + + for key, val in data.items(): + assert cp.allclose(retrieved_tensors.get_tensor(key), val), f"Mismatch in tensor data for {key}." + + +# Test retrieving tensor names and checking specific tensor existence +@pytest.mark.usefixtures("config_only_cpp") +def test_tensor_names_and_existence(): + tokenized_data = { + "input_ids": cp.array([1, 2, 3]), "input_mask": cp.array([1, 1, 1]), "segment_ids": cp.array([0, 0, 1]) + } + message = messages.ControlMessage() + tensor_memory = TensorMemory(count=tokenized_data["input_ids"].shape[0], tensors=tokenized_data) + + message.tensors(tensor_memory) + retrieved_tensors = message.tensors() + + for key in tokenized_data: + assert key in retrieved_tensors.tensor_names, f"Tensor {key} should be listed in tensor names." + assert retrieved_tensors.has_tensor(key), f"Tensor {key} should exist." + + +# Test manipulating tensors after retrieval +@pytest.mark.usefixtures("config_only_cpp") +def test_tensor_manipulation_after_retrieval(): + tokenized_data = { + "input_ids": cp.array([1, 2, 3]), "input_mask": cp.array([1, 1, 1]), "segment_ids": cp.array([0, 0, 1]) + } + message = messages.ControlMessage() + tensor_memory = TensorMemory(count=3, tensors=tokenized_data) + + message.tensors(tensor_memory) + + retrieved_tensors = message.tensors() + new_tensor = cp.array([4, 5, 6]) + retrieved_tensors.set_tensor("new_tensor", new_tensor) + + assert cp.allclose(retrieved_tensors.get_tensor("new_tensor"), new_tensor), "New tensor data mismatch." + + +# Assuming there's functionality to update all tensors at once +@pytest.mark.usefixtures("config_only_cpp") +def test_tensor_update(): + tokenized_data = { + "input_ids": cp.array([1, 2, 3]), "input_mask": cp.array([1, 1, 1]), "segment_ids": cp.array([0, 0, 1]) + } + message = messages.ControlMessage() + tensor_memory = TensorMemory(count=3, tensors=tokenized_data) + + message.tensors(tensor_memory) + + # Update tensors with new data + new_tensors = { + "input_ids": cp.array([4, 5, 6]), "input_mask": cp.array([1, 0, 1]), "segment_ids": cp.array([1, 1, 0]) + } + + tensor_memory.set_tensors(new_tensors) + + updated_tensors = message.tensors() + + for key, val in new_tensors.items(): + assert cp.allclose(updated_tensors.get_tensor(key), val), f"Mismatch in updated tensor data for {key}." + + +@pytest.mark.usefixtures("config_only_cpp") +def test_update_individual_tensor(): + initial_data = {"input_ids": cp.array([1, 2, 3]), "input_mask": cp.array([1, 1, 1])} + update_data = {"input_ids": cp.array([4, 5, 6])} + message = messages.ControlMessage() + tensor_memory = TensorMemory(count=3, tensors=initial_data) + message.tensors(tensor_memory) + + # Update one tensor and retrieve all to ensure update integrity + tensor_memory.set_tensor("input_ids", update_data["input_ids"]) + retrieved_tensors = message.tensors() + + # Check updated tensor + assert cp.allclose(retrieved_tensors.get_tensor("input_ids"), + update_data["input_ids"]), "Input IDs update mismatch." + # Ensure other tensor remains unchanged + assert cp.allclose(retrieved_tensors.get_tensor("input_mask"), + initial_data["input_mask"]), "Input mask should remain unchanged after updating input_ids." + + +@pytest.mark.usefixtures("config_only_cpp") +def test_behavior_with_empty_tensors(): + message = messages.ControlMessage() + tensor_memory = TensorMemory(count=0) + message.tensors(tensor_memory) + + retrieved_tensors = message.tensors() + assert retrieved_tensors.count == 0, "Tensor count should be 0 for empty tensor memory." + assert len(retrieved_tensors.tensor_names) == 0, "There should be no tensor names for empty tensor memory." + + +@pytest.mark.usefixtures("config_only_cpp") +def test_consistency_after_multiple_operations(): + initial_data = {"input_ids": cp.array([1, 2, 3]), "input_mask": cp.array([1, 1, 1])} + message = messages.ControlMessage() + tensor_memory = TensorMemory(count=3, tensors=initial_data) + message.tensors(tensor_memory) + + # Update a tensor + tensor_memory.set_tensor("input_ids", cp.array([4, 5, 6])) + # Remove another tensor + # Add a new tensor + new_tensor = {"new_tensor": cp.array([7, 8, 9])} + tensor_memory.set_tensor("new_tensor", new_tensor["new_tensor"]) + + retrieved_tensors = message.tensors() + assert retrieved_tensors.count == 3, "Tensor count mismatch after multiple operations." + assert cp.allclose(retrieved_tensors.get_tensor("input_ids"), + cp.array([4, 5, 6])), "Mismatch in input_ids after update." + assert cp.allclose(retrieved_tensors.get_tensor("new_tensor"), + new_tensor["new_tensor"]), "New tensor data mismatch." diff --git a/tests/utils/test_control_message_utils.py b/tests/utils/test_control_message_utils.py index 95fee92a73..71be9c7074 100644 --- a/tests/utils/test_control_message_utils.py +++ b/tests/utils/test_control_message_utils.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest - from morpheus.messages import ControlMessage from morpheus.utils.control_message_utils import CMDefaultFailureContextManager from morpheus.utils.control_message_utils import cm_set_failure @@ -37,7 +35,7 @@ def test_skip_forward_on_cm_failed(): # pylint: disable=unused-argument @cm_skip_processing_if_failed - def dummy_func(control_message, *args, **kwargs): + def dummy_func(cm, *args, **kwargs): return "Function Executed" assert dummy_func(control_message) == control_message @@ -50,8 +48,8 @@ def test_cm_default_failure_context_manager_no_exception(): control_message = ControlMessage() with CMDefaultFailureContextManager(control_message): pass - with pytest.raises(RuntimeError): - control_message.get_metadata("cm_failed") + + assert control_message.get_metadata("cm_failed") is None def test_cm_default_failure_context_manager_with_exception(): From 1c9905782b5f1ecb4c75db9e7e9b1d1be9e23760 Mon Sep 17 00:00:00 2001 From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com> Date: Tue, 20 Feb 2024 20:09:01 -0600 Subject: [PATCH 11/18] Update ops-bot.yaml (#1528) This PR enables the ForwardMerger [ops-bot](https://github.com/rapidsai/ops-bot) plugin. Authors: - Jake Awe (https://github.com/AyodeAwe) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1528 --- .github/ops-bot.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/ops-bot.yaml b/.github/ops-bot.yaml index 93cc5cf22e..c22131c4ca 100644 --- a/.github/ops-bot.yaml +++ b/.github/ops-bot.yaml @@ -20,3 +20,4 @@ auto_merger: true branch_checker: true label_checker: true release_drafter: true +forward_merger: true From 65552f51fd6e5ff3186a03ece6ba5a8e4feebf87 Mon Sep 17 00:00:00 2001 From: Eli Fajardo Date: Thu, 22 Feb 2024 14:27:22 -0500 Subject: [PATCH 12/18] Make `start_async()` available to source stages (#1523) - Moves `start_async` from `Stage` to `StageBase` to make it available to source stages. - Adds tests for source stage `start_async` and `on_start`. Closes #1522 ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - Eli Fajardo (https://github.com/efajardo-nv) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1523 --- morpheus/pipeline/pipeline.py | 3 +-- morpheus/pipeline/stage.py | 13 ------------- morpheus/pipeline/stage_base.py | 15 +++++++++++++++ tests/pipeline/test_pipeline.py | 34 +++++++++++++++++++++++++++------ 4 files changed, 44 insertions(+), 21 deletions(-) diff --git a/morpheus/pipeline/pipeline.py b/morpheus/pipeline/pipeline.py index 0604971d7b..df40264bf4 100644 --- a/morpheus/pipeline/pipeline.py +++ b/morpheus/pipeline/pipeline.py @@ -471,8 +471,7 @@ async def build_and_start(self): async def _async_start(self, stages: networkx.classes.reportviews.NodeView): # This method is called once for each segment in the pipeline executed on this host for stage in stages: - if (isinstance(stage, Stage)): - await stage.start_async() + await stage.start_async() def visualize(self, filename: str = None, **graph_kwargs): """ diff --git a/morpheus/pipeline/stage.py b/morpheus/pipeline/stage.py index dd4054ea35..c9c03b65e4 100644 --- a/morpheus/pipeline/stage.py +++ b/morpheus/pipeline/stage.py @@ -13,7 +13,6 @@ # limitations under the License. import logging -import warnings import mrc @@ -40,17 +39,5 @@ def _post_build(self, builder: mrc.Builder, out_ports_nodes: list[mrc.SegmentObj def _start(self): pass - async def start_async(self): - """ - This function is called along with on_start during stage initialization. Allows stages to utilize the - asyncio loop if needed. - """ - if (hasattr(self, 'on_start')): - warnings.warn( - "The on_start method is deprecated and may be removed in the future. " - "Please use start_async instead.", - DeprecationWarning) - self.on_start() - def _on_complete(self, node): # pylint: disable=unused-argument logger.info("Stage Complete: %s", self.name) diff --git a/morpheus/pipeline/stage_base.py b/morpheus/pipeline/stage_base.py index a7da7a6145..c71146a060 100644 --- a/morpheus/pipeline/stage_base.py +++ b/morpheus/pipeline/stage_base.py @@ -18,6 +18,7 @@ import inspect import logging import typing +import warnings from abc import ABC from abc import abstractmethod @@ -75,6 +76,8 @@ class StageBase(ABC, collections.abc.Hashable): """ + # pylint:disable=too-many-public-methods + __ID_COUNTER = AtomicInteger(0) def __init__(self, config: Config): @@ -494,3 +497,15 @@ def _pre_compute_schema(self, schema: _pipeline.StageSchema): `compute_schema` being called. """ pass + + async def start_async(self): + """ + This function is called along with on_start during stage initialization. Allows stages to utilize the + asyncio loop if needed. + """ + if (hasattr(self, 'on_start')): + warnings.warn( + "The on_start method is deprecated and may be removed in the future. " + "Please use start_async instead.", + DeprecationWarning) + self.on_start() diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py index e763ca95c9..aedc003b25 100755 --- a/tests/pipeline/test_pipeline.py +++ b/tests/pipeline/test_pipeline.py @@ -47,15 +47,28 @@ class SourceTestStage(InMemorySourceStage): def __init__(self, config, dataframes: typing.List[DataFrameType], + on_start_cb: typing.Callable[[], None] = None, + start_async_cb: typing.Callable[[], None] = None, destructor_cb: typing.Callable[[], None] = None, repeat: int = 1): super().__init__(config, dataframes, repeat) + self._on_start_cb = on_start_cb + self._start_async_cb = start_async_cb self._destructor_cb = destructor_cb @property def name(self) -> str: return "test-source" + def on_start(self): + if self._on_start_cb is not None: + self._on_start_cb() + + async def start_async(self): + await super().start_async() + if self._start_async_cb is not None: + self._start_async_cb() + def __del__(self): if self._destructor_cb is not None: self._destructor_cb() @@ -130,21 +143,30 @@ def test_startup_cb_called(filter_probs_df: DataFrameType): """ Test to ensure that the destructors of stages are called (issue #1114). """ - state_dict = {"on_start": False, "start_async": False} + state_dict = { + "source_on_start": False, "source_start_async": False, "sink_on_start": False, "sink_start_async": False + } def update_state_dict(key: str): nonlocal state_dict state_dict[key] = True + source_callbacks = { + 'on_start_cb': lambda: update_state_dict("source_on_start"), + 'start_async_cb': lambda: update_state_dict("source_start_async") + } + sink_callbacks = { - 'on_start_cb': lambda: update_state_dict("on_start"), - 'start_async_cb': lambda: update_state_dict("start_async") + 'on_start_cb': lambda: update_state_dict("sink_on_start"), + 'start_async_cb': lambda: update_state_dict("sink_start_async") } - _run_pipeline(filter_probs_df, source_callbacks={}, sink_callbacks=sink_callbacks) + _run_pipeline(filter_probs_df, source_callbacks=source_callbacks, sink_callbacks=sink_callbacks) - assert state_dict["on_start"] - assert state_dict["start_async"] + assert state_dict["source_on_start"] + assert state_dict["source_start_async"] + assert state_dict["sink_on_start"] + assert state_dict["sink_start_async"] @pytest.mark.use_cudf From eeb8b51e2cfc2e326bc5b909e6027b9d63f279dd Mon Sep 17 00:00:00 2001 From: David Gardner <96306125+dagardner-nv@users.noreply.github.com> Date: Mon, 26 Feb 2024 14:53:20 -0800 Subject: [PATCH 13/18] Fix memory leak in the mutable dataframe checkout/checkin code (#1534) * Switch to using a `unique_ptr` to hold the python object * Random IWYU fixes. Closes #1533 ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - David Gardner (https://github.com/dagardner-nv) Approvers: - Devin Robison (https://github.com/drobison00) URL: https://github.com/nv-morpheus/Morpheus/pull/1534 --- .../_lib/include/morpheus/objects/table_info.hpp | 6 +++--- .../include/morpheus/stages/preprocess_fil.hpp | 7 ------- morpheus/_lib/src/messages/multi.cpp | 11 ++++------- .../_lib/src/objects/mutable_table_ctx_mgr.cpp | 8 ++++++-- morpheus/_lib/src/objects/table_info.cpp | 14 +++++++++----- morpheus/_lib/src/stages/preprocess_fil.cpp | 14 ++++---------- morpheus/_lib/tests/messages/test_dev_doc_ex3.cpp | 6 ++---- 7 files changed, 28 insertions(+), 38 deletions(-) diff --git a/morpheus/_lib/include/morpheus/objects/table_info.hpp b/morpheus/_lib/include/morpheus/objects/table_info.hpp index 610b69139f..d4e719abd2 100644 --- a/morpheus/_lib/include/morpheus/objects/table_info.hpp +++ b/morpheus/_lib/include/morpheus/objects/table_info.hpp @@ -185,9 +185,9 @@ struct MORPHEUS_EXPORT MutableTableInfo : public TableInfoBase * lifetime of `MutableTableInfo`. Use this method when it is necessary to make changes to the python object using * the python API. The python object must be returned via `return_obj` before `MutableTableInfo` goes out of scope. * - * @return pybind11::object + * @return std::unique_ptr */ - pybind11::object checkout_obj(); + std::unique_ptr checkout_obj(); /** * @brief Returns the checked out python object from `checkout_obj`. Each call to `checkout_obj` needs a @@ -195,7 +195,7 @@ struct MORPHEUS_EXPORT MutableTableInfo : public TableInfoBase * * @param obj */ - void return_obj(pybind11::object&& obj); + void return_obj(std::unique_ptr&& obj); /** * @brief Replaces the index in the underlying dataframe if the existing one is not unique and monotonic. The old diff --git a/morpheus/_lib/include/morpheus/stages/preprocess_fil.hpp b/morpheus/_lib/include/morpheus/stages/preprocess_fil.hpp index 7849509785..683badf4bb 100644 --- a/morpheus/_lib/include/morpheus/stages/preprocess_fil.hpp +++ b/morpheus/_lib/include/morpheus/stages/preprocess_fil.hpp @@ -22,19 +22,12 @@ #include "morpheus/objects/table_info.hpp" #include -#include -#include -#include -#include -#include #include #include -#include #include #include // for apply, make_subscriber, observable_member, is_on_error<>::not_void, is_on_next_of<>::not_void, from // IWYU pragma: no_include "rxcpp/sources/rx-iterate.hpp" -#include #include #include #include diff --git a/morpheus/_lib/src/messages/multi.cpp b/morpheus/_lib/src/messages/multi.cpp index 1469e05cd8..6e42e839d7 100644 --- a/morpheus/_lib/src/messages/multi.cpp +++ b/morpheus/_lib/src/messages/multi.cpp @@ -32,21 +32,17 @@ #include #include // for CHECK #include // for MRC_CHECK_CUDA -#include +#include // IWYU pragma: keep #include #include #include -#include -#include // for get_current_device_resource #include // for transform -#include // needed for pybind11::make_tuple #include // for size_t #include // for uint8_t #include #include // for runtime_error #include -#include #include // IWYU pragma: no_include @@ -377,7 +373,8 @@ void MultiMessageInterfaceProxy::set_meta(MultiMessage& self, pybind11::object c // Need the GIL for the remainder pybind11::gil_scoped_acquire gil; - auto df = mutable_info.checkout_obj(); + auto pdf = mutable_info.checkout_obj(); + auto& df = *pdf; auto [row_indexer, column_indexer] = get_indexers(self, df, columns); @@ -427,7 +424,7 @@ void MultiMessageInterfaceProxy::set_meta(MultiMessage& self, pybind11::object c } } - mutable_info.return_obj(std::move(df)); + mutable_info.return_obj(std::move(pdf)); } std::shared_ptr MultiMessageInterfaceProxy::get_slice(MultiMessage& self, diff --git a/morpheus/_lib/src/objects/mutable_table_ctx_mgr.cpp b/morpheus/_lib/src/objects/mutable_table_ctx_mgr.cpp index 10ed3a5e97..b895252767 100644 --- a/morpheus/_lib/src/objects/mutable_table_ctx_mgr.cpp +++ b/morpheus/_lib/src/objects/mutable_table_ctx_mgr.cpp @@ -22,6 +22,7 @@ #include #include +#include #include namespace morpheus { @@ -39,13 +40,16 @@ py::object MutableTableCtxMgr::enter() // Release the GIL py::gil_scoped_release no_gil; m_table = std::make_unique(std::move(m_meta_msg.get_mutable_info())); - m_py_table = std::make_unique(std::move(m_table->checkout_obj())); + m_py_table = m_table->checkout_obj(); return *m_py_table; } void MutableTableCtxMgr::exit(const py::object& type, const py::object& value, const py::object& traceback) { - m_table->return_obj(std::move(*m_py_table.release())); + std::unique_ptr ptr{nullptr}; + m_py_table.swap(ptr); + + m_table->return_obj(std::move(ptr)); m_table.reset(nullptr); } diff --git a/morpheus/_lib/src/objects/table_info.cpp b/morpheus/_lib/src/objects/table_info.cpp index b781ebac16..9661e42b97 100644 --- a/morpheus/_lib/src/objects/table_info.cpp +++ b/morpheus/_lib/src/objects/table_info.cpp @@ -281,27 +281,31 @@ void MutableTableInfo::insert_missing_columns(const std::vector MutableTableInfo::checkout_obj() { // Get a copy increasing the ref count py::object checked_out_obj = this->get_parent()->get_py_object(); m_checked_out_ref_count = checked_out_obj.ref_count(); - return checked_out_obj; + auto ptr = std::make_unique(std::move(checked_out_obj)); + + return ptr; } -void MutableTableInfo::return_obj(py::object&& obj) +void MutableTableInfo::return_obj(std::unique_ptr&& obj) { + obj.reset(nullptr); m_checked_out_ref_count = -1; } std::optional MutableTableInfo::ensure_sliceable_index() { std::optional old_index_col_name{"_index_"}; - auto py_df = this->checkout_obj(); + auto ptr_df = this->checkout_obj(); { py::gil_scoped_acquire gil; + auto& py_df = *ptr_df; auto df_index = py_df.attr("index"); // Check to see if we actually need the change @@ -326,7 +330,7 @@ std::optional MutableTableInfo::ensure_sliceable_index() } } - this->return_obj(std::move(py_df)); + this->return_obj(std::move(ptr_df)); // If we made a change, update the index and column list if (old_index_col_name.has_value()) diff --git a/morpheus/_lib/src/stages/preprocess_fil.cpp b/morpheus/_lib/src/stages/preprocess_fil.cpp index cda36739c7..293a3af70c 100644 --- a/morpheus/_lib/src/stages/preprocess_fil.cpp +++ b/morpheus/_lib/src/stages/preprocess_fil.cpp @@ -17,12 +17,7 @@ #include "morpheus/stages/preprocess_fil.hpp" -#include "mrc/node/rx_sink_base.hpp" -#include "mrc/node/rx_source_base.hpp" -#include "mrc/node/sink_properties.hpp" -#include "mrc/node/source_properties.hpp" #include "mrc/segment/object.hpp" -#include "mrc/types.hpp" #include "morpheus/messages/memory/inference_memory_fil.hpp" #include "morpheus/messages/meta.hpp" // for MessageMeta @@ -41,7 +36,6 @@ #include #include // for MRC_CHECK_CUDA #include -#include // for object_api::operator(), operator""_a #include #include // for str_attr_accessor, arg #include @@ -49,10 +43,9 @@ #include // for cuda_stream_per_thread #include // for device_buffer -#include +#include // for std::find #include #include -#include #include #include @@ -184,7 +177,8 @@ TableInfo PreprocessFILStage::fix_bad_columns(sink_type_t x) pybind11::gil_scoped_acquire gil; // pybind11::object df = x->meta->get_py_table(); - auto df = mutable_info.checkout_obj(); + auto pdf = mutable_info.checkout_obj(); + auto& df = *pdf; std::string regex = R"((\d+))"; @@ -196,7 +190,7 @@ TableInfo PreprocessFILStage::fix_bad_columns(sink_type_t x) .attr("astype")(pybind11::str("float32")); } - mutable_info.return_obj(std::move(df)); + mutable_info.return_obj(std::move(pdf)); } } diff --git a/morpheus/_lib/tests/messages/test_dev_doc_ex3.cpp b/morpheus/_lib/tests/messages/test_dev_doc_ex3.cpp index b33ec0cc81..397cd21c26 100644 --- a/morpheus/_lib/tests/messages/test_dev_doc_ex3.cpp +++ b/morpheus/_lib/tests/messages/test_dev_doc_ex3.cpp @@ -23,12 +23,10 @@ #include "morpheus/utilities/cudf_util.hpp" // for CudfHelper #include -#include // for cast #include // for gil_scoped_release, gil_scoped_acquire #include // IWYU pragma: keep #include // for object, object_api, literals -#include // for array #include // for shared_ptr #include // for move @@ -68,7 +66,7 @@ TEST_F(TestDevDocEx3, TestPyObjFromMultiMesg) auto df = mutable_info.checkout_obj(); // Maka a copy of the original DataFrame - auto copied_df = df.attr("copy")("deep"_a = true); + auto copied_df = df->attr("copy")("deep"_a = true); // Now that we are done with `df` return it to the owner mutable_info.return_obj(std::move(df)); @@ -94,7 +92,7 @@ TEST_F(TestDevDocEx3, TestPyObjFromMultiMesg) auto result_df = result_mutable_info.checkout_obj(); // orig_df.eq(result_df).all().all() - auto is_true = orig_df.attr("eq")(result_df).attr("all")().attr("all")(); + auto is_true = orig_df->attr("eq")(*result_df).attr("all")().attr("all")(); EXPECT_TRUE(is_true.cast()); orig_mutable_info.return_obj(std::move(orig_df)); From 594b10c9d52ae4c4c5d7a8793118fc7b5851e6af Mon Sep 17 00:00:00 2001 From: Michael Demoret <42954918+mdemoret-nv@users.noreply.github.com> Date: Tue, 27 Feb 2024 11:44:50 -0500 Subject: [PATCH 14/18] Upgrade `openai` version to 1.13 and `langchain` to version 0.1.9 (#1529) This PRs main focus is to update the version of the OpenAI package to 1.13 to allow using the built in retry logic and improved code. In addition, the following changes are also made: - Switch to using `langchain==0.1.9` installed via pip to get around pandas dependency issue - Sorted the lists in `dependencies.yaml` to make merging easier in the future - Improved logging for the `OpenAILlmService` to log all requests ordered to a file - New feature to add default model arguments to the service to avoid repeating arguments for each client. You can now just overwrite the default args - New feature to allow the `run_ci_local.sh` to persist any changes to the code back to the host via a volume mount. ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - Michael Demoret (https://github.com/mdemoret-nv) - David Gardner (https://github.com/dagardner-nv) Approvers: - David Gardner (https://github.com/dagardner-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1529 --- .github/workflows/pr.yaml | 4 +- ci/scripts/bootstrap_local_ci.sh | 15 +- ci/scripts/run_ci_local.sh | 13 +- .../all_cuda-121_arch-x86_64.yaml | 5 +- .../examples_cuda-121_arch-x86_64.yaml | 5 +- dependencies.yaml | 91 +++++----- docs/source/conf.py | 1 + examples/llm/agents/kafka_pipeline.py | 2 +- examples/llm/agents/simple_pipeline.py | 4 +- examples/llm/common/utils.py | 12 +- examples/llm/completion/pipeline.py | 4 +- .../module/content_extractor_module.py | 11 +- morpheus/llm/services/llm_service.py | 3 +- morpheus/llm/services/nemo_llm_service.py | 9 +- morpheus/llm/services/openai_chat_service.py | 165 +++++++++++++++--- morpheus/stages/inference/inference_stage.py | 2 +- morpheus/stages/input/arxiv_source.py | 4 +- tests/_utils/llm.py | 18 ++ tests/benchmarks/conftest.py | 9 +- tests/conftest.py | 16 +- tests/llm/services/test_llm_service_pipe.py | 17 +- tests/llm/services/test_nemo_llm_client.py | 14 +- tests/llm/services/test_nemo_llm_service.py | 2 +- tests/llm/services/test_openai_chat_client.py | 138 +++++++-------- .../llm/services/test_openai_chat_service.py | 7 +- tests/llm/test_completion_pipe.py | 16 +- tests/llm/test_rag_standalone_pipe.py | 16 +- tests/stages/arxiv/test_arxiv_source.py | 11 +- tests/stages/arxiv/test_arxiv_source_pipe.py | 5 +- 29 files changed, 389 insertions(+), 230 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 7921fb430a..3482cc463b 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -94,7 +94,7 @@ jobs: # Disable conda upload for now, once we have morpheus packages in conda forge set the value to # !fromJSON(needs.prepare.outputs.is_pr) && (fromJSON(needs.prepare.outputs.is_main_branch) && 'main' || 'dev') || '' conda_upload_label: "" - container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-build-240214 - test_container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-test-240214 + container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-build-240221 + test_container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-test-240221 secrets: NGC_API_KEY: ${{ secrets.NGC_API_KEY }} diff --git a/ci/scripts/bootstrap_local_ci.sh b/ci/scripts/bootstrap_local_ci.sh index 2e59d9f1ec..3051b13af1 100755 --- a/ci/scripts/bootstrap_local_ci.sh +++ b/ci/scripts/bootstrap_local_ci.sh @@ -16,11 +16,16 @@ export WORKSPACE_TMP="$(pwd)/ws_tmp" mkdir -p ${WORKSPACE_TMP} -git clone ${GIT_URL} Morpheus -cd Morpheus/ -git checkout ${GIT_BRANCH} -git pull -git checkout ${GIT_COMMIT} + +if [[ "${USE_HOST_GIT}" == "1" ]]; then + cd Morpheus/ +else + git clone ${GIT_URL} Morpheus + cd Morpheus/ + git checkout ${GIT_BRANCH} + git pull + git checkout ${GIT_COMMIT} +fi export MORPHEUS_ROOT=$(pwd) export WORKSPACE=${MORPHEUS_ROOT} diff --git a/ci/scripts/run_ci_local.sh b/ci/scripts/run_ci_local.sh index 1575555ad3..fb29fdf139 100755 --- a/ci/scripts/run_ci_local.sh +++ b/ci/scripts/run_ci_local.sh @@ -41,6 +41,10 @@ function git_ssh_to_https() MORPHEUS_ROOT=${MORPHEUS_ROOT:-$(git rev-parse --show-toplevel)} +# Specifies whether to mount the current git repo (to allow changes to be persisted) or to use a clean clone (to closely +# match CI, the default) +USE_HOST_GIT=${USE_HOST_GIT:-0} + GIT_URL=$(git remote get-url origin) GIT_URL=$(git_ssh_to_https ${GIT_URL}) @@ -51,7 +55,7 @@ GIT_BRANCH=$(git branch --show-current) GIT_COMMIT=$(git log -n 1 --pretty=format:%H) LOCAL_CI_TMP=${LOCAL_CI_TMP:-${MORPHEUS_ROOT}/.tmp/local_ci_tmp} -CONTAINER_VER=${CONTAINER_VER:-240214} +CONTAINER_VER=${CONTAINER_VER:-240221} CUDA_VER=${CUDA_VER:-12.1} DOCKER_EXTRA_ARGS=${DOCKER_EXTRA_ARGS:-""} @@ -66,6 +70,7 @@ ENV_LIST="${ENV_LIST} --env GIT_COMMIT=${GIT_COMMIT}" ENV_LIST="${ENV_LIST} --env PARALLEL_LEVEL=$(nproc)" ENV_LIST="${ENV_LIST} --env CUDA_VER=${CUDA_VER}" ENV_LIST="${ENV_LIST} --env SKIP_CONDA_ENV_UPDATE=${SKIP_CONDA_ENV_UPDATE}" +ENV_LIST="${ENV_LIST} --env USE_HOST_GIT=${USE_HOST_GIT}" mkdir -p ${LOCAL_CI_TMP} cp ${MORPHEUS_ROOT}/ci/scripts/bootstrap_local_ci.sh ${LOCAL_CI_TMP} @@ -82,6 +87,10 @@ for STAGE in "${STAGES[@]}"; do DOCKER_RUN_ARGS="${DOCKER_RUN_ARGS} --runtime=runc" fi + if [[ "${USE_HOST_GIT}" == "1" ]]; then + DOCKER_RUN_ARGS="${DOCKER_RUN_ARGS} -v ${MORPHEUS_ROOT}:/Morpheus" + fi + if [[ "${STAGE}" == "bash" ]]; then DOCKER_RUN_CMD="bash --init-file /ci_tmp/bootstrap_local_ci.sh" else @@ -89,7 +98,9 @@ for STAGE in "${STAGES[@]}"; do fi echo "Running ${STAGE} stage in ${CONTAINER}" + set -x docker run ${DOCKER_RUN_ARGS} ${DOCKER_EXTRA_ARGS} ${CONTAINER} ${DOCKER_RUN_CMD} + set +x STATUS=$? if [[ ${STATUS} -ne 0 ]]; then diff --git a/conda/environments/all_cuda-121_arch-x86_64.yaml b/conda/environments/all_cuda-121_arch-x86_64.yaml index 5c3329da82..28bdaa6fd9 100644 --- a/conda/environments/all_cuda-121_arch-x86_64.yaml +++ b/conda/environments/all_cuda-121_arch-x86_64.yaml @@ -54,7 +54,6 @@ dependencies: - isort - jsonpatch>=1.33 - kfp -- langchain=0.0.190 - librdkafka>=1.9.2,<1.10.0a0 - libtool - libwebp=1.3.2 @@ -67,10 +66,11 @@ dependencies: - ninja=1.11 - nlohmann_json=3.9 - nodejs=18.* +- numexpr - numpydoc=1.5 - nvtabular=23.08.00 - onnx -- openai=0.28 +- openai=1.13 - papermill=2.4.0 - pip - pkg-config=0.29 @@ -119,6 +119,7 @@ dependencies: - dgl - dglgo - google-search-results==2.4 + - langchain==0.1.9 - milvus==2.3.5 - nemollm - pymilvus==2.3.6 diff --git a/conda/environments/examples_cuda-121_arch-x86_64.yaml b/conda/environments/examples_cuda-121_arch-x86_64.yaml index 04cef6f3ac..ad2315d91c 100644 --- a/conda/environments/examples_cuda-121_arch-x86_64.yaml +++ b/conda/environments/examples_cuda-121_arch-x86_64.yaml @@ -27,16 +27,16 @@ dependencies: - huggingface_hub=0.20.2 - jsonpatch>=1.33 - kfp -- langchain=0.0.190 - libwebp=1.3.2 - mlflow=2.9.2 - networkx=2.8.8 - newspaper3k=0.2 - nodejs=18.* +- numexpr - numpydoc=1.5 - nvtabular=23.08.00 - onnx -- openai=0.28 +- openai=1.13 - papermill=2.4.0 - pip - pypdf=3.17.4 @@ -63,6 +63,7 @@ dependencies: - dgl - dglgo - google-search-results==2.4 + - langchain==0.1.9 - milvus==2.3.5 - nemollm - pymilvus==2.3.6 diff --git a/dependencies.yaml b/dependencies.yaml index bff273259b..e3dbdf55ea 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -21,24 +21,24 @@ files: cuda: ["12.1"] arch: [x86_64] includes: - - data_retrieval + - benchmark_cpp - build_cpp + - checks + - cudatoolkit + - cve-mitigation + - data_retrieval - development - - benchmark_cpp - - runtime + - doca + - docs - example-dfp-prod - example-gnn - example-llm-agents - example-llm-completion - example-llm-rag - example-llm-vdb-upload - - test_python_morpheus - - docs - - cudatoolkit - python - - checks - - cve-mitigation - - doca + - runtime + - test_python_morpheus dev: output: conda @@ -46,26 +46,16 @@ files: cuda: ["12.1"] arch: [x86_64] includes: - - data_retrieval + - benchmark_cpp - build_cpp + - checks + - cudatoolkit + - data_retrieval - development - - benchmark_cpp - - runtime - - test_python_morpheus - docs - - cudatoolkit - python - - checks - - examples: - output: conda - matrix: - cuda: ["12.1"] - arch: [x86_64] - includes: - - examples - runtime - - cudatoolkit + - test_python_morpheus build: output: none @@ -73,14 +63,14 @@ files: cuda: ["12.1"] arch: [x86_64] includes: - - data_retrieval + - benchmark_cpp - build_cpp + - cudatoolkit + - data_retrieval - development - - benchmark_cpp + - python - runtime - test_python_morpheus - - cudatoolkit - - python test: output: none @@ -88,20 +78,20 @@ files: cuda: ["12.1"] arch: [x86_64] includes: - - data_retrieval + - benchmark_cpp - build_cpp + - cudatoolkit + - data_retrieval - development - - benchmark_cpp - - runtime - - test_python_morpheus - example-dfp-prod - example-gnn - example-llm-agents - example-llm-completion - example-llm-rag - example-llm-vdb-upload - - cudatoolkit - python + - runtime + - test_python_morpheus docs: output: none @@ -109,19 +99,19 @@ files: cuda: ["12.1"] arch: [x86_64] includes: - - data_retrieval - - build_cpp - benchmark_cpp + - build_cpp - cudatoolkit - - python + - data_retrieval - docs - - runtime - example-dfp-prod - example-gnn - example-llm-agents - example-llm-completion - example-llm-rag - example-llm-vdb-upload + - python + - runtime runtime: output: conda @@ -138,7 +128,7 @@ files: cuda: ["12.1"] arch: [x86_64] includes: - - runtime + - cve-mitigation - example-dfp-prod - example-gnn - example-llm-agents @@ -146,7 +136,7 @@ files: - example-llm-rag - example-llm-vdb-upload - python - - cve-mitigation + - runtime model-utils: output: conda @@ -231,18 +221,18 @@ dependencies: common: - output_types: [conda] packages: + - &click click >=8 + - &numpydoc numpydoc=1.5 - breathe=4.35.0 - doxygen=1.9.2 - exhale=0.3.6 - ipython - myst-parser=0.18.1 - nbsphinx + - pluggy=1.3 - python-graphviz - sphinx - sphinx_rtd_theme - - &numpydoc numpydoc=1.5 - - pluggy=1.3 - - &click click >=8 benchmark_cpp: common: @@ -287,10 +277,10 @@ dependencies: - output_types: [conda] packages: - &nodejs nodejs=18.* - - pytest=7.4.4 - pytest-asyncio - pytest-benchmark=4.0 - pytest-cov + - pytest=7.4.4 - python-docx==1.1.0 - pip - pip: @@ -328,12 +318,13 @@ dependencies: - output_types: [conda] packages: - &grpcio-status grpcio-status==1.59 - - &langchain langchain=0.0.190 - &transformers transformers=4.36.2 # newer versions are incompatible with our pinned version of huggingface_hub - huggingface_hub=0.20.2 # work-around for https://github.com/UKPLab/sentence-transformers/issues/1762 - - pip + - numexpr - sentence-transformers + - pip - pip: + - &langchain langchain==0.1.9 - nemollm example-llm-completion: @@ -350,12 +341,12 @@ dependencies: - output_types: [conda] packages: - *grpcio-status - - *langchain - anyio>=3.7 - jsonpatch>=1.33 - - openai=0.28 + - openai=1.13 - pip - pip: + - *langchain - google-search-results==2.4 example-llm-vdb-upload: @@ -364,27 +355,27 @@ dependencies: packages: - *arxiv - *grpcio-status - - *langchain - *newspaper3k - *pypdf - onnx - pip - pip: - PyMuPDF==1.23.21 + - *langchain model-training-tuning: common: - output_types: [conda] packages: - *cuml + - *scikit-learn + - *transformers - jupyterlab - matplotlib - onnx - pandas - - *scikit-learn - seaborn - seqeval=1.2.2 - - *transformers - xgboost cve-mitigation: diff --git a/docs/source/conf.py b/docs/source/conf.py index d63f161384..053e8214a8 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -176,6 +176,7 @@ "morpheus.cli.commands", # Dont document the CLI in Sphinx "nvtabular", "pandas", + "pydantic", "pymilvus", "tensorrt", "torch", diff --git a/examples/llm/agents/kafka_pipeline.py b/examples/llm/agents/kafka_pipeline.py index 333dd37a89..91a8ee8add 100644 --- a/examples/llm/agents/kafka_pipeline.py +++ b/examples/llm/agents/kafka_pipeline.py @@ -39,7 +39,7 @@ def _build_agent_executor(model_name: str) -> AgentExecutor: - llm = OpenAIChat(model=model_name, temperature=0) + llm = OpenAIChat(model_name=model_name, model_kwargs={"temperature": 0.0}, client=None) tools = load_tools(["serpapi", "llm-math"], llm=llm) diff --git a/examples/llm/agents/simple_pipeline.py b/examples/llm/agents/simple_pipeline.py index 0e7e1f4321..9b2b95c611 100644 --- a/examples/llm/agents/simple_pipeline.py +++ b/examples/llm/agents/simple_pipeline.py @@ -15,11 +15,11 @@ import logging import time -from langchain import OpenAI from langchain.agents import AgentType from langchain.agents import initialize_agent from langchain.agents import load_tools from langchain.agents.agent import AgentExecutor +from langchain.llms.openai import OpenAI import cudf @@ -43,7 +43,7 @@ def _build_agent_executor(model_name: str) -> AgentExecutor: - llm = OpenAI(model=model_name, temperature=0) + llm = OpenAI(model=model_name, temperature=0.0, client=None) tools = load_tools(["serpapi", "llm-math"], llm=llm) diff --git a/examples/llm/common/utils.py b/examples/llm/common/utils.py index 6c9984ee42..1779bb6c88 100644 --- a/examples/llm/common/utils.py +++ b/examples/llm/common/utils.py @@ -15,8 +15,9 @@ import logging import pymilvus -from langchain.embeddings import HuggingFaceEmbeddings +from langchain.embeddings import HuggingFaceEmbeddings # pylint: disable=no-name-in-module +from morpheus.llm.services.llm_service import LLMService from morpheus.llm.services.nemo_llm_service import NeMoLLMService from morpheus.llm.services.openai_chat_service import OpenAIChatService from morpheus.service.vdb.milvus_client import DATA_TYPE_MAP @@ -34,16 +35,19 @@ def build_huggingface_embeddings(model_name: str, model_kwargs: dict = None, enc def build_llm_service(model_name: str, llm_service: str, tokens_to_generate: int, **model_kwargs): lowered_llm_service = llm_service.lower() + + service: LLMService | None = None + if (lowered_llm_service == 'nemollm'): model_kwargs['tokens_to_generate'] = tokens_to_generate - llm_service = NeMoLLMService() + service = NeMoLLMService() elif (lowered_llm_service == 'openai'): model_kwargs['max_tokens'] = tokens_to_generate - llm_service = OpenAIChatService() + service = OpenAIChatService() else: raise RuntimeError(f"Unsupported LLM service name: {llm_service}") - return llm_service.get_client(model_name, **model_kwargs) + return service.get_client(model_name=model_name, **model_kwargs) def build_milvus_config(resource_schema_config: dict): diff --git a/examples/llm/completion/pipeline.py b/examples/llm/completion/pipeline.py index 00df19363a..80f5e8ea0f 100644 --- a/examples/llm/completion/pipeline.py +++ b/examples/llm/completion/pipeline.py @@ -54,8 +54,8 @@ def _build_engine(llm_service: str): else: raise ValueError(f"Invalid LLM service: {llm_service}") - llm_service = llm_service_cls() - llm_clinet = llm_service.get_client(model_name=model_name) + service = llm_service_cls() + llm_clinet = service.get_client(model_name=model_name) engine = LLMEngine() diff --git a/examples/llm/vdb_upload/module/content_extractor_module.py b/examples/llm/vdb_upload/module/content_extractor_module.py index 5b2ed2ce0f..ac5ae771e6 100755 --- a/examples/llm/vdb_upload/module/content_extractor_module.py +++ b/examples/llm/vdb_upload/module/content_extractor_module.py @@ -32,7 +32,7 @@ from pydantic import BaseModel # pylint: disable=no-name-in-module from pydantic import Field from pydantic import ValidationError -from pydantic import validator +from pydantic import field_validator from morpheus.messages import MessageMeta from morpheus.utils.module_utils import ModuleLoaderFactory @@ -55,12 +55,13 @@ class ContentExtractorSchema(BaseModel): converters_meta: Dict[str, Dict] = Field(default_factory=dict) num_threads: int = 10 - @validator('converters_meta', pre=True, allow_reuse=True) - def val_converters_meta(cls, to_validate: Dict[str, Dict]) -> Dict[str, Dict]: # pylint: disable=no-self-argument + @field_validator('converters_meta', mode="before") + @classmethod + def val_converters_meta(cls, to_validate: Dict[str, Dict]) -> Dict[str, Dict]: validated_meta = {} for key, value in to_validate.items(): if key.lower() == 'csv': - validated_meta[key] = CSVConverterSchema(**value) + validated_meta[key] = CSVConverterSchema(**value).model_dump() else: validated_meta[key] = value return validated_meta @@ -319,8 +320,10 @@ def file_content_extractor(builder: mrc.Builder): chunk_params = { file_type: { + # pylint: disable=no-member "chunk_size": converters_meta.get(file_type, {}).get("chunk_size", chunk_size), "chunk_overlap": converters_meta.get(file_type, {}).get("chunk_overlap", chunk_overlap) + # pylint: enable=no-member } for file_type in converters } diff --git a/morpheus/llm/services/llm_service.py b/morpheus/llm/services/llm_service.py index e1cbe3c65f..1d11481345 100644 --- a/morpheus/llm/services/llm_service.py +++ b/morpheus/llm/services/llm_service.py @@ -13,7 +13,6 @@ # limitations under the License. import logging -import typing from abc import ABC from abc import abstractmethod @@ -88,7 +87,7 @@ class LLMService(ABC): """ @abstractmethod - def get_client(self, model_name: str, **model_kwargs: dict[str, typing.Any]) -> LLMClient: + def get_client(self, *, model_name: str, **model_kwargs) -> LLMClient: """ Returns a client for interacting with a specific model. diff --git a/morpheus/llm/services/nemo_llm_service.py b/morpheus/llm/services/nemo_llm_service.py index 5dede8a240..0173354df9 100644 --- a/morpheus/llm/services/nemo_llm_service.py +++ b/morpheus/llm/services/nemo_llm_service.py @@ -50,11 +50,14 @@ class NeMoLLMClient(LLMClient): Additional keyword arguments to pass to the model when generating text. """ - def __init__(self, parent: "NeMoLLMService", model_name: str, **model_kwargs: dict[str, typing.Any]) -> None: + def __init__(self, parent: "NeMoLLMService", *, model_name: str, **model_kwargs) -> None: if IMPORT_EXCEPTION is not None: raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION super().__init__() + + assert parent is not None, "Parent service cannot be None." + self._parent = parent self._model_name = model_name self._model_kwargs = model_kwargs @@ -167,7 +170,7 @@ def __init__(self, *, api_key: str = None, org_id: str = None) -> None: org_id=org_id, ) - def get_client(self, model_name: str, **model_kwargs: dict[str, typing.Any]) -> NeMoLLMClient: + def get_client(self, *, model_name: str, **model_kwargs) -> NeMoLLMClient: """ Returns a client for interacting with a specific model. This method is the preferred way to create a client. @@ -180,4 +183,4 @@ def get_client(self, model_name: str, **model_kwargs: dict[str, typing.Any]) -> Additional keyword arguments to pass to the model when generating text. """ - return NeMoLLMClient(self, model_name, **model_kwargs) + return NeMoLLMClient(self, model_name=model_name, **model_kwargs) diff --git a/morpheus/llm/services/openai_chat_service.py b/morpheus/llm/services/openai_chat_service.py index 098625c538..76da57912f 100644 --- a/morpheus/llm/services/openai_chat_service.py +++ b/morpheus/llm/services/openai_chat_service.py @@ -11,11 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import asyncio import copy import logging +import os +import time import typing +from contextlib import contextmanager +from textwrap import dedent + +import appdirs from morpheus.llm.services.llm_service import LLMClient from morpheus.llm.services.llm_service import LLMService @@ -30,10 +35,36 @@ try: import openai + import openai.types.chat + import openai.types.chat.chat_completion except ImportError as import_exc: IMPORT_EXCEPTION = import_exc +class _ApiLogger: + """ + Simple class that allows passing back and forth the inputs and outputs of an API call via a context manager. + """ + + log_template: typing.ClassVar[str] = dedent(""" + ============= MESSAGE %d START ============== + --- Input --- + %s + --- Output --- (%f ms) + %s + ============= MESSAGE %d END ============== + """).strip("\n") + + def __init__(self, *, message_id: int, inputs: typing.Any) -> None: + + self.message_id = message_id + self.inputs = inputs + self.outputs = None + + def set_output(self, output: typing.Any) -> None: + self.outputs = output + + class OpenAIChatClient(LLMClient): """ Client for interacting with a specific OpenAI chat model. This class should be constructed with the @@ -51,11 +82,24 @@ class OpenAIChatClient(LLMClient): Additional keyword arguments to pass to the model when generating text. """ - def __init__(self, model_name: str, set_assistant: bool = False, **model_kwargs: dict[str, typing.Any]) -> None: + _prompt_key: str = "prompt" + _assistant_key: str = "assistant" + + def __init__(self, + parent: "OpenAIChatService", + *, + model_name: str, + set_assistant: bool = False, + **model_kwargs) -> None: if IMPORT_EXCEPTION is not None: raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION super().__init__() + + assert parent is not None, "Parent service cannot be None." + + self._parent = parent + self._model_name = model_name self._set_assistant = set_assistant self._prompt_key = "prompt" @@ -63,7 +107,10 @@ def __init__(self, model_name: str, set_assistant: bool = False, **model_kwargs: # Preserve original configuration. self._model_kwargs = copy.deepcopy(model_kwargs) - self._model_kwargs['temperature'] = model_kwargs.get('temperature', 0) + + # Create the client objects for both sync and async + self._client = openai.OpenAI() + self._client_async = openai.AsyncOpenAI() def get_input_names(self) -> list[str]: input_names = [self._prompt_key] @@ -72,27 +119,42 @@ def get_input_names(self) -> list[str]: return input_names - def _create_messages(self, prompt: str, assistant: str = None) -> list[dict[str, str]]: - messages = [ - { - "role": "system", "content": "You are a helpful assistant." - }, - { - "role": "user", "content": prompt - }, - ] + @contextmanager + def _api_logger(self, inputs: typing.Any): - if (self._set_assistant): + message_id = self._parent._get_message_id() + start_time = time.time() + + api_logger = _ApiLogger(message_id=message_id, inputs=inputs) + + yield api_logger + + end_time = time.time() + duration_ms = (end_time - start_time) * 1000.0 + + self._parent._logger.info(_ApiLogger.log_template, + message_id, + api_logger.inputs, + duration_ms, + api_logger.outputs, + message_id) + + def _create_messages(self, + prompt: str, + assistant: str = None) -> list["openai.types.chat.ChatCompletionMessageParam"]: + messages: list[openai.types.chat.ChatCompletionMessageParam] = [{"role": "user", "content": prompt}] + + if (self._set_assistant and assistant is not None): messages.append({"role": "assistant", "content": assistant}) return messages - def _extract_completion(self, completion: "openai.openai_object.OpenAIObject") -> str: - choices = completion.get('choices', []) + def _extract_completion(self, completion: "openai.types.chat.chat_completion.ChatCompletion") -> str: + choices = completion.choices if len(choices) == 0: raise ValueError("No choices were returned from the model.") - content = choices[0].get('message', {}).get('content', None) + content = choices[0].message.content if content is None: raise ValueError("No content was returned from the model.") @@ -101,7 +163,8 @@ def _extract_completion(self, completion: "openai.openai_object.OpenAIObject") - def _generate(self, prompt: str, assistant: str = None) -> str: messages = self._create_messages(prompt, assistant) - output = openai.ChatCompletion.create(model=self._model_name, messages=messages, **self._model_kwargs) + output: openai.types.chat.chat_completion.ChatCompletion = self._client.chat.completions.create( + model=self._model_name, messages=messages, **self._model_kwargs) return self._extract_completion(output) @@ -117,9 +180,20 @@ def generate(self, input_dict: dict[str, str]) -> str: return self._generate(input_dict[self._prompt_key], input_dict.get(self._assistant_key)) async def _generate_async(self, prompt: str, assistant: str = None) -> str: + messages = self._create_messages(prompt, assistant) - output = await openai.ChatCompletion.acreate(model=self._model_name, messages=messages, **self._model_kwargs) + with self._api_logger(inputs=messages) as msg_logger: + + try: + output = await self._client_async.chat.completions.create(model=self._model_name, + messages=messages, + **self._model_kwargs) + except Exception as exc: + self._parent._logger.error("Error generating completion: %s", exc) + raise + + msg_logger.set_output(output) return self._extract_completion(output) @@ -186,16 +260,54 @@ class OpenAIChatService(LLMService): A service for interacting with OpenAI Chat models, this class should be used to create clients. """ - def __init__(self) -> None: + def __init__(self, *, default_model_kwargs: dict = None) -> None: + """ + Creates a service for interacting with OpenAI Chat models, this class should be used to create clients. + + Parameters + ---------- + default_model_kwargs : dict, optional + Default arguments to use when creating a client via the `get_client` function. Any argument specified here + will automatically be used when calling `get_client`. Arguments specified in the `get_client` function will + overwrite default values specified here. This is useful to set model arguments before creating multiple + clients. By default None + + Raises + ------ + ImportError + If the `openai` library is not found in the python environment. + """ if IMPORT_EXCEPTION is not None: raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION super().__init__() - def get_client(self, - model_name: str, - set_assistant: bool = False, - **model_kwargs: dict[str, typing.Any]) -> OpenAIChatClient: + self._default_model_kwargs = default_model_kwargs or {} + + self._logger = logging.getLogger(f"{__package__}.{OpenAIChatService.__name__}") + + # Dont propagate up to the default logger. Just log to file + self._logger.propagate = False + + log_file = os.path.join(appdirs.user_log_dir(appauthor="NVIDIA", appname="morpheus"), "openai.log") + + # Add a file handler + file_handler = logging.FileHandler(log_file) + + self._logger.addHandler(file_handler) + self._logger.setLevel(logging.INFO) + + self._logger.info("OpenAI Chat Service started.") + + self._message_count = 0 + + def _get_message_id(self): + + self._message_count += 1 + + return self._message_count + + def get_client(self, *, model_name: str, set_assistant: bool = False, **model_kwargs) -> OpenAIChatClient: """ Returns a client for interacting with a specific model. This method is the preferred way to create a client. @@ -208,7 +320,10 @@ def get_client(self, When `True`, a second input field named `assistant` will be used to proide additional context to the model. model_kwargs : dict[str, typing.Any] - Additional keyword arguments to pass to the model when generating text. + Additional keyword arguments to pass to the model when generating text. Arguments specified here will + overwrite the `default_model_kwargs` set in the service constructor """ - return OpenAIChatClient(model_name=model_name, set_assistant=set_assistant, **model_kwargs) + final_model_kwargs = {**self._default_model_kwargs, **model_kwargs} + + return OpenAIChatClient(self, model_name=model_name, set_assistant=set_assistant, **final_model_kwargs) diff --git a/morpheus/stages/inference/inference_stage.py b/morpheus/stages/inference/inference_stage.py index d601d3880d..e4111926e9 100644 --- a/morpheus/stages/inference/inference_stage.py +++ b/morpheus/stages/inference/inference_stage.py @@ -443,4 +443,4 @@ def _convert_one_response(output: MultiResponseMessage, inf: MultiInferenceMessa for i, idx in enumerate(mess_ids): probs[idx, :] = cp.maximum(probs[idx, :], resp_probs[i, :]) - return MultiResponseMessage.from_message(inf, memory=memory, offset=inf.offset, count=inf.mess_count) + return MultiResponseMessage.from_message(inf, memory=memory, offset=seq_offset, count=seq_count) diff --git a/morpheus/stages/input/arxiv_source.py b/morpheus/stages/input/arxiv_source.py index 1b03b299fa..c1ed77c0cb 100644 --- a/morpheus/stages/input/arxiv_source.py +++ b/morpheus/stages/input/arxiv_source.py @@ -37,7 +37,7 @@ IMPORT_ERROR_MESSAGE = ( "ArxivSource requires additional dependencies to be installed. Install them by running the following command: " "`conda env update --solver=libmamba -n morpheus" - "--file morpheus/conda/environments/dev_cuda-121_arch-x86_64.yaml --prune`") + "--file conda/environments/all_cuda-121_arch-x86_64.yaml --prune`") @register_stage("from-arxiv") @@ -47,7 +47,7 @@ class ArxivSource(PreallocatorMixin, SingleOutputSource): This stage requires several additional dependencies to be installed. Install them by running the following command: `conda env update --solver=libmamba -n morpheus " - "--file morpheus/conda/environments/dev_cuda-121_arch-x86_64.yaml --prune` + "--file conda/environments/all_cuda-121_arch-x86_64.yaml --prune` Parameters ---------- diff --git a/tests/_utils/llm.py b/tests/_utils/llm.py index 6925b90df5..9c48583b7e 100644 --- a/tests/_utils/llm.py +++ b/tests/_utils/llm.py @@ -14,6 +14,7 @@ import asyncio import typing +from unittest import mock from morpheus.llm import InputMap from morpheus.llm import LLMContext @@ -73,3 +74,20 @@ def execute_task_handler(task_handler: LLMTaskHandler, message = asyncio.run(task_handler.try_handle(context)) return message + + +def _mk_mock_choice(message: str) -> mock.MagicMock: + mock_choice = mock.MagicMock() + mock_choice.message.content = message + return mock_choice + + +def mk_mock_openai_response(messages: list[str]) -> mock.MagicMock: + """ + Creates a mocked openai.types.chat.chat_completion.ChatCompletion response with the given messages. + """ + response = mock.MagicMock() + mock_choices = [_mk_mock_choice(message) for message in messages] + response.choices = mock_choices + + return response diff --git a/tests/benchmarks/conftest.py b/tests/benchmarks/conftest.py index f877612bb6..f051218193 100644 --- a/tests/benchmarks/conftest.py +++ b/tests/benchmarks/conftest.py @@ -128,16 +128,19 @@ def mock_serpapi_request_time_fixture(): @pytest.mark.usefixtures("openai") @pytest.fixture(name="mock_chat_completion") -def mock_chat_completion_fixture(mock_chat_completion: mock.MagicMock, mock_openai_request_time: float): +def mock_chat_completion_fixture(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock], + mock_openai_request_time: float): + (mock_client, mock_async_client) = mock_chat_completion async def sleep_first(*args, **kwargs): # Sleep time is based on average request time await asyncio.sleep(mock_openai_request_time) return mock.DEFAULT - mock_chat_completion.acreate.side_effect = sleep_first + mock_async_client.chat.completions.create.side_effect = sleep_first + mock_client.chat.completions.create.side_effect = sleep_first - yield mock_chat_completion + yield (mock_client, mock_async_client) @pytest.mark.usefixtures("nemollm") diff --git a/tests/conftest.py b/tests/conftest.py index 6599f53ce3..c422c18ea9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1051,13 +1051,15 @@ def openai_fixture(fail_missing: bool): @pytest.mark.usefixtures("openai") @pytest.fixture(name="mock_chat_completion") def mock_chat_completion_fixture(): - with mock.patch("openai.ChatCompletion") as mock_chat_completion: - mock_chat_completion.return_value = mock_chat_completion - - response = {'choices': [{'message': {'content': 'test_output'}}]} - mock_chat_completion.create.return_value = response.copy() - mock_chat_completion.acreate = mock.AsyncMock(return_value=response.copy()) - yield mock_chat_completion + from _utils.llm import mk_mock_openai_response + with (mock.patch("openai.OpenAI") as mock_client, mock.patch("openai.AsyncOpenAI") as mock_async_client): + mock_client.return_value = mock_client + mock_async_client.return_value = mock_async_client + + mock_client.chat.completions.create.return_value = mk_mock_openai_response(['test_output']) + mock_async_client.chat.completions.create = mock.AsyncMock( + return_value=mk_mock_openai_response(['test_output'])) + yield (mock_client, mock_async_client) @pytest.mark.usefixtures("nemollm") diff --git a/tests/llm/services/test_llm_service_pipe.py b/tests/llm/services/test_llm_service_pipe.py index a04d09a683..fa6c1ac0c7 100644 --- a/tests/llm/services/test_llm_service_pipe.py +++ b/tests/llm/services/test_llm_service_pipe.py @@ -18,6 +18,7 @@ import cudf from _utils import assert_results +from _utils.llm import mk_mock_openai_response from morpheus.config import Config from morpheus.llm import LLMEngine from morpheus.llm.nodes.extracter_node import ExtracterNode @@ -86,15 +87,15 @@ def test_completion_pipe_nemo( def test_completion_pipe_openai(config: Config, - mock_chat_completion: mock.MagicMock, + mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock], country_prompts: list[str], capital_responses: list[str]): - mock_chat_completion.acreate.side_effect = [{ - "choices": [{ - 'message': { - 'content': response - } - }] - } for response in capital_responses] + (mock_client, mock_async_client) = mock_chat_completion + mock_async_client.chat.completions.create.side_effect = [ + mk_mock_openai_response([response]) for response in capital_responses + ] _run_pipeline(config, OpenAIChatService, country_prompts, capital_responses) + + mock_client.chat.completions.create.assert_not_called() + mock_async_client.chat.completions.create.assert_called() diff --git a/tests/llm/services/test_nemo_llm_client.py b/tests/llm/services/test_nemo_llm_client.py index b9636e722a..5a7993006c 100644 --- a/tests/llm/services/test_nemo_llm_client.py +++ b/tests/llm/services/test_nemo_llm_client.py @@ -23,19 +23,19 @@ def test_constructor(mock_nemollm: mock.MagicMock, mock_nemo_service: mock.MagicMock): - client = NeMoLLMClient(mock_nemo_service, "test_model", additional_arg="test_arg") + client = NeMoLLMClient(mock_nemo_service, model_name="test_model", additional_arg="test_arg") assert isinstance(client, LLMClient) mock_nemollm.assert_not_called() def test_get_input_names(mock_nemollm: mock.MagicMock, mock_nemo_service: mock.MagicMock): - client = NeMoLLMClient(mock_nemo_service, "test_model", additional_arg="test_arg") + client = NeMoLLMClient(mock_nemo_service, model_name="test_model", additional_arg="test_arg") assert client.get_input_names() == ["prompt"] mock_nemollm.assert_not_called() def test_generate(mock_nemollm: mock.MagicMock, mock_nemo_service: mock.MagicMock): - client = NeMoLLMClient(mock_nemo_service, "test_model", additional_arg="test_arg") + client = NeMoLLMClient(mock_nemo_service, model_name="test_model", additional_arg="test_arg") assert client.generate({'prompt': "test_prompt"}) == "test_output" mock_nemollm.generate_multiple.assert_called_once_with(model="test_model", prompts=["test_prompt"], @@ -46,7 +46,7 @@ def test_generate(mock_nemollm: mock.MagicMock, mock_nemo_service: mock.MagicMoc def test_generate_batch(mock_nemollm: mock.MagicMock, mock_nemo_service: mock.MagicMock): mock_nemollm.generate_multiple.return_value = ["output1", "output2"] - client = NeMoLLMClient(mock_nemo_service, "test_model", additional_arg="test_arg") + client = NeMoLLMClient(mock_nemo_service, model_name="test_model", additional_arg="test_arg") assert client.generate_batch({'prompt': ["prompt1", "prompt2"]}) == ["output1", "output2"] mock_nemollm.generate_multiple.assert_called_once_with(model="test_model", prompts=["prompt1", "prompt2"], @@ -63,7 +63,7 @@ def test_generate_async( mock_nemo_service: mock.MagicMock): mock_asyncio_gather.return_value = [mock.MagicMock()] - client = NeMoLLMClient(mock_nemo_service, "test_model", additional_arg="test_arg") + client = NeMoLLMClient(mock_nemo_service, model_name="test_model", additional_arg="test_arg") results = asyncio.run(client.generate_async({'prompt': "test_prompt"})) assert results == "test_output" mock_nemollm.generate.assert_called_once_with("test_model", @@ -82,7 +82,7 @@ def test_generate_batch_async( mock_asyncio_gather.return_value = [mock.MagicMock(), mock.MagicMock()] mock_nemollm.post_process_generate_response.side_effect = [{"text": "output1"}, {"text": "output2"}] - client = NeMoLLMClient(mock_nemo_service, "test_model", additional_arg="test_arg") + client = NeMoLLMClient(mock_nemo_service, model_name="test_model", additional_arg="test_arg") results = asyncio.run(client.generate_batch_async({'prompt': ["prompt1", "prompt2"]})) assert results == ["output1", "output2"] mock_nemollm.generate.assert_has_calls([ @@ -101,7 +101,7 @@ def test_generate_batch_async_error( mock_asyncio_gather.return_value = [mock.MagicMock(), mock.MagicMock()] mock_nemollm.post_process_generate_response.return_value = {"status": "fail", "msg": "unittest"} - client = NeMoLLMClient(mock_nemo_service, "test_model", additional_arg="test_arg") + client = NeMoLLMClient(mock_nemo_service, model_name="test_model", additional_arg="test_arg") with pytest.raises(RuntimeError, match="unittest"): asyncio.run(client.generate_batch_async({'prompt': ["prompt1", "prompt2"]})) diff --git a/tests/llm/services/test_nemo_llm_service.py b/tests/llm/services/test_nemo_llm_service.py index d9ef769fca..d91a6f7351 100644 --- a/tests/llm/services/test_nemo_llm_service.py +++ b/tests/llm/services/test_nemo_llm_service.py @@ -43,6 +43,6 @@ def test_constructor(mock_nemollm: mock.MagicMock, api_key: str, org_id: str): def test_get_client(): service = NeMoLLMService(api_key="test_api_key") - client = service.get_client("test_model") + client = service.get_client(model_name="test_model") assert isinstance(client, NeMoLLMClient) diff --git a/tests/llm/services/test_openai_chat_client.py b/tests/llm/services/test_openai_chat_client.py index e6b6a450df..21013ce463 100644 --- a/tests/llm/services/test_openai_chat_client.py +++ b/tests/llm/services/test_openai_chat_client.py @@ -18,14 +18,18 @@ import pytest +from _utils.llm import mk_mock_openai_response from morpheus.llm.services.llm_service import LLMClient from morpheus.llm.services.openai_chat_service import OpenAIChatClient +from morpheus.llm.services.openai_chat_service import OpenAIChatService -def test_constructor(mock_chat_completion: mock.MagicMock): - client = OpenAIChatClient(model_name="test_model") +def test_constructor(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock]): + client = OpenAIChatClient(OpenAIChatService(), model_name="test_model") assert isinstance(client, LLMClient) - mock_chat_completion.assert_not_called() + + for mock_client in mock_chat_completion: + mock_client.assert_called() @pytest.mark.parametrize("use_async", [True, False]) @@ -34,87 +38,79 @@ def test_constructor(mock_chat_completion: mock.MagicMock): [({ "prompt": "test_prompt", "assistant": "assistant_response" }, - True, - [{ - "role": "system", "content": "You are a helpful assistant." - }, { + True, [{ "role": "user", "content": "test_prompt" }, { "role": "assistant", "content": "assistant_response" - }]), - ({ - "prompt": "test_prompt" - }, - False, [{ - "role": "system", "content": "You are a helpful assistant." - }, { + }]), ({ + "prompt": "test_prompt" + }, False, [{ "role": "user", "content": "test_prompt" }])]) @pytest.mark.parametrize("temperature", [0, 1, 2]) -def test_generate(mock_chat_completion: mock.MagicMock, +def test_generate(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock], use_async: bool, input_dict: dict[str, str], set_assistant: bool, expected_messages: list[dict], temperature: int): - client = OpenAIChatClient(model_name="test_model", set_assistant=set_assistant, temperature=temperature) + (mock_client, mock_async_client) = mock_chat_completion + client = OpenAIChatClient(OpenAIChatService(), + model_name="test_model", + set_assistant=set_assistant, + temperature=temperature) if use_async: results = asyncio.run(client.generate_async(input_dict)) - mock_chat_completion.acreate.assert_called_once_with(model="test_model", - messages=expected_messages, - temperature=temperature) + mock_async_client.chat.completions.create.assert_called_once_with(model="test_model", + messages=expected_messages, + temperature=temperature) + mock_client.chat.completions.create.assert_not_called() else: results = client.generate(input_dict) - mock_chat_completion.create.assert_called_once_with(model="test_model", - messages=expected_messages, - temperature=temperature) + mock_client.chat.completions.create.assert_called_once_with(model="test_model", + messages=expected_messages, + temperature=temperature) + mock_async_client.chat.completions.create.assert_not_called() assert results == "test_output" @pytest.mark.parametrize("use_async", [True, False]) -@pytest.mark.parametrize( - "inputs, set_assistant, expected_messages", - [({ - "prompt": ["prompt1", "prompt2"], "assistant": ["assistant1", "assistant2"] - }, - True, - [[{ - "role": "system", "content": "You are a helpful assistant." - }, { - "role": "user", "content": "prompt1" - }, { - "role": "assistant", "content": "assistant1" - }], - [{ - "role": "system", "content": "You are a helpful assistant." - }, { - "role": "user", "content": "prompt2" - }, { - "role": "assistant", "content": "assistant2" - }]]), - ({ - "prompt": ["prompt1", "prompt2"] - }, - False, - [[{ - "role": "system", "content": "You are a helpful assistant." - }, { - "role": "user", "content": "prompt1" - }], [{ - "role": "system", "content": "You are a helpful assistant." - }, { - "role": "user", "content": "prompt2" - }]])]) +@pytest.mark.parametrize("inputs, set_assistant, expected_messages", + [({ + "prompt": ["prompt1", "prompt2"], "assistant": ["assistant1", "assistant2"] + }, + True, + [[{ + "role": "user", "content": "prompt1" + }, { + "role": "assistant", "content": "assistant1" + }], [{ + "role": "user", "content": "prompt2" + }, { + "role": "assistant", "content": "assistant2" + }]]), + ({ + "prompt": ["prompt1", "prompt2"] + }, + False, [[{ + "role": "user", "content": "prompt1" + }], [{ + "role": "user", "content": "prompt2" + }]])]) @pytest.mark.parametrize("temperature", [0, 1, 2]) -def test_generate_batch(mock_chat_completion: mock.MagicMock, +def test_generate_batch(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock], use_async: bool, inputs: dict[str, list[str]], set_assistant: bool, expected_messages: list[list[dict]], temperature: int): - client = OpenAIChatClient(model_name="test_model", set_assistant=set_assistant, temperature=temperature) + (mock_client, mock_async_client) = mock_chat_completion + client = OpenAIChatClient(OpenAIChatService(), + model_name="test_model", + set_assistant=set_assistant, + temperature=temperature) expected_results = ["test_output" for _ in range(len(inputs["prompt"]))] expected_calls = [ @@ -123,28 +119,22 @@ def test_generate_batch(mock_chat_completion: mock.MagicMock, if use_async: results = asyncio.run(client.generate_batch_async(inputs)) - mock_chat_completion.acreate.assert_has_calls(expected_calls, any_order=False) + mock_async_client.chat.completions.create.assert_has_calls(expected_calls, any_order=False) + mock_client.chat.completions.create.assert_not_called() + else: results = client.generate_batch(inputs) - mock_chat_completion.create.assert_has_calls(expected_calls, any_order=False) + mock_client.chat.completions.create.assert_has_calls(expected_calls, any_order=False) + mock_async_client.chat.completions.create.assert_not_called() assert results == expected_results -@pytest.mark.parametrize("completion", [{ - "choices": [] -}, { - "choices": [{}] -}, { - "choices": [{ - "message": {} - }] -}], - ids=["no_choices", "no_message", "no_content"]) -def test_generate_invalid_completions(mock_chat_completion: mock.MagicMock, completion: dict): - mock_chat_completion.create.return_value = completion - - client = OpenAIChatClient(model_name="test_model") +@pytest.mark.parametrize("completion", [[], [None]], ids=["no_choices", "no_content"]) +@pytest.mark.usefixtures("mock_chat_completion") +def test_extract_completion_errors(completion: list): + client = OpenAIChatClient(OpenAIChatService(), model_name="test_model") + mock_completion = mk_mock_openai_response(completion) with pytest.raises(ValueError): - client.generate({"prompt": "test_prompt"}) + client._extract_completion(mock_completion) diff --git a/tests/llm/services/test_openai_chat_service.py b/tests/llm/services/test_openai_chat_service.py index 20cb454c7d..fc05d64543 100644 --- a/tests/llm/services/test_openai_chat_service.py +++ b/tests/llm/services/test_openai_chat_service.py @@ -29,7 +29,7 @@ def test_constructor(): def test_get_client(): service = OpenAIChatService() - client = service.get_client("test_model") + client = service.get_client(model_name="test_model") assert isinstance(client, OpenAIChatClient) @@ -39,10 +39,11 @@ def test_get_client(): @mock.patch("morpheus.llm.services.openai_chat_service.OpenAIChatClient") def test_get_client_passed_args(mock_client: mock.MagicMock, set_assistant: bool, temperature: int): service = OpenAIChatService() - service.get_client("test_model", set_assistant=set_assistant, temperature=temperature, test='this') + service.get_client(model_name="test_model", set_assistant=set_assistant, temperature=temperature, test='this') # Ensure the get_client method passed on the set_assistant and model kwargs - mock_client.assert_called_once_with(model_name="test_model", + mock_client.assert_called_once_with(service, + model_name="test_model", set_assistant=set_assistant, temperature=temperature, test='this') diff --git a/tests/llm/test_completion_pipe.py b/tests/llm/test_completion_pipe.py index db020b6ba6..39c16d7e3b 100644 --- a/tests/llm/test_completion_pipe.py +++ b/tests/llm/test_completion_pipe.py @@ -21,6 +21,7 @@ import cudf from _utils import assert_results +from _utils.llm import mk_mock_openai_response from morpheus.config import Config from morpheus.llm import LLMEngine from morpheus.llm.nodes.extracter_node import ExtracterNode @@ -106,19 +107,18 @@ def test_completion_pipe_nemo( @pytest.mark.usefixtures("openai") def test_completion_pipe_openai(config: Config, - mock_chat_completion: mock.MagicMock, + mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock], countries: list[str], capital_responses: list[str]): - mock_chat_completion.acreate.side_effect = [{ - "choices": [{ - 'message': { - 'content': response - } - }] - } for response in capital_responses] + (mock_client, mock_async_client) = mock_chat_completion + mock_async_client.chat.completions.create.side_effect = [ + mk_mock_openai_response([response]) for response in capital_responses + ] results = _run_pipeline(config, OpenAIChatService, countries=countries, capital_responses=capital_responses) assert_results(results) + mock_client.chat.completions.create.assert_not_called() + mock_async_client.chat.completions.create.assert_called() @pytest.mark.usefixtures("nemollm") diff --git a/tests/llm/test_rag_standalone_pipe.py b/tests/llm/test_rag_standalone_pipe.py index b9577a89ef..e394420845 100644 --- a/tests/llm/test_rag_standalone_pipe.py +++ b/tests/llm/test_rag_standalone_pipe.py @@ -25,6 +25,7 @@ from _utils import TEST_DIRS from _utils import assert_results from _utils.dataset_manager import DatasetManager +from _utils.llm import mk_mock_openai_response from _utils.milvus import populate_milvus from morpheus.config import Config from morpheus.config import PipelineModes @@ -160,18 +161,15 @@ def test_rag_standalone_pipe_nemo(config: Config, @pytest.mark.parametrize("repeat_count", [5]) @pytest.mark.import_mod(os.path.join(TEST_DIRS.examples_dir, 'llm/common/utils.py')) def test_rag_standalone_pipe_openai(config: Config, - mock_chat_completion: mock.MagicMock, + mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock], dataset: DatasetManager, milvus_server_uri: str, repeat_count: int, import_mod: types.ModuleType): - mock_chat_completion.acreate.side_effect = [{ - "choices": [{ - 'message': { - 'content': EXPECTED_RESPONSE - } - }] - } for _ in range(repeat_count)] + (mock_client, mock_async_client) = mock_chat_completion + mock_async_client.chat.completions.create.side_effect = [ + mk_mock_openai_response([EXPECTED_RESPONSE]) for _ in range(repeat_count) + ] collection_name = "test_rag_standalone_pipe_openai" populate_milvus(milvus_server_uri=milvus_server_uri, @@ -190,6 +188,8 @@ def test_rag_standalone_pipe_openai(config: Config, utils_mod=import_mod, ) assert_results(results) + mock_client.chat.completions.create.assert_not_called() + mock_async_client.chat.completions.create.assert_called() @pytest.mark.usefixtures("nemollm") diff --git a/tests/stages/arxiv/test_arxiv_source.py b/tests/stages/arxiv/test_arxiv_source.py index 30d9730064..6daa0ebd71 100644 --- a/tests/stages/arxiv/test_arxiv_source.py +++ b/tests/stages/arxiv/test_arxiv_source.py @@ -162,11 +162,15 @@ def test_splitting_pages(config: Config, num_expected_chunks = len(page_content_col) source_col = [] page_col = [] + type_col = [] for _ in range(num_expected_chunks): source_col.append(pdf_file) page_col.append(0) + type_col.append("Document") - expected_df = cudf.DataFrame({"page_content": page_content_col, "source": source_col, "page": page_col}) + expected_df = cudf.DataFrame({ + "page_content": page_content_col, "source": source_col, "page": page_col, "type": type_col + }) loader = langchain.document_loaders.PyPDFLoader(pdf_file) documents = loader.load() @@ -189,8 +193,11 @@ def test_splitting_pages_no_chunks(config: Config, page_content_col = [content] source_col = [pdf_file] page_col = [0] + type_col = ["Document"] - expected_df = cudf.DataFrame({"page_content": page_content_col, "source": source_col, "page": page_col}) + expected_df = cudf.DataFrame({ + "page_content": page_content_col, "source": source_col, "page": page_col, "type": type_col + }) loader = langchain.document_loaders.PyPDFLoader(pdf_file) documents = loader.load() diff --git a/tests/stages/arxiv/test_arxiv_source_pipe.py b/tests/stages/arxiv/test_arxiv_source_pipe.py index 4d8b33bc32..bf903de717 100644 --- a/tests/stages/arxiv/test_arxiv_source_pipe.py +++ b/tests/stages/arxiv/test_arxiv_source_pipe.py @@ -39,8 +39,11 @@ def test_arxiv_source_pipeline(mock_arxiv_search: mock.MagicMock, config: Config page_content_col = [content] source_col = [cached_pdf] page_col = [0] + type_col = ["Document"] - expected_df = cudf.DataFrame({"page_content": page_content_col, "source": source_col, "page": page_col}) + expected_df = cudf.DataFrame({ + "page_content": page_content_col, "source": source_col, "page": page_col, "type": type_col + }) # The ArxivSource sets a pe_count of 6 for the process_pages node, and we need at least that number of threads # in the config to run the pipeline From 5ae24d91f09fe10bf82ed2695e978439364ae926 Mon Sep 17 00:00:00 2001 From: David Gardner <96306125+dagardner-nv@users.noreply.github.com> Date: Wed, 28 Feb 2024 12:44:52 -0800 Subject: [PATCH 15/18] Expose max_retries parameter to OpenAIChatService & OpenAIChatClient (#1536) * Avoids momentary rate limit errors ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - David Gardner (https://github.com/dagardner-nv) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1536 --- morpheus/llm/services/openai_chat_service.py | 24 +++++++++++++++---- tests/llm/services/test_openai_chat_client.py | 7 +++--- .../llm/services/test_openai_chat_service.py | 12 +++++++--- 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/morpheus/llm/services/openai_chat_service.py b/morpheus/llm/services/openai_chat_service.py index 76da57912f..446b9a0ee9 100644 --- a/morpheus/llm/services/openai_chat_service.py +++ b/morpheus/llm/services/openai_chat_service.py @@ -78,6 +78,9 @@ class OpenAIChatClient(LLMClient): set_assistant: bool, optional default=False When `True`, a second input field named `assistant` will be used to proide additional context to the model. + max_retries: int, optional default=10 + The maximum number of retries to attempt when making a request to the OpenAI API. + model_kwargs : dict[str, typing.Any] Additional keyword arguments to pass to the model when generating text. """ @@ -90,6 +93,7 @@ def __init__(self, *, model_name: str, set_assistant: bool = False, + max_retries: int = 10, **model_kwargs) -> None: if IMPORT_EXCEPTION is not None: raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION @@ -109,8 +113,8 @@ def __init__(self, self._model_kwargs = copy.deepcopy(model_kwargs) # Create the client objects for both sync and async - self._client = openai.OpenAI() - self._client_async = openai.AsyncOpenAI() + self._client = openai.OpenAI(max_retries=max_retries) + self._client_async = openai.AsyncOpenAI(max_retries=max_retries) def get_input_names(self) -> list[str]: input_names = [self._prompt_key] @@ -307,7 +311,12 @@ def _get_message_id(self): return self._message_count - def get_client(self, *, model_name: str, set_assistant: bool = False, **model_kwargs) -> OpenAIChatClient: + def get_client(self, + *, + model_name: str, + set_assistant: bool = False, + max_retries: int = 10, + **model_kwargs) -> OpenAIChatClient: """ Returns a client for interacting with a specific model. This method is the preferred way to create a client. @@ -319,6 +328,9 @@ def get_client(self, *, model_name: str, set_assistant: bool = False, **model_kw set_assistant: bool, optional default=False When `True`, a second input field named `assistant` will be used to proide additional context to the model. + max_retries: int, optional default=10 + The maximum number of retries to attempt when making a request to the OpenAI API. + model_kwargs : dict[str, typing.Any] Additional keyword arguments to pass to the model when generating text. Arguments specified here will overwrite the `default_model_kwargs` set in the service constructor @@ -326,4 +338,8 @@ def get_client(self, *, model_name: str, set_assistant: bool = False, **model_kw final_model_kwargs = {**self._default_model_kwargs, **model_kwargs} - return OpenAIChatClient(self, model_name=model_name, set_assistant=set_assistant, **final_model_kwargs) + return OpenAIChatClient(self, + model_name=model_name, + set_assistant=set_assistant, + max_retries=max_retries, + **final_model_kwargs) diff --git a/tests/llm/services/test_openai_chat_client.py b/tests/llm/services/test_openai_chat_client.py index 21013ce463..b4f5529edd 100644 --- a/tests/llm/services/test_openai_chat_client.py +++ b/tests/llm/services/test_openai_chat_client.py @@ -24,12 +24,13 @@ from morpheus.llm.services.openai_chat_service import OpenAIChatService -def test_constructor(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock]): - client = OpenAIChatClient(OpenAIChatService(), model_name="test_model") +@pytest.mark.parametrize("max_retries", [5, 10]) +def test_constructor(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock], max_retries: int): + client = OpenAIChatClient(OpenAIChatService(), model_name="test_model", max_retries=max_retries) assert isinstance(client, LLMClient) for mock_client in mock_chat_completion: - mock_client.assert_called() + mock_client.assert_called_once_with(max_retries=max_retries) @pytest.mark.parametrize("use_async", [True, False]) diff --git a/tests/llm/services/test_openai_chat_service.py b/tests/llm/services/test_openai_chat_service.py index fc05d64543..8746d52535 100644 --- a/tests/llm/services/test_openai_chat_service.py +++ b/tests/llm/services/test_openai_chat_service.py @@ -36,14 +36,20 @@ def test_get_client(): @pytest.mark.parametrize("set_assistant", [True, False]) @pytest.mark.parametrize("temperature", [0, 1, 2]) +@pytest.mark.parametrize("max_retries", [5, 10]) @mock.patch("morpheus.llm.services.openai_chat_service.OpenAIChatClient") -def test_get_client_passed_args(mock_client: mock.MagicMock, set_assistant: bool, temperature: int): +def test_get_client_passed_args(mock_client: mock.MagicMock, set_assistant: bool, temperature: int, max_retries: int): service = OpenAIChatService() - service.get_client(model_name="test_model", set_assistant=set_assistant, temperature=temperature, test='this') + service.get_client(model_name="test_model", + set_assistant=set_assistant, + temperature=temperature, + test='this', + max_retries=max_retries) # Ensure the get_client method passed on the set_assistant and model kwargs mock_client.assert_called_once_with(service, model_name="test_model", set_assistant=set_assistant, temperature=temperature, - test='this') + test='this', + max_retries=max_retries) From a07f82fba797e2b79eef6595a809171aa3c159d6 Mon Sep 17 00:00:00 2001 From: David Gardner <96306125+dagardner-nv@users.noreply.github.com> Date: Thu, 29 Feb 2024 09:18:06 -0800 Subject: [PATCH 16/18] Fix pathlib.Path support for FileSourceStage (#1531) * The type hint for the `filename` argument to the `FileSourceStage` is `pathlib.Path`, however this is incompatible with the C++ impl and the `determine_file_type` method. Closes #1530 ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - David Gardner (https://github.com/dagardner-nv) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1531 --- morpheus/_lib/common/__init__.pyi | 5 +++ morpheus/_lib/common/module.cpp | 37 ++++++++++++++++--- .../include/morpheus/objects/file_types.hpp | 10 +++++ .../include/morpheus/stages/file_source.hpp | 13 +++---- morpheus/_lib/src/objects/file_types.cpp | 5 +++ morpheus/_lib/src/stages/file_source.cpp | 18 ++++++--- morpheus/_lib/stages/__init__.pyi | 4 ++ morpheus/_lib/stages/module.cpp | 18 ++++++++- tests/common/test_determine_file_type.py | 32 ++++++++++++++++ tests/test_file_in_out.py | 30 +++++++++++---- 10 files changed, 143 insertions(+), 29 deletions(-) create mode 100644 tests/common/test_determine_file_type.py diff --git a/morpheus/_lib/common/__init__.pyi b/morpheus/_lib/common/__init__.pyi index f6e94c7468..3a27d05002 100644 --- a/morpheus/_lib/common/__init__.pyi +++ b/morpheus/_lib/common/__init__.pyi @@ -7,6 +7,7 @@ from __future__ import annotations import morpheus._lib.common import typing +import os __all__ = [ "FiberQueue", @@ -190,6 +191,10 @@ class TypeId(): UINT8: morpheus._lib.common.TypeId # value = __members__: dict # value = {'EMPTY': , 'INT8': , 'INT16': , 'INT32': , 'INT64': , 'UINT8': , 'UINT16': , 'UINT32': , 'UINT64': , 'FLOAT32': , 'FLOAT64': , 'BOOL8': , 'STRING': } pass +@typing.overload +def determine_file_type(filename: os.PathLike) -> FileTypes: + pass +@typing.overload def determine_file_type(filename: str) -> FileTypes: pass def read_file_to_df(filename: str, file_type: FileTypes = FileTypes.Auto) -> object: diff --git a/morpheus/_lib/common/module.cpp b/morpheus/_lib/common/module.cpp index 397fec2341..0c2ae40914 100644 --- a/morpheus/_lib/common/module.cpp +++ b/morpheus/_lib/common/module.cpp @@ -37,7 +37,10 @@ #include #include #include // for return_value_policy::reference +// for pathlib.Path -> std::filesystem::path conversions +#include // IWYU pragma: keep +#include // for std::filesystem::path #include #include #include @@ -58,13 +61,29 @@ PYBIND11_MODULE(common, _module) CudfHelper::load(); LoaderRegistry::register_factory_fn( - "file", [](nlohmann::json config) { return std::make_unique(config); }, false); + "file", + [](nlohmann::json config) { + return std::make_unique(config); + }, + false); LoaderRegistry::register_factory_fn( - "grpc", [](nlohmann::json config) { return std::make_unique(config); }, false); + "grpc", + [](nlohmann::json config) { + return std::make_unique(config); + }, + false); LoaderRegistry::register_factory_fn( - "payload", [](nlohmann::json config) { return std::make_unique(config); }, false); + "payload", + [](nlohmann::json config) { + return std::make_unique(config); + }, + false); LoaderRegistry::register_factory_fn( - "rest", [](nlohmann::json config) { return std::make_unique(config); }, false); + "rest", + [](nlohmann::json config) { + return std::make_unique(config); + }, + false); py::class_(_module, "Tensor") .def_property_readonly("__cuda_array_interface__", &TensorObjectInterfaceProxy::cuda_array_interface) @@ -106,9 +125,15 @@ PYBIND11_MODULE(common, _module) .value("CSV", FileTypes::CSV) .value("PARQUET", FileTypes::PARQUET); - _module.def("typeid_to_numpy_str", [](TypeId tid) { return DType(tid).type_str(); }); + _module.def("typeid_to_numpy_str", [](TypeId tid) { + return DType(tid).type_str(); + }); - _module.def("determine_file_type", &determine_file_type, py::arg("filename")); + _module.def( + "determine_file_type", py::overload_cast(&determine_file_type), py::arg("filename")); + _module.def("determine_file_type", + py::overload_cast(&determine_file_type), + py::arg("filename")); _module.def("read_file_to_df", &read_file_to_df, py::arg("filename"), py::arg("file_type") = FileTypes::Auto); _module.def("write_df_to_file", &SerializersProxy::write_df_to_file, diff --git a/morpheus/_lib/include/morpheus/objects/file_types.hpp b/morpheus/_lib/include/morpheus/objects/file_types.hpp index 54b59a5b35..329ade91ef 100644 --- a/morpheus/_lib/include/morpheus/objects/file_types.hpp +++ b/morpheus/_lib/include/morpheus/objects/file_types.hpp @@ -18,6 +18,7 @@ #pragma once #include +#include // for path #include #include #include @@ -84,6 +85,15 @@ static inline std::ostream& operator<<(std::ostream& os, const FileTypes& f) */ FileTypes determine_file_type(const std::string& filename); +/** + * @brief Determines the file type from a filename based on extension. For example, my_file.json would return + * `FileTypes::JSON`. + * + * @param filename path to a file. Does not need to exist + * @return FileTypes + */ +FileTypes determine_file_type(const std::filesystem::path& filename); + #pragma GCC visibility pop /** @} */ // end of group diff --git a/morpheus/_lib/include/morpheus/stages/file_source.hpp b/morpheus/_lib/include/morpheus/stages/file_source.hpp index 4818f2e0ef..6ed1ea4852 100644 --- a/morpheus/_lib/include/morpheus/stages/file_source.hpp +++ b/morpheus/_lib/include/morpheus/stages/file_source.hpp @@ -20,23 +20,17 @@ #include "morpheus/messages/meta.hpp" #include -#include -#include -#include -#include #include #include -#include #include #include #include // for apply, make_subscriber, observable_member, is_on_error<>::not_void, is_on_next_of<>::not_void, trace_activity -#include +#include // for path #include #include #include #include -#include // for vector namespace morpheus { /****** Component public implementations *******************/ @@ -98,6 +92,11 @@ struct FileSourceStageInterfaceProxy std::string filename, int repeat = 1, pybind11::dict parser_kwargs = pybind11::dict()); + static std::shared_ptr> init(mrc::segment::Builder& builder, + const std::string& name, + std::filesystem::path filename, + int repeat = 1, + pybind11::dict parser_kwargs = pybind11::dict()); }; #pragma GCC visibility pop /** @} */ // end of group diff --git a/morpheus/_lib/src/objects/file_types.cpp b/morpheus/_lib/src/objects/file_types.cpp index b573842e62..20f8ca956e 100644 --- a/morpheus/_lib/src/objects/file_types.cpp +++ b/morpheus/_lib/src/objects/file_types.cpp @@ -49,4 +49,9 @@ FileTypes determine_file_type(const std::string& filename) } } +FileTypes determine_file_type(const std::filesystem::path& filename) +{ + return determine_file_type(filename.string()); +} + } // namespace morpheus diff --git a/morpheus/_lib/src/stages/file_source.cpp b/morpheus/_lib/src/stages/file_source.cpp index bae39fc37c..84a59f5f12 100644 --- a/morpheus/_lib/src/stages/file_source.cpp +++ b/morpheus/_lib/src/stages/file_source.cpp @@ -17,11 +17,7 @@ #include "morpheus/stages/file_source.hpp" -#include "mrc/node/rx_sink_base.hpp" -#include "mrc/node/rx_source_base.hpp" -#include "mrc/node/source_properties.hpp" #include "mrc/segment/object.hpp" -#include "mrc/types.hpp" #include "pymrc/node.hpp" #include "morpheus/io/deserializers.hpp" @@ -32,12 +28,12 @@ #include #include #include -#include +#include // IWYU pragma: keep #include #include // for str_attr_accessor #include // for pybind11::int_ -#include +#include #include #include #include @@ -133,4 +129,14 @@ std::shared_ptr> FileSourceStageInterfaceP return stage; } + +std::shared_ptr> FileSourceStageInterfaceProxy::init( + mrc::segment::Builder& builder, + const std::string& name, + std::filesystem::path filename, + int repeat, + pybind11::dict parser_kwargs) +{ + return init(builder, name, filename.string(), repeat, std::move(parser_kwargs)); +} } // namespace morpheus diff --git a/morpheus/_lib/stages/__init__.pyi b/morpheus/_lib/stages/__init__.pyi index 580a7a8357..2b40565087 100644 --- a/morpheus/_lib/stages/__init__.pyi +++ b/morpheus/_lib/stages/__init__.pyi @@ -11,6 +11,7 @@ import typing from morpheus._lib.common import FilterSource import morpheus._lib.common import mrc.core.segment +import os __all__ = [ "AddClassificationsStage", @@ -45,6 +46,9 @@ class DeserializeMultiMessageStage(mrc.core.segment.SegmentObject): def __init__(self, builder: mrc.core.segment.Builder, name: str, batch_size: int, ensure_sliceable_index: bool = True) -> None: ... pass class FileSourceStage(mrc.core.segment.SegmentObject): + @typing.overload + def __init__(self, builder: mrc.core.segment.Builder, name: str, filename: os.PathLike, repeat: int, parser_kwargs: dict) -> None: ... + @typing.overload def __init__(self, builder: mrc.core.segment.Builder, name: str, filename: str, repeat: int, parser_kwargs: dict) -> None: ... pass class FilterDetectionsStage(mrc.core.segment.SegmentObject): diff --git a/morpheus/_lib/stages/module.cpp b/morpheus/_lib/stages/module.cpp index d5f8b6bad0..0fc47034d6 100644 --- a/morpheus/_lib/stages/module.cpp +++ b/morpheus/_lib/stages/module.cpp @@ -36,16 +36,21 @@ #include "morpheus/utilities/http_server.hpp" // for DefaultMaxPayloadSize #include "morpheus/version.hpp" +#include // for Builder #include #include #include // for multiple_inheritance #include // for arg, init, class_, module_, str_attr_accessor, PYBIND11_MODULE, pybind11 #include // for dict, sequence -#include // for pymrc::import +// for pathlib.Path -> std::filesystem::path conversions +#include // IWYU pragma: keep +#include // for pymrc::import #include +#include // for std::filesystem::path #include #include +#include namespace morpheus { namespace py = pybind11; @@ -111,7 +116,16 @@ PYBIND11_MODULE(stages, _module) mrc::segment::ObjectProperties, std::shared_ptr>>( _module, "FileSourceStage", py::multiple_inheritance()) - .def(py::init<>(&FileSourceStageInterfaceProxy::init), + .def(py::init(py::overload_cast( + &FileSourceStageInterfaceProxy::init)), + py::arg("builder"), + py::arg("name"), + py::arg("filename"), + py::arg("repeat"), + py::arg("parser_kwargs")) + .def(py::init( + py::overload_cast( + &FileSourceStageInterfaceProxy::init)), py::arg("builder"), py::arg("name"), py::arg("filename"), diff --git a/tests/common/test_determine_file_type.py b/tests/common/test_determine_file_type.py new file mode 100644 index 0000000000..d3e71af72b --- /dev/null +++ b/tests/common/test_determine_file_type.py @@ -0,0 +1,32 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pathlib + +import pytest + +from morpheus.common import FileTypes +from morpheus.common import determine_file_type + + +@pytest.mark.parametrize("use_pathlib", [False, True]) +@pytest.mark.parametrize("ext, expected_result", + [("csv", FileTypes.CSV), ("json", FileTypes.JSON), ("jsonlines", FileTypes.JSON), + ("parquet", FileTypes.PARQUET)]) +def test_determine_file_type(ext: str, expected_result: FileTypes, use_pathlib: bool): + file_path = f"test.{ext}" + if use_pathlib: + file_path = pathlib.Path(file_path) + + assert determine_file_type(file_path) == expected_result diff --git a/tests/test_file_in_out.py b/tests/test_file_in_out.py index 7100aea514..bd0d06d6c2 100755 --- a/tests/test_file_in_out.py +++ b/tests/test_file_in_out.py @@ -16,6 +16,8 @@ import filecmp import os +import pathlib +import typing import numpy as np import pytest @@ -24,6 +26,7 @@ from _utils import assert_path_exists from _utils.dataset_manager import DatasetManager from morpheus.common import FileTypes +from morpheus.config import Config from morpheus.config import CppConfig from morpheus.io.deserializers import read_file_to_df from morpheus.io.serializers import write_df_to_file @@ -39,11 +42,22 @@ @pytest.mark.slow @pytest.mark.parametrize("input_type", ["csv", "jsonlines", "parquet"]) +@pytest.mark.parametrize("use_pathlib", [False, True]) @pytest.mark.parametrize("output_type", ["csv", "json", "jsonlines"]) @pytest.mark.parametrize("flush", [False, True], ids=["no_flush", "flush"]) @pytest.mark.parametrize("repeat", [1, 2, 5], ids=["repeat1", "repeat2", "repeat5"]) -def test_file_rw_pipe(tmp_path, config, input_type, output_type, flush, repeat: int): +def test_file_rw_pipe(tmp_path: pathlib.Path, + config: Config, + input_type: str, + use_pathlib: bool, + output_type: str, + flush: bool, + repeat: int): input_file = os.path.join(TEST_DIRS.tests_data_dir, f'filter_probs.{input_type}') + + if use_pathlib: + input_file = pathlib.Path(input_file) + validation_file = os.path.join(TEST_DIRS.tests_data_dir, "filter_probs.csv") out_file = os.path.join(tmp_path, f'results.{output_type}') @@ -77,7 +91,7 @@ def test_file_rw_pipe(tmp_path, config, input_type, output_type, flush, repeat: assert output_data.tolist() == validation_data.tolist() -def test_file_read_json(config): +def test_file_read_json(config: Config): src_file = os.path.join(TEST_DIRS.tests_data_dir, "simple.json") pipe = LinearPipeline(config) @@ -98,7 +112,7 @@ def test_file_read_json(config): @pytest.mark.slow @pytest.mark.use_python @pytest.mark.usefixtures("chdir_tmpdir") -def test_to_file_no_path(tmp_path, config): +def test_to_file_no_path(tmp_path: pathlib.Path, config: Config): """ Test to ensure issue #48 is fixed """ @@ -119,7 +133,7 @@ def test_to_file_no_path(tmp_path, config): @pytest.mark.slow @pytest.mark.parametrize("input_type", ["csv", "jsonlines", "parquet"]) @pytest.mark.parametrize("output_type", ["csv", "json", "jsonlines"]) -def test_file_rw_multi_segment_pipe(tmp_path, config, input_type, output_type): +def test_file_rw_multi_segment_pipe(tmp_path: pathlib.Path, config: Config, input_type: str, output_type: str): input_file = os.path.join(TEST_DIRS.tests_data_dir, f'filter_probs.{input_type}') validation_file = os.path.join(TEST_DIRS.tests_data_dir, "filter_probs.csv") out_file = os.path.join(tmp_path, f'results.{output_type}') @@ -156,7 +170,7 @@ def test_file_rw_multi_segment_pipe(tmp_path, config, input_type, output_type): os.path.join(TEST_DIRS.tests_data_dir, "filter_probs.csv"), os.path.join(TEST_DIRS.tests_data_dir, "filter_probs_w_id_col.csv") ]) -def test_file_rw_index_pipe(tmp_path, config, input_file): +def test_file_rw_index_pipe(tmp_path: pathlib.Path, config: Config, input_file: str): out_file = os.path.join(tmp_path, 'results.csv') pipe = LinearPipeline(config) @@ -183,7 +197,7 @@ def test_file_rw_index_pipe(tmp_path, config, input_file): }), (os.path.join(TEST_DIRS.tests_data_dir, "filter_probs.jsonlines"), {})], ids=["CSV", "CSV_ID", "JSON"]) @pytest.mark.usefixtures("use_cpp") -def test_file_roundtrip(tmp_path, input_file, extra_kwargs): +def test_file_roundtrip(tmp_path: pathlib.Path, input_file: str, extra_kwargs: dict[str, typing.Any]): # Output file should be same type as input out_file = os.path.join(tmp_path, f'results{os.path.splitext(input_file)[1]}') @@ -222,7 +236,7 @@ def test_read_cpp_compare(input_file: str): @pytest.mark.slow @pytest.mark.parametrize("output_type", ["csv", "json", "jsonlines"]) -def test_file_rw_serialize_deserialize_pipe(tmp_path, config, output_type): +def test_file_rw_serialize_deserialize_pipe(tmp_path: pathlib.Path, config: Config, output_type: str): input_file = os.path.join(TEST_DIRS.tests_data_dir, "filter_probs.csv") out_file = os.path.join(tmp_path, f'results.{output_type}') @@ -252,7 +266,7 @@ def test_file_rw_serialize_deserialize_pipe(tmp_path, config, output_type): @pytest.mark.slow @pytest.mark.parametrize("output_type", ["csv", "json", "jsonlines"]) -def test_file_rw_serialize_deserialize_multi_segment_pipe(tmp_path, config, output_type): +def test_file_rw_serialize_deserialize_multi_segment_pipe(tmp_path: pathlib.Path, config: Config, output_type: str): input_file = os.path.join(TEST_DIRS.tests_data_dir, "filter_probs.csv") out_file = os.path.join(tmp_path, f'results.{output_type}') From 9e9d6d809f6a5c20b9473c8478c5e586df0f6adb Mon Sep 17 00:00:00 2001 From: David Gardner <96306125+dagardner-nv@users.noreply.github.com> Date: Fri, 1 Mar 2024 08:45:50 -0800 Subject: [PATCH 17/18] Update to match new MRC function sig for AsyncioRunnable::on_data (#1541) Closes ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - David Gardner (https://github.com/dagardner-nv) Approvers: - Christopher Harris (https://github.com/cwharris) URL: https://github.com/nv-morpheus/Morpheus/pull/1541 --- morpheus/_lib/llm/include/py_llm_engine_stage.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/morpheus/_lib/llm/include/py_llm_engine_stage.hpp b/morpheus/_lib/llm/include/py_llm_engine_stage.hpp index 1afeac8025..447b868a35 100644 --- a/morpheus/_lib/llm/include/py_llm_engine_stage.hpp +++ b/morpheus/_lib/llm/include/py_llm_engine_stage.hpp @@ -94,7 +94,7 @@ class MORPHEUS_EXPORT PyLLMEngineStage private: mrc::coroutines::AsyncGenerator> on_data( - std::shared_ptr&& data) override + std::shared_ptr&& data, std::shared_ptr on) override { auto result = co_await m_engine->run(std::move(data)); From 02ced6e7a58253f50876187efbcf05e3b91d2e9a Mon Sep 17 00:00:00 2001 From: yuchenz427 <134643420+yuchenz427@users.noreply.github.com> Date: Fri, 1 Mar 2024 12:01:32 -0800 Subject: [PATCH 18/18] Adding missing dependency on `pydantic` (#1535) Adding missing dependency on pydantic ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - https://github.com/yuchenz427 Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1535 --- conda/environments/all_cuda-121_arch-x86_64.yaml | 1 + conda/environments/dev_cuda-121_arch-x86_64.yaml | 1 + conda/environments/examples_cuda-121_arch-x86_64.yaml | 1 + conda/environments/runtime_cuda-121_arch-x86_64.yaml | 1 + dependencies.yaml | 1 + 5 files changed, 5 insertions(+) diff --git a/conda/environments/all_cuda-121_arch-x86_64.yaml b/conda/environments/all_cuda-121_arch-x86_64.yaml index 28bdaa6fd9..4d18afbac2 100644 --- a/conda/environments/all_cuda-121_arch-x86_64.yaml +++ b/conda/environments/all_cuda-121_arch-x86_64.yaml @@ -78,6 +78,7 @@ dependencies: - pre-commit - protobuf=4.24 - pybind11-stubgen=0.10.5 +- pydantic - pylint=3.0.3 - pypdf=3.17.4 - pytest-asyncio diff --git a/conda/environments/dev_cuda-121_arch-x86_64.yaml b/conda/environments/dev_cuda-121_arch-x86_64.yaml index f88cc79f93..124fc59e17 100644 --- a/conda/environments/dev_cuda-121_arch-x86_64.yaml +++ b/conda/environments/dev_cuda-121_arch-x86_64.yaml @@ -60,6 +60,7 @@ dependencies: - pre-commit - protobuf=4.24 - pybind11-stubgen=0.10.5 +- pydantic - pylint=3.0.3 - pytest-asyncio - pytest-benchmark=4.0 diff --git a/conda/environments/examples_cuda-121_arch-x86_64.yaml b/conda/environments/examples_cuda-121_arch-x86_64.yaml index ad2315d91c..ba479b45f9 100644 --- a/conda/environments/examples_cuda-121_arch-x86_64.yaml +++ b/conda/environments/examples_cuda-121_arch-x86_64.yaml @@ -39,6 +39,7 @@ dependencies: - openai=1.13 - papermill=2.4.0 - pip +- pydantic - pypdf=3.17.4 - python-confluent-kafka>=1.9.2,<1.10.0a0 - python-graphviz diff --git a/conda/environments/runtime_cuda-121_arch-x86_64.yaml b/conda/environments/runtime_cuda-121_arch-x86_64.yaml index fb388052a6..7593e4a951 100644 --- a/conda/environments/runtime_cuda-121_arch-x86_64.yaml +++ b/conda/environments/runtime_cuda-121_arch-x86_64.yaml @@ -22,6 +22,7 @@ dependencies: - numpydoc=1.5 - nvtabular=23.08.00 - pip +- pydantic - python-confluent-kafka>=1.9.2,<1.10.0a0 - python-graphviz - python=3.10 diff --git a/dependencies.yaml b/dependencies.yaml index e3dbdf55ea..2d243c4e47 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -256,6 +256,7 @@ dependencies: - mlflow=2.9.2 - networkx=2.8.8 - nvtabular=23.08.00 + - pydantic - python-confluent-kafka>=1.9.2,<1.10.0a0 - python-graphviz - pytorch-cuda