Skip to content

Commit

Permalink
feat: add more logging in chatCompletion
Browse files Browse the repository at this point in the history
  • Loading branch information
CameronNg committed Jun 19, 2024
1 parent 9a2f4a9 commit a3ed58c
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 5 deletions.
4 changes: 2 additions & 2 deletions cpp/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ else
cp /usr/local/tensorrt/targets/x86_64-linux-gnu/lib/libnvinfer.so.9 cortex.tensorrt-llm && \
cp /usr/lib/x86_64-linux-gnu/libcudnn.so.8 cortex.tensorrt-llm && \
cp /usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8 cortex.tensorrt-llm && \
cp /home/runner/actions-runner/_work/cortex.tensorrt-llm/cortex.tensorrt-llm/cpp/build/tensorrt_llm/plugins/libnvinfer_plugin_tensorrt_llm.so cortex.tensorrt-llm && \
cp /home/runner/actions-runner/_work/cortex.tensorrt-llm/cortex.tensorrt-llm/cpp/build/tensorrt_llm/plugins/libnvinfer_plugin_tensorrt_llm.so.9 cortex.tensorrt-llm && \
cp /home/runner/actions-runner/_work/cortex.tensorrt-llm/cortex.tensorrt-llm/cpp/build/tensorrt_llm/libtensorrt_llm.so cortex.tensorrt-llm && \
cp /opt/hpcx/ompi/lib/libmpi.so cortex.tensorrt-llm && \
cp /opt/hpcx/ompi/lib/libmpi.so.40 cortex.tensorrt-llm && \
cp /usr/lib/x86_64-linux-gnu/libnccl.so cortex.tensorrt-llm
endif

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ void InferenceThread(
// Define the callback to stream each generated token
generation_output.onTokenGenerated = [&infer_state, input_len, outputLen, self, &generation_output](
GenerationOutput::TensorPtr const& output_ids, SizeType step, bool finished) {
LOG_INFO << "Inference thread started";
// Assuming the shape of output_ids tensor is (1, 1, 160), where 160 is the number of tokens
int output_length = output_ids->getShape().d[2]; // Get the length of output IDs based on the tensor shape
// Copy output IDs from GPU to host for printing
Expand Down Expand Up @@ -240,6 +241,7 @@ void TensorrtllmEngine::HandleChatCompletion(std::shared_ptr<Json::Value> json_b
inference_thread.detach(); // Detach the thread to allow it to run independently

q_->runTaskInQueue([cb = std::move(callback), infer_state]() {
LOG_INFO << "Preparing to run inference task queue...";
while (true) { // Continuously check if the queue is not empty
std::unique_lock<std::mutex> lock(infer_state->queue_mutex); // Lock the queue for exclusive access
if (!infer_state->texts_to_stream.empty()) {
Expand Down Expand Up @@ -280,9 +282,7 @@ void TensorrtllmEngine::HandleChatCompletion(std::shared_ptr<Json::Value> json_b
status["is_stream"] = true;
status["status_code"] = k200OK;
cb(std::move(status), std::move(resp_data));
continue;;
}
else {
} else {
// If the queue is empty, release the lock and wait before trying again
lock.unlock();
}
Expand Down

0 comments on commit a3ed58c

Please sign in to comment.