Skip to content

Commit

Permalink
latest
Browse files Browse the repository at this point in the history
  • Loading branch information
automaticcat authored and jan-service-account committed Mar 13, 2024
1 parent 764b41b commit 8e33246
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion cpp/tensorrt_llm/nitro/controllers/tensorrtllm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ bool handleMatch(const std::string& rawText, std::shared_ptr<inferenceState> inf
inferState->reset();
return false; // Reset to start if sequence breaks
}
return false;
}

// Only support single token stopping point now
Expand Down Expand Up @@ -202,6 +203,7 @@ void inferenceThread(std::shared_ptr<inferenceState> inferState, std::vector<int
inferState->textsToStream.push("[DONE]");
return;
}
return;
};
// The rest of the logic inside the `chat_completion` remains unchanged...
// After finishing the setup, call the inference logic
Expand Down Expand Up @@ -279,11 +281,12 @@ void tensorrtllm::chat_completion(
std::thread infThread(inferenceThread, inferState, inputIdsHost, callback, this);
infThread.detach(); // Detach the thread to allow it to run independently

auto chunked_content_provider = [inferState](char* pBuffer, std::size_t nBuffSize) -> std::size_t
auto chunked_content_provider = [this,inferState](char* pBuffer, std::size_t nBuffSize) -> std::size_t
{
if (!pBuffer)
{
LOG_INFO << "Connection closed or buffer is null. Reset context";
inferState->isFinished = true;
return 0; // Indicate no more data to send
}

Expand Down

0 comments on commit 8e33246

Please sign in to comment.