Skip to content

Commit 548b5b7

Browse files
Shixiaowei02niukuopei0033lkm2835kaiyux
authored
Update TensorRT-LLM (#2532)
* blossom-ci.yml: run vulnerability scan on blossom * open source efb18c1256f8c9c3d47b7d0c740b83e5d5ebe0ec --------- Co-authored-by: niukuo <[email protected]> Co-authored-by: pei0033 <[email protected]> Co-authored-by: Kyungmin Lee <[email protected]> Co-authored-by: Kaiyu Xie <[email protected]>
1 parent 4420547 commit 548b5b7

File tree

762 files changed

+1673620
-1550597
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

762 files changed

+1673620
-1550597
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ docs/source/llm-api-examples/llm_*.rst
4444
# Testing
4545
.coverage.*
4646
results_trt/
47+
llm-test-workspace/
4748

4849
# build/debug
4950
*.safetensors

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,6 @@
1717
[submodule "3rdparty/pybind11"]
1818
path = 3rdparty/pybind11
1919
url = https://github.com/pybind/pybind11.git
20+
[submodule "3rdparty/xgrammar"]
21+
path = 3rdparty/xgrammar
22+
url = https://github.com/mlc-ai/xgrammar.git

3rdparty/xgrammar

Submodule xgrammar added at b9a16de

benchmarks/cpp/disaggServerBenchmark.cpp

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -812,8 +812,9 @@ class DisaggExecutorServer
812812
}
813813
if (mEnableCollectIterStats)
814814
{
815-
for (auto const& iterStats : contextStats)
815+
for (std::size_t i = 0; i < contextStats.size(); i++)
816816
{
817+
auto const& iterStats = contextStats.at(i);
817818
for (auto const& stat : iterStats)
818819
{
819820
SizeType32 numNewActiveRequests = stat.numNewActiveRequests;
@@ -826,13 +827,15 @@ class DisaggExecutorServer
826827
}
827828
if (mLogIterationData)
828829
{
829-
TLLM_LOG_INFO(texec::JsonSerialization::toJsonStr(stat));
830+
TLLM_LOG_INFO(
831+
"ctx_id %d, ctx_stat: %s", i, texec::JsonSerialization::toJsonStr(stat).c_str());
830832
}
831833
}
832834
}
833835

834-
for (auto const& iterStats : generationStats)
836+
for (std::size_t i = 0; i < generationStats.size(); i++)
835837
{
838+
auto const& iterStats = generationStats.at(i);
836839
for (auto const& stat : iterStats)
837840
{
838841
SizeType32 numNewActiveRequests = stat.numNewActiveRequests;
@@ -845,7 +848,8 @@ class DisaggExecutorServer
845848
}
846849
if (mLogIterationData)
847850
{
848-
TLLM_LOG_INFO(texec::JsonSerialization::toJsonStr(stat));
851+
TLLM_LOG_INFO(
852+
"gen_id %d, gen_stat: %s", i, texec::JsonSerialization::toJsonStr(stat).c_str());
849853
}
850854
}
851855
}
@@ -854,9 +858,9 @@ class DisaggExecutorServer
854858
{
855859
continue;
856860
}
857-
for (auto const& stats : generationRequestStatsPerIteration)
861+
for (std::size_t i = 0; i < generationRequestStatsPerIteration.size(); i++)
858862
{
859-
863+
auto const& stats = generationRequestStatsPerIteration.at(i);
860864
for (auto const& stat : stats)
861865
{
862866
std::vector<float> kvCacheTransferMs;
@@ -874,7 +878,8 @@ class DisaggExecutorServer
874878
}
875879
if (mLogIterationData)
876880
{
877-
TLLM_LOG_INFO(texec::JsonSerialization::toJsonStr(stat));
881+
TLLM_LOG_INFO(
882+
"gen_id %d, gen_req_stat: %s", i, texec::JsonSerialization::toJsonStr(stat).c_str());
878883
}
879884
}
880885
}
@@ -973,6 +978,7 @@ void benchmark(std::vector<std::filesystem::path> const& contextEngineDirs,
973978
if (worldRank == 0)
974979
{
975980
{ // warmup
981+
TLLM_LOG_INFO("Warmup start");
976982
std::vector<tensorrt_llm::executor::Request> contextRequests;
977983
contextRequests.reserve(warmUp);
978984
for (int i = 0; i < warmUp; ++i)
@@ -989,6 +995,7 @@ void benchmark(std::vector<std::filesystem::path> const& contextEngineDirs,
989995
disaggExecutor->waitForGenResponse(warmUp, true);
990996
auto const warmUpWaitSleep = std::chrono::milliseconds(50);
991997
std::this_thread::sleep_for(warmUpWaitSleep);
998+
TLLM_LOG_INFO("Warmup done");
992999
}
9931000

9941001
{

0 commit comments

Comments
 (0)