Skip to content

Commit 772584b

Browse files
committed
Add "Processing: %" status update for pre-processing
Fixes: #7
1 parent ff04815 commit 772584b

3 files changed

Lines changed: 62 additions & 17 deletions

File tree

llamachateditor.cpp

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,10 @@ ChatEditor::ChatEditor()
152152
&ChatManager::followUpQuestionsReceived,
153153
this,
154154
&ChatEditor::createFollowUpWidget);
155-
connect(&chatManager, &ChatManager::messageExtraUpdated, this, &ChatEditor::onMessageExtraUpdated);
155+
connect(&chatManager,
156+
&ChatManager::messageExtraUpdated,
157+
this,
158+
&ChatEditor::onMessageExtraUpdated);
156159

157160
connect(m_input, &ChatInput::sendRequested, this, &ChatEditor::onSendRequested);
158161
connect(m_input, &ChatInput::stopRequested, this, &ChatEditor::onStopRequested);
@@ -812,22 +815,42 @@ void ChatEditor::updateSpeedLabel(const Message &msg)
812815
{
813816
// Update the speed label using the latest timings
814817
if (settings().showTokensPerSecond.value()) {
815-
const auto &t = msg.timings;
816-
if (t.predicted_ms > 0 && t.prompt_ms > 0) {
817-
qreal tokensPerSec = (t.predicted_n + t.prompt_n) * 1000.0
818-
/ (t.predicted_ms + t.prompt_ms);
819-
m_speedLabel->setText(Tr::tr("Speed: %1 t/s").arg(tokensPerSec, 0, 'f', 1));
820-
821-
QString labelTooltip(
822-
Tr::tr("<b>Prompt:</b><br>Tokens: %1<br>Time: %2 ms<br>Speed: %3 t/s<br><br>"
823-
"<b>Generation:</b><br>Tokens: %4<br>Time: %5 ms<br>Speed: %6 t/s")
824-
.arg(t.prompt_n)
825-
.arg(t.prompt_ms)
826-
.arg(t.prompt_n * 1000.0 / t.prompt_ms, 0, 'f', 1)
827-
.arg(t.predicted_n)
828-
.arg(t.predicted_ms)
829-
.arg(t.predicted_n * 1000.0 / t.predicted_ms, 0, 'f', 1));
818+
if (msg.content.isEmpty() && msg.promptProgress.total > 0) {
819+
double processed = msg.promptProgress.processed + msg.promptProgress.cache;
820+
double percent = (processed / msg.promptProgress.total) * 100.0;
821+
822+
percent = qBound(0.0, percent, 100.0);
823+
824+
m_speedLabel->setText(Tr::tr("Processing: %1%").arg(percent, 0, 'f', 0));
825+
826+
QString labelTooltip = Tr::tr("<b>Prompt Processing:</b><br>"
827+
"Total Tokens: %1<br>"
828+
"Processed: %2<br>"
829+
"Cached: %3<br>"
830+
"Time: %4 ms")
831+
.arg(msg.promptProgress.total)
832+
.arg(processed)
833+
.arg(msg.promptProgress.cache)
834+
.arg(msg.timings.prompt_ms);
830835
m_speedLabel->setToolTip(labelTooltip);
836+
} else if (!msg.content.isEmpty()) {
837+
const auto &t = msg.timings;
838+
if (t.predicted_ms > 0 && t.prompt_ms > 0) {
839+
qreal tokensPerSec = (t.predicted_n + t.prompt_n) * 1000.0
840+
/ (t.predicted_ms + t.prompt_ms);
841+
m_speedLabel->setText(Tr::tr("Speed: %1 t/s").arg(tokensPerSec, 0, 'f', 1));
842+
843+
QString labelTooltip(
844+
Tr::tr("<b>Prompt:</b><br>Tokens: %1<br>Time: %2 ms<br>Speed: %3 t/s<br><br>"
845+
"<b>Generation:</b><br>Tokens: %4<br>Time: %5 ms<br>Speed: %6 t/s")
846+
.arg(t.prompt_n)
847+
.arg(t.prompt_ms)
848+
.arg(t.prompt_n * 1000.0 / t.prompt_ms, 0, 'f', 1)
849+
.arg(t.predicted_n)
850+
.arg(t.predicted_ms)
851+
.arg(t.predicted_n * 1000.0 / t.predicted_ms, 0, 'f', 1));
852+
m_speedLabel->setToolTip(labelTooltip);
853+
}
831854
}
832855
}
833856
}

llamachatmanager.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ static void addCommonPayloadParams(QJsonObject &payload)
4747
payload["dry_penalty_last_n"] = settings().dry_penalty_last_n.value();
4848
payload["max_tokens"] = settings().max_tokens.value();
4949
payload["timings_per_token"] = settings().showTokensPerSecond.value();
50+
payload["return_progress"] = settings().showTokensPerSecond.value();
5051
}
5152

5253
static void addToolsToPayload(QJsonObject &payload)
@@ -659,6 +660,18 @@ void ChatManager::sendChatRequest(const QString &convId,
659660
return;
660661
}
661662

663+
if (chunk.contains("prompt_progress")) {
664+
QJsonObject progressObj = chunk["prompt_progress"].toObject();
665+
Message &pm = m_pendingMessages[convId];
666+
667+
pm.promptProgress.total = progressObj["total"].toInt();
668+
pm.promptProgress.cache = progressObj["cache"].toInt();
669+
pm.promptProgress.processed = progressObj["processed"].toInt();
670+
pm.promptProgress.time_ms = progressObj["time_ms"].toInteger();
671+
672+
emit pendingMessageChanged(pm);
673+
}
674+
662675
if (settings().showTokensPerSecond.value() && chunk.contains("timings")) {
663676
QJsonObject t = chunk["timings"].toObject();
664677
TimingReport tr;
@@ -670,7 +683,7 @@ void ChatManager::sendChatRequest(const QString &convId,
670683
}
671684

672685
QJsonArray choices = chunk["choices"].toArray();
673-
if (!choices.isEmpty()) {
686+
if (!choices.isEmpty()) {
674687
const QJsonObject &delta = choices[0].toObject()["delta"].toObject();
675688

676689
if (delta.contains("reasoning_content")) {

llamatypes.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,14 @@ struct TimingReport
1717
double predicted_ms{0};
1818
};
1919

20+
struct PromptProgress
21+
{
22+
int total{0};
23+
int cache{0};
24+
int processed{0};
25+
qint64 time_ms{0};
26+
};
27+
2028
/**
2129
* What is conversation "branching"? It is a feature that allows the user to edit an old message
2230
* in the history, while still keeping the conversation flow.
@@ -56,6 +64,7 @@ struct Message
5664
QString role; // "user" | "assistant" | "system" | "tool"
5765
QString content;
5866
TimingReport timings;
67+
PromptProgress promptProgress;
5968
QList<QVariantMap> extra; // array of MessageExtra
6069

6170
// Node relations – stored in the DB, not serialised directly

0 commit comments

Comments
 (0)