Skip to content

Commit e0c8b86

Browse files
feat: knowledge file download API and file citations (#1836)
Co-authored-by: Ryan Hopper-Lowe <[email protected]>
1 parent f115e4a commit e0c8b86

File tree

12 files changed

+251
-102
lines changed

12 files changed

+251
-102
lines changed

pkg/api/handlers/agent.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,15 @@ func (a *AgentHandler) ListKnowledgeFiles(req api.Context) error {
442442
return listKnowledgeFiles(req, agentName, "", knowledgeSetNames[0], knowledgeSource)
443443
}
444444

445+
func (a *AgentHandler) GetKnowledgeFile(req api.Context) error {
446+
knowledgeSetNames, _, err := a.getKnowledgeSetsAndName(req, req.PathValue("agent_id"))
447+
if err != nil {
448+
return err
449+
}
450+
451+
return getKnowledgeFileFromAllowedSets(req, a.gptscript, knowledgeSetNames, req.PathValue("file"))
452+
}
453+
445454
func (a *AgentHandler) UploadKnowledgeFile(req api.Context) error {
446455
knowledgeSetNames, agentName, err := a.getKnowledgeSetsAndName(req, req.PathValue("id"))
447456
if err != nil {

pkg/api/handlers/assistants.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,14 @@ func (a *AssistantHandler) Knowledge(req api.Context) error {
363363
return listKnowledgeFiles(req, "", thread.Name, thread.Status.KnowledgeSetNames[0], nil)
364364
}
365365

366+
func (a *AssistantHandler) GetKnowledgeFile(req api.Context) error {
367+
thread, err := getThreadForScope(req)
368+
if err != nil {
369+
return err
370+
}
371+
return getKnowledgeFile(req, a.gptScript, thread, nil, req.PathValue("file"))
372+
}
373+
366374
func (a *AssistantHandler) UploadKnowledge(req api.Context) error {
367375
thread, err := getThreadForScope(req)
368376
if err != nil {

pkg/api/handlers/files.go

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@ import (
44
"context"
55
"fmt"
66
"net/http"
7+
"net/url"
78
"path/filepath"
9+
"slices"
810
"strings"
911

1012
"github.com/gptscript-ai/go-gptscript"
@@ -52,6 +54,63 @@ func getWorkspaceFromKnowledgeSet(req api.Context, knowledgeSetName string) (*v1
5254
return &ws, req.Get(&ws, knowledgeSet.Status.WorkspaceName)
5355
}
5456

57+
// getKnowledgeFile retrieves a knowledge file from the workspace associated with the knowledge set.
58+
// It works for both thread and agent knowledge sets. If the knowledge set is not found in the thread, it will be looked up in the agent.
59+
func getKnowledgeFile(req api.Context, gClient *gptscript.GPTScript, thread *v1.Thread, agent *v1.Agent, fileRef string) error {
60+
var err error
61+
62+
// make sure that the selected knowledge set belongs either to the thread or to the agent
63+
var knowledgeSetNames []string
64+
if thread != nil {
65+
knowledgeSetNames = thread.Status.KnowledgeSetNames
66+
if agent == nil {
67+
agent, err = getAssistant(req, thread.Spec.AgentName)
68+
if err != nil {
69+
return err
70+
}
71+
}
72+
}
73+
74+
if agent != nil {
75+
knowledgeSetNames = append(knowledgeSetNames, agent.Status.KnowledgeSetNames...)
76+
}
77+
78+
return getKnowledgeFileFromAllowedSets(req, gClient, knowledgeSetNames, fileRef)
79+
}
80+
81+
// getKnowledgeFileFromAllowedSets retrieves a knowledge file from the workspace associated with the knowledge set, if the knowledge set is in the list of allowed knowledge sets.
82+
// The fileRef is expected to be in the URL-encoded format [<knowledgeSet.Namespace>/]<knowledgeSet.Name>::<filename>.
83+
func getKnowledgeFileFromAllowedSets(req api.Context, gClient *gptscript.GPTScript, knowledgeSetNames []string, fileRef string) error {
84+
var knowledgeSetName string
85+
86+
file, err := url.PathUnescape(fileRef)
87+
if err != nil {
88+
return types.NewErrBadRequest("invalid knowledgeFile reference")
89+
}
90+
91+
parts := strings.Split(file, "::")
92+
if len(parts) != 2 {
93+
return types.NewErrBadRequest("invalid knowledgeFile path")
94+
}
95+
knowledgeSetName, file = parts[0], parts[1]
96+
97+
if parts := strings.Split(knowledgeSetName, "/"); len(parts) > 1 {
98+
knowledgeSetName = parts[1] // may come in as <namespace>/<knowledgeset>, we don't care about the namespace right now
99+
}
100+
101+
if !slices.Contains(knowledgeSetNames, knowledgeSetName) {
102+
return types.NewErrNotFound("knowledge set %q not accessible", knowledgeSetName)
103+
}
104+
105+
ws, err := getWorkspaceFromKnowledgeSet(req, knowledgeSetName)
106+
if err != nil {
107+
return err
108+
}
109+
110+
req.SetPathValue("file", file)
111+
return getFileInWorkspace(req.Context(), req, gClient, ws.Status.WorkspaceID, "") // knowledge files are stored in the root of the workspace (we have one workspace per knowledge set)
112+
}
113+
55114
func listKnowledgeFiles(req api.Context, agentName, threadName, knowledgeSetName string, knowledgeSource *v1.KnowledgeSource) error {
56115
var (
57116
files v1.KnowledgeFileList
@@ -177,6 +236,7 @@ func getFileInWorkspace(ctx context.Context, req api.Context, gClient *gptscript
177236
}
178237

179238
req.ResponseWriter.Header().Set("Content-Type", "application/octet-stream")
239+
req.ResponseWriter.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%q", file)) // make sure the file is downloaded with only the filename, not e.g. the dataset prefix
180240
_, err = req.ResponseWriter.Write(data)
181241
return err
182242
}

pkg/api/handlers/threads.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,18 @@ func (a *ThreadHandler) Knowledge(req api.Context) error {
339339
return listKnowledgeFiles(req, "", thread.Name, thread.Status.KnowledgeSetNames[0], nil)
340340
}
341341

342+
func (a *ThreadHandler) GetKnowledgeFile(req api.Context) error {
343+
var (
344+
threadID = req.PathValue("id")
345+
)
346+
347+
var thread v1.Thread
348+
if err := req.Get(&thread, threadID); err != nil {
349+
return err
350+
}
351+
return getKnowledgeFile(req, a.gptscript, &thread, nil, req.PathValue("file"))
352+
}
353+
342354
func (a *ThreadHandler) UploadKnowledge(req api.Context) error {
343355
var (
344356
threadID = req.PathValue("id")

pkg/api/router/router.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ func Router(services *services.Services) (http.Handler, error) {
9292
mux.HandleFunc("DELETE /api/assistants/{assistant_id}/projects/{project_id}/files/{file...}", assistants.DeleteFile)
9393
// Assistant knowledge files
9494
mux.HandleFunc("GET /api/assistants/{assistant_id}/projects/{project_id}/knowledge", assistants.Knowledge)
95+
mux.HandleFunc("GET /api/assistants/{assistant_id}/projects/{project_id}/knowledge/{file}", assistants.GetKnowledgeFile)
9596
mux.HandleFunc("POST /api/assistants/{assistant_id}/projects/{project_id}/knowledge/{file}", assistants.UploadKnowledge)
9697
mux.HandleFunc("DELETE /api/assistants/{assistant_id}/projects/{project_id}/knowledge/{file...}", assistants.DeleteKnowledge)
9798
// Env
@@ -198,6 +199,7 @@ func Router(services *services.Services) (http.Handler, error) {
198199

199200
// Agent knowledge files
200201
mux.HandleFunc("GET /api/agents/{agent_id}/knowledge-files", agents.ListKnowledgeFiles)
202+
mux.HandleFunc("GET /api/agents/{agent_id}/knowledge-files/{file}", agents.GetKnowledgeFile)
201203
mux.HandleFunc("POST /api/agents/{id}/knowledge-files/{file...}", agents.UploadKnowledgeFile)
202204
mux.HandleFunc("DELETE /api/agents/{id}/knowledge-files/{file...}", agents.DeleteKnowledgeFile)
203205
mux.HandleFunc("POST /api/agents/{agent_id}/knowledge-files/{file_id}/ingest", agents.ReIngestKnowledgeFile)
@@ -239,6 +241,7 @@ func Router(services *services.Services) (http.Handler, error) {
239241

240242
// Thread knowledge files
241243
mux.HandleFunc("GET /api/threads/{id}/knowledge-files", threads.Knowledge)
244+
mux.HandleFunc("GET /api/threads/{id}/knowledge-files/{file}", threads.GetKnowledgeFile)
242245
mux.HandleFunc("POST /api/threads/{id}/knowledge-files/{file}", threads.UploadKnowledge)
243246
mux.HandleFunc("DELETE /api/threads/{id}/knowledge-files/{file...}", threads.DeleteKnowledge)
244247

ui/admin/app/components/chat/Message.tsx

Lines changed: 66 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
import "@radix-ui/react-tooltip";
2-
import { AlertCircleIcon, WrenchIcon } from "lucide-react";
2+
import { AlertCircleIcon, BrainIcon, WrenchIcon } from "lucide-react";
33
import React, { useDeferredValue, useMemo, useState } from "react";
44
import { useForm } from "react-hook-form";
55

66
import { AgentIcons } from "~/lib/model/agents";
77
import { AuthPrompt } from "~/lib/model/chatEvents";
8+
import { KnowledgeFileNamespace } from "~/lib/model/knowledge";
89
import { Message as MessageType } from "~/lib/model/messages";
10+
import { ApiRoutes } from "~/lib/routers/apiRoutes";
911
import { PromptApiService } from "~/lib/service/api/PromptApi";
1012
import { cn, formatTime } from "~/lib/utils";
1113

@@ -14,7 +16,7 @@ import { ToolCallInfo } from "~/components/chat/ToolCallInfo";
1416
import { ControlledInput } from "~/components/form/controlledInputs";
1517
import { ToolIcon } from "~/components/tools/ToolIcon";
1618
import { Avatar, AvatarFallback, AvatarImage } from "~/components/ui/avatar";
17-
import { Button } from "~/components/ui/button";
19+
import { Button, buttonVariants } from "~/components/ui/button";
1820
import {
1921
Dialog,
2022
DialogContent,
@@ -26,6 +28,11 @@ import { Form } from "~/components/ui/form";
2628
import { Link } from "~/components/ui/link";
2729
import { Markdown } from "~/components/ui/markdown";
2830
import { ScrollArea } from "~/components/ui/scroll-area";
31+
import {
32+
Tooltip,
33+
TooltipContent,
34+
TooltipTrigger,
35+
} from "~/components/ui/tooltip";
2936
import { useAnimatedText } from "~/hooks/messages/useAnimatedText";
3037
import { useAsync } from "~/hooks/useAsync";
3138

@@ -369,44 +376,80 @@ export function SourceCitations({
369376
if (!message.knowledgeSources || !show) return null;
370377

371378
const formatUrl = (url: string) => {
372-
return url.replace(/(https?:\/\/)?(www\.)?/, "");
379+
return url.replace(/^(https?:\/\/)?(knowledge:\/\/)?/, "");
373380
};
374381

375-
const citations = new Map(
376-
message.knowledgeSources
377-
.filter((s) => !!s.url)
378-
.map((s) => {
379-
return [
380-
formatUrl(s.url as string),
381-
{ ...s, url: new URL(s.url as string) },
382-
];
383-
})
384-
);
385-
386382
return (
387383
<div className="flex flex-col gap-2 pt-4">
388384
<h4>Sources</h4>
389385
<div className="flex flex-wrap gap-2 pb-3">
390-
{Array.from(citations.entries()).map(([key, { url }]) => (
386+
{message.knowledgeSources.map((s) => {
387+
if (!s.url) return null;
388+
389+
if (s.url.startsWith("knowledge://"))
390+
return renderKnowledgeSource(s.url);
391+
392+
return renderLink(s.url);
393+
})}
394+
</div>
395+
</div>
396+
);
397+
398+
function renderKnowledgeSource(url: string) {
399+
if (!message.threadId) return null;
400+
401+
const filePath = url.replace("knowledge://", "");
402+
const [_, fileName] = decodeURIComponent(filePath).split("::");
403+
404+
return (
405+
<Tooltip>
406+
<TooltipContent>{decodeURIComponent(url)}</TooltipContent>
407+
408+
<TooltipTrigger asChild>
409+
<a
410+
className={buttonVariants({ variant: "secondary", size: "sm" })}
411+
key={url}
412+
href={
413+
ApiRoutes.knowledgeFiles.getKnowledgeFileById(
414+
KnowledgeFileNamespace.Threads,
415+
message.threadId,
416+
filePath
417+
).url
418+
}
419+
download={fileName}
420+
>
421+
<BrainIcon /> {fileName}
422+
</a>
423+
</TooltipTrigger>
424+
</Tooltip>
425+
);
426+
}
427+
428+
function renderLink(url: string) {
429+
const urlObj = new URL(url);
430+
const formatted = formatUrl(url);
431+
return (
432+
<Tooltip>
433+
<TooltipContent>{url}</TooltipContent>
434+
<TooltipTrigger asChild>
391435
<Link
392436
as="button"
393437
variant="secondary"
394438
size="sm"
395-
to={url.toString()}
439+
to={url}
396440
target="_blank"
397441
rel="noreferrer"
398-
key={key}
399442
>
400443
<img
401-
src={`${url.origin}/favicon.ico`}
444+
src={`${urlObj.origin}/favicon.ico`}
402445
alt="Favicon"
403446
onError={(e) => (e.currentTarget.src = "/favicon.ico")}
404447
className="size-4"
405448
/>
406-
{key.length > 25 ? key.slice(0, 25) + "..." : key}
449+
{formatted.length > 25 ? formatted.slice(0, 25) + "..." : formatted}
407450
</Link>
408-
))}
409-
</div>
410-
</div>
411-
);
451+
</TooltipTrigger>
452+
</Tooltip>
453+
);
454+
}
412455
}

ui/admin/app/lib/model/messages.ts

Lines changed: 2 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import {
22
AuthPrompt,
3-
ChatEvent,
43
KnowledgeToolOutput,
54
ToolCall,
65
} from "~/lib/model/chatEvents";
@@ -20,6 +19,7 @@ export interface Message {
2019
contentID?: string;
2120
time?: Date | string;
2221
knowledgeSources?: KnowledgeToolOutput;
22+
threadId?: string;
2323
}
2424

2525
export const runsToMessages = (runs: Run[]) => {
@@ -48,56 +48,8 @@ export const toolCallMessage = (toolCall: ToolCall): Message => ({
4848
tools: [toolCall],
4949
});
5050

51-
export const promptMessage = (prompt: AuthPrompt, runID: string): Message => ({
51+
export const promptMessage = (prompt: AuthPrompt): Message => ({
5252
sender: "agent",
5353
text: prompt.message,
5454
prompt,
55-
runId: runID,
5655
});
57-
58-
export const chatEventsToMessages = (events: ChatEvent[]) => {
59-
const messages: Message[] = [];
60-
61-
for (const event of events) {
62-
const { content, input, toolCall, runID, error, prompt } = event;
63-
64-
if (error) {
65-
messages.push({
66-
sender: "agent",
67-
text: `Error: ${error}`,
68-
runId: runID,
69-
error: true,
70-
});
71-
continue;
72-
}
73-
74-
if (input) {
75-
messages.push({
76-
sender: "user",
77-
text: input,
78-
runId: runID,
79-
});
80-
continue;
81-
}
82-
83-
if (toolCall) {
84-
messages.push(toolCallMessage(toolCall));
85-
continue;
86-
}
87-
88-
if (prompt) {
89-
messages.push(promptMessage(prompt, runID));
90-
continue;
91-
}
92-
93-
if (content) {
94-
messages.push({
95-
sender: "agent",
96-
text: content,
97-
runId: runID,
98-
});
99-
}
100-
}
101-
102-
return messages;
103-
};

ui/admin/app/lib/routers/apiRoutes.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,11 @@ export const ApiRoutes = {
103103
knowledgeFiles: {
104104
getKnowledgeFiles: (namespace: KnowledgeFileNamespace, entityId: string) =>
105105
buildUrl(`/${namespace}/${entityId}/knowledge-files`),
106+
getKnowledgeFileById: (
107+
namespace: KnowledgeFileNamespace,
108+
entityId: string,
109+
fileName: string
110+
) => buildUrl(`/${namespace}/${entityId}/knowledge-files/${fileName}`),
106111
addKnowledgeFile: (
107112
namespace: KnowledgeFileNamespace,
108113
entityId: string,

0 commit comments

Comments
 (0)