Skip to content

Commit 92700a0

Browse files
committed
Improving example text: summarization and streaming
1 parent cb4a442 commit 92700a0

File tree

4 files changed

+266
-0
lines changed

4 files changed

+266
-0
lines changed

examples/ExampleStreaming.m

+142
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
%% Process Generated Text in Real Time by Using ChatGPT in Streaming Mode
2+
% This example shows how to process generated text in real time by using ChatGPT
3+
% in streaming mode.
4+
%
5+
% By default, when you pass a prompt to ChatGPT, it generates a response internally
6+
% and then outputs it in full at the end. To print out and format generated text
7+
% as the model is generating it, use the |StreamFun| name-value argument of the
8+
% |openAIChat| class. The streaming function is a custom function handle that
9+
% tells the model what to do with the output.
10+
%
11+
% The example includes two parts:
12+
%%
13+
% * First, define and use a custom streaming function to print out generated
14+
% text directly as the model generates it.
15+
% * Then, create an HTML UI Component and define and use a custom streaming
16+
% function to update the UI Component in real time as the model generates text.
17+
%%
18+
% To run this example, you need a valid API key from a paid OpenAI API account.
19+
20+
loadenv(".env")
21+
addpath('..')
22+
%% Print Stream Directly to Screen
23+
% In this example, the streamed output is printed directly to the screen.
24+
%
25+
% Define the function to print the returned tokens.
26+
27+
function printToken(token)
28+
fprintf("%s",token);
29+
end
30+
%%
31+
% Create the chat object with the defined function as a handle.
32+
33+
chat = openAIChat(StreamFun=@printToken);
34+
%%
35+
% Generate response to a prompt in streaming mode.
36+
37+
prompt = "What is Model-Based Design?";
38+
generate(chat, prompt, MaxNumTokens=500);
39+
%% Print Stream to HTML UI Component
40+
% In this example, the streamed output is printed to the HTML component.
41+
%
42+
% Create the HTML UI component.
43+
44+
fig = uifigure;
45+
h = uihtml(fig,Position=[50,10,450,400]);
46+
%%
47+
% Initialize the content of the HTML UI component.
48+
49+
resetTable(h);
50+
%%
51+
% Create the chat object with the function handle, which requires the |uihtml|
52+
% object created earlier.
53+
54+
chat = openAIChat(StreamFun=@(x)printStream(h,x));
55+
%%
56+
% Add the user prompt to the table in the HTML UI component.
57+
58+
userPrompt = "Tell me 5 jokes.";
59+
addChat(h,"user",userPrompt,"new")
60+
%%
61+
% Generate response to a prompt in streaming mode.
62+
63+
[txt, message, response] = generate(chat,userPrompt);
64+
%%
65+
% Update the last row with the final output. This is necessary if further update
66+
% is needed to support additional HTML formatting.
67+
68+
addChat(h,"assistant",txt,"current")
69+
%% Helper functions
70+
% |resetTable|:
71+
%%
72+
% # Adds the basic HTML structure and the JavaScript that process the data change
73+
% in MATLAB.
74+
% # The JavaScript gets a reference to the table and changed data and if the
75+
% 3rd element in the data is "new", adds a new row.
76+
% # It populates the new row with two cells and update the cells from the first
77+
% two elements of the data.
78+
% # The new row is then appended to the table.
79+
% # Otherwise, the JavaScript gets reference to the last cell of the last row
80+
% of the table, and update it with the 2nd element of the data.
81+
82+
function resetTable(obj)
83+
%RESETTABLE initialize the HTML UI component in the input argument.
84+
mustBeA(obj,'matlab.ui.control.HTML')
85+
obj.HTMLSource = ['<html><body><table>' ...
86+
'<tr><th>Role</th><th>Content</th></tr></table><script>', ...
87+
'function setup(htmlComponent) {', ...
88+
'htmlComponent.addEventListener("DataChanged", function(event) {', ...
89+
'var table = document.querySelector("table");' ...
90+
'var changedData = htmlComponent.Data;', ...
91+
'if (changedData[2] == "new") {', ...
92+
'var newRow = document.createElement("tr");', ...
93+
'var cell1 = document.createElement("td");', ...
94+
'var cell2 = document.createElement("td");', ...
95+
'cell1.innerHTML = changedData[0];', ...
96+
'cell2.innerHTML = changedData[1];', ...
97+
'newRow.appendChild(cell1);', ...
98+
'newRow.appendChild(cell2);', ...
99+
'table.appendChild(newRow);', ...
100+
'} else { ', ...
101+
'var lastRow = table.rows[table.rows.length - 1];', ...
102+
'var lastCell = lastRow.cells[lastRow.cells.length - 1];', ...
103+
'lastCell.innerHTML = changedData[1];', ...
104+
'}});}</script></body></html>'];
105+
obj.Data = [];
106+
drawnow
107+
end
108+
%%
109+
% |addRow| adds a new row to the table in the HTML UI component
110+
111+
function addChat(obj,role,content,row)
112+
%ADDCHAT adds a new row or updates the last row of the table
113+
mustBeA(obj,'matlab.ui.control.HTML')
114+
content = replace(content,newline,"<br>");
115+
obj.Data = {role,content,row};
116+
drawnow
117+
end
118+
%%
119+
% |printStream| is the streaming function and prints the stream in the table
120+
% in the HTML UI component
121+
122+
function printStream(h,x)
123+
%PRINTSTREAM prints the stream in a new row in the table
124+
if strlength(x) == 0
125+
% if the first token is 0 length, add a new row
126+
tokens = string(x);
127+
h.Data = {"assistant",tokens,"new"};
128+
else
129+
% otherwise append the new token to the previous tokens
130+
% if the new token contains a line break, replace
131+
% it with <br>
132+
if contains(x,newline)
133+
x = replace(x,newline,"<br>");
134+
end
135+
tokens = h.Data{2} + string(x);
136+
% update the existing row.
137+
h.Data = {"assistant",tokens,"current"};
138+
end
139+
drawnow
140+
end
141+
%%
142+
% _Copyright 2024 The MathWorks, Inc._

examples/ExampleStreaming.mlx

90 Bytes
Binary file not shown.

examples/ExampleSummarization.m

+124
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
%% Summarize Large Documents Using ChatGPT and MATLAB
2+
% This example shows how to use ChatGPT to summarize documents that are too
3+
% large to be summarized at once.
4+
%
5+
% To summarize short documents using ChatGPT, you can pass the documents directly
6+
% as a prompt together with an instruction to summarize them. However, ChatGPT
7+
% can only process prompts of limited size.
8+
%
9+
% To summarize documents that are larger than this limit, split the documents
10+
% up into smaller documents. Summarize the smaller document chunks, then pass
11+
% all of the summaries to ChatGPT to generate one overall summary.
12+
%%
13+
% * This example includes four steps:
14+
% * Download the complete text of "Alice in Wonderland" by Lewis Carroll from
15+
% Project Gutenberg.
16+
% * Split the documents up into chunks of less than 3000 words. (Section title:
17+
% "Split Document Into Chunks")
18+
% * Use ChatGPT to create summaries of each chunk. ("Summarize Chunks")
19+
% * Then use ChatGPT to create a summary of all of the summaries. ("Summarize
20+
% Document")
21+
%%
22+
% To run this example, you need Text Analytics Toolbox™.
23+
%
24+
% To run this example, you need a valid API key from a paid OpenAI™ API account.
25+
26+
loadenv(".env")
27+
addpath('..')
28+
%% Download Text Data
29+
% Download and read the content from Alice's Adventures in Wonderland by Lewis
30+
% Carroll from Project Gutenberg.
31+
%
32+
% First read the contents of the webpage.
33+
34+
options = weboptions(Timeout=30);
35+
code = webread("https://www.gutenberg.org/files/11/11-h/11-h.htm", options);
36+
longText = extractHTMLText(string(code));
37+
%% Split Document Into Chunks
38+
% Large language models have a limit in terms of how much text they can accept
39+
% as input, so if you try to summarize the complete book, you will likely get
40+
% an error. A workaround is splitting the book into chunks and summarize each
41+
% chunk individually. The chunk size is defined in |limitChunkWords|, which restricts
42+
% the numbers of words in a chunk.
43+
44+
incrementalSummary = longText;
45+
limitChunkWords = 3000;
46+
chunks = createChunks(incrementalSummary, limitChunkWords);
47+
%% Summarize Chunks
48+
% Initialize a ChatGPT session with the role of summarizing text
49+
50+
summarizer = openAIChat("You are a professional summarizer.");
51+
%%
52+
% Looping process to gradually summarize the text chunk by chunk, reducing the
53+
% chunk size with each iteration.
54+
55+
numCalls = 0;
56+
while numel(chunks)>1
57+
summarizedChunks = strings(size(chunks));
58+
numCalls = numCalls + numel(chunks);
59+
%%
60+
% Add a limit to the number of calls, to ensure you are not making more calls
61+
% than what is expected. You can change this value to match what is needed for
62+
% your application.
63+
64+
if numCalls > 20
65+
error("Document is too long to be summarized.")
66+
end
67+
68+
for i = 1:length(chunks)
69+
summarizedChunks(i) = generate(summarizer, "Summarize this content:" + newline + chunks(i));
70+
end
71+
%%
72+
% Merge the summarized chunks to serve as the base for the next iteration.
73+
74+
incrementalSummary = join(summarizedChunks);
75+
%%
76+
% Form new chunks with a reduced size for the subsequent iteration.
77+
78+
chunks = createChunks(incrementalSummary, limitChunkWords);
79+
end
80+
%% Summarize Document
81+
% Compile the final summary by combining the summaries from all the chunks.
82+
83+
fullSummary = generate(summarizer, "The following text is a combination of summaries. " + ...
84+
"Provide a cohese and coherent summary combining these smaller summaries, preserving as much information as possible:" + newline + incrementalSummary);
85+
wrapText(fullSummary)
86+
%% |createChunks| function
87+
% This function segments a long text into smaller parts of a predefined size
88+
% to facilitate easier summarization. It preserves the structure of sentences.
89+
% The |chunkSize| should be large enough to fit at least one sentence.
90+
91+
function chunks = createChunks(text, chunkSize)
92+
% Tokenizing the input text for processing
93+
text = tokenizedDocument(text);
94+
95+
% Splitting the tokenized text into individual sentences
96+
text = splitSentences(text);
97+
chunks = [];
98+
currentChunk = "";
99+
currentChunkSize = 0;
100+
101+
% Iterating through the sentences to aggregate them into chunks until the chunk
102+
% attains the predefined size, after which a new chunk is started
103+
for i=1:length(text)
104+
newChunkSize = currentChunkSize + doclength(text(i));
105+
if newChunkSize < chunkSize
106+
currentChunkSize = currentChunkSize + doclength(text(i));
107+
currentChunk = currentChunk + " " + joinWords(text(i));
108+
else
109+
chunks = [chunks; currentChunk]; %#ok
110+
currentChunkSize = doclength(text(i));
111+
currentChunk = joinWords(text(i));
112+
end
113+
end
114+
end
115+
%% |wrapText| function
116+
% This function splits text into sentences and then concatenates them again
117+
% using |newline| to make it easier to visualize text in this example
118+
119+
function wrappedText = wrapText(text)
120+
wrappedText = splitSentences(text);
121+
wrappedText = join(wrappedText,newline);
122+
end
123+
%%
124+
% _Copyright 2023 The MathWorks, Inc._

examples/ExampleSummarization.mlx

-908 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)