-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path_5_chatGPT_topic_labelling.py
36 lines (28 loc) · 1.93 KB
/
_5_chatGPT_topic_labelling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
""" beyond offering a list of keywords, the definitive labelling of a topic cluster is
a difficult problem, at least without access to partially-labelled training data.
An unsupervised learning approach to this task is to integrate powerful pre-trained
models such as ChatGPT, or large knowledge graphs such as ConceptNET, through their webAPI """
import openai
from num2words import num2words
from api_keys import secret
def chatGPT_cluster_label(articles):
""" uses a list of titles and abstracts as basis for a chatgpt query """
openai.api_key = secret.key
completion = openai.ChatCompletion()
length = len(articles)
# convert articles list into string with XML tags
articles=["<article>"+ article +" </article> \n" for article in articles]
articles = "".join(articles)
# create query
query = [{'role' : 'system',
'content': f"""You are an intelligent but laconic robot that responds by using the fewest words possible, at all times.
Your task is to assign name labels to groups of short encyclopedia articles that are clustered by topic.
You will be provided with {num2words(length+1)} articles (delimited with XML tags) that are selected at random from the topic cluster.
Your cluster label must name the group, or set, of which all the articles are a member.
Your cluster label must be a maximum of three words long, and ideally only one or two words, but also capture as much fine detail about the cluster's topic as possible."""},
{'role' : 'user',
'content': f'Hello robot, here are the {num2words(length+1)} articles: \n{articles}\n What is your cluster label?'}]
# get response
response = completion.create(model='gpt-3.5-turbo', messages=query)
answer = response.choices[0]['message']['content']
return answer