From 96065e3a86625e884a412479be942a309d05b7de Mon Sep 17 00:00:00 2001 From: ANDREY KHASANOV Date: Thu, 28 Dec 2023 21:21:30 +0200 Subject: [PATCH] Add Russian translation for long story summaries A new function, `translateLongStory`, has been implemented to provide translations of long story summaries into Russian. In addition, a new sender and corresponding channel for the Russian version have been established. The changes also include an increase in queue length from 10 to 100, and new test cases to ensure the proper functioning of Russian translation. --- cmd/channel_contentmaker/main.go | 5 +- internal/tweetseditor/editor.go | 82 ++++++++++++++++++++++------ internal/tweetseditor/editor_test.go | 28 ++++++++++ 3 files changed, 96 insertions(+), 19 deletions(-) diff --git a/cmd/channel_contentmaker/main.go b/cmd/channel_contentmaker/main.go index ecb704b..e117fc4 100644 --- a/cmd/channel_contentmaker/main.go +++ b/cmd/channel_contentmaker/main.go @@ -35,6 +35,7 @@ type config struct { BotToken string `envconfig:"BOT_TOKEN" required:"true"` ChatID int64 `envconfig:"CHAT_ID" required:"true"` LongChatID int64 `envconfig:"LONG_CHAT_ID" required:"true"` + RusLongChatID int64 `envconfig:"RUS_LONG_CHAT_ID" required:"true"` ChatGPTToken string `envconfig:"CHAT_GPT_TOKEN" required:"true"` // OpenAI token EditorSendInterval time.Duration `envconfig:"EDITOR_SEND_INTERVAL" default:"30m"` // Interval to send edited tweets to telegram EditorCleanContextInterval time.Duration `envconfig:"EDITOR_CLEAN_CONTEXT_INTERVAL" default:"12h"` // Interval to clean chatgpt context @@ -92,7 +93,8 @@ func main() { } s := sender.NewSender(api, &telebot.Chat{ID: cfg.ChatID}, logger.WithField(pkgKey, "sender")) - ls := sender.NewSender(api, &telebot.Chat{ID: cfg.LongChatID}, logger.WithField(pkgKey, "sender")) + ls := sender.NewSender(api, &telebot.Chat{ID: cfg.LongChatID}, logger.WithField(pkgKey, "long sender")) + rls := sender.NewSender(api, &telebot.Chat{ID: cfg.RusLongChatID}, logger.WithField(pkgKey, "rus sender")) editor := tweetseditor.NewEditor( openai.NewClient(cfg.ChatGPTToken), @@ -104,6 +106,7 @@ func main() { editor.Edit(ctx) ctx = s.Send(ctx, editor.SubscribeEdited()) ctx = ls.Send(ctx, editor.SubscribeLongStoryMessages()) + ctx = rls.Send(ctx, editor.SubscribeRusStoryMessages()) logger.Info("service started") <-ctx.Done() diff --git a/internal/tweetseditor/editor.go b/internal/tweetseditor/editor.go index 8ebbe6f..852a87c 100644 --- a/internal/tweetseditor/editor.go +++ b/internal/tweetseditor/editor.go @@ -22,8 +22,9 @@ const ( longStoryPrompt = "Analyze several recent, popular tweets related to cryptocurrency. Extract key insights relevant for cryptocurrency investment, excluding non-investment related content like airdrops or giveaways. Summarize the useful information from each tweet. Format the summaries in json with fields: 'telegram_message' for the summary, 'useful_information' to indicate if the information is investment-relevant (true/false), and 'duplicate_information' to indicate if the information is repetitive (true/false), for example {\"telegram_message\":\"summarized message by tweet\", \"useful_information\":true, \"duplicate_information\": false}. \nTweets: %s." //nolint:lll longStoryNextPrompt = "Additional tweets, create new message only for new information: %s." + russianPrompt = "Translate to russian with same json format" - queueLen = 10 + queueLen = 100 ) type Tweet struct { @@ -48,6 +49,7 @@ type Editor interface { Edit(ctx context.Context) context.Context SubscribeEdited() <-chan string SubscribeLongStoryMessages() <-chan string + SubscribeRusStoryMessages() <-chan string } type editor struct { @@ -62,6 +64,8 @@ type editor struct { longStoryIndex uint8 longStoryMessages []openai.ChatCompletionMessage + russianLongStoryEditedCh chan string + repo log log.Logger @@ -73,13 +77,11 @@ func (e *editor) Edit(ctx context.Context) context.Context { return ctx } -func (e *editor) SubscribeEdited() <-chan string { - return e.editedCh -} +func (e *editor) SubscribeEdited() <-chan string { return e.editedCh } -func (e *editor) SubscribeLongStoryMessages() <-chan string { - return e.longStoryEditedCh -} +func (e *editor) SubscribeLongStoryMessages() <-chan string { return e.longStoryEditedCh } + +func (e *editor) SubscribeRusStoryMessages() <-chan string { return e.russianLongStoryEditedCh } func (e *editor) editLoop(ctx context.Context) { ticker := time.NewTicker(e.sendInterval) @@ -321,25 +323,69 @@ func (e *editor) longStorySend(ctx context.Context) error { e.longStoryEditedCh <- utils.Escape(res.Content) - e.existMessages = append(e.existMessages, requestMessage, openai.ChatCompletionMessage{ + e.longStoryMessages = append(e.longStoryMessages, requestMessage, openai.ChatCompletionMessage{ Role: openai.ChatMessageRoleAssistant, Content: resp.Choices[0].Message.Content, }) + go e.translateLongStory(ctx) + return nil } +func (e *editor) translateLongStory(ctx context.Context) { + requestMessage := openai.ChatCompletionMessage{ + Role: openai.ChatMessageRoleUser, + Content: russianPrompt, + } + + resp, err := e.client.CreateChatCompletion( + ctx, + openai.ChatCompletionRequest{ + ResponseFormat: &openai.ChatCompletionResponseFormat{ + Type: openai.ChatCompletionResponseFormatTypeJSONObject, + }, + Model: openai.GPT4TurboPreview, + Messages: append(e.longStoryMessages, requestMessage), + }, + ) + + if err != nil { + e.log.WithError(err).Error("rus long story summary generation error") + return + } + + e.log.WithField("response", resp).Debug("rus long story summary generation result") + + res := LongStoryMessage{} + + if err = jsoniter.UnmarshalFromString(resp.Choices[0].Message.Content, &res); err != nil { + // TODO: try to search correct json in string + e.log.WithError(err).Error("rus long story summary unmarshal error") + e.russianLongStoryEditedCh <- utils.Escape(resp.Choices[0].Message.Content) + + return + } + + if !res.Useful || res.Duplicate { + return + } + + e.russianLongStoryEditedCh <- utils.Escape(res.Content) +} + func NewEditor(client *openai.Client, db repo, sendInterval, cleanInterval time.Duration, log log.Logger) Editor { return &editor{ - editedCh: make(chan string, queueLen), - client: client, - sendInterval: sendInterval, - cleanInterval: cleanInterval, - existMessages: make([]openai.ChatCompletionMessage, 0), - longStoryEditedCh: make(chan string, queueLen), - longStoryBuffer: [20]common.Tweet{}, - longStoryMessages: make([]openai.ChatCompletionMessage, 0), - repo: db, - log: log, + editedCh: make(chan string, queueLen), + client: client, + sendInterval: sendInterval, + cleanInterval: cleanInterval, + existMessages: make([]openai.ChatCompletionMessage, 0), + longStoryEditedCh: make(chan string, queueLen), + longStoryBuffer: [20]common.Tweet{}, + longStoryMessages: make([]openai.ChatCompletionMessage, 0), + russianLongStoryEditedCh: make(chan string, queueLen), + repo: db, + log: log, } } diff --git a/internal/tweetseditor/editor_test.go b/internal/tweetseditor/editor_test.go index f7e12a9..cfd89c4 100644 --- a/internal/tweetseditor/editor_test.go +++ b/internal/tweetseditor/editor_test.go @@ -202,3 +202,31 @@ func TestLongStory(t *testing.T) { cancel() }) } + +func TestRusLongStory(t *testing.T) { + t.Run("some tweets request", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + client := openai.NewClient(os.Getenv("CHAT_GPT_TOKEN")) + logrusLogger := logrus.New() + logrusLogger.SetLevel(logrus.TraceLevel) + logger := log.NewLogger(logrusLogger) + r := &testRepo{data: testTweets} + ed := NewEditor(client, r, time.Second, time.Hour*24, logger) + ctx = ed.Edit(ctx) + output := ed.SubscribeRusStoryMessages() + + chatID, err := strconv.ParseInt(os.Getenv("CHAT_ID"), 10, 64) + require.NoError(t, err) + + api, err := telebot.NewBot( + telebot.Settings{Token: os.Getenv("BOT_TOKEN"), Poller: &telebot.LongPoller{Timeout: 10 * time.Second}}, + ) + require.NoError(t, err) + s := sender.NewSender(api, &telebot.Chat{ID: chatID}, logger) + s.Send(ctx, output) + time.Sleep(time.Second) + r.data = moreTestTweets + time.Sleep(10 * time.Minute) + cancel() + }) +}