Skip to content

Commit

Permalink
Add Russian translation for long story summaries
Browse files Browse the repository at this point in the history
A new function, `translateLongStory`, has been implemented to provide translations of long story summaries into Russian. In addition, a new sender and corresponding channel for the Russian version have been established. The changes also include an increase in queue length from 10 to 100, and new test cases to ensure the proper functioning of Russian translation.
  • Loading branch information
lueurxax committed Dec 28, 2023
1 parent 96c7ef3 commit 96065e3
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 19 deletions.
5 changes: 4 additions & 1 deletion cmd/channel_contentmaker/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ type config struct {
BotToken string `envconfig:"BOT_TOKEN" required:"true"`
ChatID int64 `envconfig:"CHAT_ID" required:"true"`
LongChatID int64 `envconfig:"LONG_CHAT_ID" required:"true"`
RusLongChatID int64 `envconfig:"RUS_LONG_CHAT_ID" required:"true"`
ChatGPTToken string `envconfig:"CHAT_GPT_TOKEN" required:"true"` // OpenAI token
EditorSendInterval time.Duration `envconfig:"EDITOR_SEND_INTERVAL" default:"30m"` // Interval to send edited tweets to telegram
EditorCleanContextInterval time.Duration `envconfig:"EDITOR_CLEAN_CONTEXT_INTERVAL" default:"12h"` // Interval to clean chatgpt context
Expand Down Expand Up @@ -92,7 +93,8 @@ func main() {
}

s := sender.NewSender(api, &telebot.Chat{ID: cfg.ChatID}, logger.WithField(pkgKey, "sender"))
ls := sender.NewSender(api, &telebot.Chat{ID: cfg.LongChatID}, logger.WithField(pkgKey, "sender"))
ls := sender.NewSender(api, &telebot.Chat{ID: cfg.LongChatID}, logger.WithField(pkgKey, "long sender"))
rls := sender.NewSender(api, &telebot.Chat{ID: cfg.RusLongChatID}, logger.WithField(pkgKey, "rus sender"))

editor := tweetseditor.NewEditor(
openai.NewClient(cfg.ChatGPTToken),
Expand All @@ -104,6 +106,7 @@ func main() {
editor.Edit(ctx)
ctx = s.Send(ctx, editor.SubscribeEdited())
ctx = ls.Send(ctx, editor.SubscribeLongStoryMessages())
ctx = rls.Send(ctx, editor.SubscribeRusStoryMessages())

logger.Info("service started")
<-ctx.Done()
Expand Down
82 changes: 64 additions & 18 deletions internal/tweetseditor/editor.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@ const (

longStoryPrompt = "Analyze several recent, popular tweets related to cryptocurrency. Extract key insights relevant for cryptocurrency investment, excluding non-investment related content like airdrops or giveaways. Summarize the useful information from each tweet. Format the summaries in json with fields: 'telegram_message' for the summary, 'useful_information' to indicate if the information is investment-relevant (true/false), and 'duplicate_information' to indicate if the information is repetitive (true/false), for example {\"telegram_message\":\"summarized message by tweet\", \"useful_information\":true, \"duplicate_information\": false}. \nTweets: %s." //nolint:lll
longStoryNextPrompt = "Additional tweets, create new message only for new information: %s."
russianPrompt = "Translate to russian with same json format"

queueLen = 10
queueLen = 100
)

type Tweet struct {
Expand All @@ -48,6 +49,7 @@ type Editor interface {
Edit(ctx context.Context) context.Context
SubscribeEdited() <-chan string
SubscribeLongStoryMessages() <-chan string
SubscribeRusStoryMessages() <-chan string
}

type editor struct {
Expand All @@ -62,6 +64,8 @@ type editor struct {
longStoryIndex uint8
longStoryMessages []openai.ChatCompletionMessage

russianLongStoryEditedCh chan string

repo

log log.Logger
Expand All @@ -73,13 +77,11 @@ func (e *editor) Edit(ctx context.Context) context.Context {
return ctx
}

func (e *editor) SubscribeEdited() <-chan string {
return e.editedCh
}
func (e *editor) SubscribeEdited() <-chan string { return e.editedCh }

func (e *editor) SubscribeLongStoryMessages() <-chan string {
return e.longStoryEditedCh
}
func (e *editor) SubscribeLongStoryMessages() <-chan string { return e.longStoryEditedCh }

func (e *editor) SubscribeRusStoryMessages() <-chan string { return e.russianLongStoryEditedCh }

func (e *editor) editLoop(ctx context.Context) {
ticker := time.NewTicker(e.sendInterval)
Expand Down Expand Up @@ -321,25 +323,69 @@ func (e *editor) longStorySend(ctx context.Context) error {

e.longStoryEditedCh <- utils.Escape(res.Content)

e.existMessages = append(e.existMessages, requestMessage, openai.ChatCompletionMessage{
e.longStoryMessages = append(e.longStoryMessages, requestMessage, openai.ChatCompletionMessage{
Role: openai.ChatMessageRoleAssistant,
Content: resp.Choices[0].Message.Content,
})

go e.translateLongStory(ctx)

return nil
}

func (e *editor) translateLongStory(ctx context.Context) {
requestMessage := openai.ChatCompletionMessage{
Role: openai.ChatMessageRoleUser,
Content: russianPrompt,
}

resp, err := e.client.CreateChatCompletion(
ctx,
openai.ChatCompletionRequest{
ResponseFormat: &openai.ChatCompletionResponseFormat{
Type: openai.ChatCompletionResponseFormatTypeJSONObject,
},
Model: openai.GPT4TurboPreview,
Messages: append(e.longStoryMessages, requestMessage),
},
)

if err != nil {
e.log.WithError(err).Error("rus long story summary generation error")
return
}

e.log.WithField("response", resp).Debug("rus long story summary generation result")

res := LongStoryMessage{}

if err = jsoniter.UnmarshalFromString(resp.Choices[0].Message.Content, &res); err != nil {
// TODO: try to search correct json in string
e.log.WithError(err).Error("rus long story summary unmarshal error")
e.russianLongStoryEditedCh <- utils.Escape(resp.Choices[0].Message.Content)

return
}

if !res.Useful || res.Duplicate {
return
}

e.russianLongStoryEditedCh <- utils.Escape(res.Content)
}

func NewEditor(client *openai.Client, db repo, sendInterval, cleanInterval time.Duration, log log.Logger) Editor {
return &editor{
editedCh: make(chan string, queueLen),
client: client,
sendInterval: sendInterval,
cleanInterval: cleanInterval,
existMessages: make([]openai.ChatCompletionMessage, 0),
longStoryEditedCh: make(chan string, queueLen),
longStoryBuffer: [20]common.Tweet{},
longStoryMessages: make([]openai.ChatCompletionMessage, 0),
repo: db,
log: log,
editedCh: make(chan string, queueLen),
client: client,
sendInterval: sendInterval,
cleanInterval: cleanInterval,
existMessages: make([]openai.ChatCompletionMessage, 0),
longStoryEditedCh: make(chan string, queueLen),
longStoryBuffer: [20]common.Tweet{},
longStoryMessages: make([]openai.ChatCompletionMessage, 0),
russianLongStoryEditedCh: make(chan string, queueLen),
repo: db,
log: log,
}
}
28 changes: 28 additions & 0 deletions internal/tweetseditor/editor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,3 +202,31 @@ func TestLongStory(t *testing.T) {
cancel()
})
}

func TestRusLongStory(t *testing.T) {
t.Run("some tweets request", func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
client := openai.NewClient(os.Getenv("CHAT_GPT_TOKEN"))
logrusLogger := logrus.New()
logrusLogger.SetLevel(logrus.TraceLevel)
logger := log.NewLogger(logrusLogger)
r := &testRepo{data: testTweets}
ed := NewEditor(client, r, time.Second, time.Hour*24, logger)
ctx = ed.Edit(ctx)
output := ed.SubscribeRusStoryMessages()

chatID, err := strconv.ParseInt(os.Getenv("CHAT_ID"), 10, 64)
require.NoError(t, err)

api, err := telebot.NewBot(
telebot.Settings{Token: os.Getenv("BOT_TOKEN"), Poller: &telebot.LongPoller{Timeout: 10 * time.Second}},
)
require.NoError(t, err)
s := sender.NewSender(api, &telebot.Chat{ID: chatID}, logger)
s.Send(ctx, output)
time.Sleep(time.Second)
r.data = moreTestTweets
time.Sleep(10 * time.Minute)
cancel()
})
}

0 comments on commit 96065e3

Please sign in to comment.