Skip to content

Commit 0fa3988

Browse files
authored
Merge pull request #37 from y-pakorn/feat/get-article
Add `getArticle` function to scraper.
2 parents 8c07898 + d71542a commit 0fa3988

File tree

6 files changed

+173
-20
lines changed

6 files changed

+173
-20
lines changed

src/api-data.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ const endpoints = {
1515
'https://twitter.com/i/api/graphql/eSSNbhECHHWWALkkQq-YTA/Likes?variables=%7B%22userId%22%3A%222244196397%22%2C%22count%22%3A20%2C%22includePromotedContent%22%3Afalse%2C%22withClientEventToken%22%3Afalse%2C%22withBirdwatchNotes%22%3Afalse%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Atrue%7D&features=%7B%22responsive_web_graphql_exclude_directive_enabled%22%3Atrue%2C%22verified_phone_label_enabled%22%3Afalse%2C%22creator_subscriptions_tweet_preview_api_enabled%22%3Atrue%2C%22responsive_web_graphql_timeline_navigation_enabled%22%3Atrue%2C%22responsive_web_graphql_skip_user_profile_image_extensions_enabled%22%3Afalse%2C%22c9s_tweet_anatomy_moderator_badge_enabled%22%3Atrue%2C%22tweetypie_unmention_optimization_enabled%22%3Atrue%2C%22responsive_web_edit_tweet_api_enabled%22%3Atrue%2C%22graphql_is_translatable_rweb_tweet_is_translatable_enabled%22%3Atrue%2C%22view_counts_everywhere_api_enabled%22%3Atrue%2C%22longform_notetweets_consumption_enabled%22%3Atrue%2C%22responsive_web_twitter_article_tweet_consumption_enabled%22%3Atrue%2C%22tweet_awards_web_tipping_enabled%22%3Afalse%2C%22freedom_of_speech_not_reach_fetch_enabled%22%3Atrue%2C%22standardized_nudges_misinfo%22%3Atrue%2C%22tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled%22%3Atrue%2C%22rweb_video_timestamps_enabled%22%3Atrue%2C%22longform_notetweets_rich_text_read_enabled%22%3Atrue%2C%22longform_notetweets_inline_media_enabled%22%3Atrue%2C%22responsive_web_enhance_cards_enabled%22%3Afalse%7D',
1616
TweetDetail:
1717
'https://twitter.com/i/api/graphql/xOhkmRac04YFZmOzU9PJHg/TweetDetail?variables=%7B%22focalTweetId%22%3A%221237110546383724547%22%2C%22with_rux_injections%22%3Afalse%2C%22includePromotedContent%22%3Atrue%2C%22withCommunity%22%3Atrue%2C%22withQuickPromoteEligibilityTweetFields%22%3Atrue%2C%22withBirdwatchNotes%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Atrue%7D&features=%7B%22responsive_web_graphql_exclude_directive_enabled%22%3Atrue%2C%22verified_phone_label_enabled%22%3Afalse%2C%22creator_subscriptions_tweet_preview_api_enabled%22%3Atrue%2C%22responsive_web_graphql_timeline_navigation_enabled%22%3Atrue%2C%22responsive_web_graphql_skip_user_profile_image_extensions_enabled%22%3Afalse%2C%22tweetypie_unmention_optimization_enabled%22%3Atrue%2C%22responsive_web_edit_tweet_api_enabled%22%3Atrue%2C%22graphql_is_translatable_rweb_tweet_is_translatable_enabled%22%3Atrue%2C%22view_counts_everywhere_api_enabled%22%3Atrue%2C%22longform_notetweets_consumption_enabled%22%3Atrue%2C%22responsive_web_twitter_article_tweet_consumption_enabled%22%3Afalse%2C%22tweet_awards_web_tipping_enabled%22%3Afalse%2C%22freedom_of_speech_not_reach_fetch_enabled%22%3Atrue%2C%22standardized_nudges_misinfo%22%3Atrue%2C%22tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled%22%3Atrue%2C%22longform_notetweets_rich_text_read_enabled%22%3Atrue%2C%22longform_notetweets_inline_media_enabled%22%3Atrue%2C%22responsive_web_media_download_video_enabled%22%3Afalse%2C%22responsive_web_enhance_cards_enabled%22%3Afalse%7D&fieldToggles=%7B%22withArticleRichContentState%22%3Afalse%7D',
18+
TweetDetailArticle:
19+
'https://twitter.com/i/api/graphql/GtcBtFhtQymrpxAs5MALVA/TweetDetail?variables=%7B%22focalTweetId%22%3A%221765884209527394325%22%2C%22with_rux_injections%22%3Atrue%2C%22rankingMode%22%3A%22Relevance%22%2C%22includePromotedContent%22%3Atrue%2C%22withCommunity%22%3Atrue%2C%22withQuickPromoteEligibilityTweetFields%22%3Atrue%2C%22withBirdwatchNotes%22%3Atrue%2C%22withVoice%22%3Atrue%7D&features=%7B%22profile_label_improvements_pcf_label_in_post_enabled%22%3Afalse%2C%22rweb_tipjar_consumption_enabled%22%3Atrue%2C%22responsive_web_graphql_exclude_directive_enabled%22%3Atrue%2C%22verified_phone_label_enabled%22%3Afalse%2C%22creator_subscriptions_tweet_preview_api_enabled%22%3Atrue%2C%22responsive_web_graphql_timeline_navigation_enabled%22%3Atrue%2C%22responsive_web_graphql_skip_user_profile_image_extensions_enabled%22%3Afalse%2C%22premium_content_api_read_enabled%22%3Afalse%2C%22communities_web_enable_tweet_community_results_fetch%22%3Atrue%2C%22c9s_tweet_anatomy_moderator_badge_enabled%22%3Atrue%2C%22responsive_web_grok_analyze_button_fetch_trends_enabled%22%3Atrue%2C%22responsive_web_grok_analyze_post_followups_enabled%22%3Afalse%2C%22responsive_web_grok_share_attachment_enabled%22%3Atrue%2C%22articles_preview_enabled%22%3Atrue%2C%22responsive_web_edit_tweet_api_enabled%22%3Atrue%2C%22graphql_is_translatable_rweb_tweet_is_translatable_enabled%22%3Atrue%2C%22view_counts_everywhere_api_enabled%22%3Atrue%2C%22longform_notetweets_consumption_enabled%22%3Atrue%2C%22responsive_web_twitter_article_tweet_consumption_enabled%22%3Atrue%2C%22tweet_awards_web_tipping_enabled%22%3Afalse%2C%22creator_subscriptions_quote_tweet_preview_enabled%22%3Afalse%2C%22freedom_of_speech_not_reach_fetch_enabled%22%3Atrue%2C%22standardized_nudges_misinfo%22%3Atrue%2C%22tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled%22%3Atrue%2C%22rweb_video_timestamps_enabled%22%3Atrue%2C%22longform_notetweets_rich_text_read_enabled%22%3Atrue%2C%22longform_notetweets_inline_media_enabled%22%3Atrue%2C%22responsive_web_enhance_cards_enabled%22%3Afalse%7D&fieldToggles=%7B%22withArticleRichContentState%22%3Atrue%2C%22withArticlePlainText%22%3Afalse%2C%22withGrokAnalyze%22%3Afalse%2C%22withDisallowedReplyControls%22%3Afalse%7D',
1820
TweetResultByRestId:
1921
'https://twitter.com/i/api/graphql/DJS3BdhUhcaEpZ7B7irJDg/TweetResultByRestId?variables=%7B%22tweetId%22%3A%221237110546383724547%22%2C%22withCommunity%22%3Afalse%2C%22includePromotedContent%22%3Afalse%2C%22withVoice%22%3Afalse%7D&features=%7B%22creator_subscriptions_tweet_preview_api_enabled%22%3Atrue%2C%22tweetypie_unmention_optimization_enabled%22%3Atrue%2C%22responsive_web_edit_tweet_api_enabled%22%3Atrue%2C%22graphql_is_translatable_rweb_tweet_is_translatable_enabled%22%3Atrue%2C%22view_counts_everywhere_api_enabled%22%3Atrue%2C%22longform_notetweets_consumption_enabled%22%3Atrue%2C%22responsive_web_twitter_article_tweet_consumption_enabled%22%3Afalse%2C%22tweet_awards_web_tipping_enabled%22%3Afalse%2C%22freedom_of_speech_not_reach_fetch_enabled%22%3Atrue%2C%22standardized_nudges_misinfo%22%3Atrue%2C%22tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled%22%3Atrue%2C%22longform_notetweets_rich_text_read_enabled%22%3Atrue%2C%22longform_notetweets_inline_media_enabled%22%3Atrue%2C%22responsive_web_graphql_exclude_directive_enabled%22%3Atrue%2C%22verified_phone_label_enabled%22%3Afalse%2C%22responsive_web_media_download_video_enabled%22%3Afalse%2C%22responsive_web_graphql_skip_user_profile_image_extensions_enabled%22%3Afalse%2C%22responsive_web_graphql_timeline_navigation_enabled%22%3Atrue%2C%22responsive_web_enhance_cards_enabled%22%3Afalse%7D',
2022
ListTweets:

src/scraper.ts

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,13 @@ import {
5353
retweet,
5454
createCreateNoteTweetRequest,
5555
createCreateLongTweetRequest,
56+
getArticle,
5657
} from './tweets';
57-
import { parseTimelineTweetsV2, TimelineV2 } from './timeline-v2';
58+
import {
59+
parseTimelineTweetsV2,
60+
TimelineArticle,
61+
TimelineV2,
62+
} from './timeline-v2';
5863
import { fetchHomeTimeline } from './timeline-home';
5964
import { fetchFollowingTimeline } from './timeline-following';
6065
import {
@@ -75,9 +80,17 @@ import {
7580
fetchAudioSpaceById,
7681
fetchAuthenticatePeriscope,
7782
fetchBrowseSpaceTopics,
78-
fetchCommunitySelectQuery, fetchLiveVideoStreamStatus, fetchLoginTwitterToken
83+
fetchCommunitySelectQuery,
84+
fetchLiveVideoStreamStatus,
85+
fetchLoginTwitterToken,
7986
} from './spaces';
80-
import {AudioSpace, Community, LiveVideoStreamStatus, LoginTwitterTokenResponse, Subtopic} from './types/spaces';
87+
import {
88+
AudioSpace,
89+
Community,
90+
LiveVideoStreamStatus,
91+
LoginTwitterTokenResponse,
92+
Subtopic,
93+
} from './types/spaces';
8194

8295
const twUrl = 'https://twitter.com';
8396
const UserTweetsUrl =
@@ -945,7 +958,7 @@ export class Scraper {
945958
* @returns The status of the Audio Space stream.
946959
*/
947960
public async getAudioSpaceStreamStatus(
948-
mediaKey: string,
961+
mediaKey: string,
949962
): Promise<LiveVideoStreamStatus> {
950963
return await fetchLiveVideoStreamStatus(mediaKey, this.auth);
951964
}
@@ -958,7 +971,7 @@ export class Scraper {
958971
* @returns The status of the Audio Space stream.
959972
*/
960973
public async getAudioSpaceStatus(
961-
audioSpaceId: string,
974+
audioSpaceId: string,
962975
): Promise<LiveVideoStreamStatus> {
963976
const audioSpace = await this.getAudioSpaceById(audioSpaceId);
964977

@@ -984,7 +997,7 @@ export class Scraper {
984997
* @returns The response containing the cookie and user information.
985998
*/
986999
public async loginTwitterToken(
987-
jwt: string,
1000+
jwt: string,
9881001
): Promise<LoginTwitterTokenResponse> {
9891002
return await fetchLoginTwitterToken(jwt, this.auth);
9901003
}
@@ -999,4 +1012,13 @@ export class Scraper {
9991012

10001013
return loginResponse.cookie;
10011014
}
1015+
1016+
/**
1017+
* Fetches a article (long form tweet) by its ID.
1018+
* @param id The ID of the article to fetch. In the format of (http://x.com/i/article/id)
1019+
* @returns The {@link TimelineArticle} object, or `null` if it couldn't be fetched.
1020+
*/
1021+
public getArticle(id: string): Promise<TimelineArticle | null> {
1022+
return getArticle(id, this.auth);
1023+
}
10021024
}

src/timeline-v1.ts

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,50 @@ export interface SearchResultRaw {
7676
legacy?: LegacyTweetRaw;
7777
}
7878

79+
export interface TimelineArticleResultRaw {
80+
id?: string;
81+
rest_id?: string;
82+
title?: string;
83+
preview_text?: string;
84+
cover_media?: {
85+
media_id?: string;
86+
media_info?: {
87+
original_img_url?: string;
88+
original_img_height?: number;
89+
original_img_width?: number;
90+
};
91+
};
92+
content_state?: {
93+
blocks?: {
94+
key?: string;
95+
data?: string;
96+
text?: string;
97+
entityRanges?: {
98+
key?: number;
99+
length?: number;
100+
offset?: number;
101+
}[];
102+
}[];
103+
};
104+
entityMap?: {
105+
key?: string;
106+
value?: {
107+
type?: string; // LINK, MEDIA, TWEET
108+
mutability?: string;
109+
data?: {
110+
entityKey?: string;
111+
url?: string;
112+
tweetId?: string;
113+
mediaItems?: {
114+
localMediaId?: string;
115+
mediaCategory?: string;
116+
mediaId?: string;
117+
}[];
118+
};
119+
};
120+
}[];
121+
}
122+
79123
export interface TimelineResultRaw {
80124
rest_id?: string;
81125
__typename?: string;
@@ -97,6 +141,11 @@ export interface TimelineResultRaw {
97141
};
98142
};
99143
};
144+
article?: {
145+
article_results?: {
146+
result?: TimelineArticleResultRaw;
147+
};
148+
};
100149
quoted_status_result?: {
101150
result?: TimelineResultRaw;
102151
};

src/timeline-v2.ts

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,3 +423,41 @@ export function parseThreadedConversation(
423423

424424
return tweets;
425425
}
426+
427+
export interface TimelineArticle {
428+
id: string;
429+
articleId: string;
430+
title: string;
431+
previewText: string;
432+
coverMediaUrl?: string;
433+
text: string;
434+
}
435+
436+
export function parseArticle(
437+
conversation: ThreadedConversation,
438+
): TimelineArticle[] {
439+
const articles: TimelineArticle[] = [];
440+
for (const instruction of conversation.data
441+
?.threaded_conversation_with_injections_v2?.instructions ?? []) {
442+
for (const entry of instruction.entries ?? []) {
443+
const id = entry.content?.itemContent?.tweet_results?.result?.rest_id;
444+
const article =
445+
entry.content?.itemContent?.tweet_results?.result?.article
446+
?.article_results?.result;
447+
if (!id || !article) continue;
448+
const text =
449+
article.content_state?.blocks
450+
?.map((block) => block.text)
451+
.join('\n\n') ?? '';
452+
articles.push({
453+
id,
454+
articleId: article.rest_id || '',
455+
coverMediaUrl: article.cover_media?.media_info?.original_img_url,
456+
previewText: article.preview_text || '',
457+
text,
458+
title: article.title || '',
459+
});
460+
}
461+
}
462+
return articles;
463+
}

src/tweets.test.ts

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { getScraper } from './test-utils';
22
import { QueryTweetsResponse } from './timeline-v1';
3-
import { Mention, Tweet } from './tweets';
3+
import { Mention, Tweet, getTweetAnonymous } from './tweets';
44
import fs from 'fs';
55
import path from 'path';
66

@@ -462,10 +462,10 @@ test('scraper can send a tweet with image and video', async () => {
462462

463463
// Read test image and video files from the test-assets directory
464464
const imageBuffer = fs.readFileSync(
465-
path.join(__dirname, '../test-assets/test-image.jpeg')
465+
path.join(__dirname, '../test-assets/test-image.jpeg'),
466466
);
467467
const videoBuffer = fs.readFileSync(
468-
path.join(__dirname, '../test-assets/test-video.mp4')
468+
path.join(__dirname, '../test-assets/test-video.mp4'),
469469
);
470470

471471
// Prepare media data array with both image and video
@@ -502,10 +502,10 @@ test('scraper can quote tweet with image and video', async () => {
502502

503503
// Read test image and video files from the test-assets directory
504504
const imageBuffer = fs.readFileSync(
505-
path.join(__dirname, '../test-assets/test-image.jpeg')
505+
path.join(__dirname, '../test-assets/test-image.jpeg'),
506506
);
507507
const videoBuffer = fs.readFileSync(
508-
path.join(__dirname, '../test-assets/test-video.mp4')
508+
path.join(__dirname, '../test-assets/test-video.mp4'),
509509
);
510510

511511
// Prepare media data array with both image and video
@@ -531,13 +531,11 @@ test('scraper can quote tweet with media', async () => {
531531

532532
// Read test image file
533533
const imageBuffer = fs.readFileSync(
534-
path.join(__dirname, '../test-assets/test-image.jpeg')
534+
path.join(__dirname, '../test-assets/test-image.jpeg'),
535535
);
536536

537537
// Prepare media data with the image
538-
const mediaData = [
539-
{ data: imageBuffer, mediaType: 'image/jpeg' },
540-
];
538+
const mediaData = [{ data: imageBuffer, mediaType: 'image/jpeg' }];
541539

542540
// Send a quote tweet with the image attachment
543541
const response = await scraper.sendQuoteTweet(quoteText, quotedTweetId, {
@@ -555,13 +553,11 @@ test('sendTweetWithMedia successfully sends a tweet with media', async () => {
555553

556554
// Read a test image file
557555
const imageBuffer = fs.readFileSync(
558-
path.join(__dirname, '../test-assets/test-image.jpeg')
556+
path.join(__dirname, '../test-assets/test-image.jpeg'),
559557
);
560558

561559
// Prepare media data with the image
562-
const mediaData = [
563-
{ data: imageBuffer, mediaType: 'image/jpeg' },
564-
];
560+
const mediaData = [{ data: imageBuffer, mediaType: 'image/jpeg' }];
565561

566562
// Send a tweet with the image attachment
567563
const result = await scraper.sendTweet(draftText, undefined, mediaData);
@@ -593,4 +589,23 @@ test('scraper can follow user', async () => {
593589

594590
// Test should not throw an error
595591
await expect(scraper.followUser(username)).resolves.not.toThrow();
596-
}, 30000);
592+
}, 30000);
593+
594+
test('scraper cannot get article using getTweet', async () => {
595+
const scraper = await getScraper();
596+
// X introducing article: http://x.com/i/article/1765821414056120320
597+
const tweet = await scraper.getTweet('1765884209527394325');
598+
599+
expect(tweet).not.toBeNull();
600+
expect(tweet?.text).toMatch(/https?:\/\/t.co\//);
601+
expect(tweet?.urls[0]).toMatch(/https?:\/\/x.com\/i\/article\//);
602+
}, 30000);
603+
604+
test('scraper can get article using getArticle', async () => {
605+
const scraper = await getScraper();
606+
// X introducing article: http://x.com/i/article/1765821414056120320
607+
const article = await scraper.getArticle('1765884209527394325');
608+
609+
expect(article).not.toBeNull();
610+
expect(article?.title).toMatch(/Introducing Articles on X/);
611+
}, 30000);

src/tweets.ts

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ import {
99
parseTimelineEntryItemContentRaw,
1010
ThreadedConversation,
1111
parseThreadedConversation,
12+
parseArticle,
13+
TimelineArticle,
1214
} from './timeline-v2';
1315
import { getTweetTimeline } from './timeline-async';
1416
import { apiRequestFactory } from './api-data';
@@ -1479,3 +1481,28 @@ export async function createCreateLongTweetRequest(
14791481

14801482
return response;
14811483
}
1484+
1485+
export async function getArticle(
1486+
id: string,
1487+
auth: TwitterAuth,
1488+
): Promise<TimelineArticle | null> {
1489+
const tweetDetailRequest =
1490+
apiRequestFactory.createTweetDetailArticleRequest();
1491+
tweetDetailRequest.variables.focalTweetId = id;
1492+
1493+
const res = await requestApi<ThreadedConversation>(
1494+
tweetDetailRequest.toRequestUrl(),
1495+
auth,
1496+
);
1497+
1498+
if (!res.success) {
1499+
throw res.err;
1500+
}
1501+
1502+
if (!res.value) {
1503+
return null;
1504+
}
1505+
1506+
const articles = parseArticle(res.value);
1507+
return articles.find((article) => article.id === id) ?? null;
1508+
}

0 commit comments

Comments
 (0)