Merge pull request #37 from y-pakorn/feat/get-article

wtfsayo · web-flow · commit 0fa3988ce139 · 2025-01-05T09:02:29.000+05:30
Add `getArticle` function to scraper.
diff --git a/src/api-data.ts b/src/api-data.ts
@@ -15,6 +15,8 @@ const endpoints = {
     'https://twitter.com/i/api/graphql/eSSNbhECHHWWALkkQq-YTA/Likes?variables=%7B%22userId%22%3A%222244196397%22%2C%22count%22%3A20%2C%22includePromotedContent%22%3Afalse%2C%22withClientEventToken%22%3Afalse%2C%22withBirdwatchNotes%22%3Afalse%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Atrue%7D&features=%7B%22responsive_web_graphql_exclude_directive_enabled%22%3Atrue%2C%22verified_phone_label_enabled%22%3Afalse%2C%22creator_subscriptions_tweet_preview_api_enabled%22%3Atrue%2C%22responsive_web_graphql_timeline_navigation_enabled%22%3Atrue%2C%22responsive_web_graphql_skip_user_profile_image_extensions_enabled%22%3Afalse%2C%22c9s_tweet_anatomy_moderator_badge_enabled%22%3Atrue%2C%22tweetypie_unmention_optimization_enabled%22%3Atrue%2C%22responsive_web_edit_tweet_api_enabled%22%3Atrue%2C%22graphql_is_translatable_rweb_tweet_is_translatable_enabled%22%3Atrue%2C%22view_counts_everywhere_api_enabled%22%3Atrue%2C%22longform_notetweets_consumption_enabled%22%3Atrue%2C%22responsive_web_twitter_article_tweet_consumption_enabled%22%3Atrue%2C%22tweet_awards_web_tipping_enabled%22%3Afalse%2C%22freedom_of_speech_not_reach_fetch_enabled%22%3Atrue%2C%22standardized_nudges_misinfo%22%3Atrue%2C%22tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled%22%3Atrue%2C%22rweb_video_timestamps_enabled%22%3Atrue%2C%22longform_notetweets_rich_text_read_enabled%22%3Atrue%2C%22longform_notetweets_inline_media_enabled%22%3Atrue%2C%22responsive_web_enhance_cards_enabled%22%3Afalse%7D',
   TweetDetail:
     'https://twitter.com/i/api/graphql/xOhkmRac04YFZmOzU9PJHg/TweetDetail?variables=%7B%22focalTweetId%22%3A%221237110546383724547%22%2C%22with_rux_injections%22%3Afalse%2C%22includePromotedContent%22%3Atrue%2C%22withCommunity%22%3Atrue%2C%22withQuickPromoteEligibilityTweetFields%22%3Atrue%2C%22withBirdwatchNotes%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Atrue%7D&features=%7B%22responsive_web_graphql_exclude_directive_enabled%22%3Atrue%2C%22verified_phone_label_enabled%22%3Afalse%2C%22creator_subscriptions_tweet_preview_api_enabled%22%3Atrue%2C%22responsive_web_graphql_timeline_navigation_enabled%22%3Atrue%2C%22responsive_web_graphql_skip_user_profile_image_extensions_enabled%22%3Afalse%2C%22tweetypie_unmention_optimization_enabled%22%3Atrue%2C%22responsive_web_edit_tweet_api_enabled%22%3Atrue%2C%22graphql_is_translatable_rweb_tweet_is_translatable_enabled%22%3Atrue%2C%22view_counts_everywhere_api_enabled%22%3Atrue%2C%22longform_notetweets_consumption_enabled%22%3Atrue%2C%22responsive_web_twitter_article_tweet_consumption_enabled%22%3Afalse%2C%22tweet_awards_web_tipping_enabled%22%3Afalse%2C%22freedom_of_speech_not_reach_fetch_enabled%22%3Atrue%2C%22standardized_nudges_misinfo%22%3Atrue%2C%22tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled%22%3Atrue%2C%22longform_notetweets_rich_text_read_enabled%22%3Atrue%2C%22longform_notetweets_inline_media_enabled%22%3Atrue%2C%22responsive_web_media_download_video_enabled%22%3Afalse%2C%22responsive_web_enhance_cards_enabled%22%3Afalse%7D&fieldToggles=%7B%22withArticleRichContentState%22%3Afalse%7D',
+  TweetDetailArticle:
+    'https://twitter.com/i/api/graphql/GtcBtFhtQymrpxAs5MALVA/TweetDetail?variables=%7B%22focalTweetId%22%3A%221765884209527394325%22%2C%22with_rux_injections%22%3Atrue%2C%22rankingMode%22%3A%22Relevance%22%2C%22includePromotedContent%22%3Atrue%2C%22withCommunity%22%3Atrue%2C%22withQuickPromoteEligibilityTweetFields%22%3Atrue%2C%22withBirdwatchNotes%22%3Atrue%2C%22withVoice%22%3Atrue%7D&features=%7B%22profile_label_improvements_pcf_label_in_post_enabled%22%3Afalse%2C%22rweb_tipjar_consumption_enabled%22%3Atrue%2C%22responsive_web_graphql_exclude_directive_enabled%22%3Atrue%2C%22verified_phone_label_enabled%22%3Afalse%2C%22creator_subscriptions_tweet_preview_api_enabled%22%3Atrue%2C%22responsive_web_graphql_timeline_navigation_enabled%22%3Atrue%2C%22responsive_web_graphql_skip_user_profile_image_extensions_enabled%22%3Afalse%2C%22premium_content_api_read_enabled%22%3Afalse%2C%22communities_web_enable_tweet_community_results_fetch%22%3Atrue%2C%22c9s_tweet_anatomy_moderator_badge_enabled%22%3Atrue%2C%22responsive_web_grok_analyze_button_fetch_trends_enabled%22%3Atrue%2C%22responsive_web_grok_analyze_post_followups_enabled%22%3Afalse%2C%22responsive_web_grok_share_attachment_enabled%22%3Atrue%2C%22articles_preview_enabled%22%3Atrue%2C%22responsive_web_edit_tweet_api_enabled%22%3Atrue%2C%22graphql_is_translatable_rweb_tweet_is_translatable_enabled%22%3Atrue%2C%22view_counts_everywhere_api_enabled%22%3Atrue%2C%22longform_notetweets_consumption_enabled%22%3Atrue%2C%22responsive_web_twitter_article_tweet_consumption_enabled%22%3Atrue%2C%22tweet_awards_web_tipping_enabled%22%3Afalse%2C%22creator_subscriptions_quote_tweet_preview_enabled%22%3Afalse%2C%22freedom_of_speech_not_reach_fetch_enabled%22%3Atrue%2C%22standardized_nudges_misinfo%22%3Atrue%2C%22tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled%22%3Atrue%2C%22rweb_video_timestamps_enabled%22%3Atrue%2C%22longform_notetweets_rich_text_read_enabled%22%3Atrue%2C%22longform_notetweets_inline_media_enabled%22%3Atrue%2C%22responsive_web_enhance_cards_enabled%22%3Afalse%7D&fieldToggles=%7B%22withArticleRichContentState%22%3Atrue%2C%22withArticlePlainText%22%3Afalse%2C%22withGrokAnalyze%22%3Afalse%2C%22withDisallowedReplyControls%22%3Afalse%7D',
   TweetResultByRestId:
     'https://twitter.com/i/api/graphql/DJS3BdhUhcaEpZ7B7irJDg/TweetResultByRestId?variables=%7B%22tweetId%22%3A%221237110546383724547%22%2C%22withCommunity%22%3Afalse%2C%22includePromotedContent%22%3Afalse%2C%22withVoice%22%3Afalse%7D&features=%7B%22creator_subscriptions_tweet_preview_api_enabled%22%3Atrue%2C%22tweetypie_unmention_optimization_enabled%22%3Atrue%2C%22responsive_web_edit_tweet_api_enabled%22%3Atrue%2C%22graphql_is_translatable_rweb_tweet_is_translatable_enabled%22%3Atrue%2C%22view_counts_everywhere_api_enabled%22%3Atrue%2C%22longform_notetweets_consumption_enabled%22%3Atrue%2C%22responsive_web_twitter_article_tweet_consumption_enabled%22%3Afalse%2C%22tweet_awards_web_tipping_enabled%22%3Afalse%2C%22freedom_of_speech_not_reach_fetch_enabled%22%3Atrue%2C%22standardized_nudges_misinfo%22%3Atrue%2C%22tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled%22%3Atrue%2C%22longform_notetweets_rich_text_read_enabled%22%3Atrue%2C%22longform_notetweets_inline_media_enabled%22%3Atrue%2C%22responsive_web_graphql_exclude_directive_enabled%22%3Atrue%2C%22verified_phone_label_enabled%22%3Afalse%2C%22responsive_web_media_download_video_enabled%22%3Afalse%2C%22responsive_web_graphql_skip_user_profile_image_extensions_enabled%22%3Afalse%2C%22responsive_web_graphql_timeline_navigation_enabled%22%3Atrue%2C%22responsive_web_enhance_cards_enabled%22%3Afalse%7D',
   ListTweets:
diff --git a/src/scraper.ts b/src/scraper.ts
@@ -53,8 +53,13 @@ import {
   retweet,
   createCreateNoteTweetRequest,
   createCreateLongTweetRequest,
+  getArticle,
 } from './tweets';
-import { parseTimelineTweetsV2, TimelineV2 } from './timeline-v2';
+import {
+  parseTimelineTweetsV2,
+  TimelineArticle,
+  TimelineV2,
+} from './timeline-v2';
 import { fetchHomeTimeline } from './timeline-home';
 import { fetchFollowingTimeline } from './timeline-following';
 import {
@@ -75,9 +80,17 @@ import {
   fetchAudioSpaceById,
   fetchAuthenticatePeriscope,
   fetchBrowseSpaceTopics,
-  fetchCommunitySelectQuery, fetchLiveVideoStreamStatus, fetchLoginTwitterToken
+  fetchCommunitySelectQuery,
+  fetchLiveVideoStreamStatus,
+  fetchLoginTwitterToken,
 } from './spaces';
-import {AudioSpace, Community, LiveVideoStreamStatus, LoginTwitterTokenResponse, Subtopic} from './types/spaces';
+import {
+  AudioSpace,
+  Community,
+  LiveVideoStreamStatus,
+  LoginTwitterTokenResponse,
+  Subtopic,
+} from './types/spaces';
 
 const twUrl = 'https://twitter.com';
 const UserTweetsUrl =
@@ -945,7 +958,7 @@ export class Scraper {
    * @returns The status of the Audio Space stream.
    */
   public async getAudioSpaceStreamStatus(
-      mediaKey: string,
+    mediaKey: string,
   ): Promise<LiveVideoStreamStatus> {
     return await fetchLiveVideoStreamStatus(mediaKey, this.auth);
   }
@@ -958,7 +971,7 @@ export class Scraper {
    * @returns The status of the Audio Space stream.
    */
   public async getAudioSpaceStatus(
-      audioSpaceId: string,
+    audioSpaceId: string,
   ): Promise<LiveVideoStreamStatus> {
     const audioSpace = await this.getAudioSpaceById(audioSpaceId);
 
@@ -984,7 +997,7 @@ export class Scraper {
    * @returns The response containing the cookie and user information.
    */
   public async loginTwitterToken(
-      jwt: string,
+    jwt: string,
   ): Promise<LoginTwitterTokenResponse> {
     return await fetchLoginTwitterToken(jwt, this.auth);
   }
@@ -999,4 +1012,13 @@ export class Scraper {
 
     return loginResponse.cookie;
   }
+
+  /**
+   * Fetches a article (long form tweet) by its ID.
+   * @param id The ID of the article to fetch. In the format of (http://x.com/i/article/id)
+   * @returns The {@link TimelineArticle} object, or `null` if it couldn't be fetched.
+   */
+  public getArticle(id: string): Promise<TimelineArticle | null> {
+    return getArticle(id, this.auth);
+  }
 }
diff --git a/src/timeline-v1.ts b/src/timeline-v1.ts
@@ -76,6 +76,50 @@ export interface SearchResultRaw {
   legacy?: LegacyTweetRaw;
 }
 
+export interface TimelineArticleResultRaw {
+  id?: string;
+  rest_id?: string;
+  title?: string;
+  preview_text?: string;
+  cover_media?: {
+    media_id?: string;
+    media_info?: {
+      original_img_url?: string;
+      original_img_height?: number;
+      original_img_width?: number;
+    };
+  };
+  content_state?: {
+    blocks?: {
+      key?: string;
+      data?: string;
+      text?: string;
+      entityRanges?: {
+        key?: number;
+        length?: number;
+        offset?: number;
+      }[];
+    }[];
+  };
+  entityMap?: {
+    key?: string;
+    value?: {
+      type?: string; // LINK, MEDIA, TWEET
+      mutability?: string;
+      data?: {
+        entityKey?: string;
+        url?: string;
+        tweetId?: string;
+        mediaItems?: {
+          localMediaId?: string;
+          mediaCategory?: string;
+          mediaId?: string;
+        }[];
+      };
+    };
+  }[];
+}
+
 export interface TimelineResultRaw {
   rest_id?: string;
   __typename?: string;
@@ -97,6 +141,11 @@ export interface TimelineResultRaw {
       };
     };
   };
+  article?: {
+    article_results?: {
+      result?: TimelineArticleResultRaw;
+    };
+  };
   quoted_status_result?: {
     result?: TimelineResultRaw;
   };
diff --git a/src/timeline-v2.ts b/src/timeline-v2.ts
@@ -423,3 +423,41 @@ export function parseThreadedConversation(
 
   return tweets;
 }
+
+export interface TimelineArticle {
+  id: string;
+  articleId: string;
+  title: string;
+  previewText: string;
+  coverMediaUrl?: string;
+  text: string;
+}
+
+export function parseArticle(
+  conversation: ThreadedConversation,
+): TimelineArticle[] {
+  const articles: TimelineArticle[] = [];
+  for (const instruction of conversation.data
+    ?.threaded_conversation_with_injections_v2?.instructions ?? []) {
+    for (const entry of instruction.entries ?? []) {
+      const id = entry.content?.itemContent?.tweet_results?.result?.rest_id;
+      const article =
+        entry.content?.itemContent?.tweet_results?.result?.article
+          ?.article_results?.result;
+      if (!id || !article) continue;
+      const text =
+        article.content_state?.blocks
+          ?.map((block) => block.text)
+          .join('\n\n') ?? '';
+      articles.push({
+        id,
+        articleId: article.rest_id || '',
+        coverMediaUrl: article.cover_media?.media_info?.original_img_url,
+        previewText: article.preview_text || '',
+        text,
+        title: article.title || '',
+      });
+    }
+  }
+  return articles;
+}
diff --git a/src/tweets.test.ts b/src/tweets.test.ts
@@ -1,6 +1,6 @@
 import { getScraper } from './test-utils';
 import { QueryTweetsResponse } from './timeline-v1';
-import { Mention, Tweet } from './tweets';
+import { Mention, Tweet, getTweetAnonymous } from './tweets';
 import fs from 'fs';
 import path from 'path';
 
@@ -462,10 +462,10 @@ test('scraper can send a tweet with image and video', async () => {
 
   // Read test image and video files from the test-assets directory
   const imageBuffer = fs.readFileSync(
-    path.join(__dirname, '../test-assets/test-image.jpeg')
+    path.join(__dirname, '../test-assets/test-image.jpeg'),
   );
   const videoBuffer = fs.readFileSync(
-    path.join(__dirname, '../test-assets/test-video.mp4')
+    path.join(__dirname, '../test-assets/test-video.mp4'),
   );
 
   // Prepare media data array with both image and video
@@ -502,10 +502,10 @@ test('scraper can quote tweet with image and video', async () => {
 
   // Read test image and video files from the test-assets directory
   const imageBuffer = fs.readFileSync(
-    path.join(__dirname, '../test-assets/test-image.jpeg')
+    path.join(__dirname, '../test-assets/test-image.jpeg'),
   );
   const videoBuffer = fs.readFileSync(
-    path.join(__dirname, '../test-assets/test-video.mp4')
+    path.join(__dirname, '../test-assets/test-video.mp4'),
   );
 
   // Prepare media data array with both image and video
@@ -531,13 +531,11 @@ test('scraper can quote tweet with media', async () => {
 
   // Read test image file
   const imageBuffer = fs.readFileSync(
-    path.join(__dirname, '../test-assets/test-image.jpeg')
+    path.join(__dirname, '../test-assets/test-image.jpeg'),
   );
 
   // Prepare media data with the image
-  const mediaData = [
-    { data: imageBuffer, mediaType: 'image/jpeg' },
-  ];
+  const mediaData = [{ data: imageBuffer, mediaType: 'image/jpeg' }];
 
   // Send a quote tweet with the image attachment
   const response = await scraper.sendQuoteTweet(quoteText, quotedTweetId, {
@@ -555,13 +553,11 @@ test('sendTweetWithMedia successfully sends a tweet with media', async () => {
 
   // Read a test image file
   const imageBuffer = fs.readFileSync(
-    path.join(__dirname, '../test-assets/test-image.jpeg')
+    path.join(__dirname, '../test-assets/test-image.jpeg'),
   );
 
   // Prepare media data with the image
-  const mediaData = [
-    { data: imageBuffer, mediaType: 'image/jpeg' },
-  ];
+  const mediaData = [{ data: imageBuffer, mediaType: 'image/jpeg' }];
 
   // Send a tweet with the image attachment
   const result = await scraper.sendTweet(draftText, undefined, mediaData);
@@ -593,4 +589,23 @@ test('scraper can follow user', async () => {
 
   // Test should not throw an error
   await expect(scraper.followUser(username)).resolves.not.toThrow();
-}, 30000);
+}, 30000);
+
+test('scraper cannot get article using getTweet', async () => {
+  const scraper = await getScraper();
+  // X introducing article: http://x.com/i/article/1765821414056120320
+  const tweet = await scraper.getTweet('1765884209527394325');
+
+  expect(tweet).not.toBeNull();
+  expect(tweet?.text).toMatch(/https?:\/\/t.co\//);
+  expect(tweet?.urls[0]).toMatch(/https?:\/\/x.com\/i\/article\//);
+}, 30000);
+
+test('scraper can get article using getArticle', async () => {
+  const scraper = await getScraper();
+  // X introducing article: http://x.com/i/article/1765821414056120320
+  const article = await scraper.getArticle('1765884209527394325');
+
+  expect(article).not.toBeNull();
+  expect(article?.title).toMatch(/Introducing Articles on X/);
+}, 30000);
diff --git a/src/tweets.ts b/src/tweets.ts
@@ -9,6 +9,8 @@ import {
   parseTimelineEntryItemContentRaw,
   ThreadedConversation,
   parseThreadedConversation,
+  parseArticle,
+  TimelineArticle,
 } from './timeline-v2';
 import { getTweetTimeline } from './timeline-async';
 import { apiRequestFactory } from './api-data';
@@ -1479,3 +1481,28 @@ export async function createCreateLongTweetRequest(
 
   return response;
 }
+
+export async function getArticle(
+  id: string,
+  auth: TwitterAuth,
+): Promise<TimelineArticle | null> {
+  const tweetDetailRequest =
+    apiRequestFactory.createTweetDetailArticleRequest();
+  tweetDetailRequest.variables.focalTweetId = id;
+
+  const res = await requestApi<ThreadedConversation>(
+    tweetDetailRequest.toRequestUrl(),
+    auth,
+  );
+
+  if (!res.success) {
+    throw res.err;
+  }
+
+  if (!res.value) {
+    return null;
+  }
+
+  const articles = parseArticle(res.value);
+  return articles.find((article) => article.id === id) ?? null;
+}