Skip to content

Commit

Permalink
Only remove Twitter image URLs, not 3rd party media URLs
Browse files Browse the repository at this point in the history
This is important because Flickr and Instagram images (amongst
others) may have comments and other details on the page and the URL
may be integral to the content of the tweet, whereas Twitter images
are reliably at the end of the tweet.

We also now won't load 3rd party images when Twitter images exist
because this used to lead to double-images, and we now keep the URLs
  • Loading branch information
IBBoard committed Aug 20, 2016
1 parent 4b8e8c7 commit 4721796
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 9 deletions.
15 changes: 15 additions & 0 deletions src/MediaDownloader.c
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,21 @@ is_media_candidate (const char *url)

}

gboolean
is_twitter_media_candidate (const char *url)
{
url = canonicalize_url (url);

return
#ifdef VIDEO
g_str_has_prefix (url, "/photo/1/") ||
g_str_has_prefix (url, "video.twimg.com/ext_tw_video") ||
#endif
g_str_has_prefix (url, "pbs.twimg.com/media/")
;

}

static void
cb_media_downloader_init (CbMediaDownloader *downloader)
{
Expand Down
1 change: 1 addition & 0 deletions src/MediaDownloader.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ gboolean cb_media_downloader_load_finish (CbMediaDownloader *downloader,

gboolean is_media_candidate (const char *url);

gboolean is_twitter_media_candidate (const char *url);

G_END_DECLS

Expand Down
2 changes: 1 addition & 1 deletion src/TextTransform.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ is_media_url (const char *url,
const char *display_text,
gsize media_count)
{
return (is_media_candidate (url != NULL ? url : display_text) && media_count == 1) ||
return (is_twitter_media_candidate (url != NULL ? url : display_text) && media_count == 1) ||
g_str_has_prefix (display_text, "pic.twitter.com/");
}

Expand Down
25 changes: 17 additions & 8 deletions src/Types.c
Original file line number Diff line number Diff line change
Expand Up @@ -342,14 +342,6 @@ cb_mini_tweet_parse_entities (CbMiniTweet *t,
const char *expanded_url = json_object_get_string_member (url, "expanded_url");
JsonArray *indices;

if (is_media_candidate (expanded_url))
{
t->medias[t->n_medias] = cb_media_new ();
t->medias[t->n_medias]->url = g_strdup (expanded_url);
t->medias[t->n_medias]->type = cb_media_type_from_url (expanded_url);
t->n_medias ++;
}

indices = json_object_get_array_member (url, "indices");
t->entities[url_index].from = json_array_get_int_element (indices, 0);
t->entities[url_index].to = json_array_get_int_element (indices, 1);
Expand Down Expand Up @@ -537,6 +529,23 @@ cb_mini_tweet_parse_entities (CbMiniTweet *t,
}
}

if (t->n_medias == 0)
{
for (i = 0, p = json_array_get_length (urls); i < p; i ++)
{
JsonObject *url = json_node_get_object (json_array_get_element (urls, i));
const char *expanded_url = json_object_get_string_member (url, "expanded_url");

if (is_media_candidate (expanded_url))
{
t->medias[t->n_medias] = cb_media_new ();
t->medias[t->n_medias]->url = g_strdup (expanded_url);
t->medias[t->n_medias]->type = cb_media_type_from_url (expanded_url);
t->n_medias ++;
}
}
}

t->n_entities = url_index;
#if 0
g_debug ("Wasted entities: %d", max_entities - t->n_entities);
Expand Down

0 comments on commit 4721796

Please sign in to comment.