Skip to content

Commit baa1275

Browse files
committed
Ignore hashtags and mentions...
...when extracting links. Plus minor gem updates.
1 parent 4b52eda commit baa1275

File tree

4 files changed

+60
-3
lines changed

4 files changed

+60
-3
lines changed

Gemfile.lock

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ GEM
315315
ruby-progressbar (1.13.0)
316316
rubyzip (2.4.1)
317317
securerandom (0.4.1)
318-
selenium-webdriver (4.28.0)
318+
selenium-webdriver (4.29.0)
319319
base64 (~> 0.2)
320320
logger (~> 1.4)
321321
rexml (~> 3.2, >= 3.2.5)
@@ -336,7 +336,7 @@ GEM
336336
base64
337337
stimulus-rails (1.3.4)
338338
railties (>= 6.0.0)
339-
stringio (3.1.3)
339+
stringio (3.1.5)
340340
tailwindcss-rails (3.3.1)
341341
railties (>= 7.0.0)
342342
tailwindcss-ruby (~> 3.0)

app/models/content_object.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class ContentObject < ApplicationRecord
3131
class << self
3232
def json_to_attributes(json_object)
3333
hashtags = json_object["tag"].filter_map { |t| t["name"] if t["type"] == "Hashtag" }
34-
links = Nokogiri::HTML5::DocumentFragment.parse(json_object["content"]).css("a[href]").map { |a| a["href"] }
34+
links = LinkExtractor.new(json_object).extracted_urls
3535
{
3636
object_type: json_object["type"],
3737
published_at: json_object["published"],
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
class ContentObject::LinkExtractor
2+
def initialize(content_json)
3+
@content_json = content_json
4+
end
5+
6+
def extracted_urls
7+
@extracted_urls ||= extract_urls
8+
end
9+
10+
private
11+
12+
def extract_urls
13+
parsed_fragment.css("a[href]:not([rel~=tag]):not(.u-url)").filter_map do |a|
14+
a["href"] unless mention?(a["href"])
15+
end
16+
end
17+
18+
def parsed_fragment
19+
Nokogiri::HTML5::DocumentFragment.parse(@content_json["content"])
20+
end
21+
22+
def mention?(uri)
23+
@mentions ||= (@content_json["tag"] || []).filter_map do |tag|
24+
tag["href"] if tag["type"] == "Mention"
25+
end
26+
@mentions.include?(uri)
27+
end
28+
end
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
require "test_helper"
2+
3+
class ContentObject::LinkExtractorTest < ActiveSupport::TestCase
4+
test "#extracted_urls returns an array of links ignoring hashtags and mentions" do
5+
content_object = mock_content_object(content:)
6+
content_object["tag"] << mention
7+
8+
extracted_urls = ContentObject::LinkExtractor.new(content_object).extracted_urls
9+
10+
assert_equal 1, extracted_urls.size
11+
assert_equal "https://example.com/home", extracted_urls.first
12+
end
13+
14+
private
15+
16+
def content
17+
<<~HTML
18+
<p>Test</p><p><a href="https://fedi.example.com/tags/testtag" class="mention hashtag" rel="tag">#<span>testtag</span></a> test <span class="h-card" translate="no"><a href="https://fedi.example.com/@user1" class="u-url mention">@<span>user1</span></a></span><a href="https://example.com/home" target="_blank" rel="nofollow noopener" translate="no"><span class="invisible">https://</span><span class="">example.com/home</span><span class="invisible"></span></a> test <a href="https://fedi.example.com/@user2">@user2</a>
19+
HTML
20+
end
21+
22+
def mention
23+
{
24+
"type" => "Mention",
25+
"href" => "https://fedi.example.com/@user2",
26+
"name" => "@[email protected]"
27+
}
28+
end
29+
end

0 commit comments

Comments
 (0)