Skip to content

Commit b07a637

Browse files
authored
Merge pull request #1760 from coderdojo-japan/refactor-news-rake-task
refactor: YAGNI/DRY に沿って news.rake のコードを共通化
2 parents cbe0d9e + 1670420 commit b07a637

File tree

2 files changed

+72
-118
lines changed

2 files changed

+72
-118
lines changed

db/news.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
---
2-
news:
32
- id: 13
43
url: https://news.coderdojo.jp/2025/10/04/dojoletter-vol-89-2025%e5%b9%b408%e6%9c%88%e5%8f%b7/
54
title: DojoLetter Vol.89 2025年08月号

lib/tasks/news.rake

Lines changed: 72 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -1,118 +1,75 @@
11
require 'rss'
2-
require 'net/http'
3-
require 'uri'
4-
require 'yaml'
5-
require 'time'
6-
require 'active_support/broadcast_logger'
7-
8-
def safe_open(url)
9-
uri = URI.parse(url)
10-
raise "不正なURLです: #{url}" unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
11-
12-
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http|
13-
request = Net::HTTP::Get.new(uri)
14-
response = http.request(request)
15-
response.body
16-
end
17-
end
18-
19-
def fetch_rss_items(url, logger)
20-
logger.info("Fetching RSS → #{url}")
21-
begin
22-
rss = safe_open(url)
23-
feed = RSS::Parser.parse(rss, false)
24-
feed.items.map { |item| item_to_hash(item) }
25-
rescue => e
26-
logger.warn("⚠️ Failed to fetch #{url}: #{e.message}")
27-
[]
28-
end
29-
end
302

31-
def item_to_hash(item)
32-
{
33-
'url' => item.link,
34-
'title' => item.title,
35-
'published_at' => item.pubDate.to_s
36-
}
37-
end
3+
NEWS_YAML_PATH = 'db/news.yml'.freeze
4+
NEWS_LOG_PATH = 'log/news.log'.freeze
385

396
namespace :news do
40-
desc 'RSS フィードを取得し、db/news.yml に保存'
7+
desc "RSS フィードを取得し、#{NEWS_YAML_PATH} に保存"
418
task fetch: :environment do
429
# ロガー設定(ファイル+コンソール出力)
43-
file_logger = ActiveSupport::Logger.new('log/news.log')
4410
console = ActiveSupport::Logger.new(STDOUT)
45-
logger = ActiveSupport::BroadcastLogger.new(file_logger, console)
11+
logger_file = ActiveSupport::Logger.new(NEWS_LOG_PATH)
12+
logger = ActiveSupport::BroadcastLogger.new(logger_file, console)
4613

4714
logger.info('==== START news:fetch ====')
4815

49-
# 既存の news.yml を読み込み
50-
yaml_path = Rails.root.join('db', 'news.yml')
51-
existing_news = if File.exist?(yaml_path)
52-
YAML.safe_load(File.read(yaml_path), permitted_classes: [Time], aliases: true)['news'] || []
53-
else
54-
[]
55-
end
56-
57-
# テスト/ステージング環境ではサンプルファイル、本番は実サイトのフィード
58-
feed_urls = if Rails.env.test? || Rails.env.staging?
59-
[Rails.root.join('spec', 'fixtures', 'sample_news.rss').to_s]
60-
else
61-
[
62-
'https://news.coderdojo.jp/feed/'
63-
# 必要に応じて他 Dojo の RSS もここに追加可能
64-
# 'https://coderdojotokyo.org/feed',
65-
]
66-
end
67-
68-
new_items = feed_urls.flat_map { |url| fetch_rss_items(url, logger) }
69-
70-
# 既存データをハッシュに変換(URL をキーに)
71-
existing_items_hash = existing_news.index_by { |item| item['url'] }
72-
73-
# 新しいアイテムと既存アイテムを分離
74-
truly_new_items = []
16+
# 本番/開発環境では実フィード、それ以外(テスト環境など)ではテスト用フィード
17+
DOJO_NEWS_FEED = 'https://news.coderdojo.jp/feed/'
18+
TEST_NEWS_FEED = Rails.root.join('spec', 'fixtures', 'sample_news.rss')
19+
RSS_FEED_LIST = (Rails.env.test? || Rails.env.staging?) ?
20+
[TEST_NEWS_FEED] :
21+
[DOJO_NEWS_FEED]
22+
23+
# RSS のデータ構造を、News のデータ構造に変換
24+
fetched_items = RSS_FEED_LIST.flat_map do |feed|
25+
feed = RSS::Parser.parse(feed, false)
26+
feed.items.map { |item|
27+
{
28+
'url' => item.link,
29+
'title' => item.title,
30+
'published_at' => item.pubDate.to_s
31+
}
32+
}
33+
end
34+
35+
# 取得済みニュース (YAML) を読み込み、URL をキーとしたハッシュに変換
36+
existing_items = YAML.safe_load(File.read NEWS_YAML_PATH).index_by { it['url'] }
37+
existing_max_id = existing_items.flat_map { |url, item| item['id'].to_i }.max || 0
38+
39+
# 新規記事と既存記事を分離
40+
created_items = []
7541
updated_items = []
7642

77-
new_items.each do |new_item|
78-
if existing_items_hash.key?(new_item['url'])
79-
existing_item = existing_items_hash[new_item['url']]
80-
# タイトルまたは公開日が変わった場合のみ更新
81-
if existing_item['title'] != new_item['title'] || existing_item['published_at'] != new_item['published_at']
82-
updated_items << existing_item.merge(new_item)
83-
end
84-
else
85-
truly_new_items << new_item
43+
fetched_items.each do |fetched_item|
44+
existing_item = existing_items[fetched_item['url']]
45+
46+
if existing_item.nil?
47+
# 新規アイテムならそのまま追加
48+
created_items << fetched_item
49+
elsif existing_item['title'] != fetched_item['title'] || existing_item['published_at'] != fetched_item['published_at']
50+
# タイトルまたは公開日が変わっていたら更新
51+
updated_items << existing_item.merge(fetched_item)
8652
end
8753
end
8854

89-
# 既存の最大IDを取得
90-
max_existing_id = existing_news.map { |item| item['id'].to_i }.max || 0
91-
9255
# 新しいアイテムのみに ID を割り当て(古い順)
93-
truly_new_items_sorted = truly_new_items.sort_by { |item|
94-
Time.parse(item['published_at'])
95-
}
96-
97-
truly_new_items_sorted.each_with_index do |item, index|
98-
item['id'] = max_existing_id + index + 1
56+
created_items.sort_by! { Time.parse it['published_at'] }
57+
created_items.each.with_index(1) do |item, index|
58+
item['id'] = existing_max_id + index
9959
end
10060

101-
# 更新されなかった既存アイテムを取得
102-
updated_urls = updated_items.map { |item| item['url'] }
103-
unchanged_items = existing_news.reject { |item| updated_urls.include?(item['url']) }
104-
105-
# 全アイテムをマージ
106-
all_items = unchanged_items + updated_items + truly_new_items_sorted
61+
# URL をキーに、更新されなかった既存の YAML データを取得・保持
62+
updated_urls = updated_items.map { it['url'] }
63+
unchanged_items = existing_items.values.reject { updated_urls.include?(it['url']) }
10764

108-
# 日付降順ソート
109-
sorted_items = all_items.sort_by { |item|
110-
Time.parse(item['published_at'])
65+
# 新規・更新・既存の各アイテムをマージし、日付降順でソート
66+
merged_items = (unchanged_items + updated_items + created_items).sort_by {
67+
Time.parse(it['published_at'])
11168
}.reverse
11269

11370
# YAML ファイルに書き出し
114-
File.open('db/news.yml', 'w') do |f|
115-
formatted_items = sorted_items.map do |item|
71+
File.open(NEWS_YAML_PATH, 'w') do |f|
72+
formatted_items = merged_items.map do |item|
11673
{
11774
'id' => item['id'],
11875
'url' => item['url'],
@@ -121,51 +78,49 @@ namespace :news do
12178
}
12279
end
12380

124-
f.write({ 'news' => formatted_items }.to_yaml)
81+
f.write(formatted_items.to_yaml)
12582
end
12683

127-
logger.info("✅ Wrote #{sorted_items.size} items to db/news.yml (#{truly_new_items_sorted.size} new, #{updated_items.size} updated)")
128-
logger.info('==== END news:fetch ====')
84+
logger.info "✅ Wrote #{merged_items.size} items to #{NEWS_YAML_PATH} (#{created_items.size} new, #{updated_items.size} updated)"
85+
logger.info "==== END news:fetch ===="
86+
logger.info ""
12987
end
13088

131-
desc 'db/news.yml からデータベースに upsert'
89+
desc "#{NEWS_YAML_PATH} からデータベースに upsert"
13290
task upsert: :environment do
133-
file_logger = ActiveSupport::Logger.new('log/news.log')
13491
console = ActiveSupport::Logger.new(STDOUT)
135-
logger = ActiveSupport::BroadcastLogger.new(file_logger, console)
92+
logger_file = ActiveSupport::Logger.new(NEWS_LOG_PATH)
93+
logger = ActiveSupport::BroadcastLogger.new(logger_file, console)
13694

13795
logger.info "==== START news:upsert ===="
13896

139-
yaml_path = Rails.root.join('db', 'news.yml')
140-
raw = YAML.safe_load(File.read(yaml_path), permitted_classes: [Time], aliases: true)
141-
142-
entries = raw['news'] || []
143-
new_count = 0
97+
news_items = YAML.safe_load File.read(NEWS_YAML_PATH)
98+
created_count = 0
14499
updated_count = 0
145100

146101
News.transaction do
147-
entries.each do |attrs|
148-
news = News.find_or_initialize_by(url: attrs['url'])
149-
is_new = news.new_record?
150-
102+
news_items.each do |item|
103+
news = News.find_or_initialize_by(url: item['url'])
151104
news.assign_attributes(
152-
title: attrs['title'],
153-
published_at: attrs['published_at']
105+
title: item['title'],
106+
published_at: item['published_at']
154107
)
155-
156-
if is_new || news.changed?
108+
109+
is_new_record = news.new_record?
110+
if is_new_record || news.changed?
157111
news.save!
158-
status = is_new ? 'new' : 'updated'
159-
new_count += 1 if is_new
160-
updated_count += 1 unless is_new
112+
113+
status = is_new_record ? 'new' : 'updated'
114+
created_count += 1 if is_new_record
115+
updated_count += 1 unless is_new_record
161116

162117
logger.info "[News] #{news.published_at.to_date} #{news.title} (#{status})"
163118
end
164119
end
165120
end
166121

167-
logger.info "Upserted #{new_count + updated_count} items (#{new_count} new, #{updated_count} updated)."
122+
logger.info "Upserted #{created_count + updated_count} items (#{created_count} new, #{updated_count} updated)."
168123
logger.info "==== END news:upsert ===="
124+
logger.info ""
169125
end
170-
171126
end

0 commit comments

Comments
 (0)