This repository was archived by the owner on Apr 1, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser.rb
115 lines (108 loc) · 3.65 KB
/
parser.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#coding: utf-8
require 'time'
require 'cgi/util'
require 'nokogiri'
module Parser
class Colette
DIFFICULTY = [:basic, :medium, :hard]
# html: String, or File
def parse_profile(html)
_parse_profile(Nokogiri::HTML(html) {|config| config.nonet })
end
def parse_song(html)
_parse_song(Nokogiri::HTML(html) {|config| config.nonet })
end
private
def _parse_profile(doc)
player = {
id: doc.at_css('#plofbox dl dd').text.strip,
pseudonym: doc.at_css('#plofbox2').text.strip.gsub(/^(\S+)(.*)/, '\1'),
name: doc.at_css('#plofbox2 .plofbox2').text.strip,
comment: doc.at_css('#right_con_plf2 dl dd').text.strip,
stamp: doc.css('#profileL_box div')[1].at_css('dl dd').text.strip.to_i,
play_count: doc.css('#profileL_box div')[2].at_css('dl dd').text.strip.to_i,
last_play_shop: doc.css('#profileL_box div')[3].at_css('dl dd').text.strip,
onigiri: doc.css('#profileR_box div')[1].at_css('dl dd').text.strip.to_i,
last_play_date: Time.parse(doc.css('#profileR_box div')[3].at_css('dl dd').text.strip)
}
team = {
name: doc.css('#profileL_box div')[0].at_css('dl dd').text.strip,
id: doc.css('#profileR_box div')[0].at_css('dl dd').text.strip
}
unless team[:id] == '未設定'
team[:id] = team[:id].to_i
player[:team] = team
end
player
end
def _parse_song(doc)
songs = Array.new
doc.css('#music_table1 tbody tr').each do |row|
next unless row.css('th').empty?
scores = Hash.new
(1..3).each do |i|
col = row.css('td')[i]
if col.children.first.text.strip == '-'
achieve = nil
else
values_node = col.css('div')
achieve = values_node.at_css('.fcph').text.strip.to_f
if values_node.at_css('.fch').matches?('img')
miss = 0
else
miss = values_node.at_css('.fch').text.strip.to_i
end
if achieve < 50.0
rating = 0 # C
elsif achieve < 70.0
rating = 1 # B
elsif achieve < 80.0
rating = 2 # A
elsif achieve < 90.0
rating = 3 # AA
elsif achieve < 95.0
rating = 4 # AAA
else
rating = 5 # AAA+
end
end
scores[DIFFICULTY[i - 1]] = {
lv: col.at_css('.lv').text.strip.to_i,
achieve: achieve,
miss: miss,
rating: rating
}
end
songs << {
id: CGI.unescape(row.at_css('img').attr('src').gsub(/(.+?)img=(.+)$/, '\2')),
name: Parser.name_normalize(row.at_css('img').attr('alt')),
scores: scores
}
end
songs
end
end # end class
def self.name_normalize(name)
name.gsub(/''/, '"') # double single quote -> double quote
.gsub(/ /, ' ') # full-width space -> half-width space
.gsub(/ +/, ' ') # continuous half-width space -> single half-width space
.gsub(/−/, '—') # full-width hyphen -> full-width dash
.gsub(/—+/, '—') # continuous full-width dash -> single full-width dash
.gsub(/[\uff5e]/, "\u301c") # full-width tilde -> full-width wave dash
.gsub(/[\u2012\u2013\u2015\u2212\uff0d]/, "\u2014") # hyphen and dashes
.strip
end
end # end module
if __FILE__ == $0
parser = Parser::Colette.new
prof = nil
open(ARGV[0], 'r:shift_jis') do |f|
prof = parser.parse_profile(f)
end
song = nil
open(ARGV[1], 'r:shift_jis') do |f|
song = parser.parse_song(f)
end
p prof
p song
end