From 1c020472df3b2b7d8adc553a0a97cdae0c258f8c Mon Sep 17 00:00:00 2001 From: Scott Windsor Date: Wed, 1 May 2013 20:38:25 -0700 Subject: [PATCH 1/3] Adding atom feed export. * Introducing builder pattern for conversions * minor fixes to hatom for title fallback --- bin/prism | 13 ++- lib/prism.rb | 1 + lib/prism/builder.rb | 1 + lib/prism/builder/atom_builder.rb | 108 ++++++++++++++++++ lib/prism/microformat/hatom.rb | 17 ++- test/builder/atom_builder_test.rb | 33 ++++++ .../test-fixture/hatom/example1.atom.xml | 16 +++ .../fixtures/test-fixture/hatom/example1.html | 3 +- test/microformat/hatom_test.rb | 12 +- 9 files changed, 197 insertions(+), 7 deletions(-) create mode 100644 lib/prism/builder.rb create mode 100644 lib/prism/builder/atom_builder.rb create mode 100644 test/builder/atom_builder_test.rb create mode 100644 test/fixtures/test-fixture/hatom/example1.atom.xml diff --git a/bin/prism b/bin/prism index 9798703..8c4c224 100755 --- a/bin/prism +++ b/bin/prism @@ -13,6 +13,7 @@ ## If no SOURCE is given, prism will read from the Standard Input. ## ## Microformats: +## --atom, --atom hAtom => atom feed ## --vcard, --hcard hCard => vCard converter ## --xfn Get XFN URLs ## @@ -59,6 +60,7 @@ end options = {} ARGV.options do |option| option.banner = "Hi." + option.on('--atom','--atom') { options[:atom] = true } option.on('--hcard','--vcard') { options[:vcard] = true } option.on('--xfn') { options[:xfn] = true } option.on_tail('-h','--help') { puts usage ; exit } @@ -73,11 +75,12 @@ end def uformat_counts(group) uf_count = Prism::Microformat.microformats.values.collect do |uformat| - name = "#{uformat::FRIENDLY_NAME}" - found = group.select {|format| format.is_a?(uformat) }.count + klass = Prism::Microformat.format_to_class(uformat) + name = "#{klass::FRIENDLY_NAME}" + found = group.select {|format| format.is_a?(klass) }.count if found > 0 name += "s" if found > 1 - "Found #{found} #{name} in the document. Read more at: #{uformat::WIKI_URL}" + "Found #{found} #{name} in the document. Read more at: #{klass::WIKI_URL}" end end.compact if !uf_count.empty? @@ -107,7 +110,9 @@ end if options.empty? uformat_counts(Prism.find(input)).each {|count| puts count } else - if options[:vcard] + if options[:atom] + parse_microformats(input, :hatom) {|hatom| puts hatom.to_atom } + elsif options[:vcard] parse_microformats(input, :hcard) {|hcard| puts hcard.to_vcard } elsif options[:xfn] parse_microformats(input, :xfn) {|xfn| puts xfn.url } diff --git a/lib/prism.rb b/lib/prism.rb index 5eb450a..4e5c182 100644 --- a/lib/prism.rb +++ b/lib/prism.rb @@ -137,3 +137,4 @@ def parse_first(document) require 'prism/pattern' require 'prism/posh' require 'prism/microformat' +require 'prism/builder' diff --git a/lib/prism/builder.rb b/lib/prism/builder.rb new file mode 100644 index 0000000..a32589a --- /dev/null +++ b/lib/prism/builder.rb @@ -0,0 +1 @@ +require 'prism/builder/atom_builder' diff --git a/lib/prism/builder/atom_builder.rb b/lib/prism/builder/atom_builder.rb new file mode 100644 index 0000000..c83d703 --- /dev/null +++ b/lib/prism/builder/atom_builder.rb @@ -0,0 +1,108 @@ +require 'forwardable' + +module Prism + module Builder + class AtomBuilder + public + + def add_hatom(hatom) + self.hatom = hatom + end + + def build + self.builder = Nokogiri::XML::Builder.new(:encoding => 'utf-8') do |xml| + xml.feed('xmlns' => 'http://www.w3.org/2005/Atom') do + entries.each do |hentry| + build_entry xml, hentry + end + end + end + end + + def to_s + builder.to_xml + end + + private + + attr_accessor :builder, :hatom + + def entries + if hatom + hatom.hentry || [] + else + [] + end + end + + class EntryDelgator + extend Forwardable + + attr_reader :hentry + + def_delegator :hentry, :entry_title, :title + def_delegator :hentry, :entry_summary, :summary + + def initialize(hentry) + @hentry = hentry + end + + def fields + [:title, :updated, :published, :summary] + end + + def updated + hentry.updated.iso8601 if hentry.updated + end + + def published + hentry.published.iso8601 if hentry.published + end + end + + class AuthorDelegator + extend Forwardable + + attr_reader :author + + def_delegator :author, :fn, :name + def_delegator :author, :email + + def initialize(author) + @author = author + end + + def fields + [:name, :uri, :email] + end + + def uri + author.url.first if author.url + end + end + + def build_entry(xml, hentry) + entry = EntryDelgator.new(hentry) + xml.entry do + entry.fields.each do |field| + xml.send(field, entry.send(field)) if entry.send(field) + end + if hentry.entry_content + xml.content(:type => "html") do + xml.text hentry.entry_content + end + end + if hentry.author + author = AuthorDelegator.new(hentry.author) + xml.author do + author.fields.each do |field| + xml.send(field, author.send(field)) if author.send(field) + end + end + end + end + end + + end + end +end diff --git a/lib/prism/microformat/hatom.rb b/lib/prism/microformat/hatom.rb index 3e84c45..55978c1 100644 --- a/lib/prism/microformat/hatom.rb +++ b/lib/prism/microformat/hatom.rb @@ -27,7 +27,16 @@ class HAtom < POSH end has_many :hentry do - has_one :entry_title, :entry_summary + has_one :entry_title do + search do |node| + entry_title = node.css(".entry-title") + if entry_title.empty? + entry_title = node.css("h1,h2,h3,h4,h5,h6") + end + entry_title + end + end + has_one :entry_summary has_one :updated, :published do extract :typevalue @@ -65,6 +74,12 @@ class HAtom < POSH required! end + def to_atom + builder = Prism::Builder::AtomBuilder.new(self) + builder.build + builder.to_s + end + end end end diff --git a/test/builder/atom_builder_test.rb b/test/builder/atom_builder_test.rb new file mode 100644 index 0000000..edae9d5 --- /dev/null +++ b/test/builder/atom_builder_test.rb @@ -0,0 +1,33 @@ +require File.join(File.dirname(File.absolute_path(__FILE__)),'..','test_helper') + +class AtomBuilderTest < Test::Unit::TestCase + @@klass = Prism::Builder::AtomBuilder + + setup do + @atom = @@klass.new + @doc = test_fixture('hatom/example1.html') + @hatom = Prism::Microformat::HAtom.parse(@doc) + end + + def parse_atom + @atom.build + @xml_doc = Nokogiri::XML(@atom.to_s){ |config| config.strict } + end + + should "create an empty atom feed" do + parse_atom + assert_not_equal "", @xml_doc.xpath('/xmlns:feed').to_s + end + + should "create a feed" do + @atom.add_hatom(@hatom) + parse_atom + assert_equal "Wiki Attack", @xml_doc.xpath('/xmlns:feed/xmlns:entry[1]/xmlns:title').text + assert_equal "2005-10-10T14:07:00-07:00", @xml_doc.xpath('/xmlns:feed/xmlns:entry[1]/xmlns:published').text + assert_equal "We had a bit of trouble with ...", @xml_doc.xpath('/xmlns:feed/xmlns:entry[1]/xmlns:summary').text + assert_equal "Ryan King", @xml_doc.xpath('/xmlns:feed/xmlns:entry[1]/xmlns:author/xmlns:name').text + assert_equal "http://theryanking.com/", @xml_doc.xpath('/xmlns:feed/xmlns:entry[1]/xmlns:author/xmlns:uri').text + end + +end + diff --git a/test/fixtures/test-fixture/hatom/example1.atom.xml b/test/fixtures/test-fixture/hatom/example1.atom.xml new file mode 100644 index 0000000..cfd01e7 --- /dev/null +++ b/test/fixtures/test-fixture/hatom/example1.atom.xml @@ -0,0 +1,16 @@ + + + + Wiki Attack + 2005-10-10T14:07:00-07:00 + We had a bit of trouble with ... + <p class="entry-summary">We had a bit of trouble with ...</p> + <p>We've restored the wiki and ...</p> + <p>If anyone is working to combat said spammers ...</p> + + Ryan King + http://theryanking.com/ + + + + diff --git a/test/fixtures/test-fixture/hatom/example1.html b/test/fixtures/test-fixture/hatom/example1.html index a98dbd0..d638045 100644 --- a/test/fixtures/test-fixture/hatom/example1.html +++ b/test/fixtures/test-fixture/hatom/example1.html @@ -7,7 +7,7 @@

Wiki Attack

-

We had a bit of trouble with ...

+

We had a bit of trouble with ...

We've restored the wiki and ...

If anyone is working to combat said spammers ...

@@ -34,6 +34,7 @@

Technorati Tags:

+

Another title

diff --git a/test/microformat/hatom_test.rb b/test/microformat/hatom_test.rb index b2737f9..d6d5efe 100644 --- a/test/microformat/hatom_test.rb +++ b/test/microformat/hatom_test.rb @@ -18,16 +18,26 @@ def self.before_all assert_equal "Wiki Attack", hentry[0].entry_title end + test 'The title will fall back to h# tag' do + hentry = @hatom.hentry + assert_equal "Another title", hentry[1].entry_title + end + test 'The content contains html' do hentry = @hatom.hentry content = <<-EOS -

We had a bit of trouble with ...

+

We had a bit of trouble with ...

We've restored the wiki and ...

If anyone is working to combat said spammers ...

EOS assert_equal content.strip, hentry[0].entry_content end + test 'The summary is a singular value' do + hentry = @hatom.hentry + assert_equal 'We had a bit of trouble with ...', hentry[0].entry_summary + end + test 'The published is a time' do hentry = @hatom.hentry assert_equal Time.parse('2005-10-10 14:07:00 -0700'), hentry[0].published From 622282b97aa0433ef17658bc6321538f0f7dc8a8 Mon Sep 17 00:00:00 2001 From: Scott Windsor Date: Sun, 5 May 2013 11:45:35 -0700 Subject: [PATCH 2/3] Fix tests for timezone issues. Also set default TZ to be UTC for testing purposes. --- test/builder/atom_builder_test.rb | 2 +- test/test_helper.rb | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/test/builder/atom_builder_test.rb b/test/builder/atom_builder_test.rb index edae9d5..a6d4feb 100644 --- a/test/builder/atom_builder_test.rb +++ b/test/builder/atom_builder_test.rb @@ -23,7 +23,7 @@ def parse_atom @atom.add_hatom(@hatom) parse_atom assert_equal "Wiki Attack", @xml_doc.xpath('/xmlns:feed/xmlns:entry[1]/xmlns:title').text - assert_equal "2005-10-10T14:07:00-07:00", @xml_doc.xpath('/xmlns:feed/xmlns:entry[1]/xmlns:published').text + assert_equal "2005-10-10T21:07:00+00:00", @xml_doc.xpath('/xmlns:feed/xmlns:entry[1]/xmlns:published').text assert_equal "We had a bit of trouble with ...", @xml_doc.xpath('/xmlns:feed/xmlns:entry[1]/xmlns:summary').text assert_equal "Ryan King", @xml_doc.xpath('/xmlns:feed/xmlns:entry[1]/xmlns:author/xmlns:name').text assert_equal "http://theryanking.com/", @xml_doc.xpath('/xmlns:feed/xmlns:entry[1]/xmlns:author/xmlns:uri').text diff --git a/test/test_helper.rb b/test/test_helper.rb index 609dc89..28dffcf 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -36,3 +36,5 @@ def test_fixture(filename) html = get_fixture('test-fixture/' + filename) doc = Nokogiri::HTML.parse(html).css('#uf').first end + +ENV['TZ'] = 'UTC' # set UTC as timezone for tests From d799f364660b99154c9ed9b2f9cf5ec0158844c8 Mon Sep 17 00:00:00 2001 From: Scott Windsor Date: Sun, 5 May 2013 11:46:18 -0700 Subject: [PATCH 3/3] Making builder chainable --- lib/prism/builder/atom_builder.rb | 1 + lib/prism/microformat/hatom.rb | 4 +--- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/prism/builder/atom_builder.rb b/lib/prism/builder/atom_builder.rb index c83d703..2a75575 100644 --- a/lib/prism/builder/atom_builder.rb +++ b/lib/prism/builder/atom_builder.rb @@ -17,6 +17,7 @@ def build end end end + self end def to_s diff --git a/lib/prism/microformat/hatom.rb b/lib/prism/microformat/hatom.rb index 55978c1..075cb5b 100644 --- a/lib/prism/microformat/hatom.rb +++ b/lib/prism/microformat/hatom.rb @@ -75,9 +75,7 @@ class HAtom < POSH end def to_atom - builder = Prism::Builder::AtomBuilder.new(self) - builder.build - builder.to_s + Prism::Builder::AtomBuilder.new(self).build.to_s end end