From 0219f530d848c8368f945ee7eccaf2d00f824068 Mon Sep 17 00:00:00 2001 From: Vaclav Haisman Date: Mon, 20 Nov 2017 21:45:01 +0100 Subject: [PATCH 1/2] Allow parsing of Open Graph metas without og:type present. --- lib/Data/OpenGraph/Parser.pm | 64 +++++++++++++++++++++++------------- t/001_basic.t | 12 ++++++- 2 files changed, 52 insertions(+), 24 deletions(-) diff --git a/lib/Data/OpenGraph/Parser.pm b/lib/Data/OpenGraph/Parser.pm index cb20ee8..33ffb7e 100644 --- a/lib/Data/OpenGraph/Parser.pm +++ b/lib/Data/OpenGraph/Parser.pm @@ -1,6 +1,6 @@ package Data::OpenGraph::Parser; use strict; -use Scalar::Util qw(reftype); +use Scalar::Util qw(reftype blessed); sub new { @@ -29,36 +29,28 @@ sub parse_string { my %properties = (); my $og_type = $tree->findnodes('//meta[@property="og:type" and @content]'); - if (! defined($og_type)) { - $tree->delete; - return \%properties; + if (defined($og_type)) { + $og_type = _unwrap_one_node($og_type); } - # There appears to be a slight difference in return value depending on - # whether HTML::TreeBuilder::LibXML or HTML::TreeBuilder::XPath is used. - if (reftype($og_type) eq 'HASH' && scalar keys %$og_type) { - $og_type = $og_type->get_nodelist->get_node(0); - } - elsif (reftype($og_type) eq 'ARRAY' && scalar @$og_type) { - $og_type = $og_type->[0]; - } - else { - $tree->delete; - return \%properties; - } - - my $content = $og_type->attr('content'); - if ($content =~ /^([^.]+)\..+$/) { - $properties{'_basictype'} = $1; - } else { - $properties{'_basictype'} = $content; + my $content; + if (defined($og_type) + && ($content = $og_type->attr('content'))) { + $properties{'type'} = $content; + if ($content =~ /^([^.]+)\..+$/) { + $properties{'_basictype'} = $1; + } else { + $properties{'_basictype'} = $content; + } } my $basictype = $properties{'_basictype'}; my $metas = $tree->findnodes( '//meta[' .'((starts-with(@property, "og:") and @property != "og:type")' - .' or starts-with(@property, "'.$basictype.':"))' + .(defined($basictype) + ? ' or starts-with(@property, "'.$basictype.':")' : '') + .')' .'and @content' .']'); for my $meta (@$metas) { @@ -88,6 +80,32 @@ sub parse_string { return \%properties; } +sub _unwrap_one_node { + # There appears to be a slight difference in return value depending on + # whether HTML::TreeBuilder::LibXML or HTML::TreeBuilder::XPath is used. + + my ($nodes_or_node,) = @_; + if (blessed($nodes_or_node) + && $nodes_or_node->isa('XML::XPathEngine::NodeSet')) { + if ($nodes_or_node->size()) { + return $nodes_or_node->get_node(0); + } + else { + return; + } + } + elsif (reftype($nodes_or_node) eq 'ARRAY') { + if (scalar @$nodes_or_node) { + return $nodes_or_node->[0]; + } + else { + return; + } + } + + return $nodes_or_node; +} + 1; diff --git a/t/001_basic.t b/t/001_basic.t index 63c8830..fff9dc5 100644 --- a/t/001_basic.t +++ b/t/001_basic.t @@ -25,4 +25,14 @@ EOM } }; -done_testing; \ No newline at end of file +subtest 'nested-without-prefix' => sub { + my $og = Data::OpenGraph->parse_string(< + +EOM + if (not is $og->property("audio:title"), "foo bar baz") { + diag(Data::Dumper::Dumper($og)); + } +}; + +done_testing; From 0290d8498bab43d81c63bc9761dce64b59f24cd3 Mon Sep 17 00:00:00 2001 From: Vaclav Haisman Date: Wed, 22 Nov 2017 02:33:17 +0100 Subject: [PATCH 2/2] Fix use of undef in regexp. --- lib/Data/OpenGraph/Parser.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Data/OpenGraph/Parser.pm b/lib/Data/OpenGraph/Parser.pm index 33ffb7e..60c9359 100644 --- a/lib/Data/OpenGraph/Parser.pm +++ b/lib/Data/OpenGraph/Parser.pm @@ -58,7 +58,7 @@ sub parse_string { $content = $meta->attr('content'); next unless $prop && $content; $prop =~ s/^og://; - if ($prop =~ /^$basictype:.+$/) { + if (defined($basictype) && $prop =~ /^$basictype:.+$/) { if (exists $properties{$prop}) { if ((reftype($properties{$prop}) // '') eq 'ARRAY') { push @{$properties{$prop}}, $content;