From f5a7e0f5c2d1f8b510265891151d8caa3c101aad Mon Sep 17 00:00:00 2001 From: Ralf Ertzinger Date: Sat, 6 Dec 2008 15:15:12 +0100 Subject: [PATCH] - Add HTMLHelper.pm --- videosite/HTMLHelper.pm | 69 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 videosite/HTMLHelper.pm diff --git a/videosite/HTMLHelper.pm b/videosite/HTMLHelper.pm new file mode 100644 index 0000000..e86b193 --- /dev/null +++ b/videosite/HTMLHelper.pm @@ -0,0 +1,69 @@ +# +# A helper class for getting values out of a HTML document +# + +package HTMLHelper; + +use HTML::TreeBuilder; +@ISA = qw(HTML::TreeBuilder); + +use strict; +use Data::Dumper; +use LWP::Simple qw(!get); + +sub new { + my $class = shift; + my $self = $class->SUPER::new(@_); + + return bless($self, $class); +} + +sub load { + my $self = shift; + my $URL = shift; + my $c; + + unless(defined($c = LWP::Simple::get($URL))) { + # Error loading URL + return undef; + } + + $self->parse($c); + + return 1; +} + +sub findnodes { + my $self = shift; + my $path = shift; + my ($tagname, $classifier); + my %matchtag; + + # + # Try to make sense of the path specifier. + # For the moment, we just allow paths of the following two forms: + # + # a) + # b) [@= (, @= ...)] + # + + unless (($tagname, $classifier) = $path =~ + m|(\w+)(?:\[((?:\@[\w-]+=\x22[^\x22]+?\x22)(?:\s*,\s*(?:\@\w+=\x22[^\x22]+?\x22))*)\])?|) { + # bad path name + return wantarray?():undef; + } + + $matchtag{'_tag'} = $tagname; + if (defined($classifier)) { + foreach (split(/\s*,\s*/, $classifier)) { + my ($n, $v) = split(/=/, $_, 2); + $n =~ s/^\@//; + $v =~ s/"//g; + $matchtag{$n} = $v; + } + } + + return $self->look_down(%matchtag); +} + +1; -- 1.8.3.1