# # A helper class for getting values out of a HTML document # package videosite::HTMLHelper; use HTML::TreeBuilder; @ISA = qw(HTML::TreeBuilder); use strict; use Data::Dumper; use LWP::Simple qw(!get); sub new { my $class = shift; my $self = $class->SUPER::new(@_); return bless($self, $class); } sub load { my $self = shift; my $URL = shift; my $c; unless(defined($c = LWP::Simple::get($URL))) { # Error loading URL return undef; } $self->parse($c); return 1; } sub findnodes { my $self = shift; my $path = shift; my ($tagname, $classifier); my %matchtag; # # Try to make sense of the path specifier. # For the moment, we just allow paths of the following two forms: # # a) # b) [@= (, @= ...)] # unless (($tagname, $classifier) = $path =~ m|(\w+)(?:\[((?:\@[\w-]+=\x22[^\x22]+?\x22)(?:\s*,\s*(?:\@\w+=\x22[^\x22]+?\x22))*)\])?|) { # bad path name return wantarray?():undef; } $matchtag{'_tag'} = $tagname; if (defined($classifier)) { foreach (split(/\s*,\s*/, $classifier)) { my ($n, $v) = split(/=/, $_, 2); $n =~ s/^\@//; $v =~ s/"//g; $matchtag{$n} = $v; } } return $self->look_down(%matchtag); } 1;