--- /dev/null
+#
+# A helper class for getting values out of a HTML document
+#
+
+package HTMLHelper;
+
+use HTML::TreeBuilder;
+@ISA = qw(HTML::TreeBuilder);
+
+use strict;
+use Data::Dumper;
+use LWP::Simple qw(!get);
+
+sub new {
+ my $class = shift;
+ my $self = $class->SUPER::new(@_);
+
+ return bless($self, $class);
+}
+
+sub load {
+ my $self = shift;
+ my $URL = shift;
+ my $c;
+
+ unless(defined($c = LWP::Simple::get($URL))) {
+ # Error loading URL
+ return undef;
+ }
+
+ $self->parse($c);
+
+ return 1;
+}
+
+sub findnodes {
+ my $self = shift;
+ my $path = shift;
+ my ($tagname, $classifier);
+ my %matchtag;
+
+ #
+ # Try to make sense of the path specifier.
+ # For the moment, we just allow paths of the following two forms:
+ #
+ # a) <tagname>
+ # b) <tagname>[@<attribute>=<value> (, @<attribute>=<value> ...)]
+ #
+
+ unless (($tagname, $classifier) = $path =~
+ m|(\w+)(?:\[((?:\@[\w-]+=\x22[^\x22]+?\x22)(?:\s*,\s*(?:\@\w+=\x22[^\x22]+?\x22))*)\])?|) {
+ # bad path name
+ return wantarray?():undef;
+ }
+
+ $matchtag{'_tag'} = $tagname;
+ if (defined($classifier)) {
+ foreach (split(/\s*,\s*/, $classifier)) {
+ my ($n, $v) = split(/=/, $_, 2);
+ $n =~ s/^\@//;
+ $v =~ s/"//g;
+ $matchtag{$n} = $v;
+ }
+ }
+
+ return $self->look_down(%matchtag);
+}
+
+1;