Qdb: Properly encode text into UTF8
[quotesite.git] / quotesite / QdbGrabber.pm
index 0a65255..411be01 100644 (file)
@@ -3,14 +3,15 @@
 #
 # Grabber for qdb.us
 
-package QdbGrabber;
+package quotesite::QdbGrabber;
 
-use GrabberBase;
-@ISA = qw(GrabberBase);
+use quotesite::GrabberBase;
+@ISA = qw(quotesite::GrabberBase);
 
 use LWP::Simple qw(!get);
 use HTML::TokeParser;
 use Data::Dumper;
+use Encode;
 
 use strict;
 
@@ -54,12 +55,10 @@ sub _parse {
 
     $p = HTML::TokeParser->new(\$content);
 
-    OUTER: while ($t = $p->get_tag('table')) {
-        if (exists($t->[1]->{'class'}) && ($t->[1]->{'class'} eq 'quote')) {
-            while ($t2 = $p->get_tag('p')) {
-                $metadata->{'CONTENT'} = $p->get_text('/p');
-                last OUTER;
-            }
+    OUTER: while ($t = $p->get_tag('span')) {
+        if (exists($t->[1]->{'class'}) && ($t->[1]->{'class'} eq 'qt')) {
+            $metadata->{'CONTENT'} = encode('utf8', decode('iso8859-1', $p->get_text('/span')));
+            last OUTER;
         }
     }