From c56bfee28e0d6b033d1490f329a8c6270d4fd677 Mon Sep 17 00:00:00 2001 From: Ralf Ertzinger Date: Sat, 3 Sep 2011 23:58:07 +0200 Subject: [PATCH] Base: Add support for connectors A connector is basically a list of proxies to use for HTTP/HTTPS connections. Grabbers can request this list and try them in order until one yields a result. This is especially useful to work around silly YouTube videos, which are not available in all countries. --- videosite/Base.pm | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 2 deletions(-) diff --git a/videosite/Base.pm b/videosite/Base.pm index 0f2cf2d..e271253 100644 --- a/videosite/Base.pm +++ b/videosite/Base.pm @@ -10,7 +10,12 @@ use Data::Dumper; sub new { my $class = shift; - my $self = {'_DEBUG' => 0, '_OUT' => sub {printf(@_)}}; + my $self = {'_DEBUG' => 0, + '_OUT' => sub {printf(@_)}, + '_CONNECTORS' => sub { return ({ -name => 'direct', + -schemas => {} }) }, + '_CONNECTOR' => undef, + }; bless($self, $class); @@ -174,8 +179,47 @@ sub setdebug { sub ua { my $self = shift; + my $ua; + + $ua = LWP::UserAgent->new('agent' => 'Mozilla/5.0', 'cookie_jar' => HTTP::Cookies->new); + + # Remove a currently defined HTTPS proxy. See below for a longer explanation. + delete($ENV{'HTTPS_PROXY'}); + + if (defined($self->{'_CONNECTOR'})) { + my $schemas = $self->{'_CONNECTOR'}->{-schemas}; + foreach (keys(%{$schemas})) { + $self->debug("Adding schema %s with proxy %s", $_, $schemas->{$_}); + if ($_ eq 'https') { + # OK, so here's the gist. + # + # The usual way of reqesting an HTTPS URL through a proxy is + # to connect to the proxy server, issue a CONNECT request to + # create a channel to the web server and start an SSL session over + # this channel, so there is an end-to-end connection between + # the client and the server. + # + # Setting a proxy for the https schema in LWP WILL NOT ACCOMPLISH + # THIS. + # + # LWP will connect to the proxy server, and issue a standard GET + # request for the target URL, which most proxy servers will refuse + # to get. + # + # The way to use a proxy server is to set some environment variables + # and let the underlying Crypt::SSLeay module do the rest. + # + # This is positively appaling. + $ENV{'HTTPS_PROXY'} = $schemas->{$_}; + } else { + $ua->proxy($_, $schemas->{$_}); + } + } + } + + print Dumper($ua); - return LWP::UserAgent->new('agent' => 'Mozilla/5.0', 'cookie_jar' => HTTP::Cookies->new); + return $ua; } sub decode_hexurl { @@ -193,4 +237,22 @@ sub decode_querystring { return { map { split /=/, $_, 2; } split /&/, shift }; } +sub connectors { + my $self = shift; + + return $self->{'_CONNECTORS'}->(); +} + +sub selectconn { + my $self = shift; + + $self->{'_CONNECTOR'} = shift; +} + +sub setconn { + my $self = shift; + + $self->{'_CONNECTORS'} = shift; +} + 1; -- 1.8.3.1