53 lines
1.4 KiB
Perl
Executable File
53 lines
1.4 KiB
Perl
Executable File
#!/usr/bin/env perl
|
|
# hui
|
|
# scrap_proxy.pl
|
|
|
|
use strict;
|
|
use URI::Escape;
|
|
use Web::Scraper;
|
|
use Furl;
|
|
use Data::Dumper;
|
|
|
|
my $ua = Furl->new(agent => "Lynx", timeout => "15");
|
|
|
|
my $url = 'https://www.google.com/search?q=http+proxy+3128+80&hl=en&newwindow=1&tbo=1&tbs=qdr:d,sbd:1&prmd=imvns&source=lnt&start=0&sa=N';
|
|
my $pages = 5; # scrap first 5 pages
|
|
my $step = 10; # 10 results per page
|
|
my $sleep = 5; # sleep 5 seconds for each request
|
|
|
|
my @result;
|
|
for my $n (1..$pages) {
|
|
warn "Checking page $n\n";
|
|
if ( $n > 0) {
|
|
my $i = ($n - 1) * $step;
|
|
my $start = "start=$i";
|
|
$url =~ s/\bstart=\d+\b/$start/;
|
|
}
|
|
|
|
my $crawler = scraper { process "a", "link[]" => '@href'; };
|
|
my $res = $crawler->scrape( $ua->get($url)->content );
|
|
for my $link (@{$res->{'link'}}) {
|
|
next unless $link =~ /^\/url\?/;
|
|
push @result, uri_unescape($1) if $link =~ /[?&]q=(http:[^\s?&]+?)(?:$|&)/;
|
|
}
|
|
|
|
sleep $sleep;
|
|
}
|
|
|
|
|
|
warn "Get ".scalar(@result)." targets\n";
|
|
|
|
my %proxies;
|
|
for my $target (@result) {
|
|
warn " -> processing $target\n";
|
|
my $content = $ua->get($target)->content;
|
|
$content =~ s/\s*<\/td>\s*<td[^<>]*?>\s*/:/gm;
|
|
while( $content =~ /\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})[\s:]+(\d{3,5})\b/g ) {
|
|
my $ip = "$1:$2";
|
|
$proxies{$ip}++ unless $ip =~ /^(?:192\.168|127\.0|10\.)/;
|
|
}
|
|
}
|
|
|
|
print "$_\n" for keys %proxies;
|
|
|