init proxy_scraper
This commit is contained in:
commit
ba5157c21c
|
@ -0,0 +1,11 @@
|
|||
Following modules are required:
|
||||
|
||||
Furl
|
||||
AnyEvent::HTTP
|
||||
Web::Scraper
|
||||
|
||||
You can use cpanm (http://cpanmin.us) to install above modules easily.
|
||||
|
||||
cpanm Furl AnyEvent::HTTP Web::Scraper
|
||||
|
||||
Run ./run.sh to generate available proxies and you can find the output file ",proxylist" in "data" directory.
|
|
@ -0,0 +1,49 @@
|
|||
#!/usr/bin/env perl
|
||||
# hui
|
||||
# check_proxy.pl
|
||||
|
||||
use strict;
|
||||
use AnyEvent::HTTP;
|
||||
|
||||
$| = 1;
|
||||
my $MAX_PROC = 1000;
|
||||
my $timeout = 10;
|
||||
|
||||
$AnyEvent::HTTP::MAX_RECURSE = 0;
|
||||
$AnyEvent::HTTP::MAX_PER_HOST = $MAX_PROC;
|
||||
|
||||
my $target = "http://www.google.co.jp";
|
||||
my $start = time;
|
||||
|
||||
my $cv = AnyEvent->condvar;
|
||||
_check(*STDIN);
|
||||
$cv->recv;
|
||||
|
||||
my $end = time;
|
||||
print "time: ".($end - $start)."s\n";
|
||||
|
||||
sub _check {
|
||||
my $fh = shift;
|
||||
while($AnyEvent::HTTP::ACTIVE < $MAX_PROC) {
|
||||
my $proxy = <$fh>;
|
||||
defined $proxy or last;
|
||||
$proxy =~ s/\s//g;
|
||||
http_request(
|
||||
GET => $target,
|
||||
headers => { "user-agent" => "Mozilla/5.0" },
|
||||
timeout => $timeout,
|
||||
proxy => [split(/:/, $proxy)],
|
||||
on_header => sub {
|
||||
if ($_[0]{'server'} eq "gws") {
|
||||
print "$proxy ok!\n";
|
||||
} else {
|
||||
print "$proxy failed!\n";
|
||||
}
|
||||
return 0;
|
||||
},
|
||||
sub { _check($fh) },
|
||||
);
|
||||
}
|
||||
$cv->send if ($AnyEvent::HTTP::ACTIVE == 0 && eof($fh));
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,19 @@
|
|||
#!/bin/bash
|
||||
# run.sh
|
||||
|
||||
PERL=$(which perl)
|
||||
SCRAPER=scrap_proxy.pl
|
||||
CHECKER=check_proxy.pl
|
||||
SCRAP_RESULT=tmp/,scrap_result
|
||||
CHECK_RESULT=tmp/,check_result
|
||||
PROXYLIST=data/,proxylist
|
||||
|
||||
echo "Scrapping proxy search result from Google..."
|
||||
$PERL $SCRAPER > $SCRAP_RESULT
|
||||
|
||||
echo "Checking proxy connectivity..."
|
||||
grep : $SCRAP_RESULT | $PERL $CHECKER | tee $CHECK_RESULT
|
||||
|
||||
grep ok $CHECK_RESULT | awk '{print $1}' >> $PROXYLIST
|
||||
|
||||
echo -e "All done!\nAvailable proxies are listed in file $PROXYLIST.\n"
|
|
@ -0,0 +1,52 @@
|
|||
#!/usr/bin/env perl
|
||||
# hui
|
||||
# scrap_proxy.pl
|
||||
|
||||
use strict;
|
||||
use URI::Escape;
|
||||
use Web::Scraper;
|
||||
use Furl;
|
||||
use Data::Dumper;
|
||||
|
||||
my $ua = Furl->new(agent => "Lynx", timeout => "15");
|
||||
|
||||
my $url = 'https://www.google.com/search?q=http+proxy+3128+80&hl=en&newwindow=1&tbo=1&tbs=qdr:d,sbd:1&prmd=imvns&source=lnt&start=0&sa=N';
|
||||
my $pages = 5; # scrap first 5 pages
|
||||
my $step = 10; # 10 results per page
|
||||
my $sleep = 5; # sleep 5 seconds for each request
|
||||
|
||||
my @result;
|
||||
for my $n (1..$pages) {
|
||||
warn "Checking page $n\n";
|
||||
if ( $n > 0) {
|
||||
my $i = ($n - 1) * $step;
|
||||
my $start = "start=$i";
|
||||
$url =~ s/\bstart=\d+\b/$start/;
|
||||
}
|
||||
|
||||
my $crawler = scraper { process "a", "link[]" => '@href'; };
|
||||
my $res = $crawler->scrape( $ua->get($url)->content );
|
||||
for my $link (@{$res->{'link'}}) {
|
||||
next unless $link =~ /^\/url\?/;
|
||||
push @result, uri_unescape($1) if $link =~ /[?&]q=(http:[^\s?&]+?)(?:$|&)/;
|
||||
}
|
||||
|
||||
sleep $sleep;
|
||||
}
|
||||
|
||||
|
||||
warn "Get ".scalar(@result)." targets\n";
|
||||
|
||||
my %proxies;
|
||||
for my $target (@result) {
|
||||
warn " -> processing $target\n";
|
||||
my $content = $ua->get($target)->content;
|
||||
$content =~ s/\s*<\/td>\s*<td[^<>]*?>\s*/:/gm;
|
||||
while( $content =~ /\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})[\s:]+(\d{3,5})\b/g ) {
|
||||
my $ip = "$1:$2";
|
||||
$proxies{$ip}++ unless $ip =~ /^(?:192\.168|127\.0|10\.)/;
|
||||
}
|
||||
}
|
||||
|
||||
print "$_\n" for keys %proxies;
|
||||
|
|
@ -0,0 +1,99 @@
|
|||
58.64.168.208:3128 ok!
|
||||
202.185.100.225:3128 ok!
|
||||
103.244.2.114:3128 ok!
|
||||
58.20.127.100:3128 failed!
|
||||
118.189.1.186:3128 ok!
|
||||
202.146.144.30:8080 ok!
|
||||
220.173.59.36:8080 ok!
|
||||
118.163.243.40:8080 ok!
|
||||
218.104.148.59:3128 failed!
|
||||
106.187.38.45:3128 ok!
|
||||
114.66.219.90:8080 ok!
|
||||
111.192.57.101:9000 ok!
|
||||
117.40.160.45:3128 ok!
|
||||
110.74.197.26:8080 ok!
|
||||
222.88.240.27:9999 ok!
|
||||
124.47.57.2:9000 ok!
|
||||
27.46.21.67:9999 ok!
|
||||
112.65.19.122:8080 ok!
|
||||
124.78.81.53:8080 ok!
|
||||
118.114.77.116:8080 ok!
|
||||
180.173.85.204:8080 ok!
|
||||
111.67.74.178:8000 ok!
|
||||
210.14.147.71:3128 ok!
|
||||
183.220.195.112:8123 failed!
|
||||
66.146.193.31:8118 failed!
|
||||
192.254.70.245:8080 ok!
|
||||
199.185.61.3:8081 failed!
|
||||
175.41.246.120:17403 ok!
|
||||
202.98.123.126:8080 ok!
|
||||
201.159.23.150:3128 ok!
|
||||
108.177.171.106:33948 ok!
|
||||
183.178.23.103:3128 ok!
|
||||
187.188.195.66:8080 ok!
|
||||
181.114.59.203:8080 ok!
|
||||
118.96.172.6:3128 ok!
|
||||
77.95.194.142:3128 ok!
|
||||
118.99.114.113:3128 ok!
|
||||
159.226.61.60:3128 ok!
|
||||
186.3.52.166:3128 ok!
|
||||
219.150.205.35:8080 ok!
|
||||
173.9.143.222:7004 ok!
|
||||
176.223.101.66:8080 ok!
|
||||
188.111.69.178:8080 ok!
|
||||
195.175.201.242:8080 ok!
|
||||
117.171.242.248:8123 failed!
|
||||
91.121.158.63:3128 ok!
|
||||
195.190.117.50:3128 ok!
|
||||
110.208.27.82:9000 ok!
|
||||
201.49.209.147:3128 failed!
|
||||
189.59.219.202:8080 ok!
|
||||
41.223.53.170:443 ok!
|
||||
202.52.152.210:8080 ok!
|
||||
204.84.216.200:3128 ok!
|
||||
109.70.145.125:3040 ok!
|
||||
110.208.27.114:9000 ok!
|
||||
117.164.169.181:8123 failed!
|
||||
190.128.234.130:3128 ok!
|
||||
117.121.242.8:18888 ok!
|
||||
190.79.159.101:8080 ok!
|
||||
110.208.26.35:9000 ok!
|
||||
201.208.106.200:8080 ok!
|
||||
118.174.149.118:8080 ok!
|
||||
124.81.121.238:8080 ok!
|
||||
117.169.239.163:8123 failed!
|
||||
186.226.172.91:8080 ok!
|
||||
184.107.204.85:3128 ok!
|
||||
177.38.40.11:3128 ok!
|
||||
125.124.115.193:8080 ok!
|
||||
177.69.67.253:3128 ok!
|
||||
182.253.32.100:8080 ok!
|
||||
186.228.78.177:3128 ok!
|
||||
190.42.142.22:8080 ok!
|
||||
186.228.78.130:3128 ok!
|
||||
190.39.149.34:8080 ok!
|
||||
80.193.214.231:3128 ok!
|
||||
201.211.125.90:8080 ok!
|
||||
120.84.236.37:8080 ok!
|
||||
186.228.78.169:3128 ok!
|
||||
181.48.62.75:8080 ok!
|
||||
120.84.236.153:8080 ok!
|
||||
80.193.214.233:3128 ok!
|
||||
110.208.27.75:9000 ok!
|
||||
82.207.112.44:3128 ok!
|
||||
27.44.79.8:8080 ok!
|
||||
120.84.236.168:8080 ok!
|
||||
186.93.190.202:8080 ok!
|
||||
200.84.108.16:8080 ok!
|
||||
120.84.234.103:8080 ok!
|
||||
190.7.144.75:8080 ok!
|
||||
177.19.162.52:3128 ok!
|
||||
42.121.106.82:8088 failed!
|
||||
117.171.115.106:8123 failed!
|
||||
190.204.130.231:8080 ok!
|
||||
117.163.116.154:8123 failed!
|
||||
190.204.109.105:8080 ok!
|
||||
190.36.224.223:8080 ok!
|
||||
183.218.49.115:8123 failed!
|
||||
202.116.1.149:8128 ok!
|
||||
time: 15s
|
|
@ -0,0 +1,306 @@
|
|||
183.221.190.202:8123
|
||||
182.235.175.176:8088
|
||||
114.66.219.90:8080
|
||||
190.204.130.231:8080
|
||||
37.187.97.36:3128
|
||||
129.74.74.15:3128
|
||||
110.77.228.131:3128
|
||||
190.78.166.144:8080
|
||||
186.101.23.235:3128
|
||||
124.78.81.53:8080
|
||||
41.42.241.35:8080
|
||||
192.254.70.245:8080
|
||||
193.188.95.146:8080
|
||||
120.84.236.153:8080
|
||||
202.70.136.158:3128
|
||||
198.27.97.214:7808
|
||||
173.9.143.222:7004
|
||||
209.141.46.196:8888
|
||||
218.18.29.223:9000
|
||||
111.67.74.178:8000
|
||||
175.41.246.120:17403
|
||||
115.84.242.84:8080
|
||||
81.34.161.21:8080
|
||||
120.84.234.103:8080
|
||||
186.228.78.169:3128
|
||||
93.115.46.10:8080
|
||||
64.78.169.114:3128
|
||||
166.114.6.34:3128
|
||||
108.177.171.106:33948
|
||||
31.193.118.122:8080
|
||||
223.84.10.112:8123
|
||||
207.173.172.98:8000
|
||||
183.89.42.206:3128
|
||||
200.90.77.247:8080
|
||||
65.98.100.220:3128
|
||||
41.32.136.74:808
|
||||
194.29.178.14:3127
|
||||
58.137.158.104:8080
|
||||
202.116.1.149:8128
|
||||
118.99.114.113:3128
|
||||
202.97.159.227:8080
|
||||
117.40.160.45:3128
|
||||
186.228.78.177:3128
|
||||
186.91.134.139:8080
|
||||
124.47.57.2:9000
|
||||
80.193.214.233:3128
|
||||
212.248.78.114:8080
|
||||
77.95.194.142:3128
|
||||
190.78.10.189:8080
|
||||
189.208.57.239:1080
|
||||
27.44.79.8:8080
|
||||
207.236.90.180:8080
|
||||
137.135.97.79:8080
|
||||
220.166.64.73:1080
|
||||
220.132.152.102:3128
|
||||
218.204.89.117:8123
|
||||
113.106.19.28:3128
|
||||
64.34.14.28:7808
|
||||
182.253.51.223:8080
|
||||
200.97.98.171:8080
|
||||
190.202.220.242:8080
|
||||
58.20.127.100:3128
|
||||
181.114.59.203:8080
|
||||
58.246.43.122:8080
|
||||
190.203.40.191:8080
|
||||
177.99.244.38:8080
|
||||
183.216.167.138:8123
|
||||
199.241.28.233:8080
|
||||
80.193.214.231:3128
|
||||
41.191.237.233:8080
|
||||
200.84.73.121:8080
|
||||
137.135.98.170:8080
|
||||
201.242.88.110:8080
|
||||
200.93.93.205:8080
|
||||
180.173.85.204:8080
|
||||
181.48.62.75:8080
|
||||
190.77.196.240:8080
|
||||
203.178.133.10:3124
|
||||
162.243.50.42:3128
|
||||
41.220.19.157:3128
|
||||
202.175.83.183:808
|
||||
41.223.53.170:443
|
||||
68.48.33.47:3128
|
||||
202.146.144.30:8080
|
||||
180.245.66.251:8080
|
||||
202.185.100.225:3128
|
||||
180.183.137.157:3128
|
||||
112.65.19.122:8080
|
||||
186.3.52.166:3128
|
||||
182.253.242.33:3128
|
||||
213.180.75.122:2023
|
||||
61.160.126.157:3128
|
||||
115.156.165.3:8080
|
||||
186.90.181.166:8080
|
||||
61.156.235.172:9999
|
||||
137.117.71.160:8080
|
||||
41.46.215.190:8080
|
||||
202.51.117.22:8888
|
||||
27.46.21.67:9999
|
||||
204.86.209.115:8080
|
||||
128.42.142.41:3124
|
||||
180.183.239.22:3128
|
||||
179.210.21.241:3128
|
||||
172.16.0.20:3128
|
||||
222.223.127.130:808
|
||||
190.37.163.156:8080
|
||||
188.111.69.178:8080
|
||||
120.84.236.168:8080
|
||||
111.192.57.101:9000
|
||||
118.163.243.40:8080
|
||||
190.36.49.69:8080
|
||||
125.124.115.193:8080
|
||||
195.190.117.50:3128
|
||||
201.209.103.253:8080
|
||||
200.84.60.173:8080
|
||||
202.91.13.124:8080
|
||||
95.110.196.114:3128
|
||||
201.211.125.90:8080
|
||||
125.162.149.223:8080
|
||||
117.170.220.29:8123
|
||||
117.162.152.40:8123
|
||||
83.146.70.81:3128
|
||||
201.243.204.70:8080
|
||||
194.36.10.156:3127
|
||||
110.74.197.26:8080
|
||||
117.164.169.181:8123
|
||||
180.183.154.52:3128
|
||||
95.138.163.86:8001
|
||||
203.155.205.4:3128
|
||||
77.45.132.127:3128
|
||||
201.49.209.147:3128
|
||||
216.165.109.79:3127
|
||||
190.36.224.223:8080
|
||||
66.146.193.31:8118
|
||||
190.74.182.156:8080
|
||||
186.226.172.91:8080
|
||||
201.159.23.150:3128
|
||||
82.207.112.44:3128
|
||||
183.178.23.103:3128
|
||||
195.175.201.242:8080
|
||||
208.113.228.217:5555
|
||||
103.244.2.114:3128
|
||||
175.103.42.218:3128
|
||||
220.164.108.3:1080
|
||||
177.38.40.11:3128
|
||||
189.59.219.202:8080
|
||||
65.98.100.210:3128
|
||||
108.178.200.46:8080
|
||||
182.253.73.142:8080
|
||||
177.19.162.52:3128
|
||||
202.98.123.126:8080
|
||||
117.171.242.248:8123
|
||||
190.7.144.75:8080
|
||||
219.150.205.35:8080
|
||||
62.111.208.195:123
|
||||
190.39.149.34:8080
|
||||
120.84.236.37:8080
|
||||
176.205.213.147:8118
|
||||
223.84.16.39:8123
|
||||
117.163.116.154:8123
|
||||
109.233.215.166:8080
|
||||
177.69.67.253:3128
|
||||
176.223.101.66:8080
|
||||
67.17.38.72:3128
|
||||
177.69.195.4:3128
|
||||
220.173.59.36:8080
|
||||
118.114.77.116:8080
|
||||
195.113.161.83:3124
|
||||
201.38.204.210:8080
|
||||
216.12.29.195:8080
|
||||
190.37.34.192:8080
|
||||
204.84.216.200:3128
|
||||
118.99.84.141:8080
|
||||
183.216.249.12:8123
|
||||
190.128.234.130:3128
|
||||
117.171.115.106:8123
|
||||
109.194.65.175:3128
|
||||
222.88.240.27:9999
|
||||
106.187.38.45:3128
|
||||
110.208.27.82:9000
|
||||
202.52.152.210:8080
|
||||
118.174.149.118:8080
|
||||
180.183.66.253:8080
|
||||
110.208.27.114:9000
|
||||
184.107.204.85:3128
|
||||
183.218.49.115:8123
|
||||
194.45.222.17:255
|
||||
87.236.208.153:3128
|
||||
200.84.108.16:8080
|
||||
118.189.1.186:3128
|
||||
218.58.136.14:808
|
||||
117.162.205.125:8123
|
||||
105.236.66.187:3128
|
||||
219.93.174.104:553
|
||||
220.110.137.44:8080
|
||||
190.42.142.22:8080
|
||||
187.188.195.66:8080
|
||||
159.226.61.60:3128
|
||||
186.93.190.202:8080
|
||||
190.142.106.156:8080
|
||||
133.11.240.57:3127
|
||||
61.178.178.159:9999
|
||||
186.228.78.130:3128
|
||||
117.171.57.130:8123
|
||||
182.253.32.100:8080
|
||||
190.204.109.105:8080
|
||||
128.42.142.43:3124
|
||||
195.68.114.9:8080
|
||||
117.177.195.38:8123
|
||||
183.221.160.48:8123
|
||||
137.99.11.87:3124
|
||||
42.121.106.82:8088
|
||||
172.16.0.10:3128
|
||||
37.59.81.65:443
|
||||
173.208.110.98:34061
|
||||
201.242.90.177:8080
|
||||
91.121.158.63:3128
|
||||
122.49.12.186:1080
|
||||
140.247.60.126:3127
|
||||
58.64.168.208:3128
|
||||
31.170.178.2:8080
|
||||
180.211.159.138:8080
|
||||
129.82.12.188:3124
|
||||
124.81.121.238:8080
|
||||
115.29.161.178:8000
|
||||
109.70.145.125:3040
|
||||
110.137.40.52:8080
|
||||
117.170.206.111:8123
|
||||
183.220.195.112:8123
|
||||
210.14.147.71:3128
|
||||
110.208.27.75:9000
|
||||
110.208.26.35:9000
|
||||
183.219.94.247:8123
|
||||
201.208.106.200:8080
|
||||
87.194.10.38:443
|
||||
5.178.96.125:3128
|
||||
118.96.172.6:3128
|
||||
190.73.156.135:8080
|
||||
199.185.61.3:8081
|
||||
190.235.148.246:3128
|
||||
1.2.3.4:5678
|
||||
219.61.100.24:3128
|
||||
117.121.242.8:18888
|
||||
187.95.112.243:3128
|
||||
190.198.162.171:8080
|
||||
200.82.248.86:8080
|
||||
190.75.142.153:8080
|
||||
190.79.159.101:8080
|
||||
137.135.99.5:8080
|
||||
5.135.42.105:3128
|
||||
218.104.148.59:3128
|
||||
117.169.239.163:8123
|
||||
122.129.118.186:3128
|
||||
61.153.236.30:8080
|
||||
218.249.83.87:8080
|
||||
186.5.102.162:8080
|
||||
69.147.64.31:209
|
||||
190.39.38.57:3128
|
||||
118.142.19.39:1180
|
||||
200.109.33.50:8080
|
||||
203.151.44.66:8080
|
||||
88.248.183.22:8080
|
||||
190.120.251.154:8080
|
||||
182.253.35.57:8080
|
||||
223.86.18.109:8123
|
||||
190.40.54.245:8080
|
||||
91.121.136.186:9999
|
||||
187.59.2.83:3128
|
||||
137.135.97.7:8080
|
||||
110.208.27.178:9000
|
||||
186.215.80.218:3128
|
||||
174.129.196.16:8080
|
||||
109.73.70.165:5005
|
||||
85.64.202.69:29991
|
||||
219.94.87.123:8080
|
||||
219.72.230.2:1080
|
||||
129.82.12.188:3128
|
||||
117.170.197.19:8123
|
||||
109.73.70.165:7080
|
||||
175.140.44.162:8080
|
||||
146.57.249.98:3124
|
||||
190.253.89.124:8080
|
||||
171.100.122.119:3128
|
||||
186.228.78.133:3128
|
||||
212.56.195.190:8080
|
||||
190.36.136.204:8080
|
||||
202.29.214.2:3129
|
||||
192.254.128.172:8080
|
||||
183.217.162.173:8123
|
||||
61.164.184.66:8090
|
||||
112.45.120.143:8123
|
||||
117.164.164.143:8123
|
||||
219.137.229.146:9999
|
||||
82.200.164.226:3128
|
||||
190.77.2.71:8080
|
||||
218.18.128.108:9000
|
||||
109.196.127.194:8080
|
||||
120.206.144.186:8123
|
||||
120.72.84.192:8080
|
||||
211.41.55.136:123
|
||||
180.246.216.194:3128
|
||||
117.169.231.61:8123
|
||||
222.180.173.3:8080
|
||||
192.80.153.126:8080
|
||||
117.170.223.129:8123
|
||||
77.78.116.86:3128
|
Loading…
Reference in New Issue