Excluding extensions
git-svn-id: file:///home/svn/framework3/trunk@8863 4d416f70-5f16-0410-b530-b9f4589650da
This commit is contained in:
parent
5949b91612
commit
0bb34c94be
|
@ -47,6 +47,9 @@ $dbs = false
|
||||||
# Thread number
|
# Thread number
|
||||||
$threadnum = 1
|
$threadnum = 1
|
||||||
|
|
||||||
|
# Dont crawl
|
||||||
|
$dontcrawl = ".exe,.zip,.tar,.bz2,.run,.asc,.gz,"
|
||||||
|
|
||||||
# Use proxy
|
# Use proxy
|
||||||
$useproxy = false
|
$useproxy = false
|
||||||
|
|
||||||
|
@ -151,6 +154,11 @@ class HttpCrawler
|
||||||
if !@ViewedQueue.include?(hashsig(hashreq))
|
if !@ViewedQueue.include?(hashsig(hashreq))
|
||||||
@ViewedQueue[hashsig(hashreq)] = Time.now
|
@ViewedQueue[hashsig(hashreq)] = Time.now
|
||||||
|
|
||||||
|
if !File.extname(hashreq['uri']).empty? and $dontcrawl.include? File.extname(hashreq['uri'])
|
||||||
|
puts "URI not crawled #{hashreq['uri']}"
|
||||||
|
else
|
||||||
|
|
||||||
|
|
||||||
#if i < $threadnum
|
#if i < $threadnum
|
||||||
# a.push(Thread.new {
|
# a.push(Thread.new {
|
||||||
|
|
||||||
|
@ -178,7 +186,8 @@ class HttpCrawler
|
||||||
#else
|
#else
|
||||||
# sleep(0.01) and a.delete_if {|x| not x.alive?} while not a.empty?
|
# sleep(0.01) and a.delete_if {|x| not x.alive?} while not a.empty?
|
||||||
# i = 0
|
# i = 0
|
||||||
#end
|
#end
|
||||||
|
end
|
||||||
else
|
else
|
||||||
#puts "#{hashreq} already visited at #{@ViewedQueue[hashsig(hashreq)]}"
|
#puts "#{hashreq} already visited at #{@ViewedQueue[hashsig(hashreq)]}"
|
||||||
end
|
end
|
||||||
|
@ -242,9 +251,9 @@ class HttpCrawler
|
||||||
#
|
#
|
||||||
resp.transfer_chunked = false
|
resp.transfer_chunked = false
|
||||||
if resp['Set-Cookie']
|
if resp['Set-Cookie']
|
||||||
#puts "SET COOKIE: #{resp['Set-Cookie']}"
|
#puts "Set Cookie: #{resp['Set-Cookie']}"
|
||||||
#puts "Storing in cookie jar for host:port #{reqopts['rhost']}:#{reqopts['rport']}"
|
#puts "Storing in cookie jar for host:port #{reqopts['rhost']}:#{reqopts['rport']}"
|
||||||
$cookiejar["#{reqopts['rhost']}:#{reqopts['rport']}"] = resp['Set-Cookie']
|
#$cookiejar["#{reqopts['rhost']}:#{reqopts['rport']}"] = resp['Set-Cookie']
|
||||||
end
|
end
|
||||||
#puts ("#{resp.to_s}")
|
#puts ("#{resp.to_s}")
|
||||||
|
|
||||||
|
@ -272,8 +281,10 @@ class HttpCrawler
|
||||||
puts "No response"
|
puts "No response"
|
||||||
end
|
end
|
||||||
sleep($sleeptime)
|
sleep($sleeptime)
|
||||||
rescue ::Rex::ConnectionRefused, ::Rex::HostUnreachable, ::Rex::ConnectionTimeout
|
#rescue ::Rex::ConnectionRefused, ::Rex::HostUnreachable, ::Rex::ConnectionTimeout
|
||||||
rescue ::Timeout::Error, ::Errno::EPIPE
|
#rescue ::Timeout::Error, ::Errno::EPIPE
|
||||||
|
rescue
|
||||||
|
"ERROR"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue