Excluding extensions

git-svn-id: file:///home/svn/framework3/trunk@8863 4d416f70-5f16-0410-b530-b9f4589650da
This commit is contained in:
et 2010-03-21 03:23:27 +00:00
parent 5949b91612
commit 0bb34c94be
1 changed files with 16 additions and 5 deletions

View File

@ -47,6 +47,9 @@ $dbs = false
# Thread number # Thread number
$threadnum = 1 $threadnum = 1
# Dont crawl
$dontcrawl = ".exe,.zip,.tar,.bz2,.run,.asc,.gz,"
# Use proxy # Use proxy
$useproxy = false $useproxy = false
@ -151,6 +154,11 @@ class HttpCrawler
if !@ViewedQueue.include?(hashsig(hashreq)) if !@ViewedQueue.include?(hashsig(hashreq))
@ViewedQueue[hashsig(hashreq)] = Time.now @ViewedQueue[hashsig(hashreq)] = Time.now
if !File.extname(hashreq['uri']).empty? and $dontcrawl.include? File.extname(hashreq['uri'])
puts "URI not crawled #{hashreq['uri']}"
else
#if i < $threadnum #if i < $threadnum
# a.push(Thread.new { # a.push(Thread.new {
@ -178,7 +186,8 @@ class HttpCrawler
#else #else
# sleep(0.01) and a.delete_if {|x| not x.alive?} while not a.empty? # sleep(0.01) and a.delete_if {|x| not x.alive?} while not a.empty?
# i = 0 # i = 0
#end #end
end
else else
#puts "#{hashreq} already visited at #{@ViewedQueue[hashsig(hashreq)]}" #puts "#{hashreq} already visited at #{@ViewedQueue[hashsig(hashreq)]}"
end end
@ -242,9 +251,9 @@ class HttpCrawler
# #
resp.transfer_chunked = false resp.transfer_chunked = false
if resp['Set-Cookie'] if resp['Set-Cookie']
#puts "SET COOKIE: #{resp['Set-Cookie']}" #puts "Set Cookie: #{resp['Set-Cookie']}"
#puts "Storing in cookie jar for host:port #{reqopts['rhost']}:#{reqopts['rport']}" #puts "Storing in cookie jar for host:port #{reqopts['rhost']}:#{reqopts['rport']}"
$cookiejar["#{reqopts['rhost']}:#{reqopts['rport']}"] = resp['Set-Cookie'] #$cookiejar["#{reqopts['rhost']}:#{reqopts['rport']}"] = resp['Set-Cookie']
end end
#puts ("#{resp.to_s}") #puts ("#{resp.to_s}")
@ -272,8 +281,10 @@ class HttpCrawler
puts "No response" puts "No response"
end end
sleep($sleeptime) sleep($sleeptime)
rescue ::Rex::ConnectionRefused, ::Rex::HostUnreachable, ::Rex::ConnectionTimeout #rescue ::Rex::ConnectionRefused, ::Rex::HostUnreachable, ::Rex::ConnectionTimeout
rescue ::Timeout::Error, ::Errno::EPIPE #rescue ::Timeout::Error, ::Errno::EPIPE
rescue
"ERROR"
end end
end end