Verbose option and start cleaning

git-svn-id: file:///home/svn/framework3/trunk@9000 4d416f70-5f16-0410-b530-b9f4589650da
This commit is contained in:
et 2010-04-03 08:11:31 +00:00
parent e968c3894e
commit 3f27572fee
1 changed files with 44 additions and 34 deletions

View File

@ -62,6 +62,9 @@ $proxyport = 8080
# Cookie Jar
$cookiejar = {}
# Verbose
$verbose = false
class HttpCrawler
attr_accessor :ctarget, :cport, :cinipath, :cssl, :proxyhost, :proxyport, :useproxy
@ -114,6 +117,8 @@ class HttpCrawler
end
def storedb(hashreq,response,dbpath)
#postgres , pg gem
db = SQLite3::Database.new(dbpath)
#db = Mysql.new("127.0.0.1", username, password, databasename)
until !db.transaction_active?
@ -177,7 +182,9 @@ class HttpCrawler
@ViewedQueue[hashsig(hashreq)] = Time.now
if !File.extname(hashreq['uri']).empty? and $dontcrawl.include? File.extname(hashreq['uri'])
puts "URI not crawled #{hashreq['uri']}"
if $verbose
puts "URI not crawled #{hashreq['uri']}"
end
else
####
@ -212,7 +219,9 @@ class HttpCrawler
####
end
else
#puts "#{hashreq} already visited at #{@ViewedQueue[hashsig(hashreq)]}"
if $verbose
puts "#{hashreq['uri']} already visited at #{@ViewedQueue[hashsig(hashreq)]}"
end
end
end
@ -253,20 +262,6 @@ class HttpCrawler
def sendreq(nclient,reqopts={})
puts ">> #{reqopts['uri']}"
#puts reqopts
if reqopts['query'] and !reqopts['query'].empty?
puts ">>> [Q] #{reqopts['query']}"
end
if reqopts['data']
puts ">>> [D] #{reqopts['data']}"
end
begin
r = nclient.request_raw(reqopts)
@ -286,37 +281,44 @@ class HttpCrawler
#puts "Storing in cookie jar for host:port #{reqopts['rhost']}:#{reqopts['rport']}"
#$cookiejar["#{reqopts['rhost']}:#{reqopts['rport']}"] = resp['Set-Cookie']
end
#puts ("#{resp.to_s}")
#puts "resp code #{resp.code}"
if $dbs
#store db
storedb(reqopts,resp,$dbpathmsf)
end
puts ">> [#{resp.code}] #{reqopts['uri']}"
if reqopts['query'] and !reqopts['query'].empty?
puts ">>> [Q] #{reqopts['query']}"
end
if reqopts['data']
puts ">>> [D] #{reqopts['data']}"
end
case resp.code
when 200
@crawlermodules.each_key do |k|
@crawlermodules[k].parse(reqopts,resp)
end
when 301..302
puts "(#{resp.code}) Redirection to: #{resp['Location']}"
#puts urltohash(resp['Location'])
puts "[#{resp.code}] Redirection to: #{resp['Location']}"
if $verbose
puts urltohash(resp['Location'])
end
insertnewpath(urltohash(resp['Location']))
when 404
puts "Invalid link (404) #{reqopts['uri']}"
puts "[404] Invalid link #{reqopts['uri']}"
else
puts "Unhandled #{resp.code}"
end
else
puts "No response"
end
sleep($sleeptime)
#rescue ::Rex::ConnectionRefused, ::Rex::HostUnreachable, ::Rex::ConnectionTimeout
#rescue ::Timeout::Error, ::Errno::EPIPE
sleep($sleeptime)
rescue
puts "ERROR #{$!.backtrace}"
puts "ERROR #{$!}: #{$!.backtrace}"
end
end
@ -327,14 +329,20 @@ class HttpCrawler
if hashreq['rhost'] == self.ctarget and hashreq['rport'] == self.cport
if !@ViewedQueue.include?(hashsig(hashreq))
if @NotViewedQueue.read_all(hashreq).size > 0
#puts "Already in queue to be viewed"
if $verbose
puts "Already in queue to be viewed"
end
else
#puts "Inserted: #{hashreq['uri']}"
if $verbose
puts "Inserted: #{hashreq['uri']}"
end
@NotViewedQueue.write(hashreq)
end
else
#puts "#{hashreq} already visited at #{@ViewedQueue[hashsig(hashreq)]}"
if $verbose
puts "#{hashreq['uri']} already visited at #{@ViewedQueue[hashsig(hashreq)]}"
end
end
end
end
@ -368,7 +376,6 @@ class HttpCrawler
'data' => nil
}
#puts hashreq
return hashreq
end
@ -428,7 +435,8 @@ $args = Rex::Parser::Arguments.new(
"-u" => [ true, "Use proxy"],
"-x" => [ true, "Proxy host" ],
"-p" => [ true, "Proxy port" ],
"-h" => [ false, "Display this help information"]
"-h" => [ false, "Display this help information"],
"-v" => [ false, "Verbose" ]
)
if ARGV.length < 1
@ -445,7 +453,9 @@ $args.parse(ARGV) { |opt, idx, val|
$crun = true
turl = val
when "-u"
$useproxy = true
$useproxy = true
when "-v"
$verbose = true
when "-x"
$proxyhost = val
when "-p"