Security_Code/资产收集/多个网站出现同样的关键词/find_vlue.py

84 lines
3.8 KiB
Python

# -*- coding: utf-8 -*-
# @Time : 2018/4/23 0023 13:17
# @Author : Langzi
# @Blog : www.langzi.fun
# @File : find_vlue.py
# @Software: PyCharm
import sys
import jieba
import random
import requests
from collections import Counter
from prettytable import PrettyTable
import time
import os
reload(sys)
sys.setdefaultencoding('utf-8')
print '''
| __ __ __
|_, (__( | ) (__|
__/
'''
time.sleep(5)
headerss = [
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
"Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"]
ll = []
def scan(url):
try:
UA = random.choice(headerss)
headers = {'User-Agent': UA}
r = requests.get(url=url, headers=headers, timeout=5).content.decode("utf8","ignore").encode("gbk","ignore")
for x in jieba.cut_for_search(r,HMM=True):
ll.append(x)
except Exception, e:
print e
url_input = raw_input(unicode('请输入需要扫描的网址文本:','utf-8').encode('gbk'))
list_url = list(set([i.replace('\n','') for i in open(url_input,'r').readlines()]))
for url in list_url:
print 'Scan: '+(url)
scan(url)
d=dict(Counter(ll))
d1 = dict(sorted(zip(d.values(),d.keys())))
x = PrettyTable(["出现次数", "元素"])
for k,v in d1.iteritems():
x.add_row([k, v])
with open ('result.txt','a+')as a:
a.write(str(x))
print x
time.sleep(10)
def get(data_str):
try:
for xx in list_url:
print 'Scan:' + xx
UA = random.choice(headerss)
headers = {'User-Agent': UA}
r1 = requests.get(url=xx, headers=headers, timeout=5).content.decode("utf8","ignore").encode("gbk","ignore")
if data_str in r1:
with open(str(data_str+'.txt'),'a+')as aa:
aa.write(xx+'\n')
except Exception,e:
print e
while 1:
data_str=raw_input(unicode('请输入需要寻找所在网站的关键词:','utf-8').encode('gbk'))
get(data_str)
print unicode('当前关键词扫描完毕....','utf-8')
time.sleep(10)