75 lines
3.7 KiB
Python
75 lines
3.7 KiB
Python
# coding:utf-8
|
|
import requests
|
|
import random
|
|
import re
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
|
|
def headers():
|
|
REFERERS = [
|
|
"https://www.baidu.com",
|
|
"http://www.baidu.com",
|
|
"https://www.google.com.hk",
|
|
"http://www.so.com",
|
|
"http://www.sogou.com",
|
|
"http://www.soso.com",
|
|
"http://www.bing.com",
|
|
]
|
|
|
|
headerss = [
|
|
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
|
|
"Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
|
|
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
|
|
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
|
|
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
|
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
|
|
"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
|
|
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
|
|
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
|
|
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
|
|
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
|
|
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
|
|
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
|
|
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
|
|
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
|
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
|
|
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"]
|
|
|
|
headers = {
|
|
'User-Agent': random.choice(headerss),
|
|
'Accept': 'Accept:text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
'Cache-Control': 'max-age=0',
|
|
'referer': random.choice(REFERERS),
|
|
'Accept-Charset': 'GBK,utf-8;q=0.7,*;q=0.3',
|
|
}
|
|
return headers
|
|
|
|
|
|
def run(url):
|
|
print('Check url:'+url)
|
|
try:
|
|
r = requests.get(url,headers=headers(),allow_redirects=False,verify=False,timeout=10)
|
|
print(url + ':' + str(r.status_code))
|
|
encoding = requests.utils.get_encodings_from_content(r.text)[0]
|
|
res = r.content.decode(encoding,'replace')
|
|
title_pattern = '<title>(.*?)</title>'
|
|
title = re.search(title_pattern,res,re.S|re.I)
|
|
with open('result.txt','a+',encoding='utf-8')as a:
|
|
a.write(url + ':' + str(title.group(1)).strip() + '\n')
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(e)
|
|
|
|
if '__main__' == __name__:
|
|
file_name = input('导入文本:')
|
|
tasks = list(set(x.strip() for x in open(file_name).readlines()))
|
|
print('总任务量:' + str(len(tasks)))
|
|
|
|
with ThreadPoolExecutor() as p: #类似打开文件,可省去.shutdown()
|
|
future_tasks = [p.submit(run, i) for i in tasks]
|
|
print('=' * 30)
|
|
print([obj.result() for obj in future_tasks])
|
|
|
|
|