81 lines
3.5 KiB
Python
81 lines
3.5 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""
|
||
__author__ = 'Langziyanqin'
|
||
__QQ__ = '982722261'
|
||
┏┓ ┏┓
|
||
┏┛┻━━━┛┻┓
|
||
┃ ☃ ┃
|
||
┃ ┳┛ ┗┳ ┃
|
||
┃ ┻ ┃
|
||
┗━┓ ┏━┛
|
||
┃ ┗━━━┓
|
||
┃ 神兽保佑 ┣┓
|
||
┃ 永无BUG! ┏┛
|
||
┗┓┓┏━┳┓┏┛
|
||
┃┫┫ ┃┫┫
|
||
┗┻┛ ┗┻┛
|
||
"""
|
||
import sys
|
||
import os
|
||
import requests
|
||
import re
|
||
import time
|
||
from bs4 import BeautifulSoup as bp
|
||
import random
|
||
reload(sys)
|
||
sys.setdefaultencoding('utf-8')
|
||
print '''
|
||
|
||
| __ __ __
|
||
|_, (__( | ) (__|
|
||
__/
|
||
|
||
'''
|
||
#time.sleep(3)
|
||
headerss = [
|
||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
|
||
"Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
|
||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
|
||
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
|
||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
|
||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
|
||
"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
|
||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
|
||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
|
||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
|
||
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
|
||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
|
||
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
|
||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
|
||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
|
||
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
|
||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
|
||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24" ]
|
||
url = 'http://www.juzimi.com/article/龙族?page='
|
||
def start(url):
|
||
z = 1
|
||
for i in range(97,153):
|
||
urlx = url + str(i)
|
||
#time.sleep(random.randint(29,50))
|
||
print 'Crawl : '+ urlx
|
||
UA = random.choice(headerss)
|
||
headers = {'User-Agent': UA}
|
||
proxies={
|
||
'http':'115.237.13.176:8118'
|
||
}
|
||
req = requests.get(url=urlx,headers=headers,proxies=proxies)
|
||
#time.sleep(random.randint(29,60))
|
||
#print req.headers
|
||
reqe = req.content.encode('utf-8','utf-8')
|
||
bs = bp(reqe,'lxml')
|
||
a = bs.find_all('a',class_='xlistju')
|
||
for x in a:
|
||
try:
|
||
print str(z) + ' : '+x.string
|
||
with open('longzu.txt','a+')as a:
|
||
a.write(str(z) + ' ' + str(x.string)+'\n'+'\n')
|
||
#time.sleep(random.randint(2, 8))
|
||
z+=1
|
||
except:
|
||
pass
|
||
start(url) |