44 lines
1.2 KiB
Python
44 lines
1.2 KiB
Python
# -*- coding: utf-8 -*-
|
|
# @Time : 2018/4/23 0023 13:54
|
|
# @Author : Langzi
|
|
# @Blog : www.langzi.fun
|
|
# @File : 权重.py
|
|
# @Software: PyCharm
|
|
import sys
|
|
import re
|
|
import requests
|
|
import time
|
|
reload(sys)
|
|
sys.setdefaultencoding('utf-8')
|
|
urlx = 'http://top.chinaz.com/all/index_%d.html'
|
|
def scan(urlx):
|
|
for x in range(2,200):
|
|
url = urlx%x
|
|
print url
|
|
try:
|
|
r = requests.get(url=url,timeout=8).content
|
|
try:
|
|
rr = re.findall('class="col-gray">(.*?)</span>',r)
|
|
for x in rr:
|
|
if not '-' in x:
|
|
with open('result.txt','a+')as a:
|
|
a.write(str('http://www.'+x+'\n'))
|
|
except Exception,e:
|
|
print e
|
|
except Exception,e:
|
|
print e
|
|
|
|
try:
|
|
r = requests.get(url='http://top.chinaz.com/all/',timeout=8).content
|
|
try:
|
|
rr = re.findall('class="col-gray">(.*?)</span>',r)
|
|
for x in rr:
|
|
if not '-' in x:
|
|
with open('result.txt','a+')as a:
|
|
a.write(str('http://www.'+x+'\n'))
|
|
except Exception,e:
|
|
print e
|
|
except Exception,e:
|
|
print e
|
|
|
|
scan(urlx) |