Security_Code/Spider/BuTian.NET/CODE/补天爬虫/数据可视化/test.py

24 lines
937 B
Python

# -*- coding:utf-8 -*-
import requests
from concurrent.futures import ThreadPoolExecutor
import time
import random
def run(url):
try:
time.sleep(random.randint(1,3))
r = requests.get(url,timeout=5)
data = (r.json()['data']['list'])
print('当前爬行到第{}页 数量: {}'.format(url.replace('https://www.butian.net/Loo/index/p/', ''),len(data)))
for i in data:
#print(i['title'].replace('发现了','').split('的')[0])
# 下面两行是保存数据
with open('厂商.txt','a+',encoding='utf-8')as a:
a.write(str(i['title'].replace('发现了','').split('')[0])+ '\n')
except Exception as e:
print(e)
run(url)
if __name__ == '__main__':
tasks = ['https://www.butian.net/Loo/index/p/{}'.format(id) for id in range(1,6293)]
with ThreadPoolExecutor(10) as p:
res= [p.submit(run,url) for url in tasks]