Security_Code/COMMON_CODE半成品-不再更新/获取网址中所有友链.py

108 lines
5.3 KiB
Python

#coding:utf-8
from bs4 import BeautifulSoup as bp
import requests
#requests.packages.urlib3.disable_warnings()
import random
import threading
import time
from bs4 import BeautifulSoup as bs
def log(*args):
with open('log.txt', 'a+')as aa:
for x in args:
aa.write(str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) + ':' + x + '\n')
timeout = 5
headerss = [
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
"Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"]
first_cule = ['.com.cn', '.org.cn', '.net.cn', '.com', '.cn', '.cc', '.net', '.org', '.info', '.fun', '.one', '.xyz',
'.name', '.io', '.top', '.me', '.club', '.tv']
def wuxiancaiji(url):
result_list = []
result_list.append(url)
try:
UA = random.choice(headerss)
headers = {'User-Agent': UA, 'Connection': 'close'}
r = requests.get(url=url, headers=headers, verify=False, timeout=timeout)
time.sleep(0.02)
bp = bs(r.content, 'html.parser')
for x in bp.select('li > a'):
d = str(x)
if 'nofollow' not in d and 'java' not in d and ';' not in d and '?' not in d and '#' not in d:
try:
ddd = x['href']
for x in first_cule:
if x in ddd:
if 'http' in ddd:
# print ddd.split(x)[0] + x
result_list.append(ddd.split(x)[0] + x)
except Exception as e:
print(e)
else:
pass
for x in bp.select('td > a'):
d = str(x)
if 'nofollow' not in d and 'java' not in d and ';' not in d and '?' not in d and '#' not in d:
try:
ddd = x['href']
for x in first_cule:
if x in ddd:
if 'http' in ddd:
# print ddd.split(x)[0] + x
result_list.append(ddd.split(x)[0] + x)
except Exception as e:
print(e)
else:
pass
for x in bp.select('p > a'):
d = str(x)
if 'nofollow' not in d and 'java' not in d and ';' not in d and '?' not in d and '#' not in d:
try:
ddd = x['href']
for x in first_cule:
if x in ddd:
if 'http' in ddd:
# print ddd.split(x)[0] + x
result_list.append(ddd.split(x)[0] + x)
except Exception as e:
print(e)
else:
pass
for x in bp.select('div > a'):
d = str(x)
if 'nofollow' not in d and 'java' not in d and ';' not in d and '?' not in d and '#' not in d:
try:
ddd = x['href']
for x in first_cule:
if x in ddd:
if 'http' in ddd:
# print ddd.split(x)[0] + x
result_list.append(ddd.split(x)[0] + x)
except Exception as e:
print(e)
else:
pass
except Exception as e:
print(e)
result_list = list(set(result_list))
return result_list
print(wuxiancaiji('http://www.hntky.com/'))