Security_Code/COMMON_CODE半成品-不再更新/百度关键词搜索.py

27 lines
1.3 KiB
Python

def scan_baidu(keyword):
list_001 = []
print (unicode(' [*] 关键词网址采集功能启动......', 'utf-8'))
urlx = 'https://www.baidu.com/s?wd='
for i in range(0, 100, 10):
UA = random.choice(headerss)
headers = {'User-Agent': UA}
url = str(urlx + str(keyword) + '&pn=' + str(i))
try:
r = requests.get(url=url, headers=headers, timeout=timeout)
rr = re.findall(r'<a target="_blank" href="(.*?)"', r.content, re.S)
for xx in rr:
if xx.find('link') > 0:
try:
rxr = requests.get(url=xx, headers=headers, timeout=5)
if rxr.status_code == 200:
print (' [*] First Found Url: ' + rxr.url.split('://')[0] + '://' +
rxr.url.split('://')[1].split('/')[0])
dxdx = rxr.url.split('://')[0] + '://' + rxr.url.split('://')[1].split('/')[0]
if dxdx.find('gov.cn') > 0 or dxdx.find('edu.cn') > 0:
pass
else:
list_001.append(dxdx)
except:
pass
except:
pass