#爱站关键词挖掘
import requests
from lxml import etree
import time
import xlwt
headers = {
s = requests.Session()
#登陆账号
def dlwz():
url = 'https://www.aizhan.com/login.php'
html = s.get(url, headers=headers).text
# print(html)
con = etree.HTML(html)
# print(con)
# 获取验证码参数
parameter = con.xpath('//ul[@class="auth-login"]/li/img/@src')[0]
print(parameter)
# 获取验证码图片地址
img_url = f'https://www.aizhan.com/{parameter}'
print(img_url)
# 下载验证码图片到本地
r_img = s.get(img_url)
with open('https://blog.csdn.net/weixin_39997695/article/details/yanzhengma.jpg', 'wb') as fp:
fp.write(r_img.content)
# 输入验证码
code = input('请输入验证码:')
password= input('请输入密码:')
data = {
'refer': 'https://ci.aizhan.com/',
'username': '2504520968@qq.com',
'password': password,
'code': code
r = s.post(url, headers=headers, data=data)
if "退出" in r.text:
print("登陆成功!")
else:
print("登陆失败!验证码错误?账号输入有误?")
#获取关键词页码
def get_keyword_num(keyword_url):
url=f"https://ci.aizhan.com/{keyword_url}/"
print(con)
page=con.xpath('//div[@class="pager"]/ul/li/a/text()')
print(page)
return page
#cookies登陆
def cookdl(cookie):
head={
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'cookie': cookie,
'Host': 'ci.aizhan.com',
'Referer': 'https://ci.aizhan.com/',
'Upgrade-Insecure-Requests': '1',
get_url="https://ci.aizhan.com/5de54e1a8bbe8ba1/"
req=requests.get(get_url,headers=head)
print(req)
print(req.text)
#关键词url编码
def get_keyword_url(keyword):
'''http://static.aizhan.com/js/home.js'''
s = ""
if keyword:
keyword = keyword.replace('+', '')
for c in keyword:
e = hex(ord(c))[2:]
if len(e) == 2:
e = "n" + e
s += e
def get_keyword_data(keyword_url,num):
data_list=[]
url=f"https://ci.aizhan.com/{keyword_url}/{num}/"
time.sleep(5)
key_words=con.xpath('//table[@class="table table-striped table-s2"]/tbody/tr/td[@class="title"]/a/@title') #获取关键词
print(key_words)
print(len(key_words))
pc_mobile_indexs=con.xpath('//table[@class="table table-striped table-s2"]/tbody/tr/td[@class="center"]/span/text()') #获取PC/移动指数
print(pc_mobile_indexs)
print(len(pc_mobile_indexs))
inclusions=con.xpath('//table[@class="table table-striped table-s2"]/tbody/tr/td[@class="level"]/text()') #获取收录数
print(inclusions)
print(len(inclusions))
hrefs=con.xpath('//table[@class="table table-striped table-s2"]/tbody/tr/td[@class="url"]/a/i/text()') #获取首页一二位链接
first_hrefs=hrefs[::2] #首页第一位置链接
print(first_hrefs)
print(len(first_hrefs))
second_hrefs=hrefs[1::2] #首页第二位置链接
print(second_hrefs)
print(len(second_hrefs))
titles=con.xpath('//table[@class="table table-striped table-s2"]/tbody/tr/td[@class="url"]/a/p/text()') #获取首页一二位标题
first_titles=titles[::2] #首页第一位置标题
print(first_titles)
print(len(first_titles))
second_titles=titles[1::2] #首页第二位置标题
print(second_titles)
print(len(second_titles))
for key_word,pc_mobile_index,inclusion,first_href,first_title,second_href,second_title in zip (
key_words,pc_mobile_indexs,inclusions,first_hrefs,first_titles,second_hrefs,second_titles):
data=[
key_word,
pc_mobile_index,
inclusion,
first_href,
first_title,
second_href,
second_title,
]
print(data)
data_list.append(data)
#print(data_list)
#print(len(data_list))
return data_list
#保存关键词数据为excel格式
def bcsj(keyword,data):
workbook = xlwt.Workbook(encoding='utf-8')
booksheet = workbook.add_sheet(f'{keyword}', cell_overwrite_ok=True)
title = [['关键词', 'PC/移动指数', '收录数', '首页第1位链接', '首页第1位标题','首页第2位链接', '首页第2位标题']]
title.extend(data)
#print(title)
for i, row in enumerate(title):
for j, col in enumerate(row):
booksheet.write(i, j, col)
workbook.save(f'{keyword}.xls')
print(f"保存关键词数据为{keyword}.xls 成功!")
if __name__ == '__main__':
keyword = input('请输入关键词>>')
dlwz()
keyword_url=get_keyword_url(keyword)
page_num=get_keyword_num(keyword_url)
datas_list=[]
for num in page_num:
print(num)
print(f'正在查询采集第{num}页关键词挖掘数据!')
data_list=get_keyword_data(keyword_url, num)
datas_list.extend(data_list)
print(datas_list)
本文地址:http://syank.xrbh.cn/quote/6836.html 迅博思语资讯 http://syank.xrbh.cn/ , 查看更多