推广 热搜: 公司  快速  中国  上海  未来    企业  政策  教师  系统 

百度网盘搜索引擎(基于python)

   日期:2024-10-31     作者:caijiyuan    caijiyuan   评论:0    移动:http://kaire.xrbh.cn/news/9589.html
核心提示:#! /usr/bin/env python#coding=utf-8#import osfrom Queue import Queueimport threadingimport timeimport urllibfrom urlpars
#! /usr/bin/env python

百度网盘搜索引擎(基于python)

#coding=utf-8 # import os from Queue import Queue import threading import time import urllib from urlparse import * from urlparse import urljoin import redis import pymongo import datetime import urllib2 import json import re import sys reload(sys) sys.setdefaultencoding('utf-8') def get_url(url): request = urllib2.Request(url) return urllib2.urlopen(request,timeout=20).read() # def get_count(url): # data = re.findall(r'totalCount:"(.+?)"', get_url(url)) # if len(data) == 0: # raise Exception('get_count err') # return data[0] # def get_count_all(uk): # follow_count = get_count('http://pan.baidu.com/wap/share/home/followers?uk='+str(uk)+'&start=0') # fan_count = get_count('http://pan.baidu.com/wap/share/home/fans?uk='+str(uk)+'&start=0') # return follow_count, fan_count def get_ren_info(url,neibie): data = get_url(url) matches = re.findall(r'parse("(.+?)"),totalCount', data) if len(matches) == 0: raise Exception('get_ren_info err') data_decode = matches[0].decode("unicode_escape") jsondata = json.loads(data_decode) rens = [] if len(jsondata) != 0: for i in jsondata: if neibie == 'follow': ren = {'fangwen':1,'uk':i['follow_uk'],'uname':i['follow_uname'],'avatar_url':i['avatar_url'],'intro':i['intro'],'fans_count':i['fans_count'],'follow_count':i['follow_count'],'pubshare_count':i['pubshare_count'],'album_count':i['album_count']} elif neibie == 'fans': ren = {'fangwen':1,'uk':i['fans_uk'],'uname':i['fans_uname'],'avatar_url':i['avatar_url'],'intro':i['intro'],'fans_count':i['fans_count'],'follow_count':i['follow_count'],'pubshare_count':i['pubshare_count'],'album_count':i['album_count']} rens.append(ren) return rens def get_uk_all(url): url.index('followers') return get_ren_info(url,'follow') url.index('fans') return get_ren_info(url,'fans') raise Exception('url str not found followers and fans') def ren_mongo(uk, number): db.ren.update({'uk':uk}, {'$set':{'fangwen':number}}) def url_mongo(url, number): db.url.update({'url':url}, {'$set':{'fangwen':number}}) def drop(): db.ren.drop() db.url.drop() def url_save(dics): db.url.save(dics) print pymongo.errors.DuplicateKeyError('url save suoying chongfu') def ren_save(dics): db.ren.save(dics) print pymongo.errors.DuplicateKeyError('ren save suoying chongfu') def init(): if not ren_one: url = 'http://pan.baidu.com/wap/share/home/followers?uk=657260084&start=0' url = 'http://pan.baidu.com/wap/share/home/followers?uk=657260084&start=24' print pymongo.errors.DuplicateKeyError('zai suoying chongfu') #queue for i in range(20): def check(): if ren_queue.qsize()<20: if url_queue.qsize() <20: def deal_ren(): # = '1' ren_one = ren_queue.get() uk = ren_one['uk'] print "["+datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")+"]["+name+"号] "+str(uk)+"doing " follow_count = ren_one['follow_count'] fans_count = ren_one['fans_count'] print follow_count print fans_count for i in range(0,(24 if follow_count > 24 else follow_count),24): url_save({'url':'http://pan.baidu.com/wap/share/home/followers?uk='+str(uk)+'&start='+str(i),'fangwen':1}) for j in range(0,(24 if fans_count > 24 else fans_count),24): url_save({'url':'http://pan.baidu.com/wap/share/home/fans?uk='+str(uk)+'&start='+str(j),'fangwen':1}) ren_mongo(uk, 3) #db.ren.update({'uk':uk}, {'$set':{'fangwen':1}}) def deal_url(): #name = '1' url_one = url_queue.get() url = url_one['url'] #db.url.update({'url':url}, {'$set':{'fangwen':2}})
本文地址:http://syank.xrbh.cn/news/9589.html    迅博思语资讯 http://syank.xrbh.cn/ , 查看更多
 
 
更多>同类资讯
0相关评论

新闻列表
企业新闻
推荐企业新闻
推荐图文
推荐资讯
点击排行
网站首页  |  关于我们  |  联系方式  |  使用协议  |  版权隐私  |  网站地图  |  排名推广  |  广告服务  |  积分换礼  |  网站留言  |  RSS订阅  |  违规举报  |  粤ICP备2023022329号