Python爬蟲聯想詞視頻和代碼
視頻中的代碼(Python 2)
# coding:utf-8n """n 本代碼由黃哥python培訓黃哥所寫,詳細請看視頻。nn """n import urllib2n import urllibn import ren import timen from random import choicen #特別提示,下面這個list中的代理ip可能失效,請換上有效的代理ipn iplist = [27.24.158.153:81,46.209.70.74:8080,60.29.255.88:8888]nn list1 = ["集團","科技"]n for item in list1:n ip= choice(iplist)n gjc = urllib.quote(item)n url = "http://sug.so.360.cn/suggest/word?callback=suggest_so&encodein=utf-8&encodeout=utf-8&word="+gjcn headers = {n "GET":url,n "Host":"sug.so.360.cn",n "Referer":"http://www.so.com/",n "User-Agent":"sMozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.56 Safari/537.17",n }nn proxy_support = urllib2.ProxyHandler({http:http://+ip})nn opener = urllib2.build_opener(proxy_support)n urllib2.install_opener( opener )n req = urllib2.Request(url)nn for key in headers:n req.add_header(key,headers[key])nn html = urllib2.urlopen(req).read()nn ss = re.findall(""(.*?)"",html)n for item in ss:n print itemn time.sleep(2)n
Python爬蟲聯想詞 Python 3.5 代碼,修改於2016-5-9 上午,北京。
import urllib.requestn import urllib.parsen import ren import timen from random import choicennn def get_associational_word(url, kw, iplist, headers):n 抓取so.com聯想詞 本代碼由黃哥python培訓黃哥所寫,詳細請看視頻。n 黃哥python遠程視頻培訓班n https://github.com/pythonpeixun/article/blob/master/index.mdn 黃哥python培訓試看視頻播放地址n https://github.com/pythonpeixun/article/blob/master/python_shiping.mdn n result = []n ip = choice(iplist)n keyword = urllib.parse.quote(kw)n url += keywordn proxies = {http: http:// + ip} # 添加代理支持n opener = urllib.request.FancyURLopener(proxies)n opener.addheaders = headersn try:n with opener.open(url) as f:n html = f.read().decode(utf-8)n ss = re.findall("word":"(.*?)", html)n for item in ss:n result.append(item)n time.sleep(2)n except:n passn return resultnn if __name__ == __main__:n # 特別提示,下面這個list中的代理ip可能失效,請換上有效的代理ipn ip_list = [1.93.2.191:3128, 14.18.236.99:80, 58.253.238.243:80]n keyword_lst = ["集團", "科技"]n url = "https://sug.so.360.cn/suggest?callback=suggest_so&encodein="n url += "utf-8&encodeout=utf-8&format=json&fields=word,obdata&word="n browser = "sMozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/"n browser += "537.17 (KHTML, like Gecko) Chrome/24.0.1312.56 Safari/537.17"n headers = [n ("GET", url),n ("Host", "sug.so.360.cn"),n ("Referer", "http://www.so.com/"),n ("User-Agent", browser),n ]n for item in keyword_lst:n associational_word = get_associational_word(n url, item, ip_list, headers)n for item in associational_word:n print(item)n
推薦閱讀:
※[譯] Python 3.7 新特性
※Python數據分析及可視化實例之可視化圖表應用簡介
※Django學習筆記一:搭建簡易博客
※使用 Python 連接 Todoist 與 Pomotodo
※黃哥推薦學習Python 10本好書。
TAG:Python |