怎樣用Python設計一個爬蟲模擬登陸知乎?

Python3下的代碼如下,結果登陸失敗,求助大神!
import gzip
import re
import http.cookiejar
import urllib.request
import urllib.parse

def ungzip(data):
try: # 嘗試解壓
print("正在解壓.....")
data = gzip.decompress(data)
print("解壓完畢!")
except:
print("未經壓縮, 無需解壓")
return data

def getXSRF(data):
cer = re.compile("name="_xsrf" value="(.*)"", flags = 0)
strlist = cer.findall(data)
return strlist[0]

def getOpener(head):
# deal with the Cookies
cj = http.cookiejar.CookieJar()
pro = urllib.request.HTTPCookieProcessor(cj)
opener = urllib.request.build_opener(pro)
header = []
for key, value in head.items():
elem = (key, value)
header.append(elem)
opener.addheaders = header
return opener

header = {
"Connection": "Keep-Alive",
"Accept": "text/html, application/xhtml+xml, */*",
"Accept-Language": "en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3",
"User-Agent": "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko",
"Accept-Encoding": "gzip, deflate",
"Host": "www.zhihu.com",
"DNT": "1"
}

url = "http://www.zhihu.com/"
opener = getOpener(header)
op = opener.open(url)
data = op.read()
data = ungzip(data) # 解壓
_xsrf = getXSRF(data.decode())

url += "login"
id = "這裡填你的知乎帳號"
password = "這裡填你的知乎密碼"
postDict = {
"_xsrf":_xsrf,
"email": id,
"password": password,
"rememberme": "y"
}
postData = urllib.parse.urlencode(postDict).encode()
op = opener.open(url, postData)
data = op.read()
data = ungzip(data)

print(data.decode())


寫了個關於模擬登錄常見網站的小項目,

GitHub - xchaoinfo/fuck-login: 模擬登錄一些知名的網站,為了方便爬取需要登錄的網站

其中包括知乎 百度 新浪微博 126 郵箱 web微信等,考慮了 Py2 Py3 版本兼容 以及驗證碼的問題,歡迎大家來圍觀 pull request

既然是問的模擬登錄知乎,那我還是貼出來代碼吧。雖然你們可以直接到 github 上看。

經過多次修改,代碼的兼容和健壯性都有了很大的改進。Py2 Py3 兼容,代碼符合 PEP 8 規範,如果你有更好的改進方案,歡迎 Pull Request

由於網站的規則隨時會調整,請 github 去看最新的代碼

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Required
- requests (必須)
- pillow (可選)
Info
- author : "xchaoinfo"
- email : "xchaoinfo@qq.com"
- date : "2016.2.4"
Update
- name : "wangmengcn"
- email : "eclipse_sv@163.com"
- date : "2016.4.21"
"""
import requests
try:
import cookielib
except:
import http.cookiejar as cookielib
import re
import time
import os.path
try:
from PIL import Image
except:
pass

# 構造 Request headers
agent = "Mozilla/5.0 (Windows NT 5.1; rv:33.0) Gecko/20100101 Firefox/33.0"
headers = {
"User-Agent": agent
}

# 使用登錄cookie信息
session = requests.session()
session.cookies = cookielib.LWPCookieJar(filename="cookies")
try:
session.cookies.load(ignore_discard=True)
except:
print("Cookie 未能載入")

def get_xsrf():
"""_xsrf 是一個動態變化的參數"""
index_url = "http://www.zhihu.com"
# 獲取登錄時需要用到的_xsrf
index_page = session.get(index_url, headers=headers)
html = index_page.text
pattern = r"name="_xsrf" value="(.*?)""
# 這裡的_xsrf 返回的是一個list
_xsrf = re.findall(pattern, html)
return _xsrf[0]

# 獲取驗證碼
def get_captcha():
t = str(int(time.time()*1000))
captcha_url = "http://www.zhihu.com/captcha.gif?r=" + t + "type=login"
r = session.get(captcha_url, headers=headers)
with open("captcha.jpg", "wb") as f:
f.write(r.content)
f.close()
# 用pillow 的 Image 顯示驗證碼
# 如果沒有安裝 pillow 到源代碼所在的目錄去找到驗證碼然後手動輸入
try:
im = Image.open("captcha.jpg")
im.show()
im.close()
except:
print(u"請到 %s 目錄找到captcha.jpg 手動輸入" % os.path.abspath("captcha.jpg"))
captcha = input("please input the captcha
&>")
return captcha

def isLogin():
# 通過查看用戶個人信息來判斷是否已經登錄
url = "https://www.zhihu.com/settings/profile"
login_code = session.get(url,allow_redirects=False).status_code
if int(x=login_code) == 200:
return True
else:
return False

def login(secret, account):
# 通過輸入的用戶名判斷是否是手機號
if re.match(r"^1d{10}$", account):
print("手機號登錄
")
post_url = "http://www.zhihu.com/login/phone_num"
postdata = {
"_xsrf": get_xsrf(),
"password": secret,
"remember_me": "true",
"phone_num": account,
}
else:
print("郵箱登錄
")
post_url = "http://www.zhihu.com/login/email"
postdata = {
"_xsrf": get_xsrf(),
"password": secret,
"remember_me": "true",
"email": account,
}
try:
# 不需要驗證碼直接登錄成功
login_page = session.post(post_url, data=postdata, headers=headers)
login_code = login_page.text
print(login_page.status)
print(login_code)
except:
# 需要輸入驗證碼後才能登錄成功
postdata["captcha"] = get_captcha()
login_page = session.post(post_url, data=postdata, headers=headers)
login_code = eval(login_page.text)
print(login_code["msg"])
session.cookies.save()

try:
input = raw_input
except:
pass

if __name__ == "__main__":
if isLogin():
print("您已經登錄")
else:
account = input("請輸入你的用戶名
&> ")
secret = input("請輸入你的密碼
&> ")
login(secret, account)


登錄很簡單,其實上面很多答案的很多內容都是可以去掉的。簡化到最後奉上以下代碼。(是手機號碼登錄的,想要郵箱的話改一下url和參數就可以了)

#encoding=utf8
import cookielib
import urllib2
import urllib

url_start = r"https://www.zhihu.com/topic/19556498/questions?page="
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
opener.addheaders = [("User-agent","Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Firefox/38.0 Iceweasel/38.3.0")]

def login():
username = ""
password = ""
cap_url = "https://www.zhihu.com/captcha.gif?r=1466595391805type=login"
cap_content = urllib2.urlopen(cap_url).read()
cap_file = open("/root/Desktop/cap.gif","wb")
cap_file.write(cap_content)
cap_file.close()
captcha = raw_input("capture:")
url = "https://www.zhihu.com/login/phone_num"
data = urllib.urlencode({"phone_num":username,"password":password,"captcha":captcha})
print urllib2.urlopen(url,data).read()

if __name__=="__main__":
login()


抓包發現使用手機號登陸時,用戶名的key是phone_num。登陸網址是:

http://www.zhihu.com/login/phone_num

樓上都是Python 2,我放個Python 3的,用法見注釋

import requests
import time
import json
import os
import re
import sys
import subprocess
from bs4 import BeautifulSoup as BS

class ZhiHuClient(object):

"""連接知乎的工具類,維護一個Session
2015.11.11

用法:

client = ZhiHuClient()

# 第一次使用時需要調用此方法登錄一次,生成cookie文件
# 以後可以跳過這一步
client.login("username", "password")

# 用這個session進行其他網路操作,詳見requests庫
session = client.getSession()
"""

# 網址參數是賬號類型
TYPE_PHONE_NUM = "phone_num"
TYPE_EMAIL = "email"
loginURL = r"http://www.zhihu.com/login/{0}"
homeURL = r"http://www.zhihu.com"
captchaURL = r"http://www.zhihu.com/captcha.gif"

headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate",
"Host": "www.zhihu.com",
"Upgrade-Insecure-Requests": "1",
}

captchaFile = os.path.join(sys.path[0], "captcha.gif")
cookieFile = os.path.join(sys.path[0], "cookie")

def __init__(self):
os.chdir(sys.path[0]) # 設置腳本所在目錄為當前工作目錄

self.__session = requests.Session()
self.__session.headers = self.headers # 用self調用類變數是防止將來類改名
# 若已經有 cookie 則直接登錄
self.__cookie = self.__loadCookie()
if self.__cookie:
print("檢測到cookie文件,直接使用cookie登錄")
self.__session.cookies.update(self.__cookie)
soup = BS(self.open(r"http://www.zhihu.com/").text, "html.parser")
print("已登陸賬號: %s" % soup.find("span", class_="name").getText())
else:
print("沒有找到cookie文件,請調用login方法登錄一次!")

# 登錄
def login(self, username, password):
"""
驗證碼錯誤返回:
{"errcode": 1991829, "r": 1, "data": {"captcha": "請提交正確的驗證碼 :("}, "msg": "請提交正確的驗證碼 :("}
登錄成功返回:
{"r": 0, "msg": "登陸成功"}
"""
self.__username = username
self.__password = password
self.__loginURL = self.loginURL.format(self.__getUsernameType())
# 隨便開個網頁,獲取登陸所需的_xsrf
html = self.open(self.homeURL).text
soup = BS(html, "html.parser")
_xsrf = soup.find("input", {"name": "_xsrf"})["value"]
# 下載驗證碼圖片
while True:
captcha = self.open(self.captchaURL).content
with open(self.captchaFile, "wb") as output:
output.write(captcha)
# 人眼識別
print("=" * 50)
print("已打開驗證碼圖片,請識別!")
subprocess.call(self.captchaFile, shell=True)
captcha = input("請輸入驗證碼:")
os.remove(self.captchaFile)
# 發送POST請求
data = {
"_xsrf": _xsrf,
"password": self.__password,
"remember_me": "true",
self.__getUsernameType(): self.__username,
"captcha": captcha
}
res = self.__session.post(self.__loginURL, data=data)
print("=" * 50)
# print(res.text) # 輸出腳本信息,調試用
if res.json()["r"] == 0:
print("登錄成功")
self.__saveCookie()
break
else:
print("登錄失敗")
print("錯誤信息 ---&>", res.json()["msg"])

def __getUsernameType(self):
"""判斷用戶名類型
經測試,網頁的判斷規則是純數字為phone_num,其他為email
"""
if self.__username.isdigit():
return self.TYPE_PHONE_NUM
return self.TYPE_EMAIL

def __saveCookie(self):
"""cookies 序列化到文件
即把dict對象轉化成字元串保存
"""
with open(self.cookieFile, "w") as output:
cookies = self.__session.cookies.get_dict()
json.dump(cookies, output)
print("=" * 50)
print("已在同目錄下生成cookie文件:", self.cookieFile)

def __loadCookie(self):
"""讀取cookie文件,返回反序列化後的dict對象,沒有則返回None"""
if os.path.exists(self.cookieFile):
print("=" * 50)
with open(self.cookieFile, "r") as f:
cookie = json.load(f)
return cookie
return None

def open(self, url, delay=0, timeout=10):
"""打開網頁,返回Response對象"""
if delay:
time.sleep(delay)
return self.__session.get(url, timeout=timeout)

def getSession(self):
return self.__session

if __name__ == "__main__":
client = ZhiHuClient()

# 第一次使用時需要調用此方法登錄一次,生成cookie文件
# 以後可以跳過這一步
# client.login("username", "password")

# 用這個session進行其他網路操作,詳見requests庫
session = client.getSession()


https://github.com/lining0806/PythonSpiderNotes/tree/master/ZhihuSpider


可以用Selenium控制瀏覽器來登錄。
利用Selenium來實現知乎和Bilibili的登錄
有一個演示視頻可以看看


知乎現在登錄貌似每次都會有密碼了,修改如下:

import requests
from xtls.util import BeautifulSoup

INDEX_URL = "http://www.zhihu.com"
LOGIN_URL = "http://www.zhihu.com/login/email"
CAPTCHA_URL = "http://www.zhihu.com/captcha.gif?r="

def gen_time_stamp():
return str(int(time.time())) + "%03d" % random.randint(0, 999)

def login(username, password, oncaptcha):
session = requests.session()

_xsrf = BeautifulSoup(session.get(INDEX_URL).content).find("input", attrs={"name": "_xsrf"})["value"]
data = {
"_xsrf": _xsrf,
"email": username,
"password": password,
"remember_me": "true",
"captcha": oncaptcha(session.get(CAPTCHA_URL + gen_time_stamp()).content)
}
resp = session.post(LOGIN_URL, data)
if 2 != resp.status_code / 100 or u"登陸成功" not in resp.content:
raise Exception("captcha error.")
return session

其中,oncaptcha為一個回調函數(需要自己實現的),接受的參數為驗證碼的二進位內容,返回的為驗證碼內容。

P.S.你可以自己做識別驗證碼,或者手動輸入,其中最簡單的oncaptcha為:

def oncaptcha(data):
with open("captcha file save path", "wb") as fp:
fp.write(data)
return raw_input("captcha : ")


要是3,建議你用requests,簡直不要太簡單


初學爬蟲,貼上代碼記錄一下

Python2.7.8/WINDOWS/requesrts、lxml庫

# -*- coding:utf-8 -*-
import requests
from lxml import etree

url = "http://www.zhihu.com"
headers = {
"Referer":"http://www.zhihu.com/",
"User-Agent": "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.99 Safari/537.36"
}
xsrf = etree.HTML(requests.get(url, headers = headers).content).xpath("//input[@name="_xsrf"]/@value")[0]

data = {
"_xsrf": xsrf,
"email": "xxxxxx",
"password": "xxxxxx",
"remember_me": "true"
}

# captcha_url = "http://www.zhihu.com/captcha.gif"
# captcha = requests.get(captcha_url, stream=True)
# print captcha
# f = open("captcha.gif", "wb")
# for line in captcha.iter_content(10):
# f.write(line)
# f.close()
# print u"輸入驗證碼:"
# captcha_str = raw_input()
# data["captcha"] = captcha_str

loginurl = url + "/login/email"
html = requests.post(loginurl, data = data, headers = headers)
cookies = html.cookies

url1 = "https://www.zhihu.com/question/22591304/followers"
#這是通過fiddler獲取的cookies
# cookies = {"Cookies":"xxxxxx"}
html = requests.get(url1,cookies=cookies,headers=headers).content
zhihu = etree.HTML(html)
content = zhihu.xpath("//a[@class="zg-link"]")
for each in content:
text = each.xpath("string(.)")
print text

------------------------------------------------------------
沒有泄露個人信息吧。。。不小心把賬號密碼貼出來了。。。


最近寫了個python模擬登陸知乎的blog,有我的分析過程,分享給題主,題主可以參考下:http://www.cnblogs.com/ly941122/p/5401950.html


附上一個完整登錄成功python3.5版代碼

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
__author__ = "Jhy"
import gzip
import re
import http.cookiejar
import urllib.request
import urllib.parse

def ungzip(data):
try:
# 嘗試解壓
print("正在解壓.....")
data = gzip.decompress(data)
print("解壓完畢!")
except:
print("未經壓縮, 無需解壓")
return data

def getXSRF(data):
cer = re.compile("name="_xsrf" value="(.*)"", flags = 0)
strlist = cer.findall(data)
return strlist[0]

def getOpener(head):

# deal with the Cookies
cj = http.cookiejar.CookieJar()
pro = urllib.request.HTTPCookieProcessor(cj)
opener = urllib.request.build_opener(pro)
header = []
for key, value in head.items():
elem = (key, value)
header.append(elem)
opener.addheaders = header
return opener

header = {
"Connection": "Keep-Alive",
"Accept": "text/html, application/xhtml+xml, */*",
"Accept-Language": "en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3",
"User-Agent": "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko",
"Accept-Encoding": "gzip, deflate",
"Host": "www.zhihu.com",
"DNT": "1"
}

url = "http://www.zhihu.com/"
opener = getOpener(header)
op = opener.open(url)
data = op.read()
data = ungzip(data)
# 解壓
_xsrf = getXSRF(data.decode())
print(_xsrf)
url += "login/phone_num"
id = "這裡填你的知乎帳號"
password = "這裡填你的知乎密碼"
postDict = {
"_xsrf":_xsrf,
"phone_num": id,
"password": password,
"rememberme": "y"
}
postData = urllib.parse.urlencode(postDict).encode()
op = opener.open(url, postData)
data = op.read()
data = ungzip(data)

print(data.decode("utf-8"))

情況1:
id = "這裡填你的純數字帳號"
當id為純數字時,比如你的手機號
代碼如上

情況2:
id = "這裡填你的郵箱帳號"
當id為郵箱時,比如XXXXX@qq.com
代碼修改如下

url += "login"
"_xsrf":_xsrf,
"email": id,
"password": password,
"rememberme": "y"


可以用 selenium 自動化瀏覽器 不過會比單純的爬蟲慢 http://www.seleniumhq.org


其實我這個最簡單了,用的python3,requests, 只需要驗證一次,就會保存cookies,下次登錄使用cookies登錄。代碼如下:

# -*- coding: utf-8 -*-

""" 知乎登錄分為兩種登錄
一是手機登錄 API : https://www.zhihu.com/login/phone_num
二是郵箱登錄 API : https://www.zhihu.com/login/email

第一步、打開首頁獲取_xref值,驗證圖片
第二步、輸入賬號密碼
第三步、看是否需要驗證、要則下載驗證碼圖片,手動輸入
第四步、判斷是否登錄成功、登錄成功後獲取頁面值。

requests 與 http.cookiejar 相結合使用
session = requests.session
session.cookies = http.cookiejar.LWPCookies(filename="abc")
...
請求網址後
...
session.cookies.save() 保存cookies

載入cookies
try:
session.cookies.load(ignore_discard=True)
except:
print("沒有cookies")
"""

import requests
from bs4 import BeautifulSoup as BS
import time
from subprocess import Popen # 打開圖片
import http.cookiejar
import re

# 模擬瀏覽器訪問
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.109 Safari/537.36"
}
home_url = "https://www.zhihu.com"
base_login = "https://www.zhihu.com/login/" # 一定不能寫成http,否則無法登錄

session = requests.session()
session.cookies = http.cookiejar.LWPCookieJar(filename="ZhiHuCookies")
try:
# 載入Cookies文件
session.cookies.load(ignore_discard=True)
except:
print("cookie未保存或cookie已過期")
# 第一步 獲取_xsrf
_xsrf = BS(session.get(home_url, headers=headers).text, "lxml").find("input", {"name": "_xsrf"})["value"]

# 第二步 根據賬號判斷登錄方式
account = input("請輸入您的賬號:")
password = input("請輸入您的密碼:")

# 第三步 獲取驗證碼圖片
gifUrl = "http://www.zhihu.com/captcha.gif?r=" + str(int(time.time() * 1000)) + "type=login"
gif = session.get(gifUrl, headers=headers)
# 保存圖片
with open("code.gif", "wb") as f:
f.write(gif.content)
# 打開圖片
Popen("code.gif", shell=True)
# 輸入驗證碼
captcha = input("captcha: ")

data = {
"captcha": captcha,
"password": password,
"_xsrf": _xsrf,
}

# 第四步 判斷account類型是手機號還是郵箱
if re.match("^.+@([?)[a-zA-Z0-9-.]+.([a-zA-Z]{2,3}|[0-9]{1,3})(]?)$", account):
# 郵箱
data["email"] = account
base_login = base_login + "email"
else:
# 手機號
data["phone_num"] = account
base_login = base_login + "phone_num"

print(data)

# 第五步 登錄
response = session.post(base_login, data=data, headers=headers)
print(response.content.decode("utf-8"))

# 第六步 保存cookie
session.cookies.save()

# 獲取首頁信息
resp = session.get(home_url, headers=headers, allow_redirects=False)
print(resp.content.decode("utf-8"))


這段代碼,我在3.5.2版本運行時也出現問題。

#!/usr/bin/env python3
#-*- coding:utf-8 -*-
import gzip
import re
import urllib.request
import http.cookiejar
import urllib.parse

def ungzip(data):
try:
print("正在解壓……")
data=gzip.decompress(data)
print("解壓完畢!")
except:
print("未經壓縮,無需解壓")
return data

def getXSRF(data):
cer = re.compile("name="_xsrf"value="(.*)"", flags = 0)
strlist = cer.findall(data)
return strlist[0]

def getOpener(head):
#deal with the Cookies
cj = http.cookiejar.CookieJar()
pro = urllib.request.HTTPCookieProcessor(cj)
opener = urllib.request.build_opener(pro)
header =[]
for key, value in head.items():
elem = (key, value)
header.append(elem)
opener.addheaders = header
return opener

header = {
"Connection":"Keep-Alive",
"Accept":"text/html,application/xhtml+xml,*/*",
"Accept-Language": "en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3",
"User-Agent":"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36",
"Accept-Encoding":"gzip, deflate",
"Host":"wwww.zhihu.com",
"DNT":"1"
}

url = "http://www.zhihu.com/"
opener = getOpener(header)
op = opener.open(url)
data = op.read()
data = ungzip(data)
_xsrf=getXSRF(data.decode())

url +="login/email"
id = "請輸入你的用戶名"
password = "請輸入你的密碼"
postDict = {
"_xsrf":_xsrf,
"email": id,
"password": password,
"rememberme": "y"
}
postData = urllib.parse.urlencode(postDict).encode()
op=opener.open(url, postData)
data = op.read()
data = ungzip(data)

print(data.decode("utf-8"))

運行時出現的錯誤是:

後來我改了getXSRF的返回值,修改如下:

運行通過,顯示登錄了:

不過在Fiddler上,查看到的_xsrf值是[] :

貌似沒有get_xsrf,囧囧,不知道怎麼改這段代碼才能正常獲得xsrf值……-------------------------------------分割線~~2016-9-9-----------------------------------------------
這兩天一直忙著看其他東西,本已忘了這段代碼。剛才花了一點時間重新看了一下這段代碼,使用help()查看re.findall,雖然知道返回的是list,但還是找不出錯誤。於是乎各種Google,無意中看到博客園裡一篇關於正則表達式的博客,耐著性子看了一下規則(小白一枚,表示之前沒有看過正則表達式)上面那段代碼里的 "轉義字元 "很蹊蹺,於是我試著把它刪了,哈哈哈哈哈,然後運行就通過了,在fiddler上查看也能獲取正確的_xsrf值了,成功登錄~~~更改的地方如下:

def getXSRF(data):
cer = re.compile("name="_xsrf" value="(.*)"", flags = 0)
strlist = cer.findall(data)
return strlist[0]


如果感興趣,可以查看我的項目littlepai/Unofficial-Zhihu-API,驗證碼可以自動識別,不需要人工輸入.如果有用,記得幫我留個言點個贊之類的


哥們,你跟我前些天找的是同一篇教程啊……爬蟲驗證登錄常常變化,如果自己不擅長分析登錄抓包,可以參考最高贊推薦的fuck-login

悄悄的放上自己的知乎用戶信息爬蟲https://github.com/happyfarmergo/ZhihuCrawler


decode("unicode-escape")
調用下可以正常顯示了


用上面的方法登入成功後,想查看收件箱信息,發現session傳不過去,用cookies也不行
url = "https://www.zhihu.com/inbox"
t= requests.get(url,cookies = cookies, headers = headers)或者
t = session.get(url, headers = headers), 這個是什麼原因呢?


直接放代碼吧:GitHub - nekocode/zhihuSayHi: Say Hi to your new followers in Zhihu.
用的是客戶端的 API 而不是網頁端的 API !所以理論上會更加穩定些。

想測試下效果的話可以關注我 nekocode 測試效果,會收到一條來自機器人發的私信哦~

不想污染 Timeline,可恥地暴露身份匿了。


這個問題是鏈接錯了
url += "login" 錯了,你用Fidder截取數據或Firefox(Chrome是英語我看不懂不如FireFox透徹),如下圖,請求網站,就會發現post數據的頁面其實是知乎 - 與世界分享你的知識、經驗和見解(如果是email登錄就把phone_num改成email),並且如果是手機號登錄,需要把"email": id,改成『phone_num":id,不然會返回錯誤。然後返回結果用Unicode編碼轉換,Unicode轉換為中文就可以顯示』登陸成功『,祝好運,由於本人說話不是很好理解,不懂可以問我哈。


今天剛做了一個demo,驗證可用,見我的主頁,新手做的,歡迎給點建議…


推薦閱讀:

TAG:Python | 爬蟲計算機網路 | 網頁爬蟲 |