根據小甲魚60課編寫的爬取代理IP代碼(python2.7)
# -*- coding: utf-8 -*-nimport urllib,urllib2nimport renndef url_open(url):n req = urllib2.Request(url)n req.add_header(User-Agent,Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36)nn# 有代理訪問 n# proxies = []n# proxy = random.choice(proxies)n# proxy_handler = urllib2.ProxyHandler({http: random.choice(iplist)})n# opener = urllib2.build_opener(proxy_handler)n# html = opener.open(url).read()nn# 無代理訪問 n response = urllib2.urlopen(req)n html = response.read()nn return htmlnndef get_img(html):n p = r(?:(?:[0,1]?d?d|2[0-4]d|25[0-5]).){3}(?:[0,1]?d?d|2[0-4]d|25[0-5])n iplist = re.findall(p,html)nn for each in iplist:n print each n nif __name__ == __main__:n url = http://www.xicidaili.com/n get_img(url_open(url))n
執行結果:
推薦閱讀:
※自學python3的爬蟲,但是網上普遍是python2的書,想問下有沒有好的爬蟲的python3書?
※Python爬蟲實戰之使用Scrapy爬起點網的完本小說
※用 python 對 github 用戶 followers 分析
※第四章:動態網頁抓取 (解析真實地址 + selenium)
TAG:python爬虫 |