python爬取網頁圖片
import urllib.requestimport urllib.errorimport reimport os"""爬取圖片"""class Picture: def __init__(self,url): self.url = url def getPageContent(self): url=self.url user_agent = "Mozilla/5.0 (Windows NT 6.1)" headers = {"User-Agent": user_agent} try: request = urllib.request.Request(url, headers=headers) content = urllib.request.urlopen(request).read().decode(utf-8) # print(content) #測試輸出 return content except urllib.error.URLError as e: if hasattr(e, "reason"): print(e.reason) def getImage(self): content = self.getPageContent() image = re.compile(rsrc="(.*?.JPEG)",re.S) find_image=re.findall(image,content) num=1 if find_image: for i in range(len(find_image)): print(i) print( find_image[i].strip()) if i==3: urllib.request.urlretrieve(find_image[4].strip(),"C:/Users/zhanghl/Desktop/Awork/mm/1.JPEG") print("下載完成") else: print(None) # 判斷文件夾是否存在,要是沒有則生成一個 def mkdir(self): path = "C:/Users/zhanghl/Desktop/Awork/mm" path = path.strip() # 先判斷這個路徑是否存在 isExist = os.path.exists(path) # 如果存在,則為True if not isExist: os.mkdir(path) return True else: print(u"該目錄已經存在") return False def start(self): dir=self.mkdir() getImage=self.getImage()url="https://mbd.baidu.com/newspage/data/landingsuper?context=%7B%22nid%22%3A%22news_6445741139827118489%22%7D&n_type=0&p_from=1"p=Picture(url)p.start()
推薦閱讀:
※基於餘弦相似性的404頁面識別
※Python爬蟲學習之(一)| 從零開始
※PYTHON爬蟲將相對路徑轉化為絕對路徑
※用Python爬取網易雲音樂歌曲
※出門旅行沒規劃?python抓取馬蜂窩自由行攻略!
TAG:python爬蟲 |