网站地址:http://sc.chinaz.com/tupian/index.html
直接上代码:
import requestsfrom bs4 import BeautifulSoupfrom pptx import Presentationfrom pptx.util import Inchesimport osheader = { "Referer":"http://sc.chinaz.com/tupian/index_2.html", "Upgrade-Insecure-Requests":1, "User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36", "Host":"sc.chinaz.com", "If-None-Match":"b22f28e7941dd41:0", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "Connection":"keep-alive"}#创建pptprs = Presentation()#图片放置位置left, top, width, height = Inches(1), Inches(0.5), Inches(8), Inches(6)#查询最大页数response = requests.get("http://sc.chinaz.com/tupian/index.html")soup = BeautifulSoup(response.text,"lxml");b=soup.find_all("b")print("最大页数为%s" % b[-1].text)for i in range(2,int(b[-1].text)): print("下载第%d页的图片" % i) resposne = requests.get("http://sc.chinaz.com/tupian/index_%s.html" % str(i)); resposne.encoding="utf-8" soup = BeautifulSoup(resposne.text,"lxml") div = soup.find("div",id="container") #获取所有a标签 a = div.find_all("img") for j in a: #获取名称 name = j["alt"] #获取图片地址 img = j["src2"] #href = j.find("img")["src2"] #图片保存的名称 imgname = name+".jpg" conteng = requests.get(img) #图片二进制 er = conteng.content with open("F:\\BaiduNetdiskDownload\\"+imgname,"wb") as file: print("正在下载图片--%s" %imgname) file.write(er) file.flush() #创建一个空白ppt ppt = prs.slide_layouts[6] #添加到ppt中 ppts = prs.slides.add_slide(ppt) #空白ppt添加下载的图片 pic = ppts.shapes.add_picture("F:\\BaiduNetdiskDownload\\"+imgname, left, top, width, height) #删除图片 try: if os.path.exists("F:\\BaiduNetdiskDownload\\"+imgname): # 删除文件,可使用以下两种方法。 os.remove("F:\\BaiduNetdiskDownload\\"+imgname) # os.unlink(my_file) except: pass files= os.listdir("F:\\BaiduNetdiskDownload\\") if files: for k in files: print("正在删除图片--%s" % k) os.remove("F:\\BaiduNetdiskDownload\\"+k) prs.save("ppt1.ppt")
个人娱乐