下载单章内容
#下载单章内容 def download_one_chapter(): """获取网页源代码""" target_url = 'http://www.shuquge.com/txt/8659/25441893.html' response = requests.get(target_url) response.encoding=response.apparent_encoding html=response.text """从网页源代码中拿到小说正文信息""" sel = parsel.Selector(html) title=sel.css('.content h1::text').extract_first() contents = sel.css('#content::text').extract() """数据清除 转化并清楚空白字符串""" contents1=[content.strip() for content in contents] #print(contents1) text = '\n'.join(contents1) #print(text) """保存小说内容""" file=open(title+'.txt',mode='w',encoding='utf-8') #只能写入字符串 file.write(title) file.write(text) #关闭文件 file.close()
获取每章URL,下载多章内容
"""获取书籍每章链接,目录页""" def get_chapters_links(target_url): """目录页获取每章的url""" #target_url = 'http://www.shuquge.com/txt/8659/index.html' response = requests.get(target_url) response.encoding=response.apparent_encoding html=response.text """css选择器提取""" sel=parsel.Selector(html) links=sel.css('dd a::attr(href)').extract() for link in links: print('http://www.shuquge.com/txt/8659/'+link) return links
下载一本小说
def get_one_book(book_url): links = get_chapters_links(book_url) for link in links: #print('http://www.shuquge.com/txt/8659/'+link) download_one_chapter('http://www.shuquge.com/txt/8659/'+link)
最后
作者:ZoomToday
链接:https://blog.csdn.net/qq_36477513/article/details/104834629
来源:CSDN
著作权归作者所有,转载请联系作者获得授权,切勿私自转载。