故宫博物馆爬虫基础代码后续总代码
基础代码
import requests,re,time
from lxml import etree
url = r'https://img.dpm.org.cn/Public/static/CCP/index.html'
base_url = r'https://img.dpm.org.cn/Public/static/CCP/'
def getHtml(url):#获取网页源代码
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.3
2022-06-09 13:43:47
295KB
爬虫
1