正文
:
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
}
html = requests.get(url.format(i),headers = headers)
bs = etree.HTML(html.text)
for
sig
in
bs.xpath(
'//li[@class = "video-item matrix"]'
):
title = sig.xpath(
'div[@class = "info"]/div/a/@title'
)[
0
]
click = sig.xpath(
'div[@class = "info"]/div[3]/span[1]'
)[
0
].xpath(
'string(.)'
).strip(
'\n '
)
danmu = sig.xpath(
'div[@class = "info"]/div[3]/span[2]'
)[
0
].xpath(
'string(.)'
).strip(
'\n '
)
date = sig.xpath(
'div[@class = "info"]/div[3]/span[3]'
)[
0
].xpath(
'string(.)'
).strip(
'\n '
)
up = sig.xpath(
'div[@class = "info"]/div[3]/span[4]'
)[
0
].xpath(
'string(.)'
).strip(
'\n '
)
df = pd.DataFrame({
'标题'
:[title],
'播放量'
:[click],
'弹幕'
:[danmu],
'日期'
:[date],
'UP主'
:[up]})
result = pd.concat([result,df])
time.sleep(random.random() +
0.5
)
print(
'已经完成b站第 {} 页爬取'
.format(i))
return
result