Python爬虫+ffmpeg爬取b站高清视频(最新可用版 附源码)
以此视频为例 玩法一直被误解,但数值碾压五虎的存在!!_哔哩哔哩bilibili_英雄联盟_第一视角
b站视频网址经过更新已变成了这种形式,由这三个 BV_id,spm_id_from,vd_source 参数构成
1.导入相关包
2.写入headers,里面要加cookie(获取最高清画质),referer,请求头
3.向上面的url发送请求,获取视频标题,
这里利用 正则 提取标题数据
4.获取视频和音频的链接
也是用 正则 提取相关的视频和音频链接
5.保存视频和音频的数据
6.音视频合成(用到ffmpeg软件)
这里得去安装 ffmpeg 和 配置相关环境变量 (网上有相关教程),配置完后 一定要重启电脑 ,否则无法生效.
7.主函数
import requests import re import os import json import subprocess if not os.path.exists('./b_video'): os.mkdir('./b_video') headers = { #获取最高清1080p数据,加入cookie个人信息 'Cookie': 'buvid3=13BDB6B0-42D9-394C-4854-62C0E45A03E053164infoc; b_nut=1682788153; _uuid=7ECAC96D-BDD7-BB46-A1091-DCB108B7E4F1D52773infoc; hit-dyn-v2=1; buvid4=437CE8B5-F794-1D78-D562-5A35186E193C54689-023043001-8tg8EBM5kh3sMLXSa1MWrQ%3D%3D; hit-new-style-dyn=1; CURRENT_PID=f5d25b50-e705-11ed-b31c-f394dfdc49da; rpdid=|(J~RllR~kkm0J''uY)k)kR)Ju; i-wanna-go-back=-1; b_ut=5; buvid_fp_plain=undefined; LIVE_BUVID=AUTO8616828562491882; nostalgia_conf=-1; CURRENT_BLACKGAP=0; FEED_LIVE_VERSION=V8; enable_web_push=DISABLE; header_theme_version=CLOSE; DedeUserID=189910467; DedeUserID__ckMd5=5cd08607e3bab812; CURRENT_FNVAL=4048; fingerprint=3dfc0e4a00b0be0658a8a42dead9b580; CURRENT_QUALITY=80; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MDg4NzM2MjQsImlhdCI6MTcwODYxNDM2NCwicGx0IjotMX0.9N3p16Ae8DmIc8MQWkOX5-AFaU-J_HwFaXnSI7iKkZw; bili_ticket_expires=1708873564; SESSDATA=17545f0d%2C1724302581%2Ce3ab4%2A22CjBVW1pAEtBN94KdnUkfj8FRHnCKWe4O85HTmNw9pw6nZ64m1FmUVT_S1GwtKIoo0V0SVmxXS01NTTJ2eFZwY3loak1qSFJCVUt6eW1VVklxaUFPQUVvdjdRTFdOeFY0dWJ1MHAzeElsajhlNnNJNng0Y2pjOGJsOFA0WXZTaGpBWWZ4MDJXX2h3IIEC; bili_jct=2b62c22b97bb0f12fe86ee02f0f1d26e; buvid_fp=3dfc0e4a00b0be0658a8a42dead9b580; bp_video_offset_189910467=901949574878330885; b_lsid=16469BF10_18DDEFA63CC; bmg_af_switch=1; bmg_src_def_domain=i0.hdslb.com; home_feed_column=5; browser_resolution=1536-714; sid=6fb5ixqo; PVID=1' 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0' 'referer': 'https://www.bilibili.com/?spm_id_from=333.1365.0.0' def get_resopnse(html_url): response=requests.get(url=html_url,headers=headers) return response #获取视频标题 def get_viedeo_info(html_url): resopnse=get_resopnse(html_url) ex='<h1 title="(.*?)" class="video-title" .*>' #标题不能包含空格 title=re.findall(ex,resopnse.text)[0].replace(' ','') video_info=[title] return video_info #获取视频的视频和音频的链接 def get_video_content(BV_ID,spm_id_from,vd_source): index_url='https://www.bilibili.com/video/'+BV_ID+'/' data={ 'spm_id_from': spm_id_from, 'vd_source': vd_source page_text=requests.get(url=index_url,params=data,headers=headers).text # window.__playinfo__ =({.*?}) #(.*?只能匹配字符不能匹配符号,例如'{}') ex='window\.__playinfo__=({.*?})\s*</script>' json_data=re.findall(ex,page_text)[0] #json字符串转换为python对象 data=json.loads(json_data) audio_url=data['data']['dash']['audio'][0]['baseUrl'] video_url=data['data']['dash']['video'][0]['baseUrl'] video_content=[audio_url,video_url] return video_content #数据保存 def save(title,audio_url,video_url): audio_content=get_resopnse(audio_url).content video_content = get_resopnse(video_url).content fp=open(title+'.mp3',mode='wb') fp.write(audio_content) fp = open(title + '.mp4', mode='ab') fp.write(video_content) print(title,'保存完成') #音视频合成 def merge_data(video_name): '''数据合并''' cmd=f"ffmpeg -i {video_name}.mp4 -i {video_name}.mp3 -c:v copy -c:a aac -strict experimental b_video\\{video_name}output.mp4" subprocess.run(cmd,shell=True) def main(BV_ID,spm_id_from,vd_source): html_url=f'https://www.bilibili.com/video/{BV_ID}/?spm_id_from={spm_id_from}&vd_source={vd_source}' video_info=get_viedeo_info(html_url) video_content=get_video_content(BV_ID,spm_id_from,vd_source) save(video_info[0],video_content[0],video_content[1]) merge_data(video_info[0]) b_url=input('输入爬取视频的网址:') # b_url='https://www.bilibili.com/video/BV1et421a7E1/?spm_id_from=333.337.search-card.all.click&vd_source=77c296188837a388ccd6343ff122de09' #取出参数,BV_ID,spm,_id_from,vd_source list=b_url.split('/') BV_ID=list[4] new_list=list[5].split('=') spm_id_from=new_list[1].split('&')[0] vd_source=new_list[-1] main(BV_ID,spm_id_from,vd_source) 2024校源行“就业技能培训系列专题课”正式启动
所有评论(0)