添加链接
link管理
链接快照平台
  • 输入网页链接,自动生成快照
  • 标签化管理网页链接
header = { 'user-agent' : '你自己的浏览器信息' } try : response = requests . get ( url , headers = header ) # 判断网页是否正确返回 if response . status_code == 200 : return response . content . decode ( 'gbk' ) else : print ( "{0}网页请求状态码错误!{0}" . format ( "-" * 10 ) ) except Exception as e : print ( "{0}请求参数出现错误:{1}{0}" . format ( "-" * 10 , e ) ) def parse_url ( url , xpath_path ) : html = get_html ( url ) # 构建下一级跳转初始url部分 next_base_url = "/" . join ( url . split ( "/" ) [ : - 1 ] ) # 初始化 HTML = etree . HTML ( html ) # 获取区级名称和对应下一级链接 all_area = HTML . xpath ( f' { xpath_path } /text()' ) next_link = HTML . xpath ( f' { xpath_path } /@href' ) return [ ( i [ 0 ] , next_base_url + "/" + i [ 1 ] ) for i in list ( zip ( all_area , next_link ) ) ] def parse_url2 ( url , xpath_path ) : """最后一级,无跳转链接""" html = get_html ( url ) # 初始化 HTML = etree . HTML ( html ) villagetr = HTML . xpath ( f' { xpath_path } /text()' ) return villagetr result = [ ] xpath_path = '//tr[@class="countytr"]/td[2]/a' url = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2019/34/3401.html" # 市 get ==》 区:名字&链接 for i in parse_url ( url , xpath_path ) : area1 , url = i xpath_path = '//tr[@class="towntr"]/td[2]/a' # 区 get ==》 镇:名字&链接 for j in parse_url ( url , xpath_path ) : area2 , url = j xpath_path = '//tr[@class ="villagetr"]/td[3]' # 镇 get ==》 街道:名字 for k in parse_url2 ( url , xpath_path ) : result . append ( [ area1 , area2 , k ] ) df = pd . DataFrame ( result , columns = [ "区" , "镇/街道" , "居委会" ] ) df . to_excel ( "合肥市行政区域划分.xlsx" , index = False ) 【版权声明】本文为华为云社区用户原创内容,未经允许不得转载,如需转载请自行联系原作者进行授权。如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱: [email protected]