每一颗渺小的种子都能创造伟大的力量!
完整代码:github
#全国省市区信息api
https://ncov.html5.qq.com/api/getPosition
#全国疫情api
https://ncov.html5.qq.com/api/getCommunity?province=省&city=市&district=区
bs4==0.0.1
requests==2.23.0
pandas==1.0.2
{
"position":{
"云南省":{
"玉溪市":{
"全部":"",
"红塔区":""
},
"德宏傣族景颇族自治州":{
"全部":"",
"瑞丽市":""
}
}
}
}
# 加header伪装成浏览器
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) " \
"Chrome/80.0.3987.100 Safari/537.36 "
headers = {'User-Agent': user_agent}
def get_city_info():
"""
获取全国省市信息
Returns:DataFrame
"""
url_position = 'https://ncov.html5.qq.com/api/getPosition'
r_position = requests.get(url_position, headers=headers)
data_position = BeautifulSoup(r_position.text, 'html.parser')
json1bj = json.loads(data_position.decode('utf-8'))
data = []
for i in json1bj['position']:
for j in json1bj['position'][i]:
for k in json1bj['position'][i][j]:
dict1 = {'province': i, 'city': j, 'district': k}
data.append(dict1)
return pd.DataFrame(data)
def get_info(province, city, district):
"""
获取地区疫情信息
Args:
province: 省
city: 市
district: 县(区)
Returns:json
"""
url_community = 'https://ncov.html5.qq.com/api/getCommunity?province=' + str(province) + '&city=' + str(
city) + '&district=' + str(district)
r_community = requests.get(url_community, headers=headers)
data_community = BeautifulSoup(r_community.text, 'html.parser')
jsonObj = json.loads(data_community.decode('utf-8'))
return jsonObj
{
"code":0,
"community":{
"云南省":{
"玉溪市":{
"红塔区":[
{
"province":"云南省",
"city":"玉溪市",
"district":"红塔区",
"county":"",
"street":"北城街道",
"community":"大石板社区秧草塘村",
"show_address":"大石板社区秧草塘村",
"cnt_inc_uncertain":"-1",
"cnt_inc_certain":"-1",
"cnt_inc_die":"-1",
"cnt_inc_recure":"-1",
"cnt_sum_uncertain":"-1",
"cnt_sum_certain":"2",
"cnt_sum_die":"-1",
"cnt_sum_recure":"-1",
"full_address":"云南省玉溪市红塔区北城街道大石板社区秧草塘村",
"release_date":"",
"article_source":[
{
"title":"玉溪新增4例确诊病例 详细路线公布!",
"url":"https://mp.weixin.qq.com/s/THaIpahx_5VTWP2jcMMRMw"
},
{
"title":"玉溪市新增确诊病例1例!红塔大道50号华瑞小区实施隔离封闭管理",
"url":"https://mp.weixin.qq.com/s/AjB4YCWmfSiJHzBJNhzHpg"
}
],
"id":"60bd8bd1be4e00780a29336dab42f18e",
"lng":"102.51207",
"lat":"24.46442",
"doc_id":"90000052_60bd8bd1be4e00780a29336dab42f18e",
"source":[
{
"name":"玉溪发布",
"url":""
}
],
"communitytype":1,
"distance":-1
}
]
}
}
}
}
通过对json数据的解析,转化为dict数据集,从而再以其他格式输出处理(此处过于野蛮)
def format_data(info, result):
"""
格式化疫情信息
Args:
info: 疫情信息
result:结果集
Returns:None
"""
for i in info['community']:
for j in info['community'][i]:
for k in info['community'][i][j]:
for x in info['community'][i][j][k]:
if len(x['article_source']) > 0:
for y in x['article_source']:
dict1 = {'province': x['province'], 'city': x['city'], 'district': x['district'],
'street': x['street'], 'community': x['community'],
'full_address': x['full_address'],
'lat': x['lat'], 'lng': x['lng'], 'title': y['title'], 'url': y['url']}
else:
dict1 = {'province': x['province'], 'city': x['city'], 'district': x['district'],
'street': x['street'], 'community': x['community'], 'full_address': x['full_address'],
'lat': x['lat'], 'lng': x['lng'], 'title': '', 'url': ''}
result.append(dict1)