|
1.欢迎点赞、关注、批评、指正,互三走起来,小手动起来!【python014】Python爬取并解析潮汐天气简报-潮历数据,源代码下载【python014】Python爬取并解析潮汐天气简报-潮历数据,源代码下载【python014】Python爬取并解析潮汐天气简报-潮历数据,源代码下载文章目录1.简要介绍2.`Python`版数据爬取、解析代码2.1源代码2.2字段内容进一步解析`Python`源代码3.参考地址1.简要介绍参考潮汐数据,网站地址:潮汐表快速导航,做进一步层次应用。数据可视化效果如下图所示:发现大语言模型写的并不能解决问题,也可能是没付费的原因,haha趁着空闲的时间撸了段代码,供已参考2.Python版数据爬取、解析代码2.1源代码importreimportrequestsimportpandasaspdfromdatetimeimportdatetimefrombs4importBeautifulSoupimportwarningswarnings.filterwarnings('ignore')pd.set_option('display.width',500)pd.set_option('display.max_rows',200)pd.set_option('display.max_columns',200)pd.set_option('display.max_colwidth',1000)#浙江-宁波市-松兰山zj_nb_sl_url='https://www.eisk.cn/Calendar/1259.html'zj_nb_sl_name='浙江-宁波市-松兰山'#浙江-象山县-西泽zj_xs_xz_url='https://www.eisk.cn/Calendar/463.html'zj_xs_xz_name='浙江-象山县-西泽'#浙江-象山县-石浦港zj_xs_sp_url='https://www.eisk.cn/Calendar/460.html'zj_xs_sp_name='浙江-象山县-石浦港'wl15tcxb='未来15天潮汐表'defparser_response_html(_url,_name):response=requests.get(_url)#发送HTTP请求response.raise_for_status()#如果请求失败,则抛出HTTPError异常#解析HTMLsoup=BeautifulSoup(response.text,'html.parser')og_title_desc=soup.find(attrs={"property":"og:description"})['content']result_html=[]fora_sliceinsoup.find_all('a',href=re.compile(r'/Tides/\d+\.html\?date=.*?')):hour=a_slice.find('div',class_='hour').text.strip()hour_date=parser_ymd_date(hour)day=a_slice.find('div',class_='day').text.strip()_temperature=a_slice.find('div',class_='temperature').text.strip()humidity=a_slice.find('div',class_='humidity').text.strip()temperature=a_slice.find('div',class_='temperature',style=re.compile(r'color:.*?')).text.strip()tide2=';'.join([','.join(c.string.strip()forcint.contents)fortina_slice.find_all('div',attrs={'class':'tide2'})])skycon=a_slice.find('div',class_='skycon').text.strip()visibility=a_slice.find('div',class_='visibility').text.strip()dswrf=a_slice.find('div',class_='dswrf').text.strip()wave2=a_slice.find('div',class_='wave2').text.strip()wave1=a_slice.find('div',class_='wave1').text.strip()wave3=''ifa_slice.find('div',class_='wave3'):wave3=a_slice.find('div',class_='wave3').text.strip()wind2=a_slice.find('div',class_='wind2').text.strip()wind1=a_slice.find('div',class_='wind1').text.strip()wind3=a_slice.find('div',class_='wind3').text.strip()description=''fordescina_slice.find_all('div',class_='description'):description+=desc.text.strip().replace('\n','').replace('\r','').replace('','')result_html.append([og_title_desc,hour_date,day,_temperature,humidity,temperature,tide2,skycon,visibility,dswrf,wave2,wave1,wave3,wind2,wind1,wind3,description])result_df=pd.DataFrame(result_html)returnresult_dfzj_nb_sl_df=parser_response_html(zj_nb_sl_url,zj_nb_sl_name)zj_xs_xz_df=parser_response_html(zj_xs_xz_url,zj_xs_xz_name)zj_xs_sp_df=parser_response_html(zj_xs_sp_url,zj_xs_sp_name)union_df=pd.concat([zj_nb_sl_df,zj_xs_xz_df,zj_xs_sp_df])union_df.columns=['title','hour_date','day','lunar_calendar','humidity','temperature','tide2','skycon','visibility','dswrf','wave2','wave1','wave3','wind2','wind1','wind3','description']union_df.to_excel('./%s-%s-%s-%s.xlsx'%(zj_nb_sl_name,zj_xs_xz_name,zj_xs_sp_name,wl15tcxb),index=None,encoding='utf8')123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778数据样例解析效果如下图所示:2.2字段内容进一步解析Python源代码datas=pd.read_excel(r'..\XX-XXX-未来15天潮汐表.xlsx')datas['xscxqy']=datas.title.apply(lambdatitle:title.split(',')[0])datas['zcsj']=datas.description.apply(lambdadescription:description.split('退潮时间')[0])datas2=datas[['hour_date','xscxqy','zcsj']]result=[]forinx,rwindatas2.iterrows():zcsj_lis=re.findall(r'(\d+)\d+)',rw['zcsj'])forsjinrange(0,len(zcsj_lis),2):result.append([rw['hour_date'],rw['xscxqy'],rw['zcsj'],'%s%02d:%02d'%(rw['hour_date'],int(zcsj_lis[sj][0]),int(zcsj_lis[sj][1])),'%s%02d:%02d'%(rw['hour_date'],int(zcsj_lis[sj+1][0]),int(zcsj_lis[sj+1][1]))])result_df=pd.DataFrame(result)result_df.columns=['cxsj','cxqy','zcsj_desc','zcsj_ks','zcsj_js']result_df['zcsj_desc']=result_df.zcsj_desc.apply(lambdazcsj_desc:zcsj_desc.replace('?','N'))result_df['zcsj_ks_sjc']=result_df.zcsj_ks.apply(lambdazcsj_ks:ret_timestamp_bystr(zcsj_ks))result_df['zcsj_js_sjc']=result_df.zcsj_js.apply(lambdazcsj_js:ret_timestamp_bystr(zcsj_js))result_df['cxqy2']='XXX-XX市-XX县'123456789101112131415161718192021223.参考地址潮汐表快速导航【python014】Python爬取并解析潮汐天气简报-潮历数据源代码下载
|
|