之前爬取了各省一年来的天气状况、气温等的数据,这次对其进行整理,目标是:

1.批量输出每个省的每个月份出现频次最多的天气情况;

2.批量输出每个省每个月的气温状况(包括对类似5℃/10℃数据的处理)。

代码:

###########################天气情况汇总###############################
> rm(list=ls(all=TRUE))
> gc()
> library(readxl)
> library(lubridate)
> library(data.table)
> library(sqldf)
> pro_capital <- read_excel("省份及省会.xlsx")
> for(i in 1:34){
   data0 <- read.csv(paste0('D:\\working directory\\天气数据\\',
                  pro_capital[i,1],'.csv'))
   data0 <- data0[,-1]
   data0[,1] <- as.Date(data0[,1],"%Y年%m月%d日")
   data0 <- cbind(data0,month=months(data0$日期))
   data0 <- as.data.table(data0)
   bydata <- data0[,.N,by=c('month','天气状况')]
   data1 <- sqldf('select * from bydata a where exists (select 1 
           from bydata where month=a.month group by month having 
                 a.N=max(N))')
   names(data1) <- c('月份','天气状况','N')
   write.csv(data1,paste0('D:\\working directory\\天气结果\\',pro_capital[i,1],'.csv'))
 }
> 

###########################气温情况汇总#################################
> rm(list=ls(all=TRUE))
> gc()
> library(readxl)
> library(lubridate)
> library(data.table)
> library(stringr)
> pro_capital <- read_excel("省份及省会.xlsx")
> for(i in 1:34){
   data0 <- read.csv(paste0('D:\\working directory\\天气数据\\',
                           pro_capital[i,1],'.csv'))
   data0 <- data0[,-1]
   data0[,1] <- as.Date(data0[,1],"%Y年%m月%d日")
   data0$气温 <- as.character(data0$气温)
   temperature <- data0$气温
   for(p in 1:nrow(data0)){
    temperature[p] <-mean(as.numeric(str_extract_all(temperature[p],"\\d",simplify=TRUE)))
   }
   data0 <- cbind(data0,month=months(data0$日期),temperature=temperature)
   data0 <- as.data.table(data0)
   data0$temperature <- as.numeric(data0$temperature)
   bydata <- data0[,.(temperature=round(mean(temperature))),by='month']
   bydata$temperature <- paste0(bydata$temperature,"℃")
   write.csv(bydata,paste0('D:\\working directory\\气温结果\\',pro_capital[i,1],'.csv'))
 }
> 
声明:本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。