之前爬取了各省一年来的天气状况、气温等的数据,这次对其进行整理,目标是:
1.批量输出每个省的每个月份出现频次最多的天气情况;
2.批量输出每个省每个月的气温状况(包括对类似5℃/10℃数据的处理)。
代码:
###########################天气情况汇总###############################
> rm(list=ls(all=TRUE))
> gc()
> library(readxl)
> library(lubridate)
> library(data.table)
> library(sqldf)
> pro_capital <- read_excel("省份及省会.xlsx")
> for(i in 1:34){
data0 <- read.csv(paste0('D:\\working directory\\天气数据\\',
pro_capital[i,1],'.csv'))
data0 <- data0[,-1]
data0[,1] <- as.Date(data0[,1],"%Y年%m月%d日")
data0 <- cbind(data0,month=months(data0$日期))
data0 <- as.data.table(data0)
bydata <- data0[,.N,by=c('month','天气状况')]
data1 <- sqldf('select * from bydata a where exists (select 1
from bydata where month=a.month group by month having
a.N=max(N))')
names(data1) <- c('月份','天气状况','N')
write.csv(data1,paste0('D:\\working directory\\天气结果\\',pro_capital[i,1],'.csv'))
}
>
###########################气温情况汇总#################################
> rm(list=ls(all=TRUE))
> gc()
> library(readxl)
> library(lubridate)
> library(data.table)
> library(stringr)
> pro_capital <- read_excel("省份及省会.xlsx")
> for(i in 1:34){
data0 <- read.csv(paste0('D:\\working directory\\天气数据\\',
pro_capital[i,1],'.csv'))
data0 <- data0[,-1]
data0[,1] <- as.Date(data0[,1],"%Y年%m月%d日")
data0$气温 <- as.character(data0$气温)
temperature <- data0$气温
for(p in 1:nrow(data0)){
temperature[p] <-mean(as.numeric(str_extract_all(temperature[p],"\\d",simplify=TRUE)))
}
data0 <- cbind(data0,month=months(data0$日期),temperature=temperature)
data0 <- as.data.table(data0)
data0$temperature <- as.numeric(data0$temperature)
bydata <- data0[,.(temperature=round(mean(temperature))),by='month']
bydata$temperature <- paste0(bydata$temperature,"℃")
write.csv(bydata,paste0('D:\\working directory\\气温结果\\',pro_capital[i,1],'.csv'))
}
>
声明:本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。