一.数据分析常常遇到批量同构数据的回溯分析
1.具体原始数据如图
2.批量数据分析需求
公司要求对季度节点数据进行分析,因此需要批量筛选并对数据进行分析,实现方式就是通过文件列表读取,循环导入相关文件,并对数据表进行逐个分析,最终合并为一张表导出
二.具体实现的代码
载入包----
{
library(rJava)
library(xlsxjars)
library(xlsx)
library(readxl)
library(data.table)
library(dplyr)
}
批量循环导表----
{
path <- c("D:/R实战/渠道日报/")
path0 <- c("D:/数据/R统计/")
city_order<- read_excel(paste0(path0,"固定表头.xlsx"), sheet = "地市排序",skip=0)
list_name <- read_excel(paste0(path0,"固定表头.xlsx") , sheet = "发展报表列名")
file_names <- list.files("D:/R实战/渠道日报/")
file_names <- file_names[grepl(".csv",file_names)] # 筛选.csv文件
for (i in 1:length(file_names)) {
name <- gsub(".csv","",file_names[i])
assign(name,
fread(paste0(path, file_names[i]), skip = 5) ) }
}
批量循环命名----
{
chan_head <-list_name[1:40,grepl("渠道日报",names(list_name))]
chan_rep_list <- paste0("渠道日报",c('1801','1803','1806','1809','1812','1903'))
for (i in 1: length(chan_rep_list)){
setnames(get(chan_rep_list[i]),as.vector(as.matrix(chan_head))) }
循环打印多表变量名
if (FALSE) {
for (i in 1:length(chan_rep_list)){
print(i)
print(names( get(chan_rep_list[i]))) } }
}
数据计算
循环赋值为单独变量
if (FALSE){
for (i in 1:length(chan_rep_list)){
assign(paste0("def_name",chan_rep_list[i]),
get(chan_rep_list[i]) %>%
filter(grepl("A",渠道名称) | grepl("B",渠道名称) ) %>%
group_by(地市) %>%
summarise(XX类渠道数= n())
) } }
循环合并为多列结果
qq_n <- city_order
for (i in 1:length(chan_rep_list)){
temp1 <-paste0("",substr(chan_rep_list[i],5,8)) # 循环截取1801等字符定义列名
temp2 <- get(chan_rep_list[i]) %>%
filter(grepl("A",渠道名称) | grepl("B",渠道名称) ) %>%
group_by(地市) %>%
summarise( 渠道数量 = n())
setnames(temp2,c("地市",temp1))
qq_n <- merge(qq_n, temp2 ,by = '地市')
}
hxqd_n <- city_order
for (i in 1:length(chan_rep_list)){
temp1 <-paste0("",substr(chan_rep_list[i],5,8))
temp2 <-
get(chan_rep_list[i]) %>%
filter((月发展)>=50 &
!grepl("A",渠道名称) &
!grepl("B",渠道名称) &
!grepl("C",渠道名称)
) %>%
group_by(地市) %>%
summarise(n())
setnames(temp2,c("地市",temp1))
hxqd_n <- merge(hxqd_n, temp2 ,by = '地市')
}
排序导出----
hxqd_n <- dplyr::arrange(hxqd_n, 序列)
qq_n <- dplyr::arrange(qq_n, 序列)
write.csv(hxqd_n , file = paste0(path,"business_analysis/","hxqd_n.csv") ,
row.names = F, quote = F)
write.csv(qq_n , file = paste0(path,"business_analysis/","qq_n.csv") ,
row.names = F, quote = F)