- grep
# load data
alldata <- read.table("household_power_consumption.txt",stringsAsFactors = F,header = T,sep = ";")
# change format of Date
alldata$fulltime <- strptime(paste(alldata$Date, alldata$Time, sep=" "),"%d/%m/%Y %H:%M:%S")
# select specified rows
newd1<-alldata[grep(pattern = "2007-02-01",alldata[,10]),]
# alldata: 数据框
# pattern: 包含的字段(字符串)
# alldata[,10]: 该字符串位于数据框的位置,也可以用$来取
- which
# load data
SCC <- readRDS("Source_Classification_Code.rds")
sumSCC <- readRDS("summarySCC_PM25.rds")
# select specified data
v1 <- SCC[grep(pattern="Coal",SCC$EI.Sector),]
v1 <- as.character(v1$SCC)
data_coal <- sumSCC[which(sumSCC$SCC %in% v1),]
total_coal <- aggregate(Emissions ~ year , data_coal, sum)
- subset
# load data
SCC <- readRDS("Source_Classification_Code.rds")
sumSCC <- readRDS("summarySCC_PM25.rds")
# select specified data
v2 <- SCC[grep(pattern="Vehicles",SCC$EI.Sector),]
v2 <- as.character(v2$SCC)
data_Bal <- sumSCC[which(sumSCC$SCC %in% v2),]
data_Bal <- subset(data_Bal,fips == “24510”)
# data_Bal: 数据框
# fips: 选择fips这一列中值为24150的
total_Bal <- aggregate(Emissions ~ year , data_Bal, sum)
# 或者
new_d3 <- scan("g3.txt")
# subset函数选择相应的行
freq_d3 <- subset(allData, name %in% new_d3)