# 기초 문법
# NA
a <- c(1,2,NA)
sum(a)
## [1] NA
sum(a, na.rm = T)
## [1] 3
# NA : Not Available. logical type.
# NaN : Not a Number 정의되지 않은 숫자. double type.
is.na(NA) # TRUE
## [1] TRUE
is.na(NaN) # TRUE
## [1] TRUE
is.na(c("NA", NA, NaN)) # FALSE TRUE FALSE / 처음값 때문에 NaN이 문자형으로 형변환.
## [1] FALSE TRUE FALSE
is.na(c(NaN, NA)) # TRUE TRUE
## [1] TRUE TRUE
# factor : 문자 --> 숫자. 그래프 또는 통계처리에 유리.
a <- c("서울","부산","제주","제주","서울","대전","부산","서울")
fa <- factor(a)
fa
## [1] 서울 부산 제주 제주 서울 대전 부산 서울
## Levels: 대전 부산 서울 제주
mode(fa)
## [1] "numeric"
plot(fa)
# Date
Sys.Date()
## [1] "2017-03-16"
substr(Sys.Date(), 1, 4)
## [1] "2017"
startDate <- as.Date("2016-03-01")
endDate <- as.Date("2016-04-09")
endDate - startDate
## Time difference of 39 days
as.numeric(endDate - startDate)
## [1] 39
as.Date("05072016", format="%d%m%Y")
## [1] "2016-07-05"
as.Date(30, "2016-05-01")
## [1] "2016-05-31"
as.Date(-30, "2016-05-01")
## [1] "2016-04-01"
# 내장 함수
ceiling(1.6)
## [1] 2
floor(1.6)
## [1] 1
exp(2) # e^2
## [1] 7.389056
factorial(5)
## [1] 120
sqrt(4) # root
## [1] 2
x <- c(1:10)
max(x)
## [1] 10
min(x)
## [1] 1
length(x)
## [1] 10
mean(x) # 평균
## [1] 5.5
median(x) # 중앙값
## [1] 5.5
sd(x) # 표준편차
## [1] 3.02765
rev(x)
## [1] 10 9 8 7 6 5 4 3 2 1
# 집합 연산
a <- c(1,2,3)
b <- c(2,3,4)
a + b
## [1] 3 5 7
c <- c("3", "4", "5")
union(a, c)
## [1] "1" "2" "3" "4" "5"
setdiff(a,b)
## [1] 1
setdiff(b,a)
## [1] 4
intersect(a,b)
## [1] 2 3
# Vector
# vector의 datatype은 모두 동일해야 한다.
# 다를 경우 자동으로 하나의 데이터타입으로 처리됨.
a <- c(1, 2, "3")
a
## [1] "1" "2" "3"
names(a) <- c("x1", "x2", "x3") # vector 각 컬럼에 이름 지정 가능
a
## x1 x2 x3
## "1" "2" "3"
a["x1"]
## x1
## "1"
length(a)
## [1] 3
nrow(a) # 행렬에만 사용되는 함수
## NULL
NROW(a)
## [1] 3
seq(1, 10)
## [1] 1 2 3 4 5 6 7 8 9 10
seq(5, -5)
## [1] 5 4 3 2 1 0 -1 -2 -3 -4 -5
seq(-5, 5, 2) # 2씩 증가
## [1] -5 -3 -1 1 3 5
rep(1:5, 2) # 반복
## [1] 1 2 3 4 5 1 2 3 4 5
rep(1:5, 2, each=2) # 연속해서 반복
## [1] 1 1 2 2 3 3 4 4 5 5 1 1 2 2 3 3 4 4 5 5
1 %in% a # vector에 특정 문자 포함 여부
## [1] TRUE
# vector indexing
a <- c(1,2,3,4,5,6)
a[c(1,3)]
## [1] 1 3
a[-1:-3]
## [1] 4 5 6
a[-length(a)]
## [1] 1 2 3 4 5
# 행렬 matrix
x <- matrix(c(1,2,3,4))
x <- matrix(c(1,2,3,4), nrow=2)
x <- matrix(c(1,2,3,4), nrow=2, byrow = T)
x
## [,1] [,2]
## [1,] 1 2
## [2,] 3 4
x[1,] # 행 값들 가져오기
## [1] 1 2
x[,1] # 열 값들 가져오기
## [1] 1 3
x <- matrix(c(1,2,3,4), nrow=2)
x <- rbind(x, c(77, 88)) # 행 추가
x <- cbind(x, c(9, 8, 7)) # 열 추가
x
## [,1] [,2] [,3]
## [1,] 1 3 9
## [2,] 2 4 8
## [3,] 77 88 7
colnames(x) <- c("no1", "no2", "no3")
x
## no1 no2 no3
## [1,] 1 3 9
## [2,] 2 4 8
## [3,] 77 88 7
# list
member <- list(name="Kate", address="Seoul", tall=170, pay=10000)
member
## $name
## [1] "Kate"
##
## $address
## [1] "Seoul"
##
## $tall
## [1] 170
##
## $pay
## [1] 10000
member$name
## [1] "Kate"
member[2:3]
## $address
## [1] "Seoul"
##
## $tall
## [1] 170
member$birth <- "1990-05-07" # list에 항목 추가
member$birth <- NULL # list 항목 삭제
length(member) # 항목 갯수
## [1] 4
# dataframe
# Create Dataframe
# 1. 직접 생성
no <- c(1:3)
name <- c("xx", "yy", "zz")
price <- c(500,100,300)
qty <- c(2,7,9)
item <- data.frame(NO=no, Name=name, Price=price, QTY=qty)
item
## NO Name Price QTY
## 1 1 xx 500 2
## 2 2 yy 100 7
## 3 3 zz 300 9
# 2. from Matrix
x <- c(1,"James",300, 2,"Kate",500, 3,"Risa",700, 4,"Liz",900)
data <- matrix(x, 4, byrow = T)
data
## [,1] [,2] [,3]
## [1,] "1" "James" "300"
## [2,] "2" "Kate" "500"
## [3,] "3" "Risa" "700"
## [4,] "4" "Liz" "900"
member <- data.frame(data)
names(member) <- c("NO", "NAME", "PAY")
member
## NO NAME PAY
## 1 1 James 300
## 2 2 Kate 500
## 3 3 Risa 700
## 4 4 Liz 900
# 3. from Text file
fruitData <- read.table("data/fruits.txt", header = T, sep = "")
fruitData
## no name price qty
## 1 1 apple 500 5
## 2 2 orange 300 3
## 3 3 peach 200 7
## 4 4 berry 100 10
fruitData <- read.table("data/fruits.txt", header = T, sep = "", skip = 3) # ignore 3 row
fruitData <- read.table("data/fruits.txt", header = T, sep = "", nrows = 3) # return first 3 row
# 4. from csv or Excel file
cnames <- c("no", "name", "price1", "price2", "qty")
df <- read.csv("data/fruits.csv", header = F, col.names = cnames) # 컬럼명 지정해서 import
df
## no name price1 price2 qty
## 1 1 apple 500 700 5
## 2 2 orange 300 550 3
## 3 3 peach 200 400 7
## 4 4 berry 100 180 10
library("readxl")
df <- read_excel(path = "data/fruits.xlsx", sheet = "Sheet1", col_names = TRUE)
df
## no name lowprice highprice qty
## 1 1 apple 500 700 5
## 2 2 orange 600 800 3
## 3 3 peach 200 400 7
## 4 4 berry 100 200 10
# save to xml
# library("XML")
# library("kulife")
# write.xml(df, file="fruits.xml")
# Dataframe 다루기
df <- read_excel(path = "data/fruits.xlsx", sheet = "Sheet1", col_names = TRUE)
df
## no name lowprice highprice qty
## 1 1 apple 500 700 5
## 2 2 orange 600 800 3
## 3 3 peach 200 400 7
## 4 4 berry 100 200 10
summary(df)
## no name lowprice highprice
## Min. :1.00 Length:4 Min. :100 Min. :200
## 1st Qu.:1.75 Class :character 1st Qu.:175 1st Qu.:350
## Median :2.50 Mode :character Median :350 Median :550
## Mean :2.50 Mean :350 Mean :525
## 3rd Qu.:3.25 3rd Qu.:525 3rd Qu.:725
## Max. :4.00 Max. :600 Max. :800
## qty
## Min. : 3.00
## 1st Qu.: 4.50
## Median : 6.00
## Mean : 6.25
## 3rd Qu.: 7.75
## Max. :10.00
str(df)
## Classes 'tbl_df', 'tbl' and 'data.frame': 4 obs. of 5 variables:
## $ no : num 1 2 3 4
## $ name : chr "apple" "orange" "peach" "berry"
## $ lowprice : num 500 600 200 100
## $ highprice: num 700 800 400 200
## $ qty : num 5 3 7 10
df[2,2]
## [1] "orange"
df$lowprice
## [1] 500 600 200 100
df[c(2,3)] # column 1,2
## name lowprice
## 1 apple 500
## 2 orange 600
## 3 peach 200
## 4 berry 100
df[c(2,3),] # row 1,2
## no name lowprice highprice qty
## 2 2 orange 600 800 3
## 3 3 peach 200 400 7
ncol(df)
## [1] 5
nrow(df)
## [1] 4
names(df)
## [1] "no" "name" "lowprice" "highprice" "qty"
rownames(df)
## [1] "1" "2" "3" "4"
# sort / order / rank
sort(df$highprice)
## [1] 200 400 700 800
sort(df$highprice, decreasing = T)
## [1] 800 700 400 200
order(df$highprice) # 정렬된 위치 인덱스 리턴
## [1] 4 3 1 2
rank(df$highprice) # 정렬되지 않은 위치 인덱스 리턴
## [1] 3 4 2 1
# split
split(df, df$name) # 해당 컬럼 기준으로 분할
## $apple
## no name lowprice highprice qty
## 1 1 apple 500 700 5
##
## $berry
## no name lowprice highprice qty
## 4 4 berry 100 200 10
##
## $orange
## no name lowprice highprice qty
## 2 2 orange 600 800 3
##
## $peach
## no name lowprice highprice qty
## 3 3 peach 200 400 7
split(df, df$no > 2)
## $`FALSE`
## no name lowprice highprice qty
## 1 1 apple 500 700 5
## 2 2 orange 600 800 3
##
## $`TRUE`
## no name lowprice highprice qty
## 3 3 peach 200 400 7
## 4 4 berry 100 200 10
# merge
x <- data.frame( names=c("A", "B", "C"), address=c("Seoul", "Busan", "Tyokyo"))
y <- data.frame( names=c("A", "B", "D"), telno=c("001", "003", "888"))
merge(x, y) # 공통적으로 있는 데이터만 merge
## names address telno
## 1 A Seoul 001
## 2 B Busan 003
merge(x, y, by = "names")
## names address telno
## 1 A Seoul 001
## 2 B Busan 003
merge(x, y, by = "names", all = T)
## names address telno
## 1 A Seoul 001
## 2 B Busan 003
## 3 C Tyokyo <NA>
## 4 D <NA> 888
# subset : dataframe 에서 조건에 맞는 데이터를를 dataframe으로 추출
subset(df, df$qty > 5)
## no name lowprice highprice qty
## 3 3 peach 200 400 7
## 4 4 berry 100 200 10
subset(df, highprice > 500)
## no name lowprice highprice qty
## 1 1 apple 500 700 5
## 2 2 orange 600 800 3
subset(df, name == "apple")
## no name lowprice highprice qty
## 1 1 apple 500 700 5
subset(df, select = c(name,qty), subset = df$qty > 5)
## name qty
## 3 peach 7
## 4 berry 10
subset(df, select = -no)
## name lowprice highprice qty
## 1 apple 500 700 5
## 2 orange 600 800 3
## 3 peach 200 400 7
## 4 berry 100 200 10
library(MASS)
str(Cars93)
## 'data.frame': 93 obs. of 27 variables:
## $ Manufacturer : Factor w/ 32 levels "Acura","Audi",..: 1 1 2 2 3 4 4 4 4 5 ...
## $ Model : Factor w/ 93 levels "100","190E","240",..: 49 56 9 1 6 24 54 74 73 35 ...
## $ Type : Factor w/ 6 levels "Compact","Large",..: 4 3 1 3 3 3 2 2 3 2 ...
## $ Min.Price : num 12.9 29.2 25.9 30.8 23.7 14.2 19.9 22.6 26.3 33 ...
## $ Price : num 15.9 33.9 29.1 37.7 30 15.7 20.8 23.7 26.3 34.7 ...
## $ Max.Price : num 18.8 38.7 32.3 44.6 36.2 17.3 21.7 24.9 26.3 36.3 ...
## $ MPG.city : int 25 18 20 19 22 22 19 16 19 16 ...
## $ MPG.highway : int 31 25 26 26 30 31 28 25 27 25 ...
## $ AirBags : Factor w/ 3 levels "Driver & Passenger",..: 3 1 2 1 2 2 2 2 2 2 ...
## $ DriveTrain : Factor w/ 3 levels "4WD","Front",..: 2 2 2 2 3 2 2 3 2 2 ...
## $ Cylinders : Factor w/ 6 levels "3","4","5","6",..: 2 4 4 4 2 2 4 4 4 5 ...
## $ EngineSize : num 1.8 3.2 2.8 2.8 3.5 2.2 3.8 5.7 3.8 4.9 ...
## $ Horsepower : int 140 200 172 172 208 110 170 180 170 200 ...
## $ RPM : int 6300 5500 5500 5500 5700 5200 4800 4000 4800 4100 ...
## $ Rev.per.mile : int 2890 2335 2280 2535 2545 2565 1570 1320 1690 1510 ...
## $ Man.trans.avail : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 1 1 1 1 1 ...
## $ Fuel.tank.capacity: num 13.2 18 16.9 21.1 21.1 16.4 18 23 18.8 18 ...
## $ Passengers : int 5 5 5 6 4 6 6 6 5 6 ...
## $ Length : int 177 195 180 193 186 189 200 216 198 206 ...
## $ Wheelbase : int 102 115 102 106 109 105 111 116 108 114 ...
## $ Width : int 68 71 67 70 69 69 74 78 73 73 ...
## $ Turn.circle : int 37 38 37 37 39 41 42 45 41 43 ...
## $ Rear.seat.room : num 26.5 30 28 31 27 28 30.5 30.5 26.5 35 ...
## $ Luggage.room : int 11 15 14 17 13 16 17 21 14 18 ...
## $ Weight : int 2705 3560 3375 3405 3640 2880 3470 4105 3495 3620 ...
## $ Origin : Factor w/ 2 levels "USA","non-USA": 2 2 2 2 2 1 1 1 1 1 ...
## $ Make : Factor w/ 93 levels "Acura Integra",..: 1 2 4 3 5 6 7 9 8 10 ...
subset(Cars93, select = c(Model, Type, Price), MPG.city > 30)
## Model Type Price
## 31 Festiva Small 7.4
## 39 Metro Small 8.4
## 42 Civic Small 12.1
## 73 LeMans Small 9.0
## 80 Justy Small 8.4
## 83 Swift Small 8.6
## 84 Tercel Small 9.8
subset(Cars93, select = c(Manufacturer, Model, Type, Price, Make),
MPG.highway > median(MPG.highway) & Manufacturer == "Subaru")
## Manufacturer Model Type Price Make
## 80 Subaru Justy Small 8.4 Subaru Justy
## 81 Subaru Loyale Small 10.9 Subaru Loyale
## 82 Subaru Legacy Compact 19.5 Subaru Legacy
# Dataframe 내용 저장.
write.table(df, "data/save_fruits.txt", quote = F, append = F)
# apply family
# apply, lapply, sapply, by, tapply, aggregate
# apply : 결과를 vector로 반환 (1 행 / 2 열)
# lapply : 결과를 list로 반환
# sapply : 결과를 vector 또는 matrix로 반환
s1 <- c(91, 87, 95, 96, 89, 87, 86, 85, 92, 93)
s2 <- c(89, 86, 85, 92, 93, 91, 90, 95, 87, 89)
s3 <- c(89, 86, 78, 99, 95, 87, 89, 86, 85, 92)
# list
score <- list(korean = s1, english = s2, math = s3)
score
## $korean
## [1] 91 87 95 96 89 87 86 85 92 93
##
## $english
## [1] 89 86 85 92 93 91 90 95 87 89
##
## $math
## [1] 89 86 78 99 95 87 89 86 85 92
lapply(score, mean) # ---> list
## $korean
## [1] 90.1
##
## $english
## [1] 89.7
##
## $math
## [1] 88.6
sapply(score, mean) # ---> vector
## korean english math
## 90.1 89.7 88.6
sapply(score, range)
## korean english math
## [1,] 85 85 78
## [2,] 96 95 99
sapply(score, t.test)
## korean english math
## statistic 73.5936 89.65021 48.33526
## parameter 9 9 9
## p.value 7.986569e-14 1.354942e-14 3.480671e-12
## conf.int Numeric,2 Numeric,2 Numeric,2
## estimate 90.1 89.7 88.6
## null.value 0 0 0
## alternative "two.sided" "two.sided" "two.sided"
## method "One Sample t-test" "One Sample t-test" "One Sample t-test"
## data.name "X[[i]]" "X[[i]]" "X[[i]]"
extremes <- function(x) {
c(min = min(x), max = max(x))
}
sapply(score, extremes)
## korean english math
## min 85 85 78
## max 96 95 99
# matrix
score <- c(s1, s2, s3)
dim(score) <- c(3, 10)
colnames(score) <- c("t1","t2","t3","t4","t5","t6","t7","t8","t9","t10")
rownames(score) <- c("K", "L", "M")
score
## t1 t2 t3 t4 t5 t6 t7 t8 t9 t10
## K 91 96 86 93 85 91 87 86 95 86
## L 87 89 85 89 92 90 89 78 87 85
## M 95 87 92 86 93 95 89 99 89 92
apply(score, 1, mean)
## K L M
## 89.6 87.1 91.7
apply(score, 2, max)
## t1 t2 t3 t4 t5 t6 t7 t8 t9 t10
## 95 96 92 93 93 95 89 99 95 92
# dataframe
df2 <- data.frame(score=c(s1, s2, s3))
df2$name <- c("K", "L", "M")
df2
## score name
## 1 91 K
## 2 87 L
## 3 95 M
## 4 96 K
## 5 89 L
## 6 87 M
## 7 86 K
## 8 85 L
## 9 92 M
## 10 93 K
## 11 89 L
## 12 86 M
## 13 85 K
## 14 92 L
## 15 93 M
## 16 91 K
## 17 90 L
## 18 95 M
## 19 87 K
## 20 89 L
## 21 89 M
## 22 86 K
## 23 78 L
## 24 99 M
## 25 95 K
## 26 87 L
## 27 89 M
## 28 86 K
## 29 85 L
## 30 92 M
apply(df2, 1, mean) # error : 각 열의 데이터타입이 다름
## [1] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [24] NA NA NA NA NA NA NA
by(df2$score, df2$name, mean)
## df2$name: K
## [1] 89.6
## --------------------------------------------------------
## df2$name: L
## [1] 87.1
## --------------------------------------------------------
## df2$name: M
## [1] 91.7
tapply(df2$score, df2$name, mean)
## K L M
## 89.6 87.1 91.7
aggregate(score ~ name, data = df2, mean)
## name score
## 1 K 89.6
## 2 L 87.1
## 3 M 91.7
# apply examples
# 1
pioneers <- c("GAUSS:1777", "BAYES:1702", "PASCAL:1623", "PEARSON:1857")
split <- strsplit(pioneers, split = ":")
split
## [[1]]
## [1] "GAUSS" "1777"
##
## [[2]]
## [1] "BAYES" "1702"
##
## [[3]]
## [1] "PASCAL" "1623"
##
## [[4]]
## [1] "PEARSON" "1857"
names <- lapply(split, function(x) { x[1] })
names
## [[1]]
## [1] "GAUSS"
##
## [[2]]
## [1] "BAYES"
##
## [[3]]
## [1] "PASCAL"
##
## [[4]]
## [1] "PEARSON"
# 2
select_el <- function(x, index) {
x[index]
}
years <- lapply(split, select_el, index = 2)
years
## [[1]]
## [1] "1777"
##
## [[2]]
## [1] "1702"
##
## [[3]]
## [1] "1623"
##
## [[4]]
## [1] "1857"
# 입출력
scan("data/fruits.txt", what = "")
## [1] "no" "name" "price" "qty" "1" "apple" "500"
## [8] "5" "2" "orange" "300" "3" "3" "peach"
## [15] "200" "7" "4" "berry" "100" "10"
readLines("data/fruits.txt")
## [1] "no\tname\tprice\tqty" "1\tapple\t500\t5" "2\torange\t300\t3"
## [4] "3\tpeach\t200\t7" "4\tberry\t100\t10"
read.table("data/fruits.txt")
## V1 V2 V3 V4
## 1 no name price qty
## 2 1 apple 500 5
## 3 2 orange 300 3
## 4 3 peach 200 7
## 5 4 berry 100 10
read.table("data/fruits.txt", header = T)
## no name price qty
## 1 1 apple 500 5
## 2 2 orange 300 3
## 3 3 peach 200 7
## 4 4 berry 100 10
# 문자열 합치기, 나누기, Replacement
paste("abc", "xyz")
## [1] "abc xyz"
paste("abc", "xyz", sep = ":")
## [1] "abc:xyz"
substr("123456789", 3, 5) # 시작위치, 끝나는 위치
## [1] "345"
substr("123456789", -2, 2)
## [1] "12"
strsplit("2016-04-19", split = "-")
## [[1]]
## [1] "2016" "04" "19"
d <- readLines("data/alert_log.txt")
d <- gsub(" ", "_", d) # 특정 문자열 치환. gsub
length(d)
## [1] 15908
c <- subset(d, nchar(d) > 100) # nchar : 문자열 길이
length(c)
## [1] 42
# 정규표현식
regexpr("ORACLE", c)
## [1] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
## [24] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
## attr(,"match.length")
## [1] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
## [24] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
## attr(,"useBytes")
## [1] TRUE
# \\d 숫자
# \\D 숫자 아닌 것
#
# \\s 공백
# \\S 공백 아닌 것
#
# \\t Tab
# \\n new line (enter)
# \\. dot
#
# ^ 시작 글자
# $ 마지막 글자
# .* any character
#
# [ab] a 또는 b
# [^ab] a와 b 제외한 모든 문자
# [0-9] 모든 숫자
# [A-Z] 영어 대문자
# [a-z] 영어 소문자
# [A-z] 모든 영문자
#
# i+ i가 최소 1회 이상 나오는 경우
# i* i가 최소 0회 이상 나오는 경우
# i? i가 최소 0회에서 최대 1회만 나오는 경우
# i{n} i가 연속적으로 n회 나오는 경우
# i{n1,n2} i가 n1에서 n2회 나오는 경우
# i{n,} i가 n회 이상 나오는 경우
#
# [:alnum:] 문자와 숫자가 나오는 경우
# [:alpha:] 문자
# [:blank:] 공백
# [:cntrl:] 제어 문자
# [:digit:] 0 ~ 9
# [:lower:] 소문자
# [:print:] 숫자, 문자, 특수문자, 공백 모두
# [:punct:] 특수문자
# [:space:] 공백문자
# [:upper:] 대문자
# [:xdigit:] 16진수
grep("ORACLE", d) # vector에서 특정 패턴을 찾아 index 출력
## [1] 2 69 141 208 455 521 697 762 944 1009 1197
## [12] 1262 1354 1419 1490 1555 1625 1690 1752 1818 1908 1973
## [23] 2059 2124 2239 2307 2421 2489 2636 2704 2890 2958 3058
## [34] 3126 3773 3841 3957 4025 4136 4204 4281 4297 4356 4424
## [45] 4523 4591 4634 4702 4770 4838 4898 4930 4952 5020 5288
## [56] 5356 5473 5541 5618 5686 5830 5898 6008 6076 6158 6226
## [67] 6329 6373 6441 6528 6596 6706 6774 6885 6953 7067 7133
## [78] 7349 7415 7540 7608 7617 7638 7706 7851 7919 8001 8047
## [89] 8115 8669 8684 11993 12061 12257 12325 12510 12578 12695 12761
## [100] 12989 13057 13170 13238 13360 13426 13644 13712 13827 13893 14118
## [111] 14186 14378 14446 14505 14573 14706 14774 14944 15012 15499 15567
## [122] 15829
grep("ORACLE_instance", d, value = T) # 값 출력
## [1] "Starting_ORACLE_instance_(normal)"
## [2] "Starting_ORACLE_instance_(normal)"
## [3] "Starting_ORACLE_instance_(restrict)"
## [4] "Starting_ORACLE_instance_(normal)"
## [5] "Starting_ORACLE_instance_(normal)"
## [6] "Starting_ORACLE_instance_(normal)"
## [7] "Starting_ORACLE_instance_(normal)"
## [8] "Starting_ORACLE_instance_(normal)"
## [9] "Starting_ORACLE_instance_(normal)"
## [10] "Starting_ORACLE_instance_(normal)"
## [11] "Starting_ORACLE_instance_(normal)"
## [12] "Starting_ORACLE_instance_(normal)"
## [13] "Starting_ORACLE_instance_(normal)"
## [14] "Starting_ORACLE_instance_(normal)"
## [15] "Starting_ORACLE_instance_(normal)"
## [16] "Starting_ORACLE_instance_(normal)"
## [17] "Starting_ORACLE_instance_(normal)"
## [18] "Starting_ORACLE_instance_(normal)"
## [19] "Starting_ORACLE_instance_(normal)"
## [20] "Starting_ORACLE_instance_(normal)"
## [21] "Starting_ORACLE_instance_(normal)"
## [22] "Starting_ORACLE_instance_(normal)"
## [23] "Starting_ORACLE_instance_(normal)"
## [24] "Starting_ORACLE_instance_(normal)"
## [25] "Starting_ORACLE_instance_(normal)"
## [26] "Starting_ORACLE_instance_(normal)"
## [27] "Starting_ORACLE_instance_(normal)"
## [28] "Starting_ORACLE_instance_(normal)"
## [29] "Starting_ORACLE_instance_(normal)"
## [30] "Starting_ORACLE_instance_(normal)"
## [31] "Starting_ORACLE_instance_(normal)"
## [32] "Starting_ORACLE_instance_(normal)"
## [33] "Starting_ORACLE_instance_(normal)"
## [34] "Starting_ORACLE_instance_(normal)"
## [35] "Starting_ORACLE_instance_(normal)"
## [36] "Starting_ORACLE_instance_(normal)"
## [37] "Starting_ORACLE_instance_(normal)"
## [38] "Starting_ORACLE_instance_(normal)"
## [39] "Starting_ORACLE_instance_(normal)"
## [40] "Starting_ORACLE_instance_(normal)"
## [41] "Starting_ORACLE_instance_(normal)"
## [42] "Starting_ORACLE_instance_(normal)"
## [43] "Starting_ORACLE_instance_(normal)"
## [44] "Starting_ORACLE_instance_(normal)"
## [45] "Starting_ORACLE_instance_(normal)"
## [46] "Starting_ORACLE_instance_(normal)"
## [47] "Starting_ORACLE_instance_(normal)"
## [48] "Starting_ORACLE_instance_(normal)"
## [49] "Starting_ORACLE_instance_(normal)"
## [50] "Starting_ORACLE_instance_(normal)"
## [51] "Starting_ORACLE_instance_(normal)"
## [52] "Starting_ORACLE_instance_(normal)"
## [53] "Starting_ORACLE_instance_(normal)"
## [54] "Starting_ORACLE_instance_(normal)"
## [55] "Starting_ORACLE_instance_(normal)"
## [56] "Starting_ORACLE_instance_(normal)"
## [57] "Starting_ORACLE_instance_(normal)"
## [58] "Starting_ORACLE_instance_(normal)"
## [59] "Starting_ORACLE_instance_(normal)"
## [60] "Starting_ORACLE_instance_(normal)"
## [61] "Starting_ORACLE_instance_(normal)"
grep("^Setting", d) # ^ : 첫글자
## [1] 322 808 809 897 898 1068 1069 1089 1090 1142 1143
## [12] 1606 1607 2012 2013 2160 2161 2181 2182 3264 3265 5154
## [23] 7223 8216 8217 12131 12132 12895 13547 13994 15118 15119 15195
## [34] 15196 15701 15702 15867 15868
grep("ing$", d) # $ : 마지막 글자
## [1] 334 2369 2373 4283 4299 5173 7253 8671 8686 12906 13558
## [12] 14023
emails <- c("john.doe@ivyleague.edu", "education@world.gov", "dalai.lama@peace.org",
"invalid.edu", "quant@bigdatacollege.edu", "cookie.monster@sesame.tv")
grepl("@.*\\.edu$", emails) # logical
## [1] TRUE FALSE FALSE FALSE TRUE FALSE
grep("@.*\\.edu$", emails) # index
## [1] 1 5
hits = grep("@.*\\.edu$", emails) # @ 다음에 .edu로 끝나는 것.
emails[hits]
## [1] "john.doe@ivyleague.edu" "quant@bigdatacollege.edu"
gsub(pattern = "@.*\\.edu$", replacement = "@datacamp.edu", emails)
## [1] "john.doe@datacamp.edu" "education@world.gov"
## [3] "dalai.lama@peace.org" "invalid.edu"
## [5] "quant@datacamp.edu" "cookie.monster@sesame.tv"