R 데이터마이닝 dplyr

Data Mining - Data Munging with dplyr

library(dplyr)

# 0. Data Setting
cs <- read.csv("data/dataCustomers.csv", header = T, stringsAsFactors = T)
head(cs)

##   custid gender age marriage    residence        job
## 1  10070     여  28     미혼   Yongsan-gu     제조업
## 2  10139     여  28     미혼  Gangdong-gu 정보서비스
## 3  10208     여  28     미혼  Gwangjin-gu     제조업
## 4  10275     여  28     미혼 Eunpyeong-gu   개인사업
## 5  10350     남  28     미혼   Gangnam-gu     제조업
## 6  10425     남  28     미혼    Songpa-gu   교육기관

str(cs)

## 'data.frame':    500 obs. of  6 variables:
##  $ custid   : int  10070 10139 10208 10275 10350 10425 10499 10567 10632 10707 ...
##  $ gender   : Factor w/ 2 levels "남","여": 2 2 2 2 1 1 2 2 2 2 ...
##  $ age      : int  28 28 28 28 28 28 28 28 28 28 ...
##  $ marriage : Factor w/ 2 levels "기혼","미혼": 2 2 2 2 2 2 2 2 2 1 ...
##  $ residence: Factor w/ 25 levels "Dobong-gu","Dongdaemun-gu",..: 25 6 12 4 7 22 19 3 4 3 ...
##  $ job      : Factor w/ 16 levels "개인사업","건설업",..: 16 15 16 1 16 5 1 15 1 1 ...

cs$custid <- as.character(cs$custid)

tr <- read.csv("data/dataTransactions.csv", header = T, stringsAsFactors = T)
head(tr)

##           datetime custid  store      product      brand   corner import
## 1 2000-05-01 10:43  18313 신촌점 4.104840e+12       샤넬   화장품      1
## 2 2000-05-01 11:00  18313 신촌점 2.700000e+12       식품 일반식품      0
## 3 2000-05-01 11:33  27222 신촌점 4.545371e+12   까사미아     가구      0
## 4 2000-05-01 11:43  27222 신촌점 4.500860e+12   대아통상     기타      0
## 5 2000-05-01 11:53  27222 신촌점 4.538130e+12 토이플러스 문화완구      0
## 6 2000-05-01 12:00  27222 신촌점 4.406010e+12       베베 유아동복      0
##   amount installment
## 1 113000           3
## 2  91950           3
## 3 598000           3
## 4  20100           1
## 5  24000           1
## 6  28000           1

str(tr)

## 'data.frame':    19125 obs. of  9 variables:
##  $ datetime   : Factor w/ 13804 levels "2000-05-01 10:43",..: 1 2 3 4 5 6 6 7 7 8 ...
##  $ custid     : int  18313 18313 27222 27222 27222 27222 47084 27222 31090 27222 ...
##  $ store      : Factor w/ 4 levels "무역점","본점",..: 3 3 3 3 3 3 1 3 1 3 ...
##  $ product    : num  4.10e+12 2.70e+12 4.55e+12 4.50e+12 4.54e+12 ...
##  $ brand      : Factor w/ 1191 levels "012베네통","1492",..: 533 619 136 204 1058 456 785 45 911 379 ...
##  $ corner     : Factor w/ 26 levels "가구","가전",..: 26 16 1 3 7 15 11 15 11 15 ...
##  $ import     : int  1 0 0 0 0 0 0 0 0 0 ...
##  $ amount     : int  113000 91950 598000 20100 24000 28000 150000 43800 118000 47000 ...
##  $ installment: int  3 3 3 1 1 1 3 1 3 1 ...

tr$datetime <- as.character(tr$datetime)
tr$custid <- as.character(tr$custid)
tr$import <- as.factor(tr$import)
tr$installment <- as.factor(tr$installment)
tr$product <- as.character(format(tr$product, digits = 13))

summary(tr)

##    datetime            custid             store        product         
##  Length:19125       Length:19125       무역점:4955   Length:19125      
##  Class :character   Class :character   본점  :3722   Class :character  
##  Mode  :character   Mode  :character   신촌점:6110   Mode  :character  
##                                        천호점:4338                     
##                                                                        
##                                                                        
##                                                                        
##         brand              corner     import        amount       
##  식품      : 5267   일반식품  :5267   0:17063   Min.   :    650  
##  지오다노  :  301   화장품    :2158   1: 2062   1st Qu.:  25000  
##  랑콤      :  251   유아동복  :1345             Median :  53000  
##  크리니크  :  188   유니캐주얼:1138             Mean   : 109198  
##  샤넬      :  181   스포츠    :1129             3rd Qu.: 118000  
##  에스티로더:  177   피혁      : 931             Max.   :8000000  
##  (Other)   :12760   (Other)   :7157                              
##   installment   
##  1      :12295  
##  3      : 6368  
##  6      :  201  
##  5      :  106  
##  2      :   55  
##  10     :   39  
##  (Other):   61

# 1. Selects a subset

filter(cs, age > 60 & gender == "여")

##    custid gender age marriage    residence      job
## 1   47842     여  61     기혼    Songpa-gu 개인사업
## 2   48044     여  62     기혼     Nowon-gu   전문직
## 3   48169     여  62     기혼    Seocho-gu 공공기관
## 4   48278     여  63     기혼 Seodaemun-gu 개인사업
## 5   48401     여  63     기혼   Gangnam-gu 교육기관
## 6   48556     여  64     기혼 Seodaemun-gu 개인사업
## 7   48686     여  63     기혼    Songpa-gu 도소매업
## 8   48997     여  66     기혼   Gangnam-gu 개인사업
## 9   49112     여  65     기혼 Seodaemun-gu 개인사업
## 10  49232     여  67     기혼 Seongdong-gu 개인사업
## 11  49351     여  68     기혼      Mapo-gu 개인사업

filter(tr, installment == 10, amount > mean(amount))

##            datetime custid  store       product            brand
## 1  2000-05-08 17:54  33751 천호점 4300471019900         헨리코튼
## 2  2000-05-14 15:03  38747 신촌점 4131690003200         세라구두
## 3  2000-05-14 15:50  38747 신촌점 4202205111020           CC클럽
## 4  2000-05-16 19:33  16805 신촌점 4130790009100   룩선글라스ck외
## 5  2000-07-13 14:00  33751 천호점 4405600026020           퀵실버
## 6  2000-07-18 16:53  33751 천호점 4139141008000             랑콤
##              corner import  amount installment
## 1  트래디셔널캐주얼      0  247000          10
## 2              피혁      0  276000          10
## 3          영캐주얼      0  238400          10
## 4              섬유      0  325000          10
## 5        유니캐주얼      0  133600          10
## 6            화장품      0  330000          10

x1 <- select(tr, custid:amount)         # range fields
head(x1)

##   custid  store       product      brand   corner import amount
## 1  18313 신촌점 4104840008000       샤넬   화장품      1 113000
## 2  18313 신촌점 2700000000000       식품 일반식품      0  91950
## 3  27222 신촌점 4545370944500   까사미아     가구      0 598000
## 4  27222 신촌점 4500860043900   대아통상     기타      0  20100
## 5  27222 신촌점 4538130048700 토이플러스 문화완구      0  24000
## 6  27222 신촌점 4406010020474       베베 유아동복      0  28000

x2 <- select(tr, -datetime, -import)    # remove fields
head(x2)

##   custid  store       product      brand   corner amount installment
## 1  18313 신촌점 4104840008000       샤넬   화장품 113000           3
## 2  18313 신촌점 2700000000000       식품 일반식품  91950           3
## 3  27222 신촌점 4545370944500   까사미아     가구 598000           3
## 4  27222 신촌점 4500860043900   대아통상     기타  20100           1
## 5  27222 신촌점 4538130048700 토이플러스 문화완구  24000           1
## 6  27222 신촌점 4406010020474       베베 유아동복  28000           1

x3 <- select(cs, custid, job, gender:residence)     # reorder fields
head(x3)

##   custid        job gender age marriage    residence
## 1  10070     제조업     여  28     미혼   Yongsan-gu
## 2  10139 정보서비스     여  28     미혼  Gangdong-gu
## 3  10208     제조업     여  28     미혼  Gwangjin-gu
## 4  10275   개인사업     여  28     미혼 Eunpyeong-gu
## 5  10350     제조업     남  28     미혼   Gangnam-gu
## 6  10425   교육기관     남  28     미혼    Songpa-gu

x4 <- rename(cs, juso = residence, upjong = job)    # rename fields
head(x4)

##   custid gender age marriage         juso     upjong
## 1  10070     여  28     미혼   Yongsan-gu     제조업
## 2  10139     여  28     미혼  Gangdong-gu 정보서비스
## 3  10208     여  28     미혼  Gwangjin-gu     제조업
## 4  10275     여  28     미혼 Eunpyeong-gu   개인사업
## 5  10350     남  28     미혼   Gangnam-gu     제조업
## 6  10425     남  28     미혼    Songpa-gu   교육기관

# 2. Select random sample

a <- sample_n(cs, 30)       # fixed number
nrow(a)

## [1] 30

a <- sample_frac(cs, 0.1)   # fixed fraction
nrow(a)

## [1] 50

a <- cs[as.logical((1:nrow(cs)%%20 == 1)), ]   # 1-in-n sampling
head(a, 10)

##     custid gender age marriage    residence        job
## 1    10070     여  28     미혼   Yongsan-gu     제조업
## 21   11489     남  29     미혼   Gangbuk-gu   개인사업
## 41   12867     여  29     기혼    Jongno-gu   금융기관
## 61   14258     여  29     기혼 Seodaemun-gu   개인사업
## 81   15641     여  30     미혼 Seongdong-gu   도소매업
## 101  17007     여  31     기혼      Mapo-gu   개인사업
## 121  18374     여  31     미혼      Mapo-gu   개인사업
## 141  19752     남  32     미혼 Seongdong-gu     전문직
## 161  21161     남  32     미혼   Gangnam-gu 정보서비스
## 181  22592     여  34     기혼      Mapo-gu   개인사업

# 3. Summarize information

cs %>% group_by(gender) %>% summarise(Total = n())

## # A tibble: 2 × 2
##   gender Total
##   <fctr> <int>
## 1     남   154
## 2     여   346

cs %>% group_by(gender, marriage) %>% summarise(Count = n())

## Source: local data frame [4 x 3]
## Groups: gender [?]
## 
##   gender marriage Count
##   <fctr>   <fctr> <int>
## 1     남     기혼   115
## 2     남     미혼    39
## 3     여     기혼   234
## 4     여     미혼   112

tr %>% group_by(store) %>% 
    summarise(Total = sum(amount), Mean = mean(amount))

## # A tibble: 4 × 3
##    store     Total      Mean
##   <fctr>     <int>     <dbl>
## 1 무역점 597280920 120541.05
## 2   본점 478251982 128493.28
## 3 신촌점 564742855  92429.27
## 4 천호점 448129075 103303.15

tr %>% group_by(store) %>% 
    summarise_each(funs(sum, min, max), amount)

## # A tibble: 4 × 4
##    store       sum   min     max
##   <fctr>     <int> <int>   <int>
## 1 무역점 597280920  2000 3930000
## 2   본점 478251982  1400 7544000
## 3 신촌점 564742855   650 2780000
## 4 천호점 448129075  1400 8000000

# 4. Reorder records

tr %>% group_by(custid, store) %>% 
    summarise(Total = sum(amount), Count = n()) %>%
    arrange(desc(Total), desc(Count))

## Source: local data frame [837 x 4]
## Groups: custid [500]
## 
##    custid  store    Total Count
##     <chr> <fctr>    <int> <int>
## 1   42800 무역점 59353579   235
## 2   15968 천호점 44014591   225
## 3   13493 무역점 35091556   178
## 4   42322 신촌점 34305296   172
## 5   48278 신촌점 30149391   216
## 6   27074   본점 29419710    92
## 7   32232 천호점 28250679   267
## 8   33829   본점 19554792   116
## 9   37340 천호점 19509317    88
## 10  41313 천호점 17824062    90
## # ... with 827 more rows

cs %>% group_by(residence) %>% 
    summarise(Count = n()) %>%
    arrange(desc(Count))

## # A tibble: 25 × 2
##       residence Count
##          <fctr> <int>
## 1    Gangnam-gu    90
## 2     Seocho-gu    54
## 3       Mapo-gu    47
## 4  Seodaemun-gu    47
## 5   Gangdong-gu    45
## 6     Songpa-gu    33
## 7   Gwangjin-gu    29
## 8  Seongdong-gu    21
## 9    Yongsan-gu    19
## 10   Dongjak-gu    16
## # ... with 15 more rows

# 5. Distinct values in specified fields

filter(cs, !duplicated(custid))

##     custid gender age marriage       residence        job
## 1    10070     여  28     미혼      Yongsan-gu     제조업
## 2    10139     여  28     미혼     Gangdong-gu 정보서비스
## 3    10208     여  28     미혼     Gwangjin-gu     제조업
## 4    10275     여  28     미혼    Eunpyeong-gu   개인사업
## 5    10350     남  28     미혼      Gangnam-gu     제조업
## 6    10425     남  28     미혼       Songpa-gu   교육기관
## 7    10499     여  28     미혼    Seodaemun-gu   개인사업
## 8    10567     여  28     미혼      Dongjak-gu 정보서비스
## 9    10632     여  28     미혼    Eunpyeong-gu   개인사업
## 10   10707     여  28     기혼      Dongjak-gu   개인사업

distinct(cs, custid)

##     custid
## 1    10070
## 2    10139
## 3    10208
## 4    10275
## 5    10350
## 6    10425
## 7    10499
## 8    10567
## 9    10632
## 10   10707

# 6. Derive new fields based on existing fields

score <- read.csv("data/score.csv", header = T)
score

##   ID exam1 exam2 quiz
## 1  1   3.3   2.0  3.7
## 2  2   4.0   4.0  4.0
## 3  3   2.3   0.0  3.3
## 4  4   2.2   1.0  3.3
## 5  5   3.1   1.2  3.9

score <- mutate(score, ExamSum = exam1 + exam2, ExamMean = ExamSum / 2)
score

##   ID exam1 exam2 quiz ExamSum ExamMean
## 1  1   3.3   2.0  3.7     5.3     2.65
## 2  2   4.0   4.0  4.0     8.0     4.00
## 3  3   2.3   0.0  3.3     2.3     1.15
## 4  4   2.2   1.0  3.3     3.2     1.60
## 5  5   3.1   1.2  3.9     4.3     2.15

# 7. Replace with new values

score$extra <- c(1, 1, NA, NA, 2)
score

##   ID exam1 exam2 quiz ExamSum ExamMean extra
## 1  1   3.3   2.0  3.7     5.3     2.65     1
## 2  2   4.0   4.0  4.0     8.0     4.00     1
## 3  3   2.3   0.0  3.3     2.3     1.15    NA
## 4  4   2.2   1.0  3.3     3.2     1.60    NA
## 5  5   3.1   1.2  3.9     4.3     2.15     2

score$extra[is.na(score$extra)] <- 0
score

##   ID exam1 exam2 quiz ExamSum ExamMean extra
## 1  1   3.3   2.0  3.7     5.3     2.65     1
## 2  2   4.0   4.0  4.0     8.0     4.00     1
## 3  3   2.3   0.0  3.3     2.3     1.15     0
## 4  4   2.2   1.0  3.3     3.2     1.60     0
## 5  5   3.1   1.2  3.9     4.3     2.15     2

# 8. Merge records

score2 <- read.csv("data/score2.csv", header = T)
score2

##   CID exam3 exam4 finalexam
## 1   1   3.1   2.2       3.5
## 2   2   3.9   4.5       4.0
## 3   3   2.2   3.9       3.7
## 4   4   2.1   1.1       3.3
## 5   5   3.0   2.0       3.8

merge(score, score2, by.x = "ID", by.y = "CID")

##   ID exam1 exam2 quiz ExamSum ExamMean extra exam3 exam4 finalexam
## 1  1   3.3   2.0  3.7     5.3     2.65     1   3.1   2.2       3.5
## 2  2   4.0   4.0  4.0     8.0     4.00     1   3.9   4.5       4.0
## 3  3   2.3   0.0  3.3     2.3     1.15     0   2.2   3.9       3.7
## 4  4   2.2   1.0  3.3     3.2     1.60     0   2.1   1.1       3.3
## 5  5   3.1   1.2  3.9     4.3     2.15     2   3.0   2.0       3.8

# 9. Melt & Cast

library(reshape)

id <- c(1, 1, 1, 1, 2, 2, 2)
site <- c("a", "b", "c", "a", "a", "b", "b")
pageview <- c(1, 2, 3, 4, 5, 6, 7)
dwelltime <- c(7, 6, 5, 4, 3, 2, 1)

mydata <- data.frame(id, site, pageview, dwelltime)
mydata

##   id site pageview dwelltime
## 1  1    a        1         7
## 2  1    b        2         6
## 3  1    c        3         5
## 4  1    a        4         4
## 5  2    a        5         3
## 6  2    b        6         2
## 7  2    b        7         1

tx <- melt(mydata, id = c("id", "site"))
tx

##    id site  variable value
## 1   1    a  pageview     1
## 2   1    b  pageview     2
## 3   1    c  pageview     3
## 4   1    a  pageview     4
## 5   2    a  pageview     5
## 6   2    b  pageview     6
## 7   2    b  pageview     7
## 8   1    a dwelltime     7
## 9   1    b dwelltime     6
## 10  1    c dwelltime     5
## 11  1    a dwelltime     4
## 12  2    a dwelltime     3
## 13  2    b dwelltime     2
## 14  2    b dwelltime     1

cast(tx, id ~ site, sum)

##   id  a  b c
## 1  1 16  8 8
## 2  2  8 16 0

cast(tx, id ~ site, sum, subset = variable == "pageview")

##   id a  b c
## 1  1 5  2 3
## 2  2 5 13 0

cast(tx, id + site ~ variable, sum)

##   id site pageview dwelltime
## 1  1    a        5        11
## 2  1    b        2         6
## 3  1    c        3         5
## 4  2    a        5         3
## 5  2    b       13         3

cast(tx, id ~ variable, mean, subset = variable == "pageview")

##   id pageview
## 1  1      2.5
## 2  2      6.0

cast(tx, id ~ variable, range)

##   id pageview_X1 pageview_X2 dwelltime_X1 dwelltime_X2
## 1  1           1           4            4            7
## 2  2           5           7            1            3

# 10. Binning : Converts numeric fields into discrete pieces

cust <- tr %>% group_by(custid) %>% summarise(visits = n())
head(cust)

## # A tibble: 6 × 2
##   custid visits
##    <chr>  <int>
## 1  10070     20
## 2  10139     16
## 3  10208    173
## 4  10275      7
## 5  10350     17
## 6  10425      3

summary(cust)

##     custid              visits      
##  Length:500         Min.   :  1.00  
##  Class :character   1st Qu.: 10.00  
##  Mode  :character   Median : 24.00  
##                     Mean   : 38.25  
##                     3rd Qu.: 48.00  
##                     Max.   :268.00

cust$level <- cut(cust$visits, breaks = 10, labels = F)
head(cust);tail(cust)

## # A tibble: 6 × 3
##   custid visits level
##    <chr>  <int> <int>
## 1  10070     20     1
## 2  10139     16     1
## 3  10208    173     7
## 4  10275      7     1
## 5  10350     17     1
## 6  10425      3     1

## # A tibble: 6 × 3
##   custid visits level
##    <chr>  <int> <int>
## 1  48890     46     2
## 2  48997    171     7
## 3  49112     12     1
## 4  49232     15     1
## 5  49351      9     1
## 6  49470     40     2

cust$level <- cut(cust$visits, c(0, 50, 100, 150, 200, 300), labels = F)
head(cust);tail(cust)

## # A tibble: 6 × 3
##   custid visits level
##    <chr>  <int> <int>
## 1  10070     20     1
## 2  10139     16     1
## 3  10208    173     4
## 4  10275      7     1
## 5  10350     17     1
## 6  10425      3     1

## # A tibble: 6 × 3
##   custid visits level
##    <chr>  <int> <int>
## 1  48890     46     1
## 2  48997    171     4
## 3  49112     12     1
## 4  49232     15     1
## 5  49351      9     1
## 6  49470     40     1

# Practice

# 지점별 수입/국산품 판매건수
tr$importGubun <- ifelse(tr$import == 1, "수입", "국산")
table(tr$store, tr$importGubun)

##         
##          국산 수입
##   무역점 4363  592
##   본점   3090  632
##   신촌점 5622  488
##   천호점 3988  350

# 남녀별 건당 구매액 최소, 중앙, 최대값
mg <- merge(cs, tr)   # tr 수만큼 custid로 merge
mg %>% group_by(gender) %>% summarise_each(funs(min, median, max), amount)

## # A tibble: 2 × 4
##   gender   min median     max
##   <fctr> <int>  <dbl>   <int>
## 1     남   650  51020 8000000
## 2     여   840  54354 3930000

# 총구매액 상위 10명

tr %>% group_by(custid) %>% summarise(Total = sum(amount)) %>%
    arrange(desc(Total)) %>%
    head(10)

## # A tibble: 10 × 2
##    custid    Total
##     <chr>    <int>
## 1   42800 61672778
## 2   15968 44478591
## 3   13493 35480804
## 4   42322 34305296
## 5   48278 31569011
## 6   27074 29419710
## 7   32232 28308279
## 8   33829 26050612
## 9   30026 23227040
## 10  37340 20128817

library(ggplot2)

head(diamonds)

## # A tibble: 6 × 10
##   carat       cut color clarity depth table price     x     y     z
##   <dbl>     <ord> <ord>   <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23     Ideal     E     SI2  61.5    55   326  3.95  3.98  2.43
## 2  0.21   Premium     E     SI1  59.8    61   326  3.89  3.84  2.31
## 3  0.23      Good     E     VS1  56.9    65   327  4.05  4.07  2.31
## 4  0.29   Premium     I     VS2  62.4    58   334  4.20  4.23  2.63
## 5  0.31      Good     J     SI2  63.3    58   335  4.34  4.35  2.75
## 6  0.24 Very Good     J    VVS2  62.8    57   336  3.94  3.96  2.48

# 1
aggregate(price ~ cut, diamonds, mean)

##         cut    price
## 1      Fair 4358.758
## 2      Good 3928.864
## 3 Very Good 3981.760
## 4   Premium 4584.258
## 5     Ideal 3457.542

diamonds %>% group_by(cut) %>% summarise(price = mean(price))

## # A tibble: 5 × 2
##         cut    price
##       <ord>    <dbl>
## 1      Fair 4358.758
## 2      Good 3928.864
## 3 Very Good 3981.760
## 4   Premium 4584.258
## 5     Ideal 3457.542

# 2
aggregate(price ~ cut + color, diamonds, mean)

##          cut color    price
## 1       Fair     D 4291.061
## 2       Good     D 3405.382
## 3  Very Good     D 3470.467
## 4    Premium     D 3631.293
## 5      Ideal     D 2629.095
## 6       Fair     E 3682.312
## 7       Good     E 3423.644
## 8  Very Good     E 3214.652
## 9    Premium     E 3538.914
## 10     Ideal     E 2597.550
## 11      Fair     F 3827.003
## 12      Good     F 3495.750
## 13 Very Good     F 3778.820
## 14   Premium     F 4324.890
## 15     Ideal     F 3374.939
## 16      Fair     G 4239.255
## 17      Good     G 4123.482
## 18 Very Good     G 3872.754
## 19   Premium     G 4500.742
## 20     Ideal     G 3720.706
## 21      Fair     H 5135.683
## 22      Good     H 4276.255
## 23 Very Good     H 4535.390
## 24   Premium     H 5216.707
## 25     Ideal     H 3889.335
## 26      Fair     I 4685.446
## 27      Good     I 5078.533
## 28 Very Good     I 5255.880
## 29   Premium     I 5946.181
## 30     Ideal     I 4451.970
## 31      Fair     J 4975.655
## 32      Good     J 4574.173
## 33 Very Good     J 5103.513
## 34   Premium     J 6294.592
## 35     Ideal     J 4918.186

diamonds %>% group_by(cut, color) %>% summarise(price = mean(price))

## Source: local data frame [35 x 3]
## Groups: cut [?]
## 
##      cut color    price
##    <ord> <ord>    <dbl>
## 1   Fair     D 4291.061
## 2   Fair     E 3682.312
## 3   Fair     F 3827.003
## 4   Fair     G 4239.255
## 5   Fair     H 5135.683
## 6   Fair     I 4685.446
## 7   Fair     J 4975.655
## 8   Good     D 3405.382
## 9   Good     E 3423.644
## 10  Good     F 3495.750
## # ... with 25 more rows

# 3
aggregate(cbind(price, carat) ~ cut, diamonds, mean)

##         cut    price     carat
## 1      Fair 4358.758 1.0461366
## 2      Good 3928.864 0.8491847
## 3 Very Good 3981.760 0.8063814
## 4   Premium 4584.258 0.8919549
## 5     Ideal 3457.542 0.7028370

diamonds %>% group_by(cut) %>% summarise_each(funs(mean), price, carat)

## # A tibble: 5 × 3
##         cut    price     carat
##       <ord>    <dbl>     <dbl>
## 1      Fair 4358.758 1.0461366
## 2      Good 3928.864 0.8491847
## 3 Very Good 3981.760 0.8063814
## 4   Premium 4584.258 0.8919549
## 5     Ideal 3457.542 0.7028370

R 데이터마이닝 dplyr

woosa7