R Plot Options

#   main = ""               제목
#   sub = ""                부제목
#   xlab = "", ylab = ""    x, y 축 제목 문자열
#   xlim = , ylim =         x, y 축 한계값
#   ann = F                 x, y 축 제목 표시하지 않음
#   axes = F                x, y 축 표시하지 않음
#   tmag = 2                제목 등에 사용되는 문자의 확대배율
# 
#   axis(...)           사용자지정 x, y 축
#   title               메인, x, y 축 제목 지정
# 
#   <그래프 타입>
#   type = "p"          점 (기본값)
#   type = "l"          꺾은 선
#   type = "b"          점과 선
#   type = "c"          "b"에서 점 생략
#   type = "o"          점과 선 중첩
#   type = "h"          각 점에서 x축까지 수직선 그래프
#   type = "s"          왼쪽 값을 기초로 계단 모양으로 연결
#   type = "S"          오른쪽 값을 기초로 계단 모양으로 연결
#   type = "n"          축만 그리고 그래프는 그리지 않음
#   
#   <선>
#   lty = 0, lty = "blank" 
#   lty = 1, lty = "solid"
#   lty = 2, lty = "dashed"
#   lty = 3, lty = "dotted"
#   lty = 4, lty = "dotdash"
#   lty = 5, lty = "longdash"
#   lty = 6, lty = "twodash"
#   
#   <색, 기호>
#   col = 1, col = "blue"   색상 1 ~ 8
#   pch = 0, pch = "문자" 점 모양 지정
#   bg = "blue"             그래프 배경색
#   lwd = "숫자"          선 굵기
#   cex = "숫자"          점 또는 문자 굵기
#   
#   par(mfrow = c(행수, 열수))      : 한 페이지에 여러 그래프 배치
#   par(new = T)                    : 그래프 중첩
#
#   lines                           : 그래프 위에 새로운 그래프 그리기
#   
#   legend                          : 범례
apple <- c(260, 400, 250, 200, 310)
peach <- c(180, 200, 210, 190, 170)
orange <- c(210, 250, 260, 330, 300)

fruits <- c(apple, peach, orange)
stem(fruits, scale = 0.5)           # stem-leaf plot
## 
##   The decimal point is 2 digit(s) to the right of the |
## 
##   1 | 789
##   2 | 00115566
##   3 | 013
##   4 | 0
# plot
# plot(y data, option)
# plot(x data, y data, option)

x <- c(1:20)
y <- sample(1:20)
plot(x, y, xlim = c(0,20), ylim = c(0,25), main = "sample", type = "o", lty = 2, col = 2)

# 사용자지정 x, y 축 --- 01_fruits_line.png

plot(apple, type="o", col="red", ylim=c(0,400), axes = F, ann=F)
axis(1, at=1:5, labels = c("Mon","Tue","Wed","Thu","Fri"))
axis(2, ylim=c(0,400))
title(main="Fruits", col.main="red", font.main=4)
title(xlab="Day", col.lab="black")
title(ylab="Qty", col.lab="blue")

par(new=T)
lines(peach, type="o", pch=21, col="green", lty=2)
lines(orange, type="o", pch=22, col="blue", lty=2)

legend(4, 130, c("apple","peach","orange"), col=c("red","green","blue"), 
       cex=0.8, pch=21, lty=1:3)

# multi graph on 1 window
par(mfrow = c(1,3))
plot(apple, type="o", col="red", ylim=c(0,400))
plot(peach, type="o", col="red", ylim=c(0,400))
plot(orange, type="o", col="red", ylim=c(0,400))

par(mfrow = c(1,1))
# barplot
# angle, density, col   : 막대를 칠하는 선의 각도, 밀도, 색상
# lengent               : 범례
# names                 : 각 막대의 라벨
# width                 : 각 막대의 상대적인 폭
# space                 : 각 막대 사이의 간격
# beside                : True 일 경우 각 값마다 막대를 그린다.
# horiz                 : True 일 경우 가로막대 출력


# 요일별 apple 판매량

# density --- 02_apple_bar_density

barplot(apple, main="Apple", xlab="Day", ylab="Qty", border="blue",
        names.arg = c("Mon","Tue","Wed","Thu","Fri"),
        density = c(10,30,50,70,90))

# condition color 특정 조건을 만족하는 경우에 색깔 표시
# --- 03_apple_bar_conditionColor

colors <- c()
for (i in 1:length(apple)) {
  if (apple[i] >= 300) {
    colors <- c(colors, "red")
  }
  else {
    colors <- c(colors, "grey")
  }
}

barplot(apple, main="Apple", xlab="Day", ylab="Qty", border="blue", col=colors,
        names.arg = c("Mon","Tue","Wed","Thu","Fri"))

# 과일별/요일별 판매랑 --- 04_fruits_bar_multi

fruits10 <- cbind(apple, peach, orange)
fruits10
##      apple peach orange
## [1,]   260   180    210
## [2,]   400   200    250
## [3,]   250   210    260
## [4,]   200   190    330
## [5,]   310   170    300
barplot(as.matrix(fruits10), main="Fruits", ylab="Qty", ylim = c(0,450), 
        beside = T, col=rainbow(5))
legend(10, 450, c("Mon","Tue","Wed","Thu","Fri"), cex=0.8, fill=rainbow(5))

# 요일별/과일별 판매량 --- 05_fruits_bar_multisection

fruits11 <- t(fruits10)
colnames(fruits11) <- c(1,2,3,4,5)
fruits11
##          1   2   3   4   5
## apple  260 400 250 200 310
## peach  180 200 210 190 170
## orange 210 250 260 330 300
barplot(fruits11, main="Fruits", ylab="Qty", ylim = c(0,1100), col=rainbow(3), space=0.1,
        cex=0.8, cex.axis=0.8, las=1, names.arg=c("Mon","Tue","Wed","Thu","Fri"))
legend(4, 1100, rownames(fruits11), cex=0.8, fill=rainbow(3))

# Pie Chart
# angle, density, col : 각도, 밀도, 색상
# labels          : 각 부분의 이름 지정
# radius          : 원형의 크기
# clockwise       : 회전 방향. 기본 = 반시계방향
# init.angle      : 시작되는 지점의 각도


apple <- c(260, 400, 250, 200, 310)
pie(apple)

# 1.
pie(apple, init.angle = 90, col=rainbow(length(apple)), radius = 1,
    labels = c("Mon","Tue","Wed","Thu","Fri"))

# 2. 비율 --- 06_apple_pie

f_ratio <- round(apple/sum(apple)*100, 1)
f_labels <- paste(f_ratio, "%", sep = "")

pie(apple, main="Apple", init.angle = 90, col=rainbow(length(apple)), radius = 0.8,
    cex=0.8, labels=f_labels)
legend(1,1, c("Mon","Tue","Wed","Thu","Fri"), cex=0.8, fill=rainbow(length(apple)))

# 3. --- 06_apple_pie_label

f_ratio <- round(apple/sum(apple)*100, 1)
f_days <- c("Mon","Tue","Wed","Thu","Fri")
f_labels <- paste(f_days, "\n", f_ratio, "%")
pie(apple, main="Apple", init.angle = 90, col=rainbow(length(apple)), radius = 1,
    cex=1.0, labels=f_labels)

# 3D Pie Chart
# explode : 각 조각의 간격 --- 07_apple_3Dpie

library(plotrix)

f_ratio <- round(apple/sum(apple)*100, 1)
f_days <- c("Mon","Tue","Wed","Thu","Fri")
f_labels <- paste(f_days, "\n", f_ratio, "%")

pie3D(apple, main="Apple", col=rainbow(length(apple)), cex=0.8, 
      labels=f_labels, explode=0.05)

# boxplot
# col         : 박스 색상
# names       : 막대 라벨
# range       : 박스 끝에서 수염까지의 길이. default = 1.5
# width       : 박스 폭
# notch       : True일 경우 상자의 허리부분 가늘게 표시
# horizontal  : 가로 상자


boxplot(apple, peach, orange,
        col=c("red","yellow","green"),
        names=c("apple","peach","orange"),
        horizontal = T)

# treemap
# vSize   : 그룹핑 기준
# index   : 화면 표시 기준

library(treemap)

data <- read.csv("data/all_student_score.csv")
data
##    rank     name team score score100 rankrate
## 1     1   Lesley    1    48    100.0     1.00
## 2     2     Kate    2    46     95.8     0.96
## 3     3   Hannah    1    45     93.8     0.94
## 4     4  Heather    3    43     89.6     0.90
## 5     5   Jennet    1    41     85.4     0.85
## 6     6    Laura    1    41     85.4     0.85
## 7     7    Paige    4    41     85.4     0.85
## 8     8    Sunny    3    40     83.3     0.83
## 9     9  Melissa    1    39     81.3     0.81
## 10   10  Vitoria    1    39     81.3     0.81
## 11   11 Katriana    2    39     81.3     0.81
## 12   12   Gloria    2    39     81.3     0.81
## 13   13     Lina    3    39     81.3     0.81
## 14   14  Scarlet    1    38     79.2     0.79
## 15   15   Serena    4    37     77.1     0.77
## 16   16    Diana    4    37     77.1     0.77
## 17   17    Kelly    2    36     75.0     0.75
## 18   18    Sally    2    34     70.8     0.71
## 19   19   Rachel    3    34     70.8     0.71
## 20   20     Emma    4    33     68.8     0.69
## 21   21    Emily    2    31     64.6     0.65
## 22   22    Julia    2    30     62.5     0.63
## 23   23    Juana    4    30     62.5     0.63
## 24   24   Yesica    4    27     56.3     0.56
## 25   25    Erica    3    24     50.0     0.50
## 26   26   Marcia    3    23     47.9     0.48
## 27   27      Zoe    4    23     47.9     0.48
## 28   28    Chloe    3    13     27.1     0.27
# --- 08_score_treemap

treemap(data, vSize="score", index="score")

treemap(data, vSize="team", index=c("score","name"))

treemap(data, vSize="score", index=c("team","score"))

# stars
# draw.segments = T : 부채꼴로 색상 표시
# full              : T 원형 / F 반원형


data <- read.csv("data/all_subject_score.csv")
data
##       name Korean English Math History Chemistry Physics
## 1    Sunny     90      85   55      88        91      79
## 2  Melissa     70      65   80      75        76      89
## 3  Vitoria     92      95   76      65        89      91
## 4 Katriana     76      89   88      98       100      91
## 5   Gloria     97      87   83      91        86      91
## 6     Lina     80      86   97      85        69      77
## 7  Scarlet     20      30   40      50        70      90
## 8   Serena     50      52   54      56        58      60
## 9    Diana     60      70   80      75        65      55
nrow(data)
## [1] 9
# 1번 이름 컬럼을 rowname으로 설정하고 삭제함
row.names(data) <- data$name
rownames(data)
## [1] "Sunny"    "Melissa"  "Vitoria"  "Katriana" "Gloria"   "Lina"    
## [7] "Scarlet"  "Serena"   "Diana"
data <- data[2:7]


# star chart

# 1. --- 09_stardiagram_line

stars(data, flip.labels = F, draw.segments = F, frame.plot = T, full = T,
      main = "Score Diagram of subject/student")

label <- names(data)
value <- table(label)
piechart <- pie(value, labels = label, radius = 0.2, cex = 0.6, col = NA)

# 2. --- 09_stardiagram_color

stars(data, flip.labels = T, draw.segments = T, frame.plot = T, full = T,
      main = "Score Diagram of subject/student")

label <- names(data)
value <- table(label)
color <- c("black","red","green","blue","cyan","violet")
pie(value, labels = label, radius = 0.1, cex = 0.6, col = color)

# 3. --- 09_stardiagram_color_half

stars(data, flip.labels = T, draw.segments = T, frame.plot = T, full = F,
      main = "Score Diagram of subject/student")

# star --> bar : --- 10_bar_multi

data2 <- t(data)
data2
##           Sunny Melissa Vitoria Katriana Gloria Lina Scarlet Serena Diana
## Korean       90      70      92       76     97   80      20     50    60
## English      85      65      95       89     87   86      30     52    70
## Math         55      80      76       88     83   97      40     54    80
## History      88      75      65       98     91   85      50     56    75
## Chemistry    91      76      89      100     86   69      70     58    65
## Physics      79      89      91       91     91   77      90     60    55
label <- rownames(data2)
label
## [1] "Korean"    "English"   "Math"      "History"   "Chemistry" "Physics"
barplot(as.matrix(data2), main = "Test", ylab = "score", beside = T, 
        col = rainbow(5), ylim = c(0,150))
abline(h=60, col = "red", lwd=2)
legend(50,140, label, cex=0.8, fill = rainbow(5))

# ggplot2
# ggplot(data, aes(x=x axis data, y=y axis data)) + geom 함수

library(ggplot2)

data <- read.table("data/korean_score.txt", header=T, sep="")
data
##       name subject score
## 1    Sunny  Korean    90
## 2    Diana  Korean    70
## 3   Serena  Korean    92
## 4     Lina  Korean    76
## 5   Gloria  Korean    97
## 6 Victoria  Korean    80
## 7  Scarlet  Korean    20
## 8   Melisa  Korean    50
## 9     Kate  Korean    60
# geom_point() : 11_ggplot_point

ggplot(data, aes(x=name, y=score)) + geom_point()

# geom_bar()
# -- stat : 주어진 데이터에서 geom에 필요한 데이터를 생성.
ggplot(data, aes(x=name, y=score)) + geom_bar(stat = "identity")

gpbar <- ggplot(data, aes(x=name, y=score)) + geom_bar(stat = "identity", fill="cyan", colour="red")
gpbar + theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1, color="blue", size=10))

# geom_bar + geom_text

library(plyr)  # split-apply-combine paradigm !!!

data <- read.csv("data/student_score_3.csv", header=T, sep=",")
head(data)
##      name subject score
## 1  Lesley    Math    90
## 2    Kate    Math    70
## 3  Hannah    Math    92
## 4 Heather    Math    76
## 5  Jennet    Math    97
## 6   Laura    Math    80
transData <- arrange(data, name, subject)   # plyr : sort
# total 컬럼 추가
transData <- ddply(transData, "name", transform, total=cumsum(score)) # plyr : 누적합계
# label 컬럼 추가
transData <- ddply(transData, "name", transform, label=cumsum(score)-score*0.5) # 라벨 위치
head(transData)
##      name subject score total label
## 1  Hannah English    95    95  47.5
## 2  Hannah    Math    92   187 141.0
## 3  Hannah Physics    76   263 225.0
## 4 Heather English    89    89  44.5
## 5 Heather    Math    76   165 127.0
## 6 Heather Physics    88   253 209.0
# --- 12_ggplot_bar_multisection

ggplot(transData, aes(x=name, y=score, fill=subject)) + 
    geom_bar(stat="identity") + 
    geom_text(aes(y=label, label=paste(score,"p")), color="black", size=4) + 
    theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1, color="blue", size=10))

# geom_segment

data <- read.csv("data/all_subject_score.csv")
kdata <- data[, 1:2]
kdata
##       name Korean
## 1    Sunny     90
## 2  Melissa     70
## 3  Vitoria     92
## 4 Katriana     76
## 5   Gloria     97
## 6     Lina     80
## 7  Scarlet     20
## 8   Serena     50
## 9    Diana     60
# theme_bw() : 배경 흑백
# panel.grid.major.y = element_line : 가로 선 그리기

# --- 13_ggplot_point_grid

ggplot(data[,1:2],aes(x=Korean, y=reorder(name,Korean))) + geom_point(size=5) +
  theme_bw() +                                      
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.major.y = element_line(color="red", linetype="dashed"))

# --- 13_ggplot_point_grid2

ggplot(data[,1:2], aes(x=Korean, y=reorder(name,Korean))) + 
  geom_segment(aes(yend=name, xend=0, color="red")) +
  geom_point(size=5) +
  theme_bw() +                                      
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.major.y = element_blank())

# geom_line --- 14_ggplot_line

data <- read.csv("data/student_subject_score_3.csv")
data2 <- arrange(data, name, subject)   # plyr : sort
data2
##     name   subject score
## 1   Jane Chemistry    76
## 2   Jane   English    65
## 3   Jane   History    89
## 4   Jane    Korean    70
## 5   Jane      Math    80
## 6   Jane   Physics    75
## 7   Kate Chemistry    91
## 8   Kate   English    85
## 9   Kate   History    79
## 10  Kate    Korean    90
## 11  Kate      Math    55
## 12  Kate   Physics    88
## 13 Sunny Chemistry    89
## 14 Sunny   English    95
## 15 Sunny   HIstory    91
## 16 Sunny    Korean    92
## 17 Sunny      Math    76
## 18 Sunny   Physics    65
ggplot(data2, aes(x=subject, y=score, color=name, group=name)) + geom_line() 

ggplot(data2, aes(x=subject, y=score, color=name, group=name, fill=name)) + geom_line() + 
  geom_point(size=3, shape=22)