구간 (범위) 데이터 분석 - boxplot
require(ggplot2)
# boxplot

tempratureData <- read.csv("data/Trend_Temperature_Seoul.csv", header=T)
str(tempratureData)
## 'data.frame':    365 obs. of  5 variables:
##  $ Date    : int  20130101 20130102 20130103 20130104 20130105 20130106 20130107 20130108 20130109 20130110 ...
##  $ Month   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ MeanTemp: int  -6 -11 -14 -14 -10 -10 -8 -7 -12 -11 ...
##  $ MaxTemp : int  0 -6 -10 -8 -2 -4 -2 -2 -8 -5 ...
##  $ MinTemp : int  -11 -16 -19 -19 -17 -16 -14 -12 -15 -17 ...
head(tempratureData)
##       Date Month MeanTemp MaxTemp MinTemp
## 1 20130101     1       -6       0     -11
## 2 20130102     1      -11      -6     -16
## 3 20130103     1      -14     -10     -19
## 4 20130104     1      -14      -8     -19
## 5 20130105     1      -10      -2     -17
## 6 20130106     1      -10      -4     -16
# ggplot > boxplot
# aes(x축, y축)

ggplot(tempratureData, aes(factor(Month), MeanTemp)) + geom_boxplot()

ggplot(tempratureData, aes(factor(Month), MeanTemp)) + geom_point()

# boxplot

# geom_jitter : 데이터의 분포를 점으로 표시

sales <- read.csv("data/emp_monthly_score.csv", header=T)
head(sales)
##   name month score target ratio
## 1 Kate     1   100    100  1.00
## 2 Kate     2    85    100  0.85
## 3 Kate     3    75    100  0.75
## 4 Kate     4    98    100  0.98
## 5 Kate     5    92    100  0.92
## 6 Kate     6    97    100  0.97
# 1. box만
ggplot(sales,aes(factor(name),score)) + geom_boxplot()

# 2. 분포위치
ggplot(sales,aes(factor(name),score)) + geom_boxplot() + geom_jitter()

# 3. 색상넣기
ggplot(sales,aes(factor(name),score)) + geom_boxplot(aes(fill=(name))) + geom_jitter()

# boxplot

total <- read.csv("data/all_student_score.csv", header=T)
head(total)
##   rank    name team score score100 rankrate
## 1    1  Lesley    1    48    100.0     1.00
## 2    2    Kate    2    46     95.8     0.96
## 3    3  Hannah    1    45     93.8     0.94
## 4    4 Heather    3    43     89.6     0.90
## 5    5  Jennet    1    41     85.4     0.85
## 6    6   Laura    1    41     85.4     0.85
ggplot(total,aes(factor(team),score)) + geom_boxplot(aes(fill=(team)))

ggplot(total,aes(factor(team),score)) + geom_boxplot(aes(fill=(team))) + geom_jitter()

# dash board : polygon
# example(polygon)

sales <- read.csv("data/emp_monthly_score_1.csv", header=T, stringsAsFactors=FALSE, sep=",")
sales
##   month    name score target ratio
## 1     1    Kate   100    100  1.00
## 2     1   Sunny    90    100  0.90
## 3     1   Jenny    85    100  0.85
## 4     1 Jessica    92    100  0.92
## 5     1 Taeyeon    95    100  0.95
## 6     1    Yuri    70    100  0.70
## 7     1 Seohyun    78    100  0.78
# function
# x : 표현할 값, angle1 : 좌측 게이지 각도, angle3 : 우측 게이지 각도, title : Dashboard 제목

# matrix(c(-1,0,0,1,1,0), 3, 2, byrow=T)
#       [,1] [,2]
# [1,]   -1    0
# [2,]    0    1
# [3,]    1    0

dash_t <- function(x, angle1, angle3, title) {
    i <- matrix(c(-1,0,0,1,1,0), 3, 2, byrow=T)
    plot(i, xlab="", ylab="", axes=F, type="n")
    x.cir <- cos(seq(0,180,1)*pi/180)
    y.cir <- sin(seq(0,180,1)*pi/180)
    
    #첫번째 polygon
    cir <- rbind(cbind(x.cir[(181-angle1):181], y.cir[(181-angle1):181]) ,
                 cbind(0.8*x.cir[181:(181-angle1)], 0.8*y.cir[181:(181-angle1)]),
                 col="green")
    polygon(cir, col="green", border="white")
    
    #두번째 polygon
    cir <- rbind(cbind(x.cir[(angle3+1):(181-angle1)], y.cir[(angle3+1):(181-angle1)]) ,
                 cbind(0.8*x.cir[(181-angle1):(angle3+1)], 0.8*y.cir[(181-angle1):(angle3+1)]),
                 col="blue")
    polygon(cir,col="blue", border="white")
    
    #세번째 polygon
    cir <- rbind(cbind(x.cir[1:(angle3+1)], y.cir[1:(angle3+1)]) ,
                 cbind(0.8*x.cir[(angle3+1):1], 0.8*y.cir[(angle3+1):1]),
                 col="red")
    polygon(cir,col="red", border="white")   
    
    for (i in 0:36) segments(0,0,cos(i*pi/36),sin(i*pi/36),col="white")
    
    arrows(0,0.1,0.75*cos(pi-x*pi),0.75*sin(pi-x*pi),lwd=3,length=0.2)  # 화살표
    text(0,0.1,"o",cex=3)  # 가운데 동그라미
    title(title)
}

empNames <- sales$name

windows(height=4, width=6)
# par(mfrow=) : 한 윈도우에 여러 개의 plot 그리기
# oma : 각 plot간의 좌,우,상,하 간격.
par(mfrow=c(3,3), oma=c(3,3,3,3), mar=c(1,1,1,2))

for (i in 1:nrow(sales)) {
    dash_t(sales[i,5], 60, 30, empNames[i])
    text(0,0.5, paste(sales[i,5]*100,"%"), cex=1.5)
}

# barplot / achievement
# 전체 높이 1 기준 달성률 표시 차트

a <- read.csv("data/emp_monthly_score_1.csv", header=T, stringsAsFactors=FALSE, sep=",")

# 달성률:미달성률 (0.9:0.1)
x <- matrix(c(a[1,5], 1-a[1,5],
              a[2,5], 1-a[2,5], 
              a[3,5], 1-a[3,5],
              a[4,5], 1-a[4,5],
              a[5,5], 1-a[5,5],
              a[6,5], 1-a[6,5],
              a[7,5], 1-a[7,5]), 2, 7)

empNames <- a$name
bp <- barplot(x, names=empNames, col=c("blue","red"), main="achievement rate (Jan)")
abline(h=seq(0.05,0.95,0.05), col="white", lwd=2) # 0.5 단위 구분선
text(x=bp, y=a$ratio*0.95, labels =paste(a$ratio*100,"%"), col = "yellow", cex = 1.2)