library(MVA)
demo("Ch-MVA")
par(mfcol=c(1,1))
# 공분산, 상관계수, 거리
# 데이터 내의 어떤 구조나 패턴이 변수들 사이의 관계에 의해서, 또는 다른 개체들과의 상대적인 거리에 의해서 나타날 수 있을까?
# 두 확률변수의 선형 의존성에 대한 측도 = 상관계수(correlation coefficient)
# 두 변수 사이의 관계가 선형이 아니라면 상관계수는 오해를 줄 수 있다.
measure
## chest waist hips gender
## 1 34 30 32 male
## 2 37 32 37 male
## 3 38 30 36 male
## 4 36 33 39 male
## 5 38 29 33 male
## 6 43 32 38 male
## 7 40 33 42 male
## 8 38 30 40 male
## 9 40 30 37 male
## 10 41 32 39 male
## 11 36 24 35 female
## 12 36 25 37 female
## 13 34 24 37 female
## 14 33 22 34 female
## 15 36 26 38 female
## 16 37 26 37 female
## 17 34 25 38 female
## 18 36 26 37 female
## 19 38 28 40 female
## 20 35 23 35 female
summary(measure)
## chest waist hips gender
## Min. :33.00 Min. :22.0 Min. :32.00 male :10
## 1st Qu.:35.75 1st Qu.:25.0 1st Qu.:35.75 female:10
## Median :36.50 Median :28.5 Median :37.00
## Mean :37.00 Mean :28.0 Mean :37.05
## 3rd Qu.:38.00 3rd Qu.:30.5 3rd Qu.:38.25
## Max. :43.00 Max. :33.0 Max. :42.00
df <- measure[, 1:3]
male <- measure[measure$gender == "male", 1:3]
female <- measure[measure$gender == "female", 1:3]
cor(df)
## chest waist hips
## chest 1.0000000 0.6987336 0.4778004
## waist 0.6987336 1.0000000 0.4147413
## hips 0.4778004 0.4147413 1.0000000
cor(male)
## chest waist hips
## chest 1.0000000 0.2513682 0.4976828
## waist 0.2513682 1.0000000 0.6947857
## hips 0.4976828 0.6947857 1.0000000
cor(female)
## chest waist hips
## chest 1.0000000 0.8303889 0.5885679
## waist 0.8303889 1.0000000 0.9101668
## hips 0.5885679 0.9101668 1.0000000
library(psych)
pairs.panels(male)
pairs.panels(female)
# 그러므로 데이터내의 개체들 간의 거리(distance) 개념이 중요.
# 일반적으로 유클리드 거리 사용
# 변수들은 서로 다른 척도(단위)를 갖기 때문에 먼저 표준화를 한 후 거리를 계산.
scale_measure <- scale(df, center = F)
scale_measure <- scale(df) # 표준화 : 평균 0, 표준편차 1 로 변환
scale_measure
## chest waist hips
## [1,] -1.1649647 0.5650909 -2.07121442
## [2,] 0.0000000 1.1301818 -0.02050707
## [3,] 0.3883216 0.5650909 -0.43064854
## [4,] -0.3883216 1.4127273 0.79977587
## [5,] 0.3883216 0.2825455 -1.66107295
## [6,] 2.3299295 1.1301818 0.38963440
## [7,] 1.1649647 1.4127273 2.03020027
## [8,] 0.3883216 0.5650909 1.20991733
## [9,] 1.1649647 0.5650909 -0.02050707
## [10,] 1.5532863 1.1301818 0.79977587
## [11,] -0.3883216 -1.1301818 -0.84079001
## [12,] -0.3883216 -0.8476364 -0.02050707
## [13,] -1.1649647 -1.1301818 -0.02050707
## [14,] -1.5532863 -1.6952727 -1.25093148
## [15,] -0.3883216 -0.5650909 0.38963440
## [16,] 0.0000000 -0.5650909 -0.02050707
## [17,] -1.1649647 -0.8476364 0.38963440
## [18,] -0.3883216 -0.5650909 -0.02050707
## [19,] 0.3883216 0.0000000 1.20991733
## [20,] -0.7766432 -1.4127273 -0.84079001
## attr(,"scaled:center")
## chest waist hips
## 37.00 28.00 37.05
## attr(,"scaled:scale")
## chest waist hips
## 2.575185 3.539254 2.438183
summary(scale_measure)
## chest waist hips
## Min. :-1.5533 Min. :-1.6953 Min. :-2.07121
## 1st Qu.:-0.4854 1st Qu.:-0.8476 1st Qu.:-0.53318
## Median :-0.1942 Median : 0.1413 Median :-0.02051
## Mean : 0.0000 Mean : 0.0000 Mean : 0.00000
## 3rd Qu.: 0.3883 3rd Qu.: 0.7064 3rd Qu.: 0.49217
## Max. : 2.3299 Max. : 1.4127 Max. : 2.03020
pairs.panels(scale_measure)
round(dist(scale_measure), 2) # 거리 행렬 만들기
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## 2 2.43
## 3 2.26 0.80
## 4 3.09 0.95 1.68
## 5 1.63 1.89 1.26 2.82
## 6 4.31 2.37 2.18 2.76 2.95
## 7 4.79 2.38 2.72 1.98 3.94 2.03
## 8 3.63 1.41 1.64 1.22 2.88 2.18 1.41
## 9 3.10 1.29 0.88 1.95 1.84 1.36 2.22 1.46
## 10 3.99 1.76 1.79 1.96 2.85 0.88 1.32 1.36 1.07
## 11 2.23 2.44 1.91 3.03 1.81 3.74 4.14 2.77 2.44 3.40
## 12 2.61 2.02 1.66 2.40 2.14 3.39 3.42 2.03 2.10 2.89 0.87
## 13 2.66 2.54 2.34 2.78 2.66 4.18 4.01 2.61 2.88 3.63 1.13 0.83
## 14 2.44 3.45 3.09 3.90 2.80 5.07 5.27 3.86 3.74 4.67 1.36 1.89 1.41
## 15 2.82 1.79 1.60 2.02 2.35 3.20 3.00 1.60 1.96 2.61 1.35 0.50 1.04 2.31
## 16 2.62 1.70 1.26 2.18 1.89 2.91 3.08 1.72 1.62 2.44 1.07 0.48 1.29 2.28
## 17 2.84 2.33 2.25 2.43 2.81 4.02 3.64 2.25 2.76 3.39 1.48 0.88 0.50 1.89
## 18 2.47 1.74 1.43 2.14 2.00 3.23 3.24 1.84 1.92 2.70 1.00 0.28 0.96 2.04
## 19 3.67 1.72 1.74 1.66 2.88 2.39 1.81 0.57 1.56 1.67 2.47 1.68 2.28 3.56
## 20 2.36 2.78 2.33 3.29 2.21 4.20 4.47 3.08 2.89 3.82 0.48 1.07 0.95 0.92
## 15 16 17 18 19
## 16 0.56
## 17 0.83 1.27
## 18 0.41 0.39 0.92
## 19 1.26 1.41 1.95 1.56
## 20 1.54 1.41 1.41 1.24 2.75
# outlier 찾기
df <- USairpollution
head(df)
## SO2 temp manu popul wind precip predays
## Albany 46 47.6 44 116 8.8 33.36 135
## Albuquerque 11 56.8 46 244 8.9 7.77 58
## Atlanta 24 61.5 368 497 9.1 48.34 115
## Baltimore 47 55.0 625 905 9.6 41.31 111
## Buffalo 11 47.1 391 463 12.4 36.11 166
## Charleston 31 55.2 35 71 6.5 40.75 148
pairs.panels(df)
library(DMwR)
## Loading required package: grid
outlier.score <- lofactor(df, k = 5)
outlier.score
## [1] 1.0185135 0.9947553 0.9767661 1.0744590 1.0025476 1.1018627 6.7308161
## [8] 1.0631965 1.4999312 1.0524985 1.0591652 1.0515998 0.9684696 2.4217025
## [15] 2.1712077 1.8285197 1.1545934 1.3550799 1.0211214 1.0560957 1.1030826
## [22] 1.1598072 1.0161363 1.0511851 1.0705834 1.1054748 1.0158912 0.9708327
## [29] 0.9904630 3.6374196 1.2199493 0.9784709 1.8761657 0.9828882 0.9796012
## [36] 1.2746045 0.9450678 1.1737022 1.2568747 1.0110542 1.0315732
plot(density(outlier.score), main = "outlier score")
sort(outlier.score, decreasing = T)[1:10]
## [1] 6.730816 3.637420 2.421702 2.171208 1.876166 1.828520 1.499931
## [8] 1.355080 1.274604 1.256875
outliers <- order(outlier.score, decreasing = T)[1:3] # score > 2.2 이면 outlier로 설정.
outliers
## [1] 7 30 14
df[outliers, ]
## SO2 temp manu popul wind precip predays
## Chicago 110 50.6 3344 3369 10.4 34.44 122
## Philadelphia 69 54.6 1692 1950 9.6 39.93 115
## Detroit 35 49.9 1064 1513 10.1 30.96 129
# outlier 제거한 데이터의 상관계수
pairs.panels(df[-outliers, ])
# 다변량 데이터의 시각화
attach(USairpollution)
head(USairpollution)
## SO2 temp manu popul wind precip predays
## Albany 46 47.6 44 116 8.8 33.36 135
## Albuquerque 11 56.8 46 244 8.9 7.77 58
## Atlanta 24 61.5 368 497 9.1 48.34 115
## Baltimore 47 55.0 625 905 9.6 41.31 111
## Buffalo 11 47.1 391 463 12.4 36.11 166
## Charleston 31 55.2 35 71 6.5 40.75 148
# 1. 산점도 scatterplot
plot(popul ~ manu, data = USairpollution)
# 1-1. Bivariate boxplot
# 영역을 3개로 분할
layer <- matrix(c(2,0,1,3), nrow = 2, byrow = T)
layout(layer, widths = c(2,1), heights = c(1,2), respect = T)
# 각 영역에 순서대로 표시
xlim <- range(manu) * 1.1
plot(popul ~ manu, data = USairpollution, cex = 0.9, type = "n", xlim = xlim)
text(manu, popul, cex = 0.6, labels = row.names(USairpollution))
hist(manu, main = "", xlim = xlim)
boxplot(popul)
par(mfcol=c(1,1))
# outlier 있고 없고 상관계수 비교
df <- USairpollution
rownames(df) <- rep(1:nrow(df))
plot(popul ~ manu, data = df, cex = 0.9, type = "n")
text(manu, popul, cex = 0.9, labels = rownames(df))
outlier <- c(7, 30, 14) # 시카고, 필라델피아, 디트로이트
cor(df$manu, df$popul) # 0.955
## [1] 0.9552693
cor(df$manu[-outlier], df$popul[-outlier]) # 0.769 : 상관계수가 오히려 줄었다.
## [1] 0.7698125
# 2. boxplot
head(chickwts) # 닭의 성장률에 대한 다양한 사료 보충제의 효과를 측정
## weight feed
## 1 179 horsebean
## 2 160 horsebean
## 3 136 horsebean
## 4 227 horsebean
## 5 217 horsebean
summary(chickwts)
## weight feed
## Min. :108.0 casein :12
## 1st Qu.:204.5 horsebean:10
## Median :258.0 linseed :12
## Mean :261.3 meatmeal :11
## 3rd Qu.:323.5 soybean :14
## Max. :423.0 sunflower:12
boxplot(weight ~ feed, chickwts)
# casein & horsebean : 평균과 범위가 서로 많이 차이나기 때문에 독립적이라고 할 수 있다.
# linseed & meatmeal : 평균은 차이가 나지만 범위가 비슷하기 때문에 독립적이라고 하기 어렵다.
# 3. Bubble chart - 변수 3개 표현
# USairpollution - SO2를 원의 크기로 표현
symbols(temp, wind, circles = SO2, inches = 0.5)
legend(70, 13, "circle : SO2")
# 크기 2배 -> 면적은 4배
# sqrt 변환 : 크기가 비슷해져 변별력이 떨어진다.
symbols(temp, wind, circles = sqrt(SO2), inches = 0.5)
ylim <- range(wind) * c(0.95, 1)
plot(wind ~ temp, data = USairpollution, pch = 10, ylim = ylim)
symbols(temp, wind, circles = SO2, inches = 0.5, add = T)
# 4. mosaic plot
UCBAdmissions # 1973년 버클리 대학원 지원자에 대한 집계 데이터.
## , , Dept = A
##
## Gender
## Admit Male Female
## Admitted 512 89
## Rejected 313 19
##
## , , Dept = B
##
## Gender
## Admit Male Female
## Admitted 353 17
## Rejected 207 8
##
## , , Dept = C
##
## Gender
## Admit Male Female
## Admitted 120 202
## Rejected 205 391
##
## , , Dept = D
##
## Gender
## Admit Male Female
## Admitted 138 131
## Rejected 279 244
##
## , , Dept = E
##
## Gender
## Admit Male Female
## Admitted 53 94
## Rejected 138 299
##
## , , Dept = F
##
## Gender
## Admit Male Female
## Admitted 22 24
## Rejected 351 317
adm <- UCBAdmissions
adm[,,1]
## Gender
## Admit Male Female
## Admitted 512 89
## Rejected 313 19
adm[,1,]
## Dept
## Admit A B C D E F
## Admitted 512 353 120 138 53 22
## Rejected 313 207 205 279 138 351
mosaicplot(~ Dept + Gender, data = adm, color = T)
mosaicplot(~ Gender + Dept, data = adm, color = T)
library(reshape)
ucb <- melt(UCBAdmissions)
head(ucb)
## Admit Gender Dept value
## 1 Admitted Male A 512
## 2 Rejected Male A 313
## 3 Admitted Female A 89
## 4 Rejected Female A 19
## 5 Admitted Male B 353
## 6 Rejected Male B 207
df <- xtabs(value ~ Dept + Gender, data = ucb)
mosaicplot(~ Dept + Gender, data = df, color = T)
df2 <- xtabs(value ~ Dept + Admit, data = ucb)
mosaicplot(~ Dept + Admit, data = df2, color = T)
# 5. star plot
stars(USairpollution)
stars(USairpollution, key.loc = c(15, 2), cex = 0.8) # show legend
stars(USairpollution, key.loc = c(15, 2), cex = 0.8, draw.segments = T)
# 6. Heat map : 군집분석을 함께 수행한다.
bball <- read.csv("http://datasets.flowingdata.com/ppg2008.csv")
head(bball)
## Name G MIN PTS FGM FGA FGP FTM FTA FTP X3PM X3PA
## 1 Dwyane Wade 79 38.6 30.2 10.8 22.0 0.491 7.5 9.8 0.765 1.1 3.5
## 2 LeBron James 81 37.7 28.4 9.7 19.9 0.489 7.3 9.4 0.780 1.6 4.7
## 3 Kobe Bryant 82 36.2 26.8 9.8 20.9 0.467 5.9 6.9 0.856 1.4 4.1
## 4 Dirk Nowitzki 81 37.7 25.9 9.6 20.0 0.479 6.0 6.7 0.890 0.8 2.1
## 5 Danny Granger 67 36.2 25.8 8.5 19.1 0.447 6.0 6.9 0.878 2.7 6.7
## 6 Kevin Durant 74 39.0 25.3 8.9 18.8 0.476 6.1 7.1 0.863 1.3 3.1
## X3PP ORB DRB TRB AST STL BLK TO PF
## 1 0.317 1.1 3.9 5.0 7.5 2.2 1.3 3.4 2.3
## 2 0.344 1.3 6.3 7.6 7.2 1.7 1.1 3.0 1.7
## 3 0.351 1.1 4.1 5.2 4.9 1.5 0.5 2.6 2.3
## 4 0.359 1.1 7.3 8.4 2.4 0.8 0.8 1.9 2.2
## 5 0.404 0.7 4.4 5.1 2.7 1.0 1.4 2.5 3.1
## 6 0.422 1.0 5.5 6.5 2.8 1.3 0.7 3.0 1.8
rownames(bball) <- bball$Name
bball <- bball[ , -1]
bball <- as.matrix(bball)
heatmap(bball) # 밝은 색이 높은 값. 변수별 단위의 차이가 있기 때문에 scaling 필요.
heatmap(bball, scale = "column", Colv = NA)
# 7. 이변량자료 분포 시각화 & 군집화
library(MASS)
attach(geyser)
head(geyser)
## waiting duration
## 1 80 4.016667
## 2 71 2.150000
## 3 57 4.000000
## 4 80 4.000000
## 5 75 4.000000
## 6 77 2.000000
dim(geyser)
## [1] 299 2
density1 <- kde2d(waiting, duration, n = 25)
image(density1, xlab = "waiting", ylab = "duration")
str(density1)
## List of 3
## $ x: num [1:25] 43 45.7 48.4 51.1 53.8 ...
## $ y: num [1:25] 0.833 1.026 1.218 1.41 1.603 ...
## $ z: num [1:25, 1:25] 9.07e-13 1.95e-11 3.10e-10 3.66e-09 3.26e-08 ...
density2 <- kde2d(waiting, duration, n = 100)
image(density2, xlab = "waiting", ylab = "duration")
# 등고선
contour(density2)
# practice
# 2005년 미국의 범죄율 데이터
# 인구 100,000명 중의 발생 비율
data <- read.csv("data/crime.csv", header = T)
head(data, 10)
## state murder forcible_rape robbery aggravated_assault
## 1 United States 5.6 31.7 140.7 291.1
## 2 Alabama 8.2 34.3 141.4 247.8
## 3 Alaska 4.8 81.1 80.9 465.1
## 4 Arizona 7.5 33.8 144.4 327.4
## 5 Arkansas 6.7 42.9 91.1 386.8
## 6 California 6.9 26.0 176.1 317.3
## 7 Colorado 3.7 43.4 84.6 264.7
## 8 Connecticut 2.9 20.0 113.0 138.6
## 9 Delaware 4.4 44.7 154.8 428.2
## 10 District of Columbia 35.4 30.2 672.1 721.3
## burglary larceny_theft motor_vehicle_theft population
## 1 726.7 2286.3 416.7 295753151
## 2 953.8 2650.0 288.3 4545049
## 3 622.5 2599.1 391.0 669488
## 4 948.4 2965.2 924.4 5974834
## 5 1084.6 2711.2 262.1 2776221
## 6 693.3 1916.5 712.8 35795255
## 7 744.8 2735.2 559.5 4660780
## 8 437.1 1824.1 296.8 3477416
## 9 688.9 2144.0 278.5 839906
## 10 649.7 2694.9 1402.3 582049
crime <- data[-1, ] # United States (미국 전체) 제거
rownames(crime) <- c(1:nrow(crime))
head(crime, 10)
## state murder forcible_rape robbery aggravated_assault
## 1 Alabama 8.2 34.3 141.4 247.8
## 2 Alaska 4.8 81.1 80.9 465.1
## 3 Arizona 7.5 33.8 144.4 327.4
## 4 Arkansas 6.7 42.9 91.1 386.8
## 5 California 6.9 26.0 176.1 317.3
## 6 Colorado 3.7 43.4 84.6 264.7
## 7 Connecticut 2.9 20.0 113.0 138.6
## 8 Delaware 4.4 44.7 154.8 428.2
## 9 District of Columbia 35.4 30.2 672.1 721.3
## 10 Florida 5.0 37.1 169.4 496.6
## burglary larceny_theft motor_vehicle_theft population
## 1 953.8 2650.0 288.3 4545049
## 2 622.5 2599.1 391.0 669488
## 3 948.4 2965.2 924.4 5974834
## 4 1084.6 2711.2 262.1 2776221
## 5 693.3 1916.5 712.8 35795255
## 6 744.8 2735.2 559.5 4660780
## 7 437.1 1824.1 296.8 3477416
## 8 688.9 2144.0 278.5 839906
## 9 649.7 2694.9 1402.3 582049
## 10 926.3 2658.3 423.3 17783868
# A. 살인(murder)와 절도(burglary) 사이의 산점도를 단변량 분포와 함께 그리시오.
# 상관계수도 함께 살피시오.
library(psych)
pairs.panels(crime) # cor = 0.28
cor.test(crime$murder, crime$burglary)
##
## Pearson's product-moment correlation
##
## data: crime$murder and crime$burglary
## t = 2.0125, df = 49, p-value = 0.04968
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.0007878724 0.5128437338
## sample estimates:
## cor
## 0.2763115
# B. 위를 통해 이상점 존재여부를 판단하고 존재한다면 해당 주를 확인하고 제거하시오.
# 제거 후 변수들 사이의 관계가 어떻게 변화하는지 살피시오.
cdata <- crime[ , c("murder", "burglary")]
head(cdata)
## murder burglary
## 1 8.2 953.8
## 2 4.8 622.5
## 3 7.5 948.4
## 4 6.7 1084.6
## 5 6.9 693.3
## 6 3.7 744.8
# 발생비율의 단위가 다르므로 비교를 위해 scaling 한다.
boxplot(scale(cdata)) # murder 에 특이하게 높은 이상점 존재
# 사분위수를 이용한 outlier 확인
# fivenum : minimum, lower-hinge, median, upper-hinge, maximum
a <- cdata$murder
which(a > fivenum(a)[4] + 1.5*IQR(a)) # 9
## [1] 9
# 산점도를 통한 outlier 확인
cdata <- crime[ , c("state", "murder", "burglary")]
head(cdata, 10)
## state murder burglary
## 1 Alabama 8.2 953.8
## 2 Alaska 4.8 622.5
## 3 Arizona 7.5 948.4
## 4 Arkansas 6.7 1084.6
## 5 California 6.9 693.3
## 6 Colorado 3.7 744.8
## 7 Connecticut 2.9 437.1
## 8 Delaware 4.4 688.9
## 9 District of Columbia 35.4 649.7
## 10 Florida 5.0 926.3
plot(burglary ~ murder, data = cdata, cex = 0.8, type = "n")
text(cdata$murder, cdata$burglary, cex = 0.8, labels = rownames(cdata)) # outlier = 9
outlier = 9
cdata[outlier, c("state", "murder", "burglary")] # District of Columbia
## state murder burglary
## 9 District of Columbia 35.4 649.7
# outlier 제거 후 상관계수 확인
cdata <- cdata[-outlier, ]
head(cdata, 10)
## state murder burglary
## 1 Alabama 8.2 953.8
## 2 Alaska 4.8 622.5
## 3 Arizona 7.5 948.4
## 4 Arkansas 6.7 1084.6
## 5 California 6.9 693.3
## 6 Colorado 3.7 744.8
## 7 Connecticut 2.9 437.1
## 8 Delaware 4.4 688.9
## 10 Florida 5.0 926.3
## 11 Georgia 6.2 931.0
cor.test(cdata$murder, cdata$burglary)
##
## Pearson's product-moment correlation
##
## data: cdata$murder and cdata$burglary
## t = 5.5205, df = 48, p-value = 1.342e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4171943 0.7682612
## sample estimates:
## cor
## 0.6231757
pairs.panels(cdata[ , c(2:3)]) # cor = 0.62 상관계수 증가
# C. 살인, 절도와 인구(population)의 관계를 함께 관찰하기 위해 bubble plot을 그리고
# 관찰한 사실을 기술하시오.
# 원본
symbols(crime$murder, crime$burglary, circles = crime$population, inches = 0.5)
# outlier 제거시
cdata <- crime[-outlier, c("state", "murder", "burglary", "population")]
symbols(cdata$murder, cdata$burglary, circles = cdata$population, inches = 0.5)
# 살인과 절도 발생률은 어느 정도 상관관계가 있으나, 인구와는 상관관계가 없다.
# D. 7가지 범죄의 발생 건수를 heatmap, 별그림, 나이팅게일 차트로 표현하고
# 범죄 발생 특징 간의 패턴이 비슷한 주들이 있는지 살피시오.
cdata2 <- crime
rownames(cdata2) <- cdata2$state
cdata2 <- cdata2[ , -c(1,9)] # state를 rowname으로 변경. 인구 변수 제거.
head(cdata2, 10)
## murder forcible_rape robbery aggravated_assault
## Alabama 8.2 34.3 141.4 247.8
## Alaska 4.8 81.1 80.9 465.1
## Arizona 7.5 33.8 144.4 327.4
## Arkansas 6.7 42.9 91.1 386.8
## California 6.9 26.0 176.1 317.3
## Colorado 3.7 43.4 84.6 264.7
## Connecticut 2.9 20.0 113.0 138.6
## Delaware 4.4 44.7 154.8 428.2
## District of Columbia 35.4 30.2 672.1 721.3
## Florida 5.0 37.1 169.4 496.6
## burglary larceny_theft motor_vehicle_theft
## Alabama 953.8 2650.0 288.3
## Alaska 622.5 2599.1 391.0
## Arizona 948.4 2965.2 924.4
## Arkansas 1084.6 2711.2 262.1
## California 693.3 1916.5 712.8
## Colorado 744.8 2735.2 559.5
## Connecticut 437.1 1824.1 296.8
## Delaware 688.9 2144.0 278.5
## District of Columbia 649.7 2694.9 1402.3
## Florida 926.3 2658.3 423.3
# star & nightingale - dataframe
# star
stars(cdata2, key.loc = c(11, 2), cex = 0.8, ncol = 10)
# nightingale
stars(cdata2, key.loc = c(11, 2), cex = 0.8, draw.segments = T, ncol = 10)
# heatmap - matrix
cdata2 <- as.matrix(cdata2)
heatmap(cdata2, scale = "column", Colv = NA, cexCol = 0.9, margins = c(8, 5))