다차원 척도법 Multi-Dimensional Scaling
개체들 사이의 유사성/비유사성을 측정하여 2차원 또는 3차원 공간상에 점으로 표현하는 분석 방법.
개체들간의 근접성(proximity)을 시각화하여 데이터 속에 잠재해 있는 패턴이나 구조를 찾아내는 통계 기법.
개체들간의 거리 계산은 유클리드 거리 행렬을 사용한다.
상대적 거리의 정확도를 높이기 위해 적합한 정도를 스트레스 값(stress value)으로 나타낸다.
1. 계량적 MDS
데이터가 연속형 변수(구간척도, 비율척도)인 경우 사용.
각 개체들간의 유클리드 거리 행렬을 계산하고 개체들간의 비유사성을 공간상에 표현한다.
library(MASS)
data(eurodist) # 유럽 도시들간의 거리
eurodist
## Athens Barcelona Brussels Calais Cherbourg Cologne
## Barcelona 3313
## Brussels 2963 1318
## Calais 3175 1326 204
## Cherbourg 3339 1294 583 460
## Cologne 2762 1498 206 409 785
## Copenhagen 3276 2218 966 1136 1545 760
## Geneva 2610 803 677 747 853 1662
## Gibraltar 4485 1172 2256 2224 2047 2436
## Hamburg 2977 2018 597 714 1115 460
## Hook of Holland 3030 1490 172 330 731 269
## Lisbon 4532 1305 2084 2052 1827 2290
## Lyons 2753 645 690 739 789 714
## Madrid 3949 636 1558 1550 1347 1764
## Marseilles 2865 521 1011 1059 1101 1035
## Milan 2282 1014 925 1077 1209 911
## Munich 2179 1365 747 977 1160 583
## Paris 3000 1033 285 280 340 465
## Rome 817 1460 1511 1662 1794 1497
## Stockholm 3927 2868 1616 1786 2196 1403
## Vienna 1991 1802 1175 1381 1588 937
loc <- cmdscale(eurodist) # 2차원으로 도시들을 mapping
loc
## [,1] [,2]
## Athens 2290.274680 1798.80293
## Barcelona -825.382790 546.81148
## Brussels 59.183341 -367.08135
## Calais -82.845973 -429.91466
## Cherbourg -352.499435 -290.90843
## Cologne 293.689633 -405.31194
## Copenhagen 681.931545 -1108.64478
## Geneva -9.423364 240.40600
## Gibraltar -2048.449113 642.45854
## Hamburg 561.108970 -773.36929
## Hook of Holland 164.921799 -549.36704
## Lisbon -1935.040811 49.12514
## Lyons -226.423236 187.08779
## Madrid -1423.353697 305.87513
## Marseilles -299.498710 388.80726
## Milan 260.878046 416.67381
## Munich 587.675679 81.18224
## Paris -156.836257 -211.13911
## Rome 709.413282 1109.36665
## Stockholm 839.445911 -1836.79055
## Vienna 911.230500 205.93020
x <- loc[ , 1]
y <- -loc[ , 2] # 북쪽 도시를 상단에 표시하기 위해 부호 변경
plot(x, y, type = "n", asp = 1, main = "Metric MDS") # asp : y/x aspect ratio
text(x, y, rownames(loc), cex = 0.8)
abline(v = 0, h = 0, lty = 2, lwd = 1)
2. 비계량적 MDS
데이터가 순서척도인 경우 사용.
개체들간의 거리가 순서로 주어진 경우에는 순서척도를 거리의 속성과 같도록 변환하여 거리를 생성.
data(swiss)
head(swiss) # 스위스 연방 주들의 사회경제적 지표
## Fertility Agriculture Examination Education Catholic
## Courtelary 80.2 17.0 15 12 9.96
## Delemont 83.1 45.1 6 9 84.84
## Franches-Mnt 92.5 39.7 5 5 93.40
## Moutier 85.8 36.5 12 7 33.77
## Neuveville 76.9 43.5 17 15 5.16
## Porrentruy 76.1 35.3 9 7 90.57
## Infant.Mortality
## Courtelary 22.2
## Delemont 22.2
## Franches-Mnt 20.2
## Moutier 20.3
## Neuveville 20.6
## Porrentruy 26.6
# (1) isoMDS : Kruskal's Non-metric Multidimensional Scaling
swissA <- as.matrix(swiss)
dist <- dist(swissA) # make distance matrix
dist
## Courtelary Delemont Franches-Mnt Moutier Neuveville
## Delemont 80.591776
## Franches-Mnt 88.214588 14.553130
## Moutier 31.876890 52.278149 60.529967
## Neuveville 27.417877 80.928749 91.039868 32.184346
## Porrentruy 83.246814 14.499410 18.902087 58.055663 86.756833
## Broye 98.701125 28.295231 33.805362 68.242409 92.325707
## Glane 101.772344 28.737648 30.262974 71.187795 96.805114
## Gruyere 95.136818 16.546265 18.943149 66.162678 93.667231
## Sarine 86.247240 12.793029 18.141951 58.922170 86.557602
## Veveyse 101.019664 25.695581 27.781902 70.799404 96.866674
## Aigle 48.526010 82.084361 93.952937 43.349077 23.647190
## Aubonne 53.121428 87.519226 99.091306 48.123799 27.560880
## Avenches 45.653159 84.196307 95.733332 42.565780 19.436638
## Cossonay 56.910189 89.750378 101.591271 52.164284 32.105538
## Echallens 59.546180 69.616805 81.459745 41.986366 38.126783
## Grandson 20.747183 83.837829 93.467534 34.052473 13.042607
## Lausanne 31.490197 86.405456 96.666406 48.006100 36.451509
## La Vallee 34.423918 97.144460 106.655579 55.371603 40.469125
## Lavaux 58.690241 89.444117 101.354840 52.635491 32.484341
## Morges 46.270324 84.488118 96.051022 43.402783 21.270047
## Moudon 42.358985 83.540124 94.778185 40.725576 20.804317
## Nyone 42.570558 76.741319 88.820761 39.306957 24.812505
## Orbe 45.144630 86.584696 98.214154 45.614744 24.730985
## Oron 56.440443 87.700420 98.685815 49.005580 31.865775
## Payerne 42.037399 81.569125 92.468151 37.853687 16.991024
## Paysd'enhaut 49.637587 85.354604 96.168787 44.167795 26.505282
## Rolle 48.490799 82.707644 94.717910 44.160871 24.885610
## Vevey 28.259335 76.281875 86.882355 37.391123 29.695623
## Yverdon 36.142490 81.330976 92.552850 36.954417 15.024766
## Conthey 114.537210 45.298200 50.022656 83.830624 105.520958
## Entremont 113.510609 44.838216 51.243325 83.577438 104.342659
## Herens 116.688095 48.143905 52.794034 85.830664 106.993063
## Martigwy 108.689328 38.814745 45.252664 78.895666 100.751824
## Monthey 101.162629 25.015083 28.946198 71.009383 96.791186
## St Maurice 108.182346 38.817759 46.222133 79.164601 100.580416
## Sierre 113.946566 44.006981 45.563512 82.357429 105.708467
## Sion 98.459265 23.748897 29.427451 69.052253 93.912507
## Boudry 26.409195 83.033718 93.195699 35.414580 12.592125
## La Chauxdfnd 22.611919 85.355214 93.231336 44.019205 40.516255
## Le Locle 10.900349 81.348106 89.606263 34.786671 28.363596
## Neuchatel 33.152400 84.200394 93.616507 47.543480 39.800975
## Val de Ruz 22.042234 80.966394 90.279150 30.116109 10.183128
## ValdeTravers 17.194653 84.346998 92.994960 38.062244 29.033086
## V. De Geneve 74.322166 94.666309 103.578297 81.525854 82.328685
## Rive Droite 63.876685 56.363624 69.856073 50.726577 57.433291
## Rive Gauche 64.802137 57.393729 68.628018 55.808903 66.799618
mds <- isoMDS(dist) # make points & stress
## initial value 5.463800
## iter 5 value 4.499103
## iter 5 value 4.495335
## iter 5 value 4.492669
## final value 4.492669
## converged
mds
## $points
## [,1] [,2]
## Courtelary 38.850496 -16.1546743
## Delemont -42.676573 -13.7209890
## Franches-Mnt -53.587659 -21.3357627
## Moutier 6.735536 -4.6041161
## Neuveville 35.622307 4.6339724
## Porrentruy -44.739479 -25.4957015
## Broye -55.301247 2.9985892
## Glane -61.510950 -0.5029742
## Gruyere -56.196434 -11.5873817
## Sarine -47.880261 -18.4937959
## Veveyse -60.573600 -3.3177231
## Aigle 28.500730 18.4040743
## Aubonne 31.622253 26.0543764
## Avenches 31.955939 19.3455733
## Cossonay 32.951993 27.2866822
## Echallens 11.653211 24.5294932
## Grandson 39.623322 -0.1906417
## Lausanne 40.455512 -24.2790922
## La Vallee 51.099610 -23.2691859
## Lavaux 30.753053 29.7236322
## Morges 32.051544 18.1638440
## Moudon 33.349605 17.2202105
## Nyone 26.363999 7.9625625
## Orbe 35.822440 15.4595563
## Oron 29.301157 31.3756933
## Payerne 30.448866 19.5104430
## Paysd'enhaut 30.389346 26.4350474
## Rolle 29.595391 18.6942289
## Vevey 30.316991 -16.0544171
## Yverdon 33.168755 11.4999792
## Conthey -67.045836 16.9000059
## Entremont -66.130908 14.2235838
## Herens -67.831773 19.3460319
## Martigwy -63.493801 8.8769860
## Monthey -59.675844 -1.3044352
## St Maurice -63.678801 7.2356724
## Sierre -69.462428 17.6354948
## Sion -57.385309 -4.8572223
## Boudry 37.667244 0.0118818
## La Chauxdfnd 40.842274 -29.0069374
## Le Locle 38.285582 -17.6212453
## Neuchatel 35.745340 -30.5746402
## Val de Ruz 37.226824 2.1006842
## ValdeTravers 41.086622 -15.3626392
## V. De Geneve 24.329270 -73.1278621
## Rive Droite -4.756696 -17.5026420
## Rive Gauche -3.887613 -37.2642199
##
## $stress
## [1] 4.492669
plot(mds$points, type = "n")
text(mds$points, labels = rownames(swissA), cex = 0.7)
abline(v = 0, h = 0, lty = 2, lwd = 1)
swissC <- as.matrix(swiss[ , -2]) # Agriculture 제외하고 비교한 경우
distC <- dist(swissC)
mdsC <- isoMDS(distC)
## initial value 3.194808
## iter 5 value 2.487603
## iter 10 value 2.330586
## iter 15 value 2.263097
## final value 2.236301
## converged
plot(mdsC$points, type = "n")
text(mdsC$points, labels = rownames(swissC), cex = 0.8)
abline(v = 0, h = 0, lty = 2, lwd = 1)
# (2) sammon : Non-Linear Mapping
swissK <- as.matrix(swiss)
sam <- sammon(dist(swissK))
## Initial stress : 0.01959
## stress after 0 iters: 0.01959
plot(sam$points, type = "n", main = "Nonmetric MDS : sammon")
text(sam$points, labels = rownames(swissK), cex = 0.7)
abline(v = 0, h = 0, lty = 2, lwd = 1)