Projekt „Nowa oferta edukacyjna Uniwersytetu Wrocławskiego odpowiedzią na współczesne potrzeby rynku pracy i gospodarki opartej na wiedzy”
*
> moto <- read.table("motorins.txt",header=T)
> tmoto <- transform(moto,LPay = log(Payment+1,10), + fKilo = factor(Kilometres),fZone=factor(Zone), + fMake = factor(Make))
Recursive Partitioning
rpart(rpart)
> trm <- rpart(LPay ~ Claims+ fKilo,tmoto)
> plot(trm,uniform=T,main="LPay ~ Claims+ fKilo, optymalny")
> text(trm, use.n=TRUE, all=TRUE, cex=.8)
LPay ~ Claims+ fKilo, optymalny
Claims< 0.5
|
Claims< 12.5
Claims< 3.5 Claims< 77.5
3.819 n=2182
0 n=385
4.638 n=1797
4.086 n=1051
3.718 n=514
4.439 n=537
5.414 n=746
5.102 n=503
6.061 n=243 Claims< 0.5
Claims< 12.5
Claims< 3.5 Claims< 77.5
3.819 n=2182
0 n=385
4.638 n=1797
4.086 n=1051
3.718 n=514
4.439 n=537
5.414 n=746
5.102 n=503
6.061 n=243
*
> printcp(trm) Regression tree:
rpart(formula = LPay ~ Claims + fKilo, data = tmoto) Variables actually used in tree construction:
[1] Claims
Root node error: 8142.3/2182 = 3.7316 n= 2182
CP nsplit rel error xerror xstd
1 0.837550 0 1.000000 1.000956 0.0303662 2 0.094500 1 0.162450 0.162688 0.0054371 3 0.018505 2 0.067950 0.068899 0.0023215 4 0.016734 3 0.049445 0.049520 0.0017718 5 0.010000 4 0.032711 0.033364 0.0012468
> trm.p <- prune(trm, cp=0.05)
> plot(trm.p,uniform=T,main="LPay ~ Claims+ fKilo Pruned cp=.05")
> text(trm.p, use.n=TRUE, all=TRUE, cex=.8)
LPay ~ Claims+ fKilo Pruned cp=.05
Claims< 0.5
|
Claims< 12.5 3.819
n=2182
0 n=385
4.638 n=1797
4.086 n=1051
5.414 n=746
*
LPay ~ Claims+ fKilo Pruned cp=.1
Claims< 0.5
|
3.819 n=2182
0 n=385
4.638 n=1797
*
trm10 pruned cp=0.017
Claims< 0.5
|
Claims< 12.5
Claims< 77.5 3.819
n=2182
0 n=385
4.638 n=1797
4.086 n=1051
5.414 n=746
5.102
n=503 6.061
n=243
> LPay.pred <- predict(trm100.p,newdata=tmoto)
> xlim <- range(tmoto$LPay)
> plot(LPay.pred~tmoto$LPay,data=tmoto,xlab="Rzeczywiste",ylab="Prognoza", + ylim=xlim,xlim=xlim)
> abline(a=0,b=1)
0 2 4 6
0 2 4 6
Rzeczywiste
P ro g n o za
> data("GlaucomaM",package = "ipred")
> glaucoma.t <- rpart(Class ~ .,data=GlaucomaM,control = rpart.control(xval=100))
> glaucoma.t$cptable
CP nsplit rel error xerror xstd
1 0.65306122 0 1.0000000 1.4897959 0.06227408 2 0.07142857 1 0.3469388 0.3877551 0.05647630 3 0.01360544 2 0.2755102 0.3673469 0.05531681 4 0.01000000 5 0.2346939 0.4693878 0.06054391
> opt <- which.min(glaucoma.t$cptable[,"xerror"])
> cp.opt <- glaucoma.t$cptable[3,"CP"]
> glaucoma.tpr <-prune(glaucoma.t,cp=cp.opt)
> plot(glaucoma.tpr, uniform=TRUE, + main="glaucoma, pruned")
> text(glaucoma.tpr, use.n=TRUE, all=TRUE, cex=.8)
glaucoma, pruned
varg< 0.209
|
mhcg>=0.1695 glaucoma
98/98
glaucoma 70/6
normal 28/92
glaucoma
7/0 normal
21/92
mhcg - mean height contour global.
varg - volume above reference global.
#stabilność drzewa
nsplitopt <- vector(mode="integer",length=25)
> for (i in 1:length(nsplitopt)) {
+ cp <-rpart(Class ~ .,data=GlaucomaM)$cptable + nsplitopt[i] <- cp[which.min(cp[,"xerror"]),"nsplit"]
+ }
> table(nsplitopt) nsplitopt
1 2 5 17 5 3
#bagging = bootstrap aggregating
> trees <- vector(mode="list",length=25)
> n <- nrow(GlaucomaM)
> boot <- rmultinom(length(trees),n,rep(1,n)/n)
> mod <- rpart(Class ~ .,data=GlaucomaM, control =rpart.control(xval=0))
> for(i in 1:length(trees)) trees[[i]] <- update(mod,weights=boot[,i])
#zmienne użyte w korzeniu
> table(sapply(trees, function(x) as.character(x$frame$var[1])))
tmi varg vari vars 1 11 12 1
> pstwo <- matrix(0,nrow=n,ncol=length(trees))
> for (i in 1: length(trees)){
+ pstwo[,i] <- predict(trees[[i]],newdata = GlaucomaM)[,1]
+ pstwo[boot[,i]>0,i] <-NA + }
> srd <- rowMeans(pstwo,na.rm=T)
> prognoza <-factor(ifelse(srd>0.5,"JASKRA","NORMALNE"))
> progTab <-table(prognoza,GlaucomaM$Class)
> progTab
prognoza glaucoma normal JASKRA 78 19 NORMALNE 20 79
> czulosc <- round(progTab[1,1]/colSums(progTab)[1]*100)
> specyficznosc <- round(progTab[2,2]/colSums(progTab)[2]*100)
> cat("czułość=", czulosc," specyficzność=" , specyficznosc) czułość= 80 specyficzność= 81
> library("randomForest")
LASY LOSOWE
Learning algorithm
Each tree is constructed using the following algorithm:
1. Let the number of training cases be N, and the number of variables in the classifier be M.
2. We are told the number m of input variables to be used to determine the decision at a node of the tree;
m should be much less than M.
3. Choose a training set for this tree by choosing n times with replacement from all N available training cases (i.e. take a bootstrap sample). Use the rest of the cases to estimate the error of the tree, by predicting their classes.
4. For each node of the tree, randomly choose m variables on which to base the decision at that node.
Calculate the best split based on these m variables in the training set.
5. Each tree is fully grown and not pruned (as may be done in constructing a normal tree classifier).
For prediction a new sample is pushed down the tree. It is assigned the label of the training sample in the terminal node it ends up in. This procedure is iterated over all trees in the ensemble, and the average vote of all trees is reported as random forest prediction.
> las <- randomForest(Class ~ .,data=GlaucomaM)
> table(predict(las),GlaucomaM$Class)
glaucoma normal glaucoma 81 9 normal 17 89
> las.p<-table(predict(las),GlaucomaM$Class)
> czulosc <- round(las.p[1,1]/colSums(las.p)[1]*100)
> specyficznosc <- round(las.p[2,2]/colSums(las.p)[2]*100)
> cat("czułość=", czulosc," specyficzność=" , specyficznosc) czułość= 83 specyficzność= 91
> print(las) Call:
randomForest(formula = Class ~ ., data = GlaucomaM) Type of random forest: classification
Number of trees: 500 No. of variables tried at each split: 7 OOB estimate of error rate: 13.27%
Confusion matrix:
glaucoma normal class.error glaucoma 81 17 0.17346939 normal 9 89 0.09183673
> trm1 <- rpart(Claims ~ fZone,tmoto)
> plot(trm1,uniform=T,main="Claims ~ fZone, optymalny")
> text(trm1, use.n=TRUE, all=TRUE, cex=.8)
Claims ~ fZone, optymalny
fZone=efg
|
51.87 n=2182
18.27 n=922
76.45 n=1260
> printcp(trm1) Regression tree:
rpart(formula = Claims ~ fZone, data = tmoto) Variables actually used in tree construction:
[1] fZone
Root node error: 88738792/2182 = 40669 n= 2182
CP nsplit rel error xerror xstd
1 0.020309 0 1.0000 1.00041 0.20894 2 0.010000 1 0.9797 0.98058 0.20473
> trm2 <- rpart(Claims ~ fZone, method="poisson",tmoto)
> plot(trm2,uniform=T,main="Claims ~ fZone, Poisson, optymalny")
> text(trm2, use.n=TRUE, all=TRUE, cex=.8)
Claims ~ fZone, Poisson, optymalny
fZone=efg
|
fZone=g
51.87 113171/2182
18.27 16844/922
2.112 620/294
25.84 16224/628
76.45 96327/1260
trm3 <- rpart(fZone ~ Claims+LPay+Insured, method="class",tmoto)
> plot(trm3,uniform=T,main="fZone ~ Claims+LPay+Insured, class, optymalny")
> text(trm3, use.n=TRUE, all=TRUE, cex=.8)
fZone ~ Claims+LPay+Insured, class, optymalny
Insured>=6.73
|
Claims>=1.5
Insured>=757.4
1
315/315/315/315/313/315/294
4
303/307/310/315/264/299/137
4
276/270/270/291/183/229/55
4
53/56/63/104/25/50/3
1
223/214/207/187/158/179/52
7
27/37/40/24/81/70/82
7
12/8/5/0/49/16/157
> printcp(trm3) Classification tree:
rpart(formula = fZone ~ Claims + LPay + Insured, data = tmoto, method = "class")
Variables actually used in tree construction:
[1] Claims Insured
Root node error: 1867/2182 = 0.85564 n= 2182
CP nsplit rel error xerror xstd
1 0.084092 0 1.00000 1.03374 0.0079966 2 0.031066 1 0.91591 0.94162 0.0098997 3 0.019282 2 0.88484 0.90252 0.0104932 4 0.010000 3 0.86556 0.88859 0.0106808
> trm3.p <- prune(trm3, cp=0.02)
> plot(trm3.p,uniform=T,main="fZone ~ Claims+LPay+Insured Pruned cp=0.02")
> text(trm3.p, use.n=TRUE, all=TRUE, cex=.8)
fZone ~ Claims+LPay+Insured Pruned cp=0.02
Insured>=6.73
|
Claims>=1.5
1
315/315/315/315/313/315/294
4
303/307/310/315/264/299/137
4
276/270/270/291/183/229/55
7
27/37/40/24/81/70/82
7
12/8/5/0/49/16/157
> library("party")
> glauct <- ctree(Class ~ ., data = GlaucomaM)
> plot(glauct) vari
volume above reference inferior.
vasg
volume above surface global.
tms
third moment superior.
vari p < 0.001
1
0.059 0.059
vasg p < 0.001
2
0.066 0.066 Node 3 (n = 79)
n o rm a l g la u co m a
0 0.2 0.4 0.6 0.8
1 Node 4 (n = 8)
n o rm a l g la u co m a
0 0.2 0.4 0.6 0.8 1
tms p = 0.049
5
-0.066 -0.066 Node 6 (n = 65)
n o rm a l g la u co m a
0 0.2 0.4 0.6 0.8
1 Node 7 (n = 44)
n o rm a l g la u co m a
0 0.2 0.4 0.6 0.8 1
> trm4 <- ctree(fZone ~ Claims+LPay+Insured, tmoto)
> plot(trm4,main="fZone ~ Claims+LPay+Insured, ctree(party)")
fZone ~ Claims+LPay+Insured, ctree(party) LPay
p < 0.001 1
3.179 3.179
Insured p < 0.001
2
5.1 5.1 Insured
p = 0.003 3
4.65 4.65 Insured
p = 0.005 4
3.7 3.7 Node 5 (n = 158)
1 3 5 7 0.4 0 0.8
Node 6 (n = 23)
1 3 5 7 0.4 0 0.8
Node 7 (n = 11)
1 3 5 7 0.4 0 0.8
Node 8 (n = 266)
1 3 5 7 0.4 0 0.8
LPay p < 0.001
9
4.523 4.523 Claims
p < 0.001 10
10 10 Claims
p = 0.002 11
1 1 Node 12 (n = 146)
1 3 5 7 0.4 0 0.8
Node 13 (n = 529)
1 3 5 7 0.4 0 0.8
Node 14 (n = 32)
1 3 5 7 0.4 0 0.8
LPay p = 0.004
15
5.685 5.685
Node 16 (n = 815)
1 3 5 7 0.4 0 0.8
Node 17 (n = 202)
1 3 5 7 0.4 0 0.8
> table(predict(trm4),tmoto$fZone)
1 2 3 4 5 6 7
1 169 142 152 151 97 116 20 2 74 92 85 76 74 90 38 3 0 0 0 0 0 0 0 4 34 36 33 63 13 23 0 5 12 17 15 13 34 29 26 6 0 0 0 0 0 0 0 7 26 28 30 12 95 57 210
> trm5 <- ctree(LPay ~ fKilo, tmoto)
> plot(trm5,main="LPay ~ fKilo, ctree(party)")
LPay ~ fKilo, ctree(party)
fKilo p < 0.001
1
{1, 2, 3} {4, 5}
fKilo p = 0.004
2
2 {1, 3}
Node 3 (n = 441)
0 2 4 6
Node 4 (n = 880)
0 2 4 6
Node 5 (n = 861)
0
2
4
6
> moton<-with(tmoto, cbind(Insured,Claims,LPay))
> motos<-scale(moton)
> wss <- (nrow(motos)-1)*sum(apply(motos,2,var))
> for (i in 2:15) wss[i] <- sum(kmeans(motos, + centers=i)$withinss)
> plot(1:15, wss, type="b", xlab="Number of Clusters", + ylab="Within groups sum of squares")
2 4 6 8 10 12 14
0 1 0 0 0 3 0 0 0 5 0 0 0
Number of Clusters
W ith in g ro u p s su m o f sq u a re s
> mkm4<-kmeans(motos,4)
> aggregate(motos,by=list(mkm4$cluster),FUN=mean) Group.1 Insured Claims LPay
1 1 12.6947997 11.0472899 1.6739558 2 2 1.8133558 2.8398186 1.3510500 3 3 -0.1086526 -0.1331254 0.3750055 4 4 -0.1911118 -0.2571036 -1.9713412
> mkm4$centers
Insured Claims LPay
1 12.6947997 11.0472899 1.6739558 2 1.8133558 2.8398186 1.3510500 3 -0.1086526 -0.1331254 0.3750055 4 -0.1911118 -0.2571036 -1.9713412 Available components:
[1] "cluster" "centers" "totss" "withinss" "tot.withinss"
[6] "betweenss" "size"
> library(cluster)
> clusplot(motos, mkm4$cluster, color=TRUE, shade=TRUE, + labels=2, lines=0)
0 5 10 15 20 25
-8 -6 -4 -2 0
CLUSPLOT( motos )
Component 1
C o m p o n e n t 2
These two components explain 97.12 % of the point variability.
2 1 3 78 56 4
9 11 10
12 13 14 16 15
17 20 19 18 21
23 24 25 22
26 29 28 27 30 32 34 33 31
35 37 36 38 39 40 41 42 43
44 46 45 48 52 47 49 50 51
53 56 57 61 59 58 60 55 54 62
63 64
65 6667 68 69 71 70
72 73
74 75 77 79 78 76 80
82 81 84 83 86 88 87 85 89
91 90 93 95 92 94 96 97
98
99 101 100 102 103 106 104 105 107
109 108 110 111 115 113 114 112
116 120 119 124 122 121 123 118 117 125
126 128 127
129 134 133 131 132 130
135 137 136
138 140 139 141 142 143
144 146 145
147 148 149 150 151 152 153 154 155
156 158 157 159 161 160
163 162 164 169 165 166 167 168 170
172 171 174 173 175 176 177
179 178 188 183 182 187 185 184 186 181 180
189 191 190
192 194 195 193 197 196
198 199
201 200 203 205 202 204
206
207 209 208
210 212 211 213
215 214 217 216 218 219 221 220 222
224 223 228 227 230 229 231 226 225 233 232 235 234
236 237 242 239 241 238 240
244 243 245
246 250 248 247 249 251
252 253
254 255
257 256 258 259
260 262 261 263 264
265 266 267 268 269
271 270
272 273
275 274 276 277 278 279 281 280 282 284 283 285
286 287
289 288 290
291 292 293 294 295
296 298 297 300 299 301 302 303 305 304
306 308 309 313 310 311 312 307 314
315 317 316
318 319 320 321
322 323 325 324 326
327 328 329 331 330
332
333 334 335 336
338 340 337 339
341 343 342 344
345 347 346 348 349 350
352 351 353
354 355 356
358 357 359
361 360
362 363 364 365 367 366 368
369 371 372 376 374 373 375 370 377
378 379
380 381
382 383
384 385
386 387
388 389
390
391 392
393 394
395
396 397 398
399 400
401
402 403
404
405 406
407 408 409 410 411
412
413 414
415 416
417 418
419 420
422 423 421
424 425 426 427 428 429
431 430 432 433
434
435 436
437 438
440 439 441 442 443 444 445
447 446 448 450 449
451 452 453 454
456 455 460 461 462 459 463 458 457 464 465 468 469 470 467 466 471 474 475 472 473 476
477 478 479 480 482 481 483
485 484 486 488 491 492 487 490 489
494 493 496 495 497 500 498 499 501
502 504 503
505 507 506 508
510 509 511 513 512
514 518 517 515 516
519 521 520 523 522 525 524 526
528 527 530 529 532 533 534 531 536 535
537 539 538 541 542 543 545 540 544
546 551 555 554 550 564 549 552 553 560 559 561 548 563 558 562 547 557 556
565 567 566
568 569 570 571
573 572 574 576 575
577 578 581 579 580
582 585 584 583
586 587 588 590 589
591 593 592 594 595
596 597 599 598 600 601 602 603 604 605 606 607 609 610 608 611 612 613 614
615 616
618 617 623 627 622 621 626 624 625 620 619
628 630 629
631 632 633 634 636 635
637 639 638
640 645 644 642 641 643
646 648 647
649 653 650 654 651 652 656 655 658 657
659 660 662 661
663 665 664 667 666
668 669 670
672 671 677 681 680 676 675 678 679 674 673 683 682 685 684
686 687 689 688 690
691 692
694 693 695 696 698 697 699
701 700 702
703 704 705 706 707 708
710 709 712 711 713 714 716 715
717 719 718 720 721 722 725 724 723 726 728 727
729
730 731
733 732 734 735
737 736 739 738 740
741 749 742 743 744745 748 747 750 751 746 753 752
754 755
756 757 758 761 759 760 762 764 763 765
766 767 768 769 770 771 772 773 774
775 776 777 778 780 779
782 781 783
784 786 785 788 787 789
791 790 793 792
794 795 796 797 798 802 801 800 799 803 804 805 806 807808 811 810 809
812 816 813 815 814
817 818
819 820
821
822 823
824 825
826 827
828 829
830 832 831
833 834
835 836
837 838
839 840
842 841
843 844 845 846 847 848
849 850 851 852
854 853
855 856 857 858 859 860 861
863 864 862
865 866 867 868 869 870
872 871 874 875 876 873 877 878 879
881 880 883 885 884 886 882 887 888 889 891 893 894 895 892 890 896
897
898 900 899 902 901 903 904 905 906 910 909 908 907
911 913 912 914
915 917 916 919 918 920 921 922 923 924
926 925 927 928
929 933 938 942 930 931 932 941 939 937 940 936 935 934
943 945 944
946 947 949 950 948 951 953 952 955 954 956
957 959 958 960
962 961 964 963 965 968 966 967 969
971 970 973 972 974
975 976 978 977
980 979 981 982 983 986 987 985 984 989 988 990 991 992
993 994
996 995 1000 999 998 997 1001 1005 1004 1002 1003
1006 1008 1009 1010 1011 1012 1007
1013 1014 1017 1016 1015 1018 1019 1022 1020 1021 1023
1025 1024 1027 1028 1038 1037 1032 1040 1041 1026 1031 1036 1029 1035 1044 1039 1030 1034 1043 1042 1033 1046 1045 1047 1049 1048 1050
1052 1051 1054 1053
1055 1064 1056 1058 1059 1060 1057 1068 1063 1065 1067 1062 1066 1061
1069 1071 1070
1072 1073 1074 1075 1076 1077 1079 1078 1081 1080 1082 1085 1083 1084 1086 1088 1087 1090 1089
1092 1091 1094 1093 10951096 1098 1097 1099 1100 1101 1103 1102 1104 1107 1108 1106 1105 1109 1112 1110 1111 1113 1118 1121 1117 1122 1119 1116 1120 1115 1114
1124 1123 1125
1126 1127 1131 1128 1130 1129
1132 1133
1134
1135 1136 1137 1138 1139 1140
1142 1141 1143 1144 1145
1146 1147
1148 1149
1151 1150 1153 1155 1154 1152 1156
1157 1158
1160 1159 1161 1162 1163 1164
1166 1165 1167 1170 1169 1168 1171
1172 1173 1174 1175 1176 1179 1178 1177 1180
1181 1183 1182 1185 1184
1187 1186 1189 1188 1190
1191 1192
1193 1194 1195 1196 1197 1198
1199 1200 1201 1202 1203
1205 1204 1206
1207 1208 1209 1211 1210 1212
1214 1213
1215 1216 1217
1218 1219
1220 1221
1223 1222 1224
1225 1226
1227 1228 1229 1230
1232 1231 1234 1233
1235 1236 1238 1239 1237 1243 1242 1241 1240
1244 1245 1246 1247 1248 1252 1251 1249 1250 1253 1256 1254 1257 1255
1258 1259
1260 1261 1262
1263 1264 1265 1266
1267
1268 1269 1270
1271 1272
1273 1274 1275
1276 1277 1278 1279 1280 1281 1282 1283 1284
1285
1286 1287 1288 1289 1290 1291
1292
1293 1294
1295 1296 1297 1298 1299 1300 1301 1302
1304 1303
1305 1306 1307
1308 1309
1310 1311
1312 1313 1314
1316 1315 1317 1318 1319 1320
1322 1321 1323 1324
1325 1327 1326
1328 1329
1331 1330 1332 1333 1335 1334 1336 1337 1338
1339 1341 1342 1340 1343
1344 1345 1346 1347 1349 1348 1351 1352 1350 1353 1354
1355 1356
1358 1357 1360 1359 1361
1363 1362 1365 1364
1367 1366 1368 1369
1370 1372 1374 1373 1371 1378 1377 1376 1375 1379 1382 1386 1383 1381 1385 1380 1384
1387 1388 1389 1391 1392 1390
1394 1393 1395 1396 1397
1398 1399 1400
1401 1403 1402 1404 1405
1406 1407 1408 1409 1410 1412 1411 1413 1414
1415 1417 1416 1418
1419 1421 1420 1423 1422
1424 1425 1426 1427 1428 1432 1431 1430 1429 1433 1437 1435 1434 1436
1438 1440 1439 1441
1442 1446 1448 1443 1445 1444 1447 1450 1449 1451
1452 1453
1454 1455
1457 1456 1458 1459 1460 1461 1463 1462 1464 1466 1465 1467 1468 1470 1469
1472 1471
1473 1475 1474
1476 1477 1478
1480 1479 1481 1482 1484 1483
1485 1486
1487 1488 1490 1489
1491 1493 1492 1495 1494
1496 1497 1498 1499 1500 1505 1506 1508 1509 1503 1507 1504 1502 1501
1510 1511
1512 1513
1514 1515 1516 1518 1517
1519 1521 1520 1523 1522 1524 1525 1527 1526
1529 1528 1531 1530 1532 1533 1534
1535 1536 1538 1537 1540 1539
1541 1542 1543 1544 1545
1547 1546 1548 1549 1550 1551 1552
1553 1554 1558 1557 1556 1555
1559 1560 1561 1562 15631564 1566 1567 1565 1568
1569 1570 1572 1571
1573 1574 1575
1576 1577
1578 1579 1580 1581
1582 1583 1584 1585 1586 1587
1588 1589 1590
1591 1592 1593
1594 1595
1596 1597 1598
1599 1601 1602 1600 1603 1604 1605
1606
1607 1608
1610 1609
1611 1612
1613 1614 1615 1616 1617
1619 1618 1620
1621
1622 1623 1625 1626 1624 1630 1629 1628 1627
1631 1632 1633 1634
1635 1637 1636
1638 1639
1640 1641 1642 1643
1644 1646 1645
1647 1648 1649
1651 1652 1650
1653 1655 1654 1656 1657 1658
1659 1662 1660 1661 1664 1663
1665 1666
1667 1668
1669 1671 1670
1673 1672 1675 1674
1676 1677
1679 1678 1680 1682 1681 1683 1684
1685 1686 1687 1688 1689 1694 1697 1695 1696 1693 1692 1698 1691 1690
1699
1700 1701 1702 1703 1704 1705
1706
1707 1708 1709 1710 1711 1712 1713
1714
1715 1716 1717 1718
1719 1720 1721 1722 1723 1724 1725 1726 1727 1728
1730 1729
1731 1732 1733 1734 1735 1736
1737
1738 1739
1740 1741
1742
1743 1744 1745
1746 1747 1748 1749 1750 1751 1752 1753 1754
1756 1757 1755 1758
1759 1760 1761 1762 1763
1765 1764 1766 1767 1768
1769 1770
1771 1772 1773 1774 1776 1775
1777 1778
1779 1780 1784 1781 1783 1782 1785 1786
1787 1789 1790 1788 1791 1792 1794 1793
1795 1797 1796 1799 1798 1801 1803 1802 1800
1804 1806 1805 1807
1808 1811 1812 1810 1809 1813
1815 1814
1816 1819 1817 1818 1821 1822 1823 1820
1824 1825 1826
1828 1827 1829 1830
1831 1832 1833 1834
1835 1837 1836 1838 1839
1840 1841 1842 1843 1844 1846 1845 1848 1847
1849 1850
1852 1851 1853
1854 1855 1856
1857 1858
1860 1862 1861 1859 1864 1863 1865 1866
1867 1868 1870 1869 1871 1875 1874 1873 1872
1876 1878 1877 1879 1880 1882 1881 1884 1883 1885
1887 1888 1889 1886 1891 1890 1892 1894 1893 1895 1896 1897
1898 1900 1899 1901 1902 1903
1904
1905 1906
1907 1909 1908 1910 1911
1912 1913 1914 1915
1916 1918 1917 1920 1919
1921 1923 1922 1924 1925 1927 1926 1929 1928
1930 1931 1932 1933 1934
1936 1935 1937 1938 1939
1941 1940 1942 1943 1945 1944 1947 1946 1949 1948
1951 1950 1952 1954 1953 1956 1955 1957 1961 1960 1958 1959 1963 1962 1965 1964
1966 1967
1969 1968 1970
1972 1971 1973 1974 1975 1976
1978 1977 1979
1981 1980 1983 1982
1984 1985 1987 1986 1988 1990 1989
1991 1992
1993 1995 1994 1996 1997 2000 2001 1999 1998 2002
2003 2004 2005 2006
2007 2008
2009 2010
2011 2012
2013 2014
2015 2016
2017 2018
2019 2020
2021
2022 2023 2024
2025
2026 2027
2028 2029 2030 2031 2032
2033 2035 2034
2036 2037
2038 2039 2040 2041
2042 2043 2045 2044 2046 2047 2048 2049
2050 2051 2052 2053
2054 2055
2057 2056 2058 2062 2061 2060 2059
2063 2064 2065 2066
2067 2068 2069 2070 2071
2072 2073
2074 2075
2076 2078 2077
2079 2080 2081
2082 2083 2084
2085 2086 2087 2088
2089 2090
2091 2092 2094 2093 2096 2095 2097 2098
2099 2100
2101 2102 2103
2105 2104 2106
2107 2108
2109 2110
2111 2112
2114 2115 2113 2116
2117 2118 2119 2120
2121 2125 2124 2123 2122 2126
2127 2128 2129 21302131
2132 2133 2134
2135
2136 2137 2138 2139 2140 2141
2142 2143 2144 2145 2146 2147 2148
2149
2150 2151 2152
2153 2154 2155
2156
2157 2158 2159 2160 2161
2162
2163 2164 2165 2166 2167 2168 2169 2170 2171 2172
2173 2174 2175
2176 2177 2178 2179
2180 2181
2182
1 3 2
4
> mcl<-clara(motos,4,samples=5) Available components:
[1] "sample" "medoids" "i.med" "clustering" "objective" "clusinfo"
[7] "diss" "call" "silinfo" "data"
> mcl$medoids
Insured Claims LPay
[1,] -0.1407973 -0.1926805 0.49435498 [2,] -0.1880138 -0.2472141 -0.09886265 [3,] 9.5373352 8.3889171 1.60959951 [4,] -0.1911686 -0.2571293 -1.97673944
Each sub-dataset is partitioned into k clusters using the same algorithm as in pam.
Once k representative objects have been selected from the sub-dataset, each observation of the entire dataset is assigned to the nearest medoid.
The mean (equivalent to the sum) of the dissimilarities of the observations to their closest medoid is used as a measure of the quality of the clustering. The sub-dataset for which the mean (or sum) is minimal, is retained. A further analysis is carried out on the final partition.
Each sub-dataset is forced to contain the medoids obtained from the best sub-dataset until then. Randomly drawn observations are added to this set until sampsize has been reached.
> mcl$clustering[1:50]
[1] 1 1 2 1 1 1 1 1 3 1 1 1 1 1 1 1 2 1 1 1 2 1 2 1 1 2 1 1 1 2 1 1 1 1 4 1 1 2 1 1 [41] 1 1 2 4 1 1 1 2 1 1
> mcl$clusinfo
size max_diss av_diss isolation
[1,] 1202 7.09933530 0.467048311 11.87999889 [2,] 577 0.91353537 0.166321453 1.52870639 [3,] 18 14.11580451 5.194189970 1.08726727 [4,] 385 0.01382580 0.001500992 0.00736235
> mcl<-clara(motos,4,samples=10)
> mcl$medoids
Insured Claims LPay
[1,] -0.1407973 -0.1926805 0.49435498 [2,] -0.1880138 -0.2472141 -0.09886265 [3,] 9.5373352 8.3889171 1.60959951 [4,] -0.1911686 -0.2571293 -1.97673944
> mcl$clusinfo
size max_diss av_diss isolation
[1,] 1202 7.09933530 0.467048311 11.87999889 [2,] 577 0.91353537 0.166321453 1.52870639 [3,] 18 14.11580451 5.194189970 1.08726727 [4,] 385 0.01382580 0.001500992 0.00736235
> mcl<-clara(motos,4,samples=10,metric="manhattan")
> mcl$medoids
Insured Claims LPay
[1,] 0.1441004 0.2435879 0.9679640 [2,] -0.1826668 -0.2323413 0.3868266 [3,] 9.5373352 8.3889171 1.6095995 [4,] -0.1922673 -0.2571293 -1.9767394
> mcl$clusinfo
size max_diss av_diss isolation [1,] 349 8.316140 1.2077138 6.0094936 [2,] 1428 1.108045 0.3312003 0.8007066 [3,] 18 19.769313 6.7872396 1.0874090 [4,] 387 1.132083 0.0068611 0.4721034
> motos[1:20,]
Insured Claims LPay
[1,] -0.11253271 0.27829105 0.918354009 [2,] -0.18070960 -0.16293494 0.437558288 [3,] -0.18005426 -0.19268051 0.194781567 [4,] 0.03536287 0.35761257 0.934754934 [5,] -0.15918749 -0.05882544 0.650820930 [6,] -0.10855296 0.02545369 0.731490533 [7,] -0.17427805 -0.14310456 0.484436183 [8,] -0.18717824 -0.18772292 0.553689369 [9,] 1.57322362 8.19061326 1.559631267 [10,] -0.13735980 -0.03403746 0.782034829 [11,] -0.18200792 -0.20755330 0.515238080 [12,] -0.18461516 -0.23234127 0.258855818 [13,] -0.05469117 -0.01916467 0.810490022 [14,] -0.17253812 -0.20259570 0.285951211 [15,] -0.13321223 -0.14310456 0.402797217 [16,] -0.18048526 -0.22242608 0.449610093 [17,] -0.19023415 -0.24721406 -0.001262288 [18,] 0.94044299 2.90581659 1.365817629
[19,] -0.13817235 -0.13814696 0.678357347 [20,] -0.18424068 -0.22738368 0.459267401
>magn<-agnes(motos[1:20,],metric="manhattan")
>plot(magn)