# przykład 6.7 options(OutDec=",") library("tree") library("ElemStatLearn") library("cluster") library("mlbench") library("rpart") mvote0<-function(x) { t<-table(x) ll<-length(t) tm<-as.matrix(t) m<-rownames(tm)[which.max(t)] if (sum(t==max(t))>1) { c<-which(t==max(t)) m<-rownames(tm)[sample(c,1)] } return(m) } data(Satellite) mo<-nrow(Satellite) ks<-seq(5,100,by=5) blad.sat.c<-matrix(nrow=length(ks),ncol=100) for (j in 1: 100) { print(j) ucz <- sample(1:mo, size = 4290, replace = FALSE) sat.ucz<-Satellite[ucz,] sat.test<-Satellite[-ucz,] y.test<-sat.test$classes y<-sat.ucz$classes y<-as.data.frame(y) sat.ucz<-sat.ucz[,-ncol(sat.ucz)] m<-ncol(sat.ucz) for (k in seq(along=ks)) { lmod<-ks[k] # liczba modeli pojedynczych w ensenble yklas<-NULL for (i in 1:lmod) { # drzewa zmienne <- sample(1:m, size = round(m/2), replace = FALSE) zb.ucz<-sat.ucz[,zmienne] zb.ucz<-cbind(y,zb.ucz) sat.tr<-rpart(y~.,zb.ucz) Z.test<-predict(sat.tr,sat.test,type ="class") yklas<-cbind(yklas,as.matrix(Z.test)) } mm<-nrow(yklas) ypred<-numeric(length=mm) for (i in 1:mm) ypred[i]<-mvote0(yklas[i,]) blad.sat.c[k,j]<-1-sum(y.test==ypred)/length(ypred) } } # Rysunek 6.6 sat.rsm<-numeric(length=20) for (k in 1:20) sat.rsm[k]<-mean(blad.sat.c[k,]) print(sat.rsm) plot(ks,sat.rsm,type="n",xlab="Liczba modeli bazowych",ylab="Błąd predykcji",main="Zbiór Satellite") lines(ks,sat.rsm,type="l",lty=1) points(ks,sat.rsm,pch=19)