Model Evaluation

Machine Learning

Method of Model Evaluation for Categorical Target

Yeongeun Jeon , Jeongwook Lee , Jung In Seo

1. Classification Matrix

Version 1

pp <- c(0.9959, 0.9875, 0.9844, 0.9804, 0.9481, 0.8893, 
        0.8476, 0.7628, 0.7070, 0.6807, 0.6563, 0.6224,
        0.5055, 0.4713, 0.3371, 0.2180, 0.1992, 0.1495, 
        0.0480, 0.0383, 0.0248, 0.0218, 0.0161, 0.0036) # pp :  클래스 1에 속할 확률확률

ac <- c(1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 
        1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0)             # ac : 실제 클래스래스

pred <- rep(0, length(pp))                              # rep(0, 5) : 0을 5개 생성생성

cv <- 0.5                                               # Cutoff Value(분류 기준값)
pred[pp>=cv]<- 1                                        # 예측확률>=cv이면 "1", 그렇지 않으면 "0"
table(ac, pred)                                         # table(행, 열) : 오분류 행렬 
ac   0  1
  0 10  2
  1  1 11

Version 2 (“function” 이용!)

CM <- function(cv) {
  pp <- c(0.9959, 0.9875, 0.9844, 0.9804, 0.9481, 0.8893, 
          0.8476, 0.7628, 0.7070, 0.6807, 0.6563, 0.6224,
          0.5055, 0.4713, 0.3371, 0.2180, 0.1992, 0.1495, 
          0.0480, 0.0383, 0.0248, 0.0218, 0.0161, 0.0036)  # pp :  클래스 1에 속할 확률확률
  ac <- c(1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 
          1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0)              # ac : 실제 클래스래스
  pred <- rep(0, length(pp))                               # rep(0, 5) : 0을 5개 생성생성
  pred[pp>=cv] <- 1                                        # 예측확률>=cv이면 "1", 그렇지 않으면 "0"  
  table(ac, pred)                                          # table(행, 열) : 오분류 행렬  }

ac   0  1
  0 11  1
  1  5  7

2. ConfusionMatrix

Version 1

pacman::p_load("e1071", "caret")                         # For confusionMatrix 

pp <- c(0.9959, 0.9875, 0.9844, 0.9804, 0.9481, 0.8893, 
        0.8476, 0.7628, 0.7070, 0.6807, 0.6563, 0.6224,
        0.5055, 0.4713, 0.3371, 0.2180, 0.1992, 0.1495, 
        0.0480, 0.0383, 0.0248, 0.0218, 0.0161, 0.0036)  # pp :  클래스 1에 속할 확률확률

ac <- c(1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 
        1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0)              # ac : 실제 클래스래스

pred <- rep(0, length(pp))                               # rep(0, 5) : 0을 5개 생성생성

cv <- 0.5                                                # Cutoff Value(분류 기준값)
pred[pp>=cv] <- 1                                        # 예측확률>=cv이면 "1", 그렇지 않으면 "0"
pred <- as.factor(pred)                                  # as.factor:범주형으로 변환ac   <- as.factor(ac)

confusionMatrix(pred, ac, positive="1")                  # confusionMatrix (예측 클래스, 실제 클래스, positive=“관심 클래스”)관심 클래스”)
Confusion Matrix and Statistics

Prediction  0  1
         0 10  1
         1  2 11
               Accuracy : 0.875           
                 95% CI : (0.6764, 0.9734)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : 0.0001386       
                  Kappa : 0.75            
 Mcnemar's Test P-Value : 1.0000000       
            Sensitivity : 0.9167          
            Specificity : 0.8333          
         Pos Pred Value : 0.8462          
         Neg Pred Value : 0.9091          
             Prevalence : 0.5000          
         Detection Rate : 0.4583          
   Detection Prevalence : 0.5417          
      Balanced Accuracy : 0.8750          
       'Positive' Class : 1               

Version 2 (“function” 이용!)

CM<-function(cv) {
  pp <- c(0.9959, 0.9875, 0.9844, 0.9804, 0.9481, 0.8893, 
          0.8476, 0.7628, 0.7070, 0.6807, 0.6563, 0.6224,
          0.5055, 0.4713, 0.3371, 0.2180, 0.1992, 0.1495, 
          0.0480, 0.0383, 0.0248, 0.0218, 0.0161, 0.0036)    # pp :  클래스 1에 속할 확률확률
  ac <- c(1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 
          1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0)                # ac : 실제 클래스래스
  pred<-rep(0, length(pp))                                   # rep(0, 5) : 0을 5개 생성생성
  pred[pp>=cv]<-1                                            # 예측확률>=cv이면 "1", 그렇지 않으면 "0"  
  pred <- as.factor(pred)                                    # as.factor:범주형으로 변환  ac   <- as.factor(ac)
  confusionMatrix(pred, ac, positive="1")                    # confusionMatrix (예측 클래스, 실제 클래스, positive=“관심 클래스”)                                                                  

Confusion Matrix and Statistics

Prediction  0  1
         0 10  1
         1  2 11
               Accuracy : 0.875           
                 95% CI : (0.6764, 0.9734)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : 0.0001386       
                  Kappa : 0.75            
 Mcnemar's Test P-Value : 1.0000000       
            Sensitivity : 0.9167          
            Specificity : 0.8333          
         Pos Pred Value : 0.8462          
         Neg Pred Value : 0.9091          
             Prevalence : 0.5000          
         Detection Rate : 0.4583          
   Detection Prevalence : 0.5417          
      Balanced Accuracy : 0.8750          
       'Positive' Class : 1               

3. ROC 곡선

3-1. Package “ROCR”


pp <- c(0.9959, 0.9875, 0.9844, 0.9804, 0.9481, 0.8893, 
        0.8476, 0.7628, 0.7070, 0.6807, 0.6563, 0.6224,
        0.5055, 0.4713, 0.3371, 0.2180, 0.1992, 0.1495, 
        0.0480, 0.0383, 0.0248, 0.0218, 0.0161, 0.0036)   # pp :  클래스 1에 속할 확률확률

ac <- c(1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 
        1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0)               # ac : 실제 클래스래스

pred <- prediction(pp,ac)                                 # prediction(예측 확률, 실제 클래스)스)
perf <- performance(pred, "tpr", "fpr", colorize=T)       # tpr : 민감도 # fpr : 1-특이도                    
plot(perf, col="blue", lwd=3)


perf.auc <- performance(pred, "auc")
[1] 0.9375

3-2. Package “Epi”

Version 1 (실제 클래스와 예측 확률을 아는 경우)

pacman::p_load("Epi", "devtools")                          # For ROC
# install_version("etm", version="1.1", repos = "")

pp <- c(0.9959, 0.9875, 0.9844, 0.9804, 0.9481, 0.8893, 
        0.8476, 0.7628, 0.7070, 0.6807, 0.6563, 0.6224,
        0.5055, 0.4713, 0.3371, 0.2180, 0.1992, 0.1495, 
        0.0480, 0.0383, 0.0248, 0.0218, 0.0161, 0.0036)    # pp :  클래스 1에 속할 확률확률

ac <- c(1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 
        1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0)                # ac : 실제 클래스래스

ROC(pp, ac, plot="ROC")                                    # ROC(예측 확률, 실제 클래스)스)

Version 2 (모형을 바로 이용하는 경우)

pacman::p_load("moonBook")                    # For data "radial"  

ROC(form=male~height,data=radial,plot="ROC")  # ROC(모형) : 키(height)에 따라 남자와 여자를 구분구분

3-3. Package “pROC”

Version 1 (실제 클래스와 예측 확률을 아는 경우)

pacman::p_load("pROC")     # For roc and roc.test

pp <- c(0.9959, 0.9875, 0.9844, 0.9804, 0.9481, 0.8893, 
        0.8476, 0.7628, 0.7070, 0.6807, 0.6563, 0.6224,
        0.5055, 0.4713, 0.3371, 0.2180, 0.1992, 0.1495, 
        0.0480, 0.0383, 0.0248, 0.0218, 0.0161, 0.0036)       # pp :  클래스 1에 속할 확률확률

ac <- c(1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 
        1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0)                   # ac : 실제 클래스래스

perf.roc <- roc(ac, pp, ci=T, percent=F, plot=T, col="blue")  # roc(실제 클래스, 예측 확률)률)
auc(perf.roc)      # AUC
Area under the curve: 0.9375

Version 2 (모형을 바로 이용하는 경우)

pacman::p_load("pROC", "moonBook")                                            # For roc and roc.test

b1 <- roc(male~height, radial, ci=T, percent=F, plot=T)                       # 키(height)에 따라 남자와 여자를 구분 구분
b2 <- roc(male~weight, radial, ci=T, percent=F, plot=T, add=TRUE, col="red")  # 몸무게(weight)에 따라 남자와 여자를 구분를 구분
roc.test(b1,b2,plot=T)    # 두 ROC곡선의 AUC 동일성 검정

    DeLong's test for two correlated ROC curves

data:  b1 and b2
Z = 3.9231, p-value = 8.743e-05
alternative hypothesis: true difference in AUC is not equal to 0
sample estimates:
AUC of roc1 AUC of roc2 
  0.9510468   0.8075739 

4. 향상 차트

4-1. 단순 랜덤과 향상 차트

pacman::p_load("gains")  # For gains
pp <- c(0.9959, 0.9875, 0.9844, 0.9804, 0.9481, 0.8893, 
        0.8476, 0.7628, 0.7070, 0.6807, 0.6563, 0.6224,
        0.5055, 0.4713, 0.3371, 0.2180, 0.1992, 0.1495, 
        0.0480, 0.0383, 0.0248, 0.0218, 0.0161, 0.0036)  # pp :  클래스 1에 속할 확률확률

ac <- c(1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 
        1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0)              # ac : 실제 클래스래스

gain <- gains(ac, pp, groups=length(ac))
plot(c(0, gain$*sum(ac)) ~ c(0, gain$cume.obs), 
xlab = "데이터 개수", ylab = "누적", type="l")
lines(c(0,sum(ac))~c(0,length(ac)), col="gray", lty=2)

4-2. 막대그래프를 이용한 향상 차트

pacman::p_load("gains")    # For gains

pp <- c(0.9959, 0.9875, 0.9844, 0.9804, 0.9481, 0.8893, 
        0.8476, 0.7628, 0.7070, 0.6807, 0.6563, 0.6224,
        0.5055, 0.4713, 0.3371, 0.2180, 0.1992, 0.1495, 
        0.0480, 0.0383, 0.0248, 0.0218, 0.0161, 0.0036)    # pp :  클래스 1에 속할 확률확률

ac <- c(1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 
        1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0)                # ac : 실제 클래스래스

gain <- gains(ac, pp)

barplot(gain$mean.resp / mean(ac), names.arg = gain$depth, 
        xlab = "Percentile", 
        ylab = "Mean Response", 
        main = "Decile-wise lift chart")

4-3. Package “ROCR”


pp <- c(0.9959, 0.9875, 0.9844, 0.9804, 0.9481, 0.8893, 
        0.8476, 0.7628, 0.7070, 0.6807, 0.6563, 0.6224,
        0.5055, 0.4713, 0.3371, 0.2180, 0.1992, 0.1495, 
        0.0480, 0.0383, 0.0248, 0.0218, 0.0161, 0.0036)    # pp :  클래스 1에 속할 확률확률

ac <- c(1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0,
        1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0)                # ac : 실제 클래스래스

pred <- prediction(pp,ac)                                  # prediction(예측 확률, 실제 클래스)스)

perf <- performance(pred, "lift", "rpp")                   # lift : lift 값  
                                                           # rpp : 양성 예측의 비율
plot(perf, main="lift curve", colorize=T, lwd=2)           # Lift Chart

4-4. Package “lift”


pp <- c(0.9959, 0.9875, 0.9844, 0.9804, 0.9481, 0.8893, 
        0.8476, 0.7628, 0.7070, 0.6807, 0.6563, 0.6224,
        0.5055, 0.4713, 0.3371, 0.2180, 0.1992, 0.1495, 
        0.0480, 0.0383, 0.0248, 0.0218, 0.0161, 0.0036)     # pp :  클래스 1에 속할 확률확률

ac <- c(1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 
        1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0)                 # ac : 실제 클래스래스

plotLift(pp, ac, cumulative = TRUE, n.buckets = 24)         # plotlift(예측 확률, 실제 클래스,  n.buckets = 케이스 수))
TopDecileLift(predicted=pp, labels=ac)                      # Top 10% 향상도 출력
[1] 2

5. Unbalanced Target


# 비례식을 이용한 과샘플링 결과의 confusionMatrix NAOS.table <- as.table( matrix(c(390, 110, 80, 420),2,2) )
Confusion Matrix and Statistics

    A   B
A 390  80
B 110 420
               Accuracy : 0.81            
                 95% CI : (0.7843, 0.8339)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : < 2e-16         
                  Kappa : 0.62            
 Mcnemar's Test P-Value : 0.03539         
            Sensitivity : 0.7800          
            Specificity : 0.8400          
         Pos Pred Value : 0.8298          
         Neg Pred Value : 0.7925          
             Prevalence : 0.5000          
         Detection Rate : 0.3900          
   Detection Prevalence : 0.4700          
      Balanced Accuracy : 0.8100          
       'Positive' Class : A               
# 과샘플링 가중치를 이용한 결과의 confusionMatrix NAOri.table <- as.table( matrix(c(764.4, 215.6, 3.2, 16.8),2,2) )
Confusion Matrix and Statistics

      A     B
A 764.4   3.2
B 215.6  16.8
               Accuracy : 0.7812   
                 95% CI : (NA, NA) 
    No Information Rate : NA       
    P-Value [Acc > NIR] : NA       
                  Kappa : 0.1      
 Mcnemar's Test P-Value : < 2.2e-16
            Sensitivity : 0.78000  
            Specificity : 0.84000  
         Pos Pred Value : 0.99583  
         Neg Pred Value : 0.07229  
             Prevalence : 0.98000  
         Detection Rate : 0.76440  
   Detection Prevalence : 0.76760  
      Balanced Accuracy : 0.81000  
       'Positive' Class : A        

6. 모형 선택

예제로 사용될 데이터는 R에 내장되어 있는 “iris” 데이터이다.


pacman::p_load("rpart", "randomForest", "caret", "kernlab")

set.seed(12345)                                                    # 똑같은 결과 나오게 하기 위해 seed 고정
DATA <- createDataPartition(y=iris$Species, p=0.7, list=FALSE)     # Training Data로 70% 분할TrD  <- iris[DATA,]                                                # Training Data
TeD  <- iris[-DATA,]                                               # Test Data

set.seed(12345)                                                    # 똑같은 결과 나오게 하기 위해 seed 고정
control <- trainControl(method = 'cv', number = 10)                # 10-fold-Cross Validation 
tree <- train(Species~., data = TrD, method = 'rpart', 
              metric = 'Accuracy', trControl=control)              # 의사결정나무rf   <- train(Species~., data = TrD, method = 'rf', 
              metric = 'Accuracy', trControl = control)            # 랜덤포레스트스트
svm  <- train(Species~., data = TrD, method = 'svmRadial', 
              metric = 'Accuracy', trControl = control)            # 서포트벡터머신
knn  <- train(Species~., data = TrD, method = 'knn', 
              metric = 'Accuracy', trControl = control)            # K-최근접 이웃NAresamp <- resamples(list(의사결정나무==treee, 랜덤포레=rf rf, 
                               SVM = svm, kNN = knn))              # Resampling Results

summary.resamples(object = resamp)

Models: 의사결정나무, 랜덤포레스트, SVM, kNN 
Number of resamples: 10 

                  Min. 1st Qu. Median      Mean 3rd Qu. Max. NA's
의사결정나무 0.8181818  1.0000      1 0.9707071       1    1    0
랜덤포레스트 0.7777778  0.9250      1 0.9577778       1    1    0
SVM          0.9000000  0.9375      1 0.9716667       1    1    0
kNN          0.9000000  1.0000      1 0.9809091       1    1    0

                  Min.   1st Qu. Median      Mean 3rd Qu. Max. NA's
의사결정나무 0.7250000 1.0000000      1 0.9558333       1    1    0
랜덤포레스트 0.6666667 0.8880597      1 0.9365898       1    1    0
SVM          0.8461538 0.9062500      1 0.9571900       1    1    0
kNN          0.8507463 1.0000000      1 0.9714944       1    1    0
sort(resamp, decreasing = TRUE)    # Resampling Results를 내림차순으로 정렬정렬
[1] "kNN"          "SVM"          "의사결정나무" "랜덤포레스트"
dotplot(resamp)                    # dot plot


