Support Vector Machine with Linear Kernel using Package e1071

Data Mining

Description for Support Vector Machine with Linear Kernel using Package e1071

Yeongeun Jeon , Jung In Seo
2023-04-12

Support Vector Machine의 장점


Support Vector Machine의 단점


실습 자료 : 유니버셜 은행의 고객 2,500명에 대한 자료(출처 : Data Mining for Business Intelligence, Shmueli et al. 2010)이며, 총 13개의 변수를 포함하고 있다. 이 자료에서 TargetPersonal Loan이다.




1. 데이터 불러오기

pacman::p_load("data.table", "dplyr",
               "caret",
               "ggplot2", "GGally",
               "e1071")


UB <- fread("../Universal Bank_Main.csv")                               # 데이터 불러오기

UB %>%
  as_tibble
# A tibble: 2,500 × 14
      ID   Age Experience Income `ZIP Code` Family CCAvg Education
   <int> <int>      <int>  <int>      <int>  <int> <dbl>     <int>
 1     1    25          1     49      91107      4   1.6         1
 2     2    45         19     34      90089      3   1.5         1
 3     3    39         15     11      94720      1   1           1
 4     4    35          9    100      94112      1   2.7         2
 5     5    35          8     45      91330      4   1           2
 6     6    37         13     29      92121      4   0.4         2
 7     7    53         27     72      91711      2   1.5         2
 8     8    50         24     22      93943      1   0.3         3
 9     9    35         10     81      90089      3   0.6         2
10    10    34          9    180      93023      1   8.9         3
# ℹ 2,490 more rows
# ℹ 6 more variables: Mortgage <int>, `Personal Loan` <int>,
#   `Securities Account` <int>, `CD Account` <int>, Online <int>,
#   CreditCard <int>

2. 데이터 전처리 I

UB %<>%
  data.frame() %>%                                                      # Data Frame 형태로 변환 
  mutate(Personal.Loan = ifelse(Personal.Loan == 1, "yes", "no")) %>%   # Target을 문자형 변수로 변환
  select(-1)                                                            # ID 변수 제거

# 1. Convert to Factor
fac.col <- c("Family", "Education", "Securities.Account", 
             "CD.Account", "Online", "CreditCard",
             # Target
             "Personal.Loan")

UB <- UB %>% 
  mutate_at(fac.col, as.factor)                                         # 범주형으로 변환

glimpse(UB)                                                             # 데이터 구조 확인
Rows: 2,500
Columns: 13
$ Age                <int> 25, 45, 39, 35, 35, 37, 53, 50, 35, 34, 6…
$ Experience         <int> 1, 19, 15, 9, 8, 13, 27, 24, 10, 9, 39, 5…
$ Income             <int> 49, 34, 11, 100, 45, 29, 72, 22, 81, 180,…
$ ZIP.Code           <int> 91107, 90089, 94720, 94112, 91330, 92121,…
$ Family             <fct> 4, 3, 1, 1, 4, 4, 2, 1, 3, 1, 4, 3, 2, 4,…
$ CCAvg              <dbl> 1.6, 1.5, 1.0, 2.7, 1.0, 0.4, 1.5, 0.3, 0…
$ Education          <fct> 1, 1, 1, 2, 2, 2, 2, 3, 2, 3, 3, 2, 3, 2,…
$ Mortgage           <int> 0, 0, 0, 0, 0, 155, 0, 0, 104, 0, 0, 0, 0…
$ Personal.Loan      <fct> no, no, no, no, no, no, no, no, no, yes, …
$ Securities.Account <fct> 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,…
$ CD.Account         <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ Online             <fct> 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1,…
$ CreditCard         <fct> 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,…
# 2. Convert One-hot Encoding for 범주형 예측 변수
dummies <- dummyVars(formula = ~ .,                                     # formula : ~ 예측 변수 / "." : data에 포함된 모든 변수를 의미
                     data = UB[,-9],                                    # Dataset including Only 예측 변수 -> Target 제외
                     fullRank = FALSE)                                  # fullRank = TRUE : Dummy Variable, fullRank = FALSE : One-hot Encoding

UB.Var   <- predict(dummies, newdata = UB) %>%                          # 범주형 예측 변수에 대한 One-hot Encoding 변환
  data.frame()                                                          # Data Frame 형태로 변환 

glimpse(UB.Var)                                                         # 데이터 구조 확인
Rows: 2,500
Columns: 21
$ Age                  <dbl> 25, 45, 39, 35, 35, 37, 53, 50, 35, 34,…
$ Experience           <dbl> 1, 19, 15, 9, 8, 13, 27, 24, 10, 9, 39,…
$ Income               <dbl> 49, 34, 11, 100, 45, 29, 72, 22, 81, 18…
$ ZIP.Code             <dbl> 91107, 90089, 94720, 94112, 91330, 9212…
$ Family.1             <dbl> 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, …
$ Family.2             <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, …
$ Family.3             <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, …
$ Family.4             <dbl> 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, …
$ CCAvg                <dbl> 1.6, 1.5, 1.0, 2.7, 1.0, 0.4, 1.5, 0.3,…
$ Education.1          <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ Education.2          <dbl> 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, …
$ Education.3          <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, …
$ Mortgage             <dbl> 0, 0, 0, 0, 0, 155, 0, 0, 104, 0, 0, 0,…
$ Securities.Account.0 <dbl> 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, …
$ Securities.Account.1 <dbl> 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
$ CD.Account.0         <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ CD.Account.1         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ Online.0             <dbl> 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, …
$ Online.1             <dbl> 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, …
$ CreditCard.0         <dbl> 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, …
$ CreditCard.1         <dbl> 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, …
# 3. Combine Target with 변환된 예측 변수
UB.df <- data.frame(Personal.Loan = UB$Personal.Loan, 
                    UB.Var)

UB.df %>%
  as_tibble
# A tibble: 2,500 × 22
   Personal.Loan   Age Experience Income ZIP.Code Family.1 Family.2
   <fct>         <dbl>      <dbl>  <dbl>    <dbl>    <dbl>    <dbl>
 1 no               25          1     49    91107        0        0
 2 no               45         19     34    90089        0        0
 3 no               39         15     11    94720        1        0
 4 no               35          9    100    94112        1        0
 5 no               35          8     45    91330        0        0
 6 no               37         13     29    92121        0        0
 7 no               53         27     72    91711        0        1
 8 no               50         24     22    93943        1        0
 9 no               35         10     81    90089        0        0
10 yes              34          9    180    93023        1        0
# ℹ 2,490 more rows
# ℹ 15 more variables: Family.3 <dbl>, Family.4 <dbl>, CCAvg <dbl>,
#   Education.1 <dbl>, Education.2 <dbl>, Education.3 <dbl>,
#   Mortgage <dbl>, Securities.Account.0 <dbl>,
#   Securities.Account.1 <dbl>, CD.Account.0 <dbl>,
#   CD.Account.1 <dbl>, Online.0 <dbl>, Online.1 <dbl>,
#   CreditCard.0 <dbl>, CreditCard.1 <dbl>
glimpse(UB.df)                                                          # 데이터 구조 확인
Rows: 2,500
Columns: 22
$ Personal.Loan        <fct> no, no, no, no, no, no, no, no, no, yes…
$ Age                  <dbl> 25, 45, 39, 35, 35, 37, 53, 50, 35, 34,…
$ Experience           <dbl> 1, 19, 15, 9, 8, 13, 27, 24, 10, 9, 39,…
$ Income               <dbl> 49, 34, 11, 100, 45, 29, 72, 22, 81, 18…
$ ZIP.Code             <dbl> 91107, 90089, 94720, 94112, 91330, 9212…
$ Family.1             <dbl> 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, …
$ Family.2             <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, …
$ Family.3             <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, …
$ Family.4             <dbl> 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, …
$ CCAvg                <dbl> 1.6, 1.5, 1.0, 2.7, 1.0, 0.4, 1.5, 0.3,…
$ Education.1          <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ Education.2          <dbl> 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, …
$ Education.3          <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, …
$ Mortgage             <dbl> 0, 0, 0, 0, 0, 155, 0, 0, 104, 0, 0, 0,…
$ Securities.Account.0 <dbl> 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, …
$ Securities.Account.1 <dbl> 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
$ CD.Account.0         <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ CD.Account.1         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ Online.0             <dbl> 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, …
$ Online.1             <dbl> 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, …
$ CreditCard.0         <dbl> 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, …
$ CreditCard.1         <dbl> 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, …

3. 데이터 탐색

ggpairs(UB,                                           # In 2-1
        columns = c("Age", "Experience", "Income",    # 수치형 예측 변수
                    "ZIP.Code", "CCAvg", "Mortgage"),                            
        aes(colour = Personal.Loan)) +                # Target의 범주에 따라 색깔을 다르게 표현
  theme_bw()
ggpairs(UB,                                           # In 2-1
        columns = c("Age", "Experience", "Income",    # 수치형 예측 변수
                    "ZIP.Code", "CCAvg", "Mortgage"), 
        aes(colour = Personal.Loan)) +                # Target의 범주에 따라 색깔을 다르게 표현
  scale_color_brewer(palette="Purples") +             # 특정 색깔 지정
  scale_fill_brewer(palette="Purples") +              # 특정 색깔 지정
  theme_bw()
ggpairs(UB,                                           # In 2-1
        columns = c("Age", "Income",                  # 수치형 예측 변수
                    "Family", "Education"),           # 범주형 예측 변수
        aes(colour = Personal.Loan, alpha = 0.8)) +   # Target의 범주에 따라 색깔을 다르게 표현
  scale_colour_manual(values = c("purple","cyan4")) + # 특정 색깔 지정
  scale_fill_manual(values = c("purple","cyan4")) +   # 특정 색깔 지정
  theme_bw()


4. 데이터 분할

# Partition (Training Dataset : Test Dataset = 7:3)
y      <- UB.df$Personal.Loan                            # Target
 
set.seed(200)
ind    <- createDataPartition(y, p = 0.7, list = T)      # Index를 이용하여 7:3으로 분할
UB.trd <- UB.df[ind$Resample1,]                          # Training Dataset
UB.ted <- UB.df[-ind$Resample1,]                         # Test Dataset

5. 데이터 전처리 II

# Standardization
preProcValues <- preProcess(UB.trd, 
                            method = c("center", "scale"))  # Standardization 정의 -> Training Dataset에 대한 평균과 표준편차 계산 

UB.trd <- predict(preProcValues, UB.trd)                    # Standardization for Training Dataset
UB.ted <- predict(preProcValues, UB.ted)                    # Standardization for Test Dataset

glimpse(UB.trd)                                             # 데이터 구조 확인
Rows: 1,751
Columns: 22
$ Personal.Loan        <fct> no, no, no, no, no, no, no, yes, no, no…
$ Age                  <dbl> -0.05431273, -0.57446728, -0.92123699, …
$ Experience           <dbl> -0.12175295, -0.46882565, -0.98943471, …
$ Income               <dbl> -0.85867297, -1.35649686, 0.56986515, -…
$ ZIP.Code             <dbl> -1.75250883, 0.88354520, 0.53745994, -1…
$ Family.1             <dbl> -0.6355621, 1.5725118, 1.5725118, -0.63…
$ Family.2             <dbl> -0.5774051, -0.5774051, -0.5774051, -0.…
$ Family.3             <dbl> 2.0037210, -0.4987865, -0.4987865, -0.4…
$ Family.4             <dbl> -0.5967491, -0.5967491, -0.5967491, 1.6…
$ CCAvg                <dbl> -0.25119120, -0.53150921, 0.42157204, -…
$ Education.1          <dbl> 1.1482386, 1.1482386, -0.8704018, -0.87…
$ Education.2          <dbl> -0.6196534, -0.6196534, 1.6128838, 1.61…
$ Education.3          <dbl> -0.6408777, -0.6408777, -0.6408777, -0.…
$ Mortgage             <dbl> -0.5664192, -0.5664192, -0.5664192, -0.…
$ Securities.Account.0 <dbl> -2.7998134, 0.3569627, 0.3569627, 0.356…
$ Securities.Account.1 <dbl> 2.7998134, -0.3569627, -0.3569627, -0.3…
$ CD.Account.0         <dbl> 0.2613337, 0.2613337, 0.2613337, 0.2613…
$ CD.Account.1         <dbl> -0.2613337, -0.2613337, -0.2613337, -0.…
$ Online.0             <dbl> 1.2486195, 1.2486195, 1.2486195, 1.2486…
$ Online.1             <dbl> -1.2486195, -1.2486195, -1.2486195, -1.…
$ CreditCard.0         <dbl> 0.6408777, 0.6408777, 0.6408777, -1.559…
$ CreditCard.1         <dbl> -0.6408777, -0.6408777, -0.6408777, 1.5…
glimpse(UB.ted)                                             # 데이터 구조 확인
Rows: 749
Columns: 22
$ Personal.Loan        <fct> no, no, no, no, no, no, no, no, no, no,…
$ Age                  <dbl> -1.7881612, -0.7478521, 1.2460737, 0.81…
$ Experience           <dbl> -1.68358012, -0.64236200, 0.83269699, 0…
$ Income               <dbl> -0.53400522, -0.96689556, -1.11840718, …
$ ZIP.Code             <dbl> -1.17304370, -0.59585545, 1.07366441, 0…
$ Family.1             <dbl> -0.6355621, -0.6355621, 1.5725118, 1.57…
$ Family.2             <dbl> -0.5774051, -0.5774051, -0.5774051, -0.…
$ Family.3             <dbl> -0.4987865, -0.4987865, -0.4987865, -0.…
$ Family.4             <dbl> 1.6747892, 1.6747892, -0.5967491, -0.59…
$ CCAvg                <dbl> -0.19512759, -0.86789083, -0.25119120, …
$ Education.1          <dbl> 1.1482386, -0.8704018, -0.8704018, -0.8…
$ Education.2          <dbl> -0.6196534, 1.6128838, -0.6196534, 1.61…
$ Education.3          <dbl> -0.6408777, -0.6408777, 1.5594690, -0.6…
$ Mortgage             <dbl> -0.5664192, 0.9609885, -0.5664192, -0.5…
$ Securities.Account.0 <dbl> -2.7998134, 0.3569627, 0.3569627, -2.79…
$ Securities.Account.1 <dbl> 2.7998134, -0.3569627, -0.3569627, 2.79…
$ CD.Account.0         <dbl> 0.2613337, 0.2613337, 0.2613337, 0.2613…
$ CD.Account.1         <dbl> -0.2613337, -0.2613337, -0.2613337, -0.…
$ Online.0             <dbl> 1.2486195, -0.8004271, -0.8004271, 1.24…
$ Online.1             <dbl> -1.2486195, 0.8004271, 0.8004271, -1.24…
$ CreditCard.0         <dbl> 0.6408777, 0.6408777, -1.5594690, -1.55…
$ CreditCard.1         <dbl> -0.6408777, -0.6408777, 1.5594690, 1.55…

6. 모형 훈련

Package "e1071"는 Support Vector Machine을 효율적으로 구현할 수 있는 “libsvm”을 R에서 사용할 수 있도록 만든 Package이며, 함수 svm()을 이용하여 Support Vector Machine을 수행할 수 있다. 함수에서 사용할 수 있는 자세한 옵션은 여기를 참고한다.

svm(formula, data, kernel, cost, probability, ...)
svm.model.li <- svm(Personal.Loan ~.,     
                    data = UB.trd,  
                    kernel = "linear", 
                    cost = 1,              
                    probability = TRUE)       

summary(svm.model.li)

Call:
svm(formula = Personal.Loan ~ ., data = UB.trd, kernel = "linear", 
    cost = 1, probability = TRUE)


Parameters:
   SVM-Type:  C-classification 
 SVM-Kernel:  linear 
       cost:  1 

Number of Support Vectors:  186

 ( 92 94 )


Number of Classes:  2 

Levels: 
 no yes

Result! Number of Support Vectors는 결정경계와 가까이 위치한 case의 수이다. 해당 데이터에서는 총 186개의 case로, "Personal.Loan = no"에 해당하는 case는 92개, "Personal.Loan = yes"에 해당하는 case는 94개이다. case의 행 번호는 svm.model.li$index를 이용하여 확인할 수 있다.

# Support Vector Index
svm.model.li$index   
  [1]    9   48   68  108  109  123  156  157  203  221  228  230  294
 [14]  299  323  352  366  373  384  413  430  452  476  525  532  560
 [27]  582  614  626  643  645  760  767  781  784  845  863  870  876
 [40]  880  883  890  899  904  920  936  953  960  996 1036 1056 1061
 [53] 1083 1095 1113 1121 1130 1140 1190 1193 1197 1201 1202 1226 1252
 [66] 1254 1259 1274 1309 1318 1323 1324 1327 1336 1359 1365 1384 1387
 [79] 1395 1400 1408 1421 1530 1588 1600 1619 1624 1637 1666 1669 1714
 [92] 1715   14   24   34   46   69  110  115  128  210  219  220  224
[105]  225  226  246  268  277  293  305  308  359  375  380  436  455
[118]  458  472  493  517  539  547  567  583  622  629  664  670  672
[131]  679  708  715  720  727  742  782  783  794  796  809  816  829
[144]  861  887  909  916  951  952  978  979 1045 1059 1090 1100 1103
[157] 1110 1122 1131 1170 1172 1175 1213 1241 1253 1257 1260 1262 1284
[170] 1308 1317 1326 1342 1403 1429 1504 1505 1585 1586 1622 1630 1674
[183] 1680 1689 1709 1713

7. 모형 평가

Caution! 모형 평가를 위해 Test Dataset에 대한 예측 class/확률 이 필요하며, 함수 predict()를 이용하여 생성한다.

# 예측 class 생성 
svm.li.pred <- predict(svm.model.li,
                       newdata = UB.ted[,-1],        # Test Dataset including Only 예측 변수   
                       type = "class")               # 예측 class 생성       

svm.li.pred %>%
  as_tibble
# A tibble: 749 × 1
   value
   <fct>
 1 no   
 2 no   
 3 no   
 4 no   
 5 no   
 6 no   
 7 no   
 8 no   
 9 no   
10 no   
# ℹ 739 more rows


7-1. ConfusionMatrix

CM   <- caret::confusionMatrix(svm.li.pred, UB.ted$Personal.Loan, 
                               positive = "yes")     # confusionMatrix(예측 class, 실제 class, positive="관심 class")
CM
Confusion Matrix and Statistics

          Reference
Prediction  no yes
       no  668  29
       yes   5  47
                                          
               Accuracy : 0.9546          
                 95% CI : (0.9371, 0.9684)
    No Information Rate : 0.8985          
    P-Value [Acc > NIR] : 1.426e-08       
                                          
                  Kappa : 0.7105          
                                          
 Mcnemar's Test P-Value : 7.998e-05       
                                          
            Sensitivity : 0.61842         
            Specificity : 0.99257         
         Pos Pred Value : 0.90385         
         Neg Pred Value : 0.95839         
             Prevalence : 0.10147         
         Detection Rate : 0.06275         
   Detection Prevalence : 0.06943         
      Balanced Accuracy : 0.80550         
                                          
       'Positive' Class : yes             
                                          


7-2. ROC 곡선

# 예측 확률 생성
test.svm.prob <- predict(svm.model.li, 
                         newdata = UB.ted[,-1],      # Test Dataset including Only 예측 변수  
                         probability = TRUE)         # 예측 확률 생성       

attr(test.svm.prob, "probabilities") %>%
  as_tibble
# A tibble: 749 × 2
      no      yes
   <dbl>    <dbl>
 1 1.00  0.000156
 2 0.993 0.00735 
 3 0.999 0.00136 
 4 1.00  0.000249
 5 0.991 0.00940 
 6 0.994 0.00558 
 7 0.969 0.0308  
 8 0.864 0.136   
 9 0.885 0.115   
10 0.967 0.0334  
# ℹ 739 more rows
test.svm.prob <- attr(test.svm.prob, "probabilities")[,2]   # "Personal.Loan = yes"에 대한 예측 확률

ac  <- UB.ted$Personal.Loan                                 # Test Dataset의 실제 class 
pp  <- as.numeric(test.svm.prob)                            # 예측 확률을 수치형으로 변환

1) Package “pROC”

pacman::p_load("pROC")

svm.roc  <- roc(ac, pp, plot = T, col = "gray")             # roc(실제 class, 예측 확률)
auc      <- round(auc(svm.roc), 3)
legend("bottomright", legend = auc, bty = "n")

Caution! Package "pROC"를 통해 출력한 ROC 곡선은 다양한 함수를 이용해서 그래프를 수정할 수 있다.

# 함수 plot.roc() 이용
plot.roc(svm.roc,   
         col="gray",                                        # Line Color
         print.auc = TRUE,                                  # AUC 출력 여부
         print.auc.col = "red",                             # AUC 글씨 색깔
         print.thres = TRUE,                                # Cutoff Value 출력 여부
         print.thres.pch = 19,                              # Cutoff Value를 표시하는 도형 모양
         print.thres.col = "red",                           # Cutoff Value를 표시하는 도형의 색깔
         auc.polygon = TRUE,                                # 곡선 아래 면적에 대한 여부
         auc.polygon.col = "gray90")                        # 곡선 아래 면적의 색깔

# 함수 ggroc() 이용
ggroc(svm.roc) +
annotate(geom = "text", x = 0.9, y = 1.0,
label = paste("AUC = ", auc),
size = 5,
color="red") +
theme_bw()

2) Package “Epi”

pacman::p_load("Epi")       
# install_version("etm", version = "1.1", repos = "http://cran.us.r-project.org")

ROC(pp, ac, plot = "ROC")                                   # ROC(예측 확률, 실제 class)  

3) Package “ROCR”

pacman::p_load("ROCR")

svm.pred <- prediction(pp, ac)                              # prediction(예측 확률, 실제 class)    

svm.perf <- performance(svm.pred, "tpr", "fpr")             # performance(, "민감도", "1-특이도")                      
plot(svm.perf, col = "gray")                                # ROC Curve

perf.auc   <- performance(svm.pred, "auc")                  # AUC
auc        <- attributes(perf.auc)$y.values 
legend("bottomright", legend = auc, bty = "n")


7-3. 향상 차트

1) Package “ROCR”

svm.perf <- performance(svm.pred, "lift", "rpp")            # Lift Chart
plot(svm.perf, main = "lift curve", 
     colorize = T,                                          # Coloring according to cutoff
     lwd = 2)  

Reuse

Text and figures are licensed under Creative Commons Attribution CC BY 4.0. The figures that have been reused from other sources don't fall under this license and can be recognized by a note in their caption: "Figure from ...".