Nearest Neighborhood Method using knn3

Data Mining

Description for KNN algorithm using knn3

Yeongeun Jeon , Jung In Seo
2023-03-06

Nearest Neighborhood Algorithm의 장점


Nearest Neighborhood Algorithm의 단점


실습 자료 : 유니버셜 은행의 고객 2,500명에 대한 자료(출처 : Data Mining for Business Intelligence, Shmueli et al. 2010)이며, 총 13개의 변수를 포함하고 있다. 이 자료에서 TargetPersonal Loan이다.




1. 데이터 불러오기

pacman::p_load("data.table", "dplyr",
               "caret",
               "ggplot2", "GGally")

UB <- fread("../Universal Bank_Main.csv")                                # 데이터 불러오기


UB %>%
  as_tibble
# A tibble: 2,500 × 14
      ID   Age Experience Income `ZIP Code` Family CCAvg Education
   <int> <int>      <int>  <int>      <int>  <int> <dbl>     <int>
 1     1    25          1     49      91107      4   1.6         1
 2     2    45         19     34      90089      3   1.5         1
 3     3    39         15     11      94720      1   1           1
 4     4    35          9    100      94112      1   2.7         2
 5     5    35          8     45      91330      4   1           2
 6     6    37         13     29      92121      4   0.4         2
 7     7    53         27     72      91711      2   1.5         2
 8     8    50         24     22      93943      1   0.3         3
 9     9    35         10     81      90089      3   0.6         2
10    10    34          9    180      93023      1   8.9         3
# ℹ 2,490 more rows
# ℹ 6 more variables: Mortgage <int>, `Personal Loan` <int>,
#   `Securities Account` <int>, `CD Account` <int>, Online <int>,
#   CreditCard <int>

2. 데이터 전처리 I

UB %<>%
  data.frame() %>%                                                      # Data Frame 형태로 변환 
  mutate(Personal.Loan = ifelse(Personal.Loan == 1, "yes", "no")) %>%   # Target을 문자형 변수로 변환
  select(-1)                                                            # ID 변수 제거

# 1. Convert to Factor
fac.col <- c("Family", "Education", "Securities.Account", 
             "CD.Account", "Online", "CreditCard",
             # Target
             "Personal.Loan")

UB <- UB %>% 
  mutate_at(fac.col, as.factor)                                         # 범주형으로 변환

glimpse(UB)                                                             # 데이터 구조 확인
Rows: 2,500
Columns: 13
$ Age                <int> 25, 45, 39, 35, 35, 37, 53, 50, 35, 34, 6…
$ Experience         <int> 1, 19, 15, 9, 8, 13, 27, 24, 10, 9, 39, 5…
$ Income             <int> 49, 34, 11, 100, 45, 29, 72, 22, 81, 180,…
$ ZIP.Code           <int> 91107, 90089, 94720, 94112, 91330, 92121,…
$ Family             <fct> 4, 3, 1, 1, 4, 4, 2, 1, 3, 1, 4, 3, 2, 4,…
$ CCAvg              <dbl> 1.6, 1.5, 1.0, 2.7, 1.0, 0.4, 1.5, 0.3, 0…
$ Education          <fct> 1, 1, 1, 2, 2, 2, 2, 3, 2, 3, 3, 2, 3, 2,…
$ Mortgage           <int> 0, 0, 0, 0, 0, 155, 0, 0, 104, 0, 0, 0, 0…
$ Personal.Loan      <fct> no, no, no, no, no, no, no, no, no, yes, …
$ Securities.Account <fct> 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,…
$ CD.Account         <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ Online             <fct> 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1,…
$ CreditCard         <fct> 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,…
# 2. Convert One-hot Encoding for 범주형 예측 변수
dummies <- dummyVars(formula = ~ .,                                     # formula : ~ 예측 변수 / "." : data에 포함된 모든 변수를 의미
                     data = UB[,-9],                                    # Dataset including Only 예측 변수 -> Target 제외
                     fullRank = FALSE)                                  # fullRank = TRUE : Dummy Variable, fullRank = FALSE : One-hot Encoding

UB.Var   <- predict(dummies, newdata = UB) %>%                          # 범주형 예측 변수에 대한 One-hot Encoding 변환
  data.frame()                                                          # Data Frame 형태로 변환 

glimpse(UB.Var)                                                         # 데이터 구조 확인
Rows: 2,500
Columns: 21
$ Age                  <dbl> 25, 45, 39, 35, 35, 37, 53, 50, 35, 34,…
$ Experience           <dbl> 1, 19, 15, 9, 8, 13, 27, 24, 10, 9, 39,…
$ Income               <dbl> 49, 34, 11, 100, 45, 29, 72, 22, 81, 18…
$ ZIP.Code             <dbl> 91107, 90089, 94720, 94112, 91330, 9212…
$ Family.1             <dbl> 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, …
$ Family.2             <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, …
$ Family.3             <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, …
$ Family.4             <dbl> 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, …
$ CCAvg                <dbl> 1.6, 1.5, 1.0, 2.7, 1.0, 0.4, 1.5, 0.3,…
$ Education.1          <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ Education.2          <dbl> 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, …
$ Education.3          <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, …
$ Mortgage             <dbl> 0, 0, 0, 0, 0, 155, 0, 0, 104, 0, 0, 0,…
$ Securities.Account.0 <dbl> 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, …
$ Securities.Account.1 <dbl> 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
$ CD.Account.0         <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ CD.Account.1         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ Online.0             <dbl> 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, …
$ Online.1             <dbl> 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, …
$ CreditCard.0         <dbl> 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, …
$ CreditCard.1         <dbl> 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, …
# 3. Combine Target with 변환된 예측 변수
UB.df <- data.frame(Personal.Loan = UB$Personal.Loan, 
                    UB.Var)

UB.df %>%
  as_tibble
# A tibble: 2,500 × 22
   Personal.Loan   Age Experience Income ZIP.Code Family.1 Family.2
   <fct>         <dbl>      <dbl>  <dbl>    <dbl>    <dbl>    <dbl>
 1 no               25          1     49    91107        0        0
 2 no               45         19     34    90089        0        0
 3 no               39         15     11    94720        1        0
 4 no               35          9    100    94112        1        0
 5 no               35          8     45    91330        0        0
 6 no               37         13     29    92121        0        0
 7 no               53         27     72    91711        0        1
 8 no               50         24     22    93943        1        0
 9 no               35         10     81    90089        0        0
10 yes              34          9    180    93023        1        0
# ℹ 2,490 more rows
# ℹ 15 more variables: Family.3 <dbl>, Family.4 <dbl>, CCAvg <dbl>,
#   Education.1 <dbl>, Education.2 <dbl>, Education.3 <dbl>,
#   Mortgage <dbl>, Securities.Account.0 <dbl>,
#   Securities.Account.1 <dbl>, CD.Account.0 <dbl>,
#   CD.Account.1 <dbl>, Online.0 <dbl>, Online.1 <dbl>,
#   CreditCard.0 <dbl>, CreditCard.1 <dbl>
glimpse(UB.df)                                                          # 데이터 구조 확인
Rows: 2,500
Columns: 22
$ Personal.Loan        <fct> no, no, no, no, no, no, no, no, no, yes…
$ Age                  <dbl> 25, 45, 39, 35, 35, 37, 53, 50, 35, 34,…
$ Experience           <dbl> 1, 19, 15, 9, 8, 13, 27, 24, 10, 9, 39,…
$ Income               <dbl> 49, 34, 11, 100, 45, 29, 72, 22, 81, 18…
$ ZIP.Code             <dbl> 91107, 90089, 94720, 94112, 91330, 9212…
$ Family.1             <dbl> 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, …
$ Family.2             <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, …
$ Family.3             <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, …
$ Family.4             <dbl> 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, …
$ CCAvg                <dbl> 1.6, 1.5, 1.0, 2.7, 1.0, 0.4, 1.5, 0.3,…
$ Education.1          <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ Education.2          <dbl> 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, …
$ Education.3          <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, …
$ Mortgage             <dbl> 0, 0, 0, 0, 0, 155, 0, 0, 104, 0, 0, 0,…
$ Securities.Account.0 <dbl> 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, …
$ Securities.Account.1 <dbl> 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
$ CD.Account.0         <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ CD.Account.1         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ Online.0             <dbl> 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, …
$ Online.1             <dbl> 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, …
$ CreditCard.0         <dbl> 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, …
$ CreditCard.1         <dbl> 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, …

3. 데이터 탐색

ggpairs(UB,                                           # In 2-1
        columns = c("Age", "Experience", "Income",    # 수치형 예측 변수
                    "ZIP.Code", "CCAvg", "Mortgage"),                            
        aes(colour = Personal.Loan)) +                # Target의 범주에 따라 색깔을 다르게 표현
  theme_bw()
ggpairs(UB,                                           # In 2-1
        columns = c("Age", "Experience", "Income",    # 수치형 예측 변수
                    "ZIP.Code", "CCAvg", "Mortgage"), 
        aes(colour = Personal.Loan)) +                # Target의 범주에 따라 색깔을 다르게 표현
  scale_color_brewer(palette="Purples") +             # 특정 색깔 지정
  scale_fill_brewer(palette="Purples") +              # 특정 색깔 지정
  theme_bw()
ggpairs(UB,                                           # In 2-1
        columns = c("Age", "Income",                  # 수치형 예측 변수
                    "Family", "Education"),           # 범주형 예측 변수
        aes(colour = Personal.Loan, alpha = 0.8)) +   # Target의 범주에 따라 색깔을 다르게 표현
  scale_colour_manual(values = c("purple","cyan4")) + # 특정 색깔 지정
  scale_fill_manual(values = c("purple","cyan4")) +   # 특정 색깔 지정
  theme_bw()


4. 데이터 분할

# Partition (Training Dataset : Test Dataset = 7:3)
y      <- UB.df$Personal.Loan                         # Target

set.seed(200)
ind    <- createDataPartition(y, p = 0.7, list = T)   # Index를 이용하여 7:3으로 분할
UB.trd <- UB.df[ind$Resample1,]                       # Training Dataset
UB.ted <- UB.df[-ind$Resample1,]                      # Test Dataset

5. 데이터 전처리 II

# Standardization
preProcValues <- preProcess(UB.trd, 
                            method = c("center", "scale"))  # Standardization 정의 -> Training Dataset에 대한 평균과 표준편차 계산 

UB.trd <- predict(preProcValues, UB.trd)                    # Standardization for Training Dataset
UB.ted <- predict(preProcValues, UB.ted)                    # Standardization for Test Dataset

glimpse(UB.trd)                                             # 데이터 구조 확인
Rows: 1,751
Columns: 22
$ Personal.Loan        <fct> no, no, no, no, no, no, no, yes, no, no…
$ Age                  <dbl> -0.05431273, -0.57446728, -0.92123699, …
$ Experience           <dbl> -0.12175295, -0.46882565, -0.98943471, …
$ Income               <dbl> -0.85867297, -1.35649686, 0.56986515, -…
$ ZIP.Code             <dbl> -1.75250883, 0.88354520, 0.53745994, -1…
$ Family.1             <dbl> -0.6355621, 1.5725118, 1.5725118, -0.63…
$ Family.2             <dbl> -0.5774051, -0.5774051, -0.5774051, -0.…
$ Family.3             <dbl> 2.0037210, -0.4987865, -0.4987865, -0.4…
$ Family.4             <dbl> -0.5967491, -0.5967491, -0.5967491, 1.6…
$ CCAvg                <dbl> -0.25119120, -0.53150921, 0.42157204, -…
$ Education.1          <dbl> 1.1482386, 1.1482386, -0.8704018, -0.87…
$ Education.2          <dbl> -0.6196534, -0.6196534, 1.6128838, 1.61…
$ Education.3          <dbl> -0.6408777, -0.6408777, -0.6408777, -0.…
$ Mortgage             <dbl> -0.5664192, -0.5664192, -0.5664192, -0.…
$ Securities.Account.0 <dbl> -2.7998134, 0.3569627, 0.3569627, 0.356…
$ Securities.Account.1 <dbl> 2.7998134, -0.3569627, -0.3569627, -0.3…
$ CD.Account.0         <dbl> 0.2613337, 0.2613337, 0.2613337, 0.2613…
$ CD.Account.1         <dbl> -0.2613337, -0.2613337, -0.2613337, -0.…
$ Online.0             <dbl> 1.2486195, 1.2486195, 1.2486195, 1.2486…
$ Online.1             <dbl> -1.2486195, -1.2486195, -1.2486195, -1.…
$ CreditCard.0         <dbl> 0.6408777, 0.6408777, 0.6408777, -1.559…
$ CreditCard.1         <dbl> -0.6408777, -0.6408777, -0.6408777, 1.5…
glimpse(UB.ted)                                             # 데이터 구조 확인
Rows: 749
Columns: 22
$ Personal.Loan        <fct> no, no, no, no, no, no, no, no, no, no,…
$ Age                  <dbl> -1.7881612, -0.7478521, 1.2460737, 0.81…
$ Experience           <dbl> -1.68358012, -0.64236200, 0.83269699, 0…
$ Income               <dbl> -0.53400522, -0.96689556, -1.11840718, …
$ ZIP.Code             <dbl> -1.17304370, -0.59585545, 1.07366441, 0…
$ Family.1             <dbl> -0.6355621, -0.6355621, 1.5725118, 1.57…
$ Family.2             <dbl> -0.5774051, -0.5774051, -0.5774051, -0.…
$ Family.3             <dbl> -0.4987865, -0.4987865, -0.4987865, -0.…
$ Family.4             <dbl> 1.6747892, 1.6747892, -0.5967491, -0.59…
$ CCAvg                <dbl> -0.19512759, -0.86789083, -0.25119120, …
$ Education.1          <dbl> 1.1482386, -0.8704018, -0.8704018, -0.8…
$ Education.2          <dbl> -0.6196534, 1.6128838, -0.6196534, 1.61…
$ Education.3          <dbl> -0.6408777, -0.6408777, 1.5594690, -0.6…
$ Mortgage             <dbl> -0.5664192, 0.9609885, -0.5664192, -0.5…
$ Securities.Account.0 <dbl> -2.7998134, 0.3569627, 0.3569627, -2.79…
$ Securities.Account.1 <dbl> 2.7998134, -0.3569627, -0.3569627, 2.79…
$ CD.Account.0         <dbl> 0.2613337, 0.2613337, 0.2613337, 0.2613…
$ CD.Account.1         <dbl> -0.2613337, -0.2613337, -0.2613337, -0.…
$ Online.0             <dbl> 1.2486195, -0.8004271, -0.8004271, 1.24…
$ Online.1             <dbl> -1.2486195, 0.8004271, 0.8004271, -1.24…
$ CreditCard.0         <dbl> 0.6408777, 0.6408777, -1.5594690, -1.55…
$ CreditCard.1         <dbl> -0.6408777, -0.6408777, 1.5594690, 1.55…

6. 모형 훈련

Nearest Neighborhood Algorithm은 다양한 Package(예를 들어, "caret", "class")를 통해 수행할 수 있다. Package "class"의 함수 knn()를 이용하면 특정 class에 대한 예측 확률만 얻을 수 있는 반면, Package "caret"의 함수 knn3()를 이용하면 각 class에 대한 예측 확률을 얻을 수 있다. 그래서 여기서는 Package "caret"을 이용하여 모형 훈련을 수행한다.

knn.model <- knn3(Personal.Loan ~ .,                        # Target ~ 예측 변수
                  data = UB.trd,                            # Training Dataset
                  k = 4)                                    # 이웃 개수

knn.model
4-nearest neighbor model
Training set outcome distribution:

  no  yes 
1571  180 

Caution! Package "caret"에서 제공하는 함수 knn3Train()를 이용하면 Training Dataset에 대한 모형 훈련과 Test Dataset에 대한 예측을 한 번에 수행할 수 있다.

# 모형 훈련 & 예측 한꺼번에
knn3Train(UB.trd[, -1],                              # Training Dataset including Only 예측 변수
          UB.ted[, -1],                              # Test Dataset including Only 예측 변수
          cl = UB.trd[, 1],                          # Target of Training Dataset
          k = 4)                                     # 이웃 개수
  [1] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
 [11] "no"  "yes" "no"  "no"  "no"  "no"  "no"  "yes" "no"  "no" 
 [21] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
 [31] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
 [41] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
 [51] "no"  "yes" "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
 [61] "no"  "no"  "yes" "no"  "no"  "no"  "no"  "no"  "no"  "no" 
 [71] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "yes" "no" 
 [81] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "yes"
 [91] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[101] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "yes" "no" 
[111] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[121] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[131] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[141] "no"  "yes" "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[151] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[161] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[171] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[181] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[191] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[201] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[211] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[221] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[231] "yes" "no"  "no"  "no"  "no"  "yes" "no"  "no"  "no"  "no" 
[241] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[251] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[261] "no"  "no"  "no"  "no"  "no"  "yes" "no"  "no"  "no"  "no" 
[271] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[281] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[291] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[301] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[311] "no"  "no"  "yes" "no"  "no"  "yes" "no"  "no"  "no"  "no" 
[321] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "yes" "no" 
[331] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[341] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[351] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "yes" "no"  "no" 
[361] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[371] "no"  "yes" "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[381] "no"  "yes" "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[391] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[401] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[411] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[421] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[431] "no"  "yes" "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[441] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[451] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[461] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[471] "no"  "no"  "no"  "yes" "no"  "yes" "yes" "no"  "no"  "no" 
[481] "no"  "no"  "yes" "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[491] "no"  "no"  "no"  "yes" "no"  "no"  "no"  "no"  "no"  "no" 
[501] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[511] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[521] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[531] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[541] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[551] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[561] "no"  "yes" "no"  "yes" "no"  "no"  "no"  "no"  "no"  "no" 
[571] "no"  "yes" "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[581] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[591] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "yes" "no" 
[601] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[611] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "yes" "no" 
[621] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[631] "no"  "no"  "no"  "no"  "no"  "yes" "no"  "no"  "no"  "no" 
[641] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[651] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[661] "yes" "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[671] "no"  "no"  "yes" "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[681] "no"  "no"  "yes" "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[691] "yes" "no"  "no"  "no"  "yes" "no"  "no"  "no"  "no"  "no" 
[701] "no"  "no"  "no"  "yes" "no"  "no"  "no"  "no"  "no"  "no" 
[711] "no"  "yes" "no"  "no"  "yes" "no"  "no"  "no"  "no"  "no" 
[721] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no"  "no" 
[731] "no"  "no"  "no"  "no"  "no"  "no"  "no"  "yes" "no"  "no" 
[741] "no"  "no"  "no"  "yes" "no"  "no"  "no"  "no"  "no" 
attr(,"prob")
         no  yes
  [1,] 1.00 0.00
  [2,] 1.00 0.00
  [3,] 1.00 0.00
  [4,] 1.00 0.00
  [5,] 1.00 0.00
  [6,] 1.00 0.00
  [7,] 1.00 0.00
  [8,] 0.75 0.25
  [9,] 1.00 0.00
 [10,] 1.00 0.00
 [11,] 1.00 0.00
 [12,] 0.50 0.50
 [13,] 1.00 0.00
 [14,] 1.00 0.00
 [15,] 1.00 0.00
 [16,] 1.00 0.00
 [17,] 1.00 0.00
 [18,] 0.00 1.00
 [19,] 0.75 0.25
 [20,] 1.00 0.00
 [21,] 0.75 0.25
 [22,] 1.00 0.00
 [23,] 1.00 0.00
 [24,] 1.00 0.00
 [25,] 1.00 0.00
 [26,] 1.00 0.00
 [27,] 1.00 0.00
 [28,] 1.00 0.00
 [29,] 1.00 0.00
 [30,] 1.00 0.00
 [31,] 0.75 0.25
 [32,] 1.00 0.00
 [33,] 1.00 0.00
 [34,] 1.00 0.00
 [35,] 1.00 0.00
 [36,] 1.00 0.00
 [37,] 1.00 0.00
 [38,] 1.00 0.00
 [39,] 1.00 0.00
 [40,] 1.00 0.00
 [41,] 1.00 0.00
 [42,] 1.00 0.00
 [43,] 1.00 0.00
 [44,] 1.00 0.00
 [45,] 1.00 0.00
 [46,] 1.00 0.00
 [47,] 1.00 0.00
 [48,] 1.00 0.00
 [49,] 1.00 0.00
 [50,] 1.00 0.00
 [51,] 1.00 0.00
 [52,] 0.25 0.75
 [53,] 1.00 0.00
 [54,] 1.00 0.00
 [55,] 1.00 0.00
 [56,] 1.00 0.00
 [57,] 1.00 0.00
 [58,] 1.00 0.00
 [59,] 1.00 0.00
 [60,] 1.00 0.00
 [61,] 1.00 0.00
 [62,] 1.00 0.00
 [63,] 0.00 1.00
 [64,] 1.00 0.00
 [65,] 1.00 0.00
 [66,] 1.00 0.00
 [67,] 1.00 0.00
 [68,] 1.00 0.00
 [69,] 0.75 0.25
 [70,] 1.00 0.00
 [71,] 1.00 0.00
 [72,] 1.00 0.00
 [73,] 0.75 0.25
 [74,] 1.00 0.00
 [75,] 1.00 0.00
 [76,] 1.00 0.00
 [77,] 1.00 0.00
 [78,] 1.00 0.00
 [79,] 0.25 0.75
 [80,] 0.50 0.50
 [81,] 1.00 0.00
 [82,] 1.00 0.00
 [83,] 1.00 0.00
 [84,] 1.00 0.00
 [85,] 1.00 0.00
 [86,] 1.00 0.00
 [87,] 1.00 0.00
 [88,] 1.00 0.00
 [89,] 1.00 0.00
 [90,] 0.50 0.50
 [91,] 1.00 0.00
 [92,] 1.00 0.00
 [93,] 1.00 0.00
 [94,] 1.00 0.00
 [95,] 1.00 0.00
 [96,] 1.00 0.00
 [97,] 1.00 0.00
 [98,] 1.00 0.00
 [99,] 0.75 0.25
[100,] 1.00 0.00
[101,] 1.00 0.00
[102,] 0.75 0.25
[103,] 1.00 0.00
[104,] 1.00 0.00
[105,] 1.00 0.00
[106,] 1.00 0.00
[107,] 1.00 0.00
[108,] 1.00 0.00
[109,] 0.50 0.50
[110,] 1.00 0.00
[111,] 1.00 0.00
[112,] 1.00 0.00
[113,] 1.00 0.00
[114,] 1.00 0.00
[115,] 1.00 0.00
[116,] 1.00 0.00
[117,] 1.00 0.00
[118,] 1.00 0.00
[119,] 0.75 0.25
[120,] 1.00 0.00
[121,] 1.00 0.00
[122,] 1.00 0.00
[123,] 1.00 0.00
[124,] 1.00 0.00
[125,] 1.00 0.00
[126,] 1.00 0.00
[127,] 1.00 0.00
[128,] 1.00 0.00
[129,] 1.00 0.00
[130,] 1.00 0.00
[131,] 1.00 0.00
[132,] 1.00 0.00
[133,] 1.00 0.00
[134,] 1.00 0.00
[135,] 1.00 0.00
[136,] 1.00 0.00
[137,] 1.00 0.00
[138,] 1.00 0.00
[139,] 1.00 0.00
[140,] 0.75 0.25
[141,] 0.75 0.25
[142,] 0.00 1.00
[143,] 1.00 0.00
[144,] 1.00 0.00
[145,] 1.00 0.00
[146,] 1.00 0.00
[147,] 1.00 0.00
[148,] 1.00 0.00
[149,] 1.00 0.00
[150,] 1.00 0.00
[151,] 1.00 0.00
[152,] 1.00 0.00
[153,] 1.00 0.00
[154,] 1.00 0.00
[155,] 1.00 0.00
[156,] 1.00 0.00
[157,] 1.00 0.00
[158,] 1.00 0.00
[159,] 1.00 0.00
[160,] 1.00 0.00
[161,] 1.00 0.00
[162,] 1.00 0.00
[163,] 1.00 0.00
[164,] 1.00 0.00
[165,] 1.00 0.00
[166,] 1.00 0.00
[167,] 1.00 0.00
[168,] 1.00 0.00
[169,] 1.00 0.00
[170,] 1.00 0.00
[171,] 1.00 0.00
[172,] 1.00 0.00
[173,] 0.75 0.25
[174,] 1.00 0.00
[175,] 1.00 0.00
[176,] 0.50 0.50
[177,] 1.00 0.00
[178,] 1.00 0.00
[179,] 1.00 0.00
[180,] 0.75 0.25
[181,] 1.00 0.00
[182,] 1.00 0.00
[183,] 1.00 0.00
[184,] 1.00 0.00
[185,] 1.00 0.00
[186,] 0.75 0.25
[187,] 1.00 0.00
[188,] 1.00 0.00
[189,] 1.00 0.00
[190,] 1.00 0.00
[191,] 1.00 0.00
[192,] 1.00 0.00
[193,] 1.00 0.00
[194,] 1.00 0.00
[195,] 1.00 0.00
[196,] 1.00 0.00
[197,] 1.00 0.00
[198,] 1.00 0.00
[199,] 1.00 0.00
[200,] 1.00 0.00
[201,] 1.00 0.00
[202,] 1.00 0.00
[203,] 1.00 0.00
[204,] 1.00 0.00
[205,] 0.75 0.25
[206,] 1.00 0.00
[207,] 1.00 0.00
[208,] 1.00 0.00
[209,] 1.00 0.00
[210,] 1.00 0.00
[211,] 1.00 0.00
[212,] 1.00 0.00
[213,] 1.00 0.00
[214,] 1.00 0.00
[215,] 1.00 0.00
[216,] 0.75 0.25
[217,] 1.00 0.00
[218,] 1.00 0.00
[219,] 1.00 0.00
[220,] 1.00 0.00
[221,] 0.50 0.50
[222,] 1.00 0.00
[223,] 1.00 0.00
[224,] 1.00 0.00
[225,] 1.00 0.00
[226,] 1.00 0.00
[227,] 1.00 0.00
[228,] 1.00 0.00
[229,] 1.00 0.00
[230,] 1.00 0.00
[231,] 0.00 1.00
[232,] 1.00 0.00
[233,] 1.00 0.00
[234,] 0.50 0.50
[235,] 1.00 0.00
[236,] 0.00 1.00
[237,] 1.00 0.00
[238,] 1.00 0.00
[239,] 1.00 0.00
[240,] 1.00 0.00
[241,] 1.00 0.00
[242,] 1.00 0.00
[243,] 1.00 0.00
[244,] 1.00 0.00
[245,] 1.00 0.00
[246,] 1.00 0.00
[247,] 1.00 0.00
[248,] 1.00 0.00
[249,] 1.00 0.00
[250,] 1.00 0.00
[251,] 1.00 0.00
[252,] 1.00 0.00
[253,] 0.75 0.25
[254,] 1.00 0.00
[255,] 1.00 0.00
[256,] 1.00 0.00
[257,] 1.00 0.00
[258,] 1.00 0.00
[259,] 1.00 0.00
[260,] 1.00 0.00
[261,] 1.00 0.00
[262,] 0.50 0.50
[263,] 1.00 0.00
[264,] 1.00 0.00
[265,] 1.00 0.00
[266,] 0.50 0.50
[267,] 1.00 0.00
[268,] 1.00 0.00
[269,] 1.00 0.00
[270,] 1.00 0.00
[271,] 0.75 0.25
[272,] 1.00 0.00
[273,] 1.00 0.00
[274,] 1.00 0.00
[275,] 1.00 0.00
[276,] 1.00 0.00
[277,] 0.75 0.25
[278,] 1.00 0.00
[279,] 1.00 0.00
[280,] 1.00 0.00
[281,] 1.00 0.00
[282,] 1.00 0.00
[283,] 1.00 0.00
[284,] 1.00 0.00
[285,] 1.00 0.00
[286,] 1.00 0.00
[287,] 1.00 0.00
[288,] 1.00 0.00
[289,] 1.00 0.00
[290,] 1.00 0.00
[291,] 1.00 0.00
[292,] 1.00 0.00
[293,] 1.00 0.00
[294,] 0.75 0.25
[295,] 1.00 0.00
[296,] 1.00 0.00
[297,] 0.75 0.25
[298,] 1.00 0.00
[299,] 1.00 0.00
[300,] 1.00 0.00
[301,] 1.00 0.00
[302,] 1.00 0.00
[303,] 1.00 0.00
[304,] 1.00 0.00
[305,] 1.00 0.00
[306,] 1.00 0.00
[307,] 1.00 0.00
[308,] 1.00 0.00
[309,] 0.50 0.50
[310,] 1.00 0.00
[311,] 1.00 0.00
[312,] 1.00 0.00
[313,] 0.25 0.75
[314,] 1.00 0.00
[315,] 1.00 0.00
[316,] 0.00 1.00
[317,] 1.00 0.00
[318,] 1.00 0.00
[319,] 1.00 0.00
[320,] 1.00 0.00
[321,] 1.00 0.00
[322,] 1.00 0.00
[323,] 1.00 0.00
[324,] 1.00 0.00
[325,] 1.00 0.00
[326,] 1.00 0.00
[327,] 1.00 0.00
[328,] 1.00 0.00
[329,] 0.25 0.75
[330,] 1.00 0.00
[331,] 1.00 0.00
[332,] 1.00 0.00
[333,] 1.00 0.00
[334,] 1.00 0.00
[335,] 1.00 0.00
[336,] 1.00 0.00
[337,] 1.00 0.00
[338,] 1.00 0.00
[339,] 1.00 0.00
[340,] 1.00 0.00
[341,] 1.00 0.00
[342,] 1.00 0.00
[343,] 1.00 0.00
[344,] 1.00 0.00
[345,] 0.75 0.25
[346,] 1.00 0.00
[347,] 0.75 0.25
[348,] 1.00 0.00
[349,] 1.00 0.00
[350,] 1.00 0.00
[351,] 1.00 0.00
[352,] 1.00 0.00
[353,] 1.00 0.00
[354,] 1.00 0.00
[355,] 0.75 0.25
[356,] 1.00 0.00
[357,] 1.00 0.00
[358,] 0.00 1.00
[359,] 1.00 0.00
[360,] 1.00 0.00
[361,] 1.00 0.00
[362,] 1.00 0.00
[363,] 1.00 0.00
[364,] 1.00 0.00
[365,] 1.00 0.00
[366,] 1.00 0.00
[367,] 1.00 0.00
[368,] 1.00 0.00
[369,] 1.00 0.00
[370,] 1.00 0.00
[371,] 1.00 0.00
[372,] 0.25 0.75
[373,] 1.00 0.00
[374,] 1.00 0.00
[375,] 1.00 0.00
[376,] 0.75 0.25
[377,] 0.75 0.25
[378,] 1.00 0.00
[379,] 1.00 0.00
[380,] 1.00 0.00
[381,] 1.00 0.00
[382,] 0.00 1.00
[383,] 1.00 0.00
[384,] 1.00 0.00
[385,] 1.00 0.00
[386,] 1.00 0.00
[387,] 1.00 0.00
[388,] 1.00 0.00
[389,] 1.00 0.00
[390,] 1.00 0.00
[391,] 1.00 0.00
[392,] 1.00 0.00
[393,] 1.00 0.00
[394,] 1.00 0.00
[395,] 1.00 0.00
[396,] 1.00 0.00
[397,] 0.75 0.25
[398,] 1.00 0.00
[399,] 1.00 0.00
[400,] 1.00 0.00
[401,] 1.00 0.00
[402,] 0.50 0.50
[403,] 1.00 0.00
[404,] 1.00 0.00
[405,] 1.00 0.00
[406,] 0.75 0.25
[407,] 1.00 0.00
[408,] 1.00 0.00
[409,] 1.00 0.00
[410,] 1.00 0.00
[411,] 1.00 0.00
[412,] 1.00 0.00
[413,] 1.00 0.00
[414,] 1.00 0.00
[415,] 1.00 0.00
[416,] 0.75 0.25
[417,] 1.00 0.00
[418,] 1.00 0.00
[419,] 1.00 0.00
[420,] 1.00 0.00
[421,] 1.00 0.00
[422,] 1.00 0.00
[423,] 1.00 0.00
[424,] 1.00 0.00
[425,] 1.00 0.00
[426,] 1.00 0.00
[427,] 1.00 0.00
[428,] 1.00 0.00
[429,] 1.00 0.00
[430,] 1.00 0.00
[431,] 1.00 0.00
[432,] 0.50 0.50
[433,] 1.00 0.00
[434,] 1.00 0.00
[435,] 1.00 0.00
[436,] 1.00 0.00
[437,] 0.75 0.25
[438,] 1.00 0.00
[439,] 1.00 0.00
[440,] 1.00 0.00
[441,] 1.00 0.00
[442,] 1.00 0.00
[443,] 1.00 0.00
[444,] 1.00 0.00
[445,] 1.00 0.00
[446,] 0.50 0.50
[447,] 1.00 0.00
[448,] 0.75 0.25
[449,] 1.00 0.00
[450,] 1.00 0.00
[451,] 0.75 0.25
[452,] 1.00 0.00
[453,] 1.00 0.00
[454,] 1.00 0.00
[455,] 1.00 0.00
[456,] 1.00 0.00
[457,] 1.00 0.00
[458,] 1.00 0.00
[459,] 1.00 0.00
[460,] 1.00 0.00
[461,] 1.00 0.00
[462,] 1.00 0.00
[463,] 1.00 0.00
[464,] 1.00 0.00
[465,] 1.00 0.00
[466,] 1.00 0.00
[467,] 1.00 0.00
[468,] 1.00 0.00
[469,] 1.00 0.00
[470,] 1.00 0.00
[471,] 1.00 0.00
[472,] 1.00 0.00
[473,] 1.00 0.00
[474,] 0.50 0.50
[475,] 1.00 0.00
[476,] 0.50 0.50
[477,] 0.00 1.00
[478,] 1.00 0.00
[479,] 1.00 0.00
[480,] 1.00 0.00
[481,] 1.00 0.00
[482,] 1.00 0.00
[483,] 0.25 0.75
[484,] 1.00 0.00
[485,] 1.00 0.00
[486,] 1.00 0.00
[487,] 1.00 0.00
[488,] 1.00 0.00
[489,] 1.00 0.00
[490,] 1.00 0.00
[491,] 1.00 0.00
[492,] 1.00 0.00
[493,] 1.00 0.00
[494,] 0.00 1.00
[495,] 0.75 0.25
[496,] 0.75 0.25
[497,] 1.00 0.00
[498,] 1.00 0.00
[499,] 1.00 0.00
[500,] 1.00 0.00
[501,] 1.00 0.00
[502,] 1.00 0.00
[503,] 1.00 0.00
[504,] 1.00 0.00
[505,] 1.00 0.00
[506,] 1.00 0.00
[507,] 1.00 0.00
[508,] 1.00 0.00
[509,] 1.00 0.00
[510,] 1.00 0.00
[511,] 1.00 0.00
[512,] 1.00 0.00
[513,] 1.00 0.00
[514,] 1.00 0.00
[515,] 1.00 0.00
[516,] 1.00 0.00
[517,] 1.00 0.00
[518,] 1.00 0.00
[519,] 1.00 0.00
[520,] 1.00 0.00
[521,] 1.00 0.00
[522,] 1.00 0.00
[523,] 1.00 0.00
[524,] 1.00 0.00
[525,] 1.00 0.00
[526,] 1.00 0.00
[527,] 1.00 0.00
[528,] 1.00 0.00
[529,] 1.00 0.00
[530,] 1.00 0.00
[531,] 1.00 0.00
[532,] 0.75 0.25
[533,] 1.00 0.00
[534,] 0.75 0.25
[535,] 0.75 0.25
[536,] 1.00 0.00
[537,] 1.00 0.00
[538,] 1.00 0.00
[539,] 1.00 0.00
[540,] 1.00 0.00
[541,] 1.00 0.00
[542,] 1.00 0.00
[543,] 1.00 0.00
[544,] 1.00 0.00
[545,] 1.00 0.00
[546,] 1.00 0.00
[547,] 1.00 0.00
[548,] 1.00 0.00
[549,] 1.00 0.00
[550,] 1.00 0.00
[551,] 1.00 0.00
[552,] 1.00 0.00
[553,] 0.75 0.25
[554,] 1.00 0.00
[555,] 1.00 0.00
[556,] 1.00 0.00
[557,] 1.00 0.00
[558,] 1.00 0.00
[559,] 0.50 0.50
[560,] 1.00 0.00
[561,] 1.00 0.00
[562,] 0.50 0.50
[563,] 1.00 0.00
[564,] 0.50 0.50
[565,] 1.00 0.00
[566,] 1.00 0.00
[567,] 1.00 0.00
[568,] 1.00 0.00
[569,] 1.00 0.00
[570,] 1.00 0.00
[571,] 1.00 0.00
[572,] 0.00 1.00
[573,] 1.00 0.00
[574,] 1.00 0.00
[575,] 1.00 0.00
[576,] 1.00 0.00
[577,] 1.00 0.00
[578,] 1.00 0.00
[579,] 1.00 0.00
[580,] 1.00 0.00
[581,] 1.00 0.00
[582,] 1.00 0.00
[583,] 1.00 0.00
[584,] 1.00 0.00
[585,] 1.00 0.00
[586,] 1.00 0.00
[587,] 1.00 0.00
[588,] 1.00 0.00
[589,] 1.00 0.00
[590,] 1.00 0.00
[591,] 1.00 0.00
[592,] 1.00 0.00
[593,] 1.00 0.00
[594,] 1.00 0.00
[595,] 1.00 0.00
[596,] 1.00 0.00
[597,] 0.75 0.25
[598,] 1.00 0.00
[599,] 0.25 0.75
[600,] 1.00 0.00
[601,] 1.00 0.00
[602,] 1.00 0.00
[603,] 1.00 0.00
[604,] 1.00 0.00
[605,] 1.00 0.00
[606,] 1.00 0.00
[607,] 1.00 0.00
[608,] 1.00 0.00
[609,] 1.00 0.00
[610,] 1.00 0.00
[611,] 1.00 0.00
[612,] 1.00 0.00
[613,] 1.00 0.00
[614,] 1.00 0.00
[615,] 1.00 0.00
[616,] 1.00 0.00
[617,] 1.00 0.00
[618,] 1.00 0.00
[619,] 0.00 1.00
[620,] 1.00 0.00
[621,] 1.00 0.00
[622,] 1.00 0.00
[623,] 1.00 0.00
[624,] 1.00 0.00
[625,] 1.00 0.00
[626,] 1.00 0.00
[627,] 1.00 0.00
[628,] 1.00 0.00
[629,] 1.00 0.00
[630,] 1.00 0.00
[631,] 1.00 0.00
[632,] 0.75 0.25
[633,] 1.00 0.00
[634,] 1.00 0.00
[635,] 1.00 0.00
[636,] 0.25 0.75
[637,] 1.00 0.00
[638,] 0.75 0.25
[639,] 1.00 0.00
[640,] 0.75 0.25
[641,] 1.00 0.00
[642,] 1.00 0.00
[643,] 1.00 0.00
[644,] 1.00 0.00
[645,] 1.00 0.00
[646,] 0.75 0.25
[647,] 1.00 0.00
[648,] 1.00 0.00
[649,] 1.00 0.00
[650,] 1.00 0.00
[651,] 1.00 0.00
[652,] 1.00 0.00
[653,] 1.00 0.00
[654,] 1.00 0.00
[655,] 1.00 0.00
[656,] 1.00 0.00
[657,] 1.00 0.00
[658,] 1.00 0.00
[659,] 1.00 0.00
[660,] 1.00 0.00
[661,] 0.50 0.50
[662,] 1.00 0.00
[663,] 1.00 0.00
[664,] 1.00 0.00
[665,] 1.00 0.00
[666,] 1.00 0.00
[667,] 1.00 0.00
[668,] 1.00 0.00
[669,] 1.00 0.00
[670,] 1.00 0.00
[671,] 1.00 0.00
[672,] 1.00 0.00
[673,] 0.25 0.75
[674,] 1.00 0.00
[675,] 1.00 0.00
[676,] 1.00 0.00
[677,] 1.00 0.00
[678,] 1.00 0.00
[679,] 1.00 0.00
[680,] 0.75 0.25
[681,] 1.00 0.00
[682,] 1.00 0.00
[683,] 0.50 0.50
[684,] 1.00 0.00
[685,] 1.00 0.00
[686,] 1.00 0.00
[687,] 1.00 0.00
[688,] 1.00 0.00
[689,] 1.00 0.00
[690,] 1.00 0.00
[691,] 0.25 0.75
[692,] 0.75 0.25
[693,] 1.00 0.00
[694,] 1.00 0.00
[695,] 0.50 0.50
[696,] 1.00 0.00
[697,] 0.75 0.25
[698,] 1.00 0.00
[699,] 1.00 0.00
[700,] 1.00 0.00
[701,] 0.75 0.25
[702,] 1.00 0.00
[703,] 1.00 0.00
[704,] 0.50 0.50
[705,] 1.00 0.00
[706,] 1.00 0.00
[707,] 1.00 0.00
[708,] 1.00 0.00
[709,] 1.00 0.00
[710,] 1.00 0.00
[711,] 1.00 0.00
[712,] 0.00 1.00
[713,] 1.00 0.00
[714,] 1.00 0.00
[715,] 0.00 1.00
[716,] 0.75 0.25
[717,] 1.00 0.00
[718,] 1.00 0.00
[719,] 1.00 0.00
[720,] 1.00 0.00
[721,] 1.00 0.00
[722,] 1.00 0.00
[723,] 1.00 0.00
[724,] 1.00 0.00
[725,] 1.00 0.00
[726,] 1.00 0.00
[727,] 1.00 0.00
[728,] 1.00 0.00
[729,] 1.00 0.00
[730,] 1.00 0.00
[731,] 1.00 0.00
[732,] 1.00 0.00
[733,] 1.00 0.00
[734,] 0.50 0.50
[735,] 1.00 0.00
[736,] 1.00 0.00
[737,] 1.00 0.00
[738,] 0.25 0.75
[739,] 1.00 0.00
[740,] 1.00 0.00
[741,] 1.00 0.00
[742,] 1.00 0.00
[743,] 1.00 0.00
[744,] 0.00 1.00
[745,] 1.00 0.00
[746,] 1.00 0.00
[747,] 1.00 0.00
[748,] 1.00 0.00
[749,] 1.00 0.00

7. 모형 평가

Caution! 모형 평가를 위해 Test Dataset에 대한 예측 class/확률 이 필요하며, 함수 predict()를 이용하여 생성한다.

# 예측 class 생성
knn.pred <- predict(knn.model,                                        
                    newdata = UB.ted[,-1],            # Test Dataset including Only 예측 변수   
                    type = "class")                   # 예측 class 생성

knn.pred %>%
  as_tibble
# A tibble: 749 × 1
   value
   <fct>
 1 no   
 2 no   
 3 no   
 4 no   
 5 no   
 6 no   
 7 no   
 8 no   
 9 no   
10 no   
# ℹ 739 more rows


7-1. ConfusionMatrix

CM   <- caret::confusionMatrix(knn.pred, UB.ted$Personal.Loan, 
                               positive = "yes")     # confusionMatrix(예측 class, 실제 class, positive = "관심 class")
CM
Confusion Matrix and Statistics

          Reference
Prediction  no yes
       no  667  41
       yes   6  35
                                          
               Accuracy : 0.9372          
                 95% CI : (0.9174, 0.9535)
    No Information Rate : 0.8985          
    P-Value [Acc > NIR] : 0.0001251       
                                          
                  Kappa : 0.5675          
                                          
 Mcnemar's Test P-Value : 7.071e-07       
                                          
            Sensitivity : 0.46053         
            Specificity : 0.99108         
         Pos Pred Value : 0.85366         
         Neg Pred Value : 0.94209         
             Prevalence : 0.10147         
         Detection Rate : 0.04673         
   Detection Prevalence : 0.05474         
      Balanced Accuracy : 0.72581         
                                          
       'Positive' Class : yes             
                                          


7-2. ROC 곡선

# 예측 확률 생성
test.knn.prob <- predict(knn.model, 
                         newdata = UB.ted[,-1],     # Test Dataset including Only 예측 변수  
                         type = "prob")             # 예측 확률 생성 

test.knn.prob %>%
  as_tibble
# A tibble: 749 × 2
      no   yes
   <dbl> <dbl>
 1  1     0   
 2  1     0   
 3  1     0   
 4  1     0   
 5  1     0   
 6  1     0   
 7  1     0   
 8  0.75  0.25
 9  1     0   
10  1     0   
# ℹ 739 more rows
test.knn.prob <- test.knn.prob[,2]                  # "Personal.Loan = yes"에 대한 예측 확률

ac <- UB.ted$Personal.Loan                          # Test Dataset의 실제 class         
pp <- as.numeric(test.knn.prob)                     # 예측 확률을 수치형으로 변환

1) Package “pROC”

pacman::p_load("pROC")

knn.roc  <- roc(ac, pp, plot=T, col="gray")         # roc(실제 class, 예측 확률)
auc      <- round(auc(knn.roc),3)
legend("bottomright", legend = auc, bty = "n")

Caution! Package "pROC"를 통해 출력한 ROC 곡선은 다양한 함수를 이용해서 그래프를 수정할 수 있다.

# 함수 plot.roc() 이용
plot.roc(knn.roc,   
         col="gray",                                # Line Color
         print.auc = TRUE,                          # AUC 출력 여부
         print.auc.col = "red",                     # AUC 글씨 색깔
         print.thres = TRUE,                        # Cutoff Value 출력 여부
         print.thres.pch = 19,                      # Cutoff Value를 표시하는 도형 모양
         print.thres.col = "red",                   # Cutoff Value를 표시하는 도형의 색깔
         auc.polygon = TRUE,                        # 곡선 아래 면적에 대한 여부
         auc.polygon.col = "gray90")                # 곡선 아래 면적의 색깔

# 함수 ggroc() 이용
ggroc(knn.roc) +
annotate(geom = "text", x = 0.9, y = 1.0,
label = paste("AUC = ", auc),
size = 5,
color="red") +
theme_bw()

2) Package “Epi”

pacman::p_load("Epi")       
# install_version("etm", version = "1.1", repos = "http://cran.us.r-project.org")

ROC(pp, ac, plot="ROC")                             # ROC(예측 확률, 실제 class)  

3) Package “ROCR”

pacman::p_load("ROCR")

knn.pred <- prediction(pp, ac)                      # prediction(예측 확률, 실제 class)  

knn.perf <- performance(knn.pred, "tpr", "fpr")     # performance(, "민감도", "1-특이도")                      
plot(knn.perf, col = "gray")                        # ROC Curve

perf.auc   <- performance(knn.pred, "auc")          # AUC
auc        <- attributes(perf.auc)$y.values 
legend("bottomright", legend = auc, bty = "n")


7-3. 향상 차트

1) Package “ROCR”

knn.perf       <- performance(knn.pred, "lift", "rpp")  # Lift Chart
plot(knn.perf, main = "lift curve", 
     colorize = T,                                      # Coloring according to cutoff
     lwd = 2)  

Reuse

Text and figures are licensed under Creative Commons Attribution CC BY 4.0. The figures that have been reused from other sources don't fall under this license and can be recognized by a note in their caption: "Figure from ...".