Ch7, 로지스틱 회귀분석


Book : 비지니스 활용 사례로 배우는 데이터분석 : R

Ch7, 로지스틱 회귀분석


[분석 스토리] 배경 : 피쳐폰 용 게임을 운영, 이후 스마트폰 용 버전 개발 / 피쳐폰 -> 스마트폰 아이디 이전 기능 개발 문제 발견 : 피쳐폰의 유저수가 크게 감소 현실의 모습 : 피처폰 유저 감소량 > 스마트폰 유저 증가량 이상적인 모습 : 피처폰 유저 감소량 = 스마트폰 유저 증가량


[설정] R markdown 사용을 위해 working directory 설정


[데이터셋]

# CSV 파일 읽어들이기
dau <- read.csv("section7-dau.csv", header = T, stringsAsFactors = F)
head(dau)
##   region_month region_day app_name  user_id device
## 1      2013-01 2013-01-01  game-02 10061580     FP
## 2      2013-01 2013-01-01  game-02 10154440     FP
## 3      2013-01 2013-01-01  game-02 10164762     SP
## 4      2013-01 2013-01-01  game-02 10165615     FP
## 5      2013-01 2013-01-01  game-02 10321356     FP
## 6      2013-01 2013-01-01  game-02 10406653     SP

[가설] 1월과 2월의 상황을 비교하여 게임을 떠난 유저 형태 분석

탈퇴 시 SP 이유 데이터유무
Y 자연탈퇴 X
N 자연탈퇴 X
N 새로운 SP계정 X(극소수)
N FP-SP ID 이전 O
N FP-SP 이전실패 X

FP 사용자가 줄어드는 경우 1) 자연탈퇴 2) ID 이전 실패

자연탈퇴와 ID 이전 실패 유저를 구별할 방법이 없다. 즉, 정답 셋 데이터가 없는 상태

# 유저별로 ID이전을 한 유저인지 아닌지를 나타내는 데이터를 정리
# MAU
mau <- unique (dau[, c("region_month", "device", "user_id")])
# FP MAU
fp.mau <- unique (dau[dau$device=="FP", c("region_month", "device",
"user_id")])
# SP MAU
sp.mau <- unique (dau[dau$device=="SP", c("region_month", "device",
"user_id")])

# 1월과 2월 데이터를 나누기
fp.mau1 <- fp.mau[fp.mau$region_month == "2013-01", ]
fp.mau2 <- fp.mau[fp.mau$region_month == "2013-02", ]
sp.mau1 <- sp.mau[sp.mau$region_month == "2013-01", ]
sp.mau2 <- sp.mau[sp.mau$region_month == "2013-02", ]
# 1월에 피쳐폰으로 이용했던 유저가 2월에도 이용했는가
mau$is_access <- 1
fp.mau1 <- merge(fp.mau1, mau[mau$region_month == "2013-02", c("user_id", "is_access")], by = "user_id", all.x = T)
fp.mau1$is_access[is.na(fp.mau1$is_access)] <- 0
head(fp.mau1)
##   user_id region_month device is_access
## 1  397286      2013-01     FP         1
## 2  471341      2013-01     FP         1
## 3  503874      2013-01     FP         0
## 4  512250      2013-01     FP         1
## 5  513811      2013-01     FP         1
## 6  638688      2013-01     FP         1

# 1월에 피쳐폰으로 이용했고 2월에도 피쳐폰으로 이용했는가
fp.mau2$is_fp <- 1
fp.mau1 <- merge(fp.mau1, fp.mau2[, c("user_id", "is_fp")], by = "user_id", all.x = T)
fp.mau1$is_fp[is.na(fp.mau1$is_fp)] <- 0
head(fp.mau1)
##   user_id region_month device is_access is_fp
## 1  397286      2013-01     FP         1     1
## 2  471341      2013-01     FP         1     0
## 3  503874      2013-01     FP         0     0
## 4  512250      2013-01     FP         1     1
## 5  513811      2013-01     FP         1     1
## 6  638688      2013-01     FP         1     1
# 1월에는 피쳐폰으로 이용하다가 2월에는 스마트폰으로 이용했는가
sp.mau2$is_sp <- 1
fp.mau1 <- merge(fp.mau1, sp.mau2[, c("user_id", "is_sp")],
by = "user_id", all.x = T)
fp.mau1$is_sp[is.na(fp.mau1$is_sp)] <- 0
head(fp.mau1)
##   user_id region_month device is_access is_fp is_sp
## 1  397286      2013-01     FP         1     1     0
## 2  471341      2013-01     FP         1     0     1
## 3  503874      2013-01     FP         0     0     0
## 4  512250      2013-01     FP         1     1     0
## 5  513811      2013-01     FP         1     1     0
## 6  638688      2013-01     FP         1     1     0

[아이디어] 1월 이용횟수를 가지고 모델을 만든다고 했을 때,

전제) ID 이전 실패는 사용 횟수가 많고, 자연 탈퇴는 사용 횟수가 서서히 줄어든 상태일 것이다. 1) ID 이전에 실패한 유저가 많으면 이용횟수에 변화가 별로 없으므로 모델 성립이 되지 않음 2) 자연 탈퇴가 많다면 이용횟수 차이가 있으므로 모델 성립이 될 것임

[Action] 위 1, 2 분석 결과에 따라 Action은 다음과 같이 달라진다.

  1. ID 이전 기능 개선
  2. 스마트폰 신규 유저를 대상으로 한 광고 개선
# 1월에는 피쳐폰으로 이용했는데 2월에는 이용하지 않았거나 혹은 스마트폰으로 이용한 유저
fp.mau1 <- fp.mau1[fp.mau1$is_access == 0 | fp.mau1$is_sp == 1, ]
head(fp.mau1)
##    user_id region_month device is_access is_fp is_sp
## 2   471341      2013-01     FP         1     0     1
## 3   503874      2013-01     FP         0     0     0
## 11 1073544      2013-01     FP         0     0     0
## 12 1073864      2013-01     FP         0     0     0
## 14 1163733      2013-01     FP         1     0     1
## 15 1454629      2013-01     FP         0     0     0
# 날짜별 게임 이용상황 데이터를 정리하기
library(reshape2)
fp.dau1 <- dau[dau$device == "FP" & dau$region_month == "2013-01", ]
fp.dau1$is_access <- 1
fp.dau1.cast <- dcast(fp.dau1, user_id ~ region_day, value.var =
"is_access", function(x) as.character(length(x)))
names(fp.dau1.cast)[-1] <- paste0("X", 1:31, "day")
head(fp.dau1.cast)
##   user_id X1day X2day X3day X4day X5day X6day X7day X8day X9day X10day
## 1  397286     1     1     1     1     1     1     1     1     1      1
## 2  471341     1     1     1     1     0     0     0     0     0      0
## 3  503874     1     0     0     0     0     0     0     0     0      0
## 4  512250     1     1     1     1     1     1     1     1     1      1
## 5  513811     0     0     0     0     0     0     0     0     0      0
## 6  638688     1     1     1     1     1     1     1     1     1      1
##   X11day X12day X13day X14day X15day X16day X17day X18day X19day X20day
## 1      1      1      2      1      1      1      2      1      1      1
## 2      0      0      0      0      0      0      0      0      0      0
## 3      0      0      0      0      0      0      0      0      0      0
## 4      1      1      1      1      1      1      1      1      1      1
## 5      0      0      0      0      0      0      0      0      0      0
## 6      1      1      1      1      1      1      1      1      1      1
##   X21day X22day X23day X24day X25day X26day X27day X28day X29day X30day
## 1      1      1      1      1      1      1      1      1      1      1
## 2      0      0      0      0      0      0      0      0      0      0
## 3      0      0      0      0      0      0      0      0      0      0
## 4      1      1      1      1      1      1      1      1      1      1
## 5      0      1      0      0      0      0      0      1      1      0
## 6      1      1      1      1      1      1      1      1      1      1
##   X31day
## 1      1
## 2      0
## 3      0
## 4      1
## 5      1
## 6      1
# 2월에 스마트폰으로 이용한 유저 데이터를 결합하기
fp.dau1.cast <- merge(fp.dau1.cast, fp.mau1[, c("user_id", "is_sp")],
by = "user_id")
head(fp.dau1.cast)
##   user_id X1day X2day X3day X4day X5day X6day X7day X8day X9day X10day
## 1  471341     1     1     1     1     0     0     0     0     0      0
## 2  503874     1     0     0     0     0     0     0     0     0      0
## 3 1073544     0     0     0     0     0     0     0     0     0      1
## 4 1073864     0     0     0     0     0     0     0     0     0      0
## 5 1163733     1     1     0     0     0     0     0     0     0      0
## 6 1454629     0     0     0     0     0     0     0     0     0      0
##   X11day X12day X13day X14day X15day X16day X17day X18day X19day X20day
## 1      0      0      0      0      0      0      0      0      0      0
## 2      0      0      0      0      0      0      0      0      0      0
## 3      0      0      0      0      0      0      0      0      0      0
## 4      1      0      0      1      0      0      0      0      0      0
## 5      0      0      0      0      0      0      1      1      0      0
## 6      0      0      1      0      0      0      0      0      0      0
##   X21day X22day X23day X24day X25day X26day X27day X28day X29day X30day
## 1      0      0      0      0      0      0      0      0      0      0
## 2      0      0      0      0      0      0      0      0      0      0
## 3      0      0      1      1      1      0      0      0      0      0
## 4      0      0      0      0      0      0      0      0      0      0
## 5      1      1      1      1      1      1      1      1      0      0
## 6      0      0      0      0      0      0      0      0      0      0
##   X31day is_sp
## 1      0     1
## 2      0     0
## 3      0     0
## 4      0     0
## 5      0     1
## 6      0     0

is_sp 0 : 탈퇴 유저 (190명), 1 : 스마트폰 이전 유저 (62명)

table(fp.dau1.cast$is_sp)
## 
##   0   1 
## 190  62

[모델링] 로지스틱 회귀분석 (binominal) 변수 선택 (계산량과 overfitting 방지) : AIC (아카이케 정보 척도) AIC(Akaike’s An Information Criterion) : AIC 값이 적을수록 적은 인수를 가지고 높은 적합성을 보이는 모 Step : AIC 값을 이용하여 단계적 회귀를 수행(forward, backward stepwise regression..)하여 변수선택 자동화

# 로지스틱 회귀분석을 통한 모델 작성
fit.logit <- step(glm(is_sp ~ ., data = fp.dau1.cast[, -1],
family = binomial))
## Start:  AIC=178.32
## is_sp ~ X1day + X2day + X3day + X4day + X5day + X6day + X7day + 
##     X8day + X9day + X10day + X11day + X12day + X13day + X14day + 
##     X15day + X16day + X17day + X18day + X19day + X20day + X21day + 
##     X22day + X23day + X24day + X25day + X26day + X27day + X28day + 
##     X29day + X30day + X31day
## 
##          Df Deviance    AIC
## - X23day  2   114.06 176.06
## - X25day  1   112.33 176.33
## - X26day  1   112.36 176.36
## - X28day  1   112.38 176.38
## - X20day  1   112.38 176.38
## - X27day  1   112.39 176.39
## - X2day   1   112.41 176.41
## - X17day  1   112.45 176.45
## - X9day   1   112.46 176.46
## - X3day   1   112.48 176.49
## - X16day  1   112.49 176.49
## - X12day  1   112.50 176.50
## - X19day  1   112.58 176.58
## - X6day   1   112.65 176.65
## - X21day  1   112.90 176.90
## - X8day   1   113.09 177.09
## - X14day  1   113.23 177.23
## - X22day  1   113.43 177.43
## - X11day  1   113.66 177.66
## - X31day  1   113.79 177.79
## - X18day  1   113.81 177.81
## - X15day  1   114.27 178.27
## <none>        112.32 178.32
## - X29day  1   114.55 178.55
## - X13day  1   114.64 178.64
## - X30day  1   114.65 178.65
## - X24day  1   114.78 178.78
## - X5day   1   114.94 178.94
## - X7day   1   115.61 179.61
## - X10day  1   117.86 181.86
## - X4day   1   118.08 182.09
## - X1day   1   118.61 182.61
## 
## Step:  AIC=176.06
## is_sp ~ X1day + X2day + X3day + X4day + X5day + X6day + X7day + 
##     X8day + X9day + X10day + X11day + X12day + X13day + X14day + 
##     X15day + X16day + X17day + X18day + X19day + X20day + X21day + 
##     X22day + X24day + X25day + X26day + X27day + X28day + X29day + 
##     X30day + X31day
## 
##          Df Deviance    AIC
## - X26day  1   114.06 174.06
## - X20day  1   114.06 174.06
## - X28day  1   114.07 174.07
## - X2day   1   114.09 174.09
## - X9day   1   114.09 174.09
## - X27day  1   114.09 174.09
## - X16day  1   114.11 174.12
## - X17day  1   114.14 174.15
## - X21day  1   114.17 174.17
## - X25day  1   114.18 174.18
## - X12day  1   114.21 174.21
## - X3day   1   114.38 174.38
## - X19day  1   114.52 174.52
## - X6day   1   114.62 174.62
## - X11day  1   114.81 174.81
## - X14day  1   114.94 174.94
## - X8day   1   115.12 175.12
## - X22day  1   115.23 175.23
## - X5day   1   115.79 175.79
## - X18day  1   115.79 175.79
## - X30day  1   115.82 175.82
## - X29day  1   115.82 175.82
## - X15day  1   115.98 175.98
## <none>        114.06 176.06
## - X24day  1   116.07 176.07
## - X31day  1   116.11 176.11
## - X13day  1   116.32 176.32
## - X7day   1   116.65 176.65
## - X4day   1   119.59 179.59
## - X10day  1   119.61 179.61
## - X1day   1   119.68 179.68
## 
## Step:  AIC=174.06
## is_sp ~ X1day + X2day + X3day + X4day + X5day + X6day + X7day + 
##     X8day + X9day + X10day + X11day + X12day + X13day + X14day + 
##     X15day + X16day + X17day + X18day + X19day + X20day + X21day + 
##     X22day + X24day + X25day + X27day + X28day + X29day + X30day + 
##     X31day
## 
##          Df Deviance    AIC
## - X20day  1   114.06 172.06
## - X28day  1   114.07 172.07
## - X2day   1   114.09 172.09
## - X9day   1   114.09 172.09
## - X27day  1   114.09 172.09
## - X16day  1   114.12 172.12
## - X17day  1   114.15 172.15
## - X21day  1   114.17 172.17
## - X25day  1   114.20 172.20
## - X12day  1   114.22 172.22
## - X3day   1   114.38 172.38
## - X19day  1   114.53 172.53
## - X6day   1   114.66 172.66
## - X11day  1   114.81 172.81
## - X14day  1   114.94 172.94
## - X8day   1   115.13 173.13
## - X22day  1   115.23 173.23
## - X30day  1   115.82 173.82
## - X29day  1   115.82 173.82
## - X5day   1   115.84 173.84
## - X18day  1   115.97 173.97
## - X15day  1   115.99 173.99
## <none>        114.06 174.06
## - X31day  1   116.11 174.11
## - X24day  1   116.29 174.29
## - X13day  1   116.36 174.36
## - X7day   1   116.71 174.71
## - X4day   1   119.60 177.60
## - X10day  1   119.62 177.62
## - X1day   1   119.75 177.75
## 
## Step:  AIC=172.06
## is_sp ~ X1day + X2day + X3day + X4day + X5day + X6day + X7day + 
##     X8day + X9day + X10day + X11day + X12day + X13day + X14day + 
##     X15day + X16day + X17day + X18day + X19day + X21day + X22day + 
##     X24day + X25day + X27day + X28day + X29day + X30day + X31day
## 
##          Df Deviance    AIC
## - X28day  1   114.07 170.07
## - X2day   1   114.09 170.09
## - X9day   1   114.09 170.09
## - X27day  1   114.09 170.09
## - X16day  1   114.12 170.12
## - X17day  1   114.15 170.15
## - X21day  1   114.17 170.17
## - X25day  1   114.20 170.20
## - X12day  1   114.22 170.22
## - X3day   1   114.39 170.39
## - X19day  1   114.65 170.65
## - X6day   1   114.67 170.67
## - X11day  1   114.84 170.84
## - X14day  1   114.97 170.97
## - X8day   1   115.14 171.14
## - X22day  1   115.27 171.27
## - X30day  1   115.84 171.84
## - X5day   1   115.85 171.85
## - X29day  1   115.85 171.85
## <none>        114.06 172.06
## - X15day  1   116.07 172.07
## - X31day  1   116.12 172.12
## - X18day  1   116.12 172.12
## - X24day  1   116.31 172.31
## - X13day  1   116.37 172.37
## - X7day   1   116.72 172.72
## - X4day   1   119.67 175.67
## - X1day   1   119.82 175.82
## - X10day  1   119.98 175.98
## 
## Step:  AIC=170.07
## is_sp ~ X1day + X2day + X3day + X4day + X5day + X6day + X7day + 
##     X8day + X9day + X10day + X11day + X12day + X13day + X14day + 
##     X15day + X16day + X17day + X18day + X19day + X21day + X22day + 
##     X24day + X25day + X27day + X29day + X30day + X31day
## 
##          Df Deviance    AIC
## - X2day   1   114.10 168.10
## - X9day   1   114.11 168.11
## - X27day  1   114.11 168.11
## - X16day  1   114.13 168.13
## - X17day  1   114.18 168.18
## - X21day  1   114.21 168.21
## - X12day  1   114.25 168.25
## - X25day  1   114.25 168.25
## - X3day   1   114.42 168.42
## - X19day  1   114.65 168.65
## - X6day   1   114.67 168.67
## - X11day  1   114.92 168.92
## - X14day  1   115.04 169.04
## - X8day   1   115.15 169.15
## - X22day  1   115.27 169.27
## - X5day   1   115.95 169.95
## <none>        114.07 170.07
## - X15day  1   116.09 170.09
## - X18day  1   116.12 170.12
## - X29day  1   116.15 170.15
## - X31day  1   116.17 170.17
## - X30day  1   116.21 170.21
## - X24day  1   116.35 170.35
## - X13day  1   116.41 170.41
## - X7day   1   116.75 170.75
## - X4day   1   119.67 173.67
## - X1day   1   119.88 173.88
## - X10day  1   120.12 174.12
## 
## Step:  AIC=168.1
## is_sp ~ X1day + X3day + X4day + X5day + X6day + X7day + X8day + 
##     X9day + X10day + X11day + X12day + X13day + X14day + X15day + 
##     X16day + X17day + X18day + X19day + X21day + X22day + X24day + 
##     X25day + X27day + X29day + X30day + X31day
## 
##          Df Deviance    AIC
## - X9day   1   114.13 166.13
## - X27day  1   114.14 166.14
## - X16day  1   114.15 166.15
## - X17day  1   114.21 166.21
## - X21day  1   114.22 166.22
## - X25day  1   114.28 166.28
## - X12day  1   114.33 166.33
## - X6day   1   114.68 166.68
## - X19day  1   114.70 166.70
## - X3day   1   114.78 166.78
## - X11day  1   115.00 167.00
## - X14day  1   115.05 167.05
## - X8day   1   115.20 167.20
## - X22day  1   115.29 167.29
## - X5day   1   115.97 167.97
## <none>        114.10 168.10
## - X29day  1   116.19 168.19
## - X15day  1   116.21 168.21
## - X31day  1   116.26 168.26
## - X24day  1   116.35 168.35
## - X18day  1   116.37 168.37
## - X30day  1   116.48 168.48
## - X13day  1   116.55 168.55
## - X7day   1   116.75 168.75
## - X10day  1   120.14 172.13
## - X4day   1   120.32 172.32
## - X1day   1   122.03 174.03
## 
## Step:  AIC=166.13
## is_sp ~ X1day + X3day + X4day + X5day + X6day + X7day + X8day + 
##     X10day + X11day + X12day + X13day + X14day + X15day + X16day + 
##     X17day + X18day + X19day + X21day + X22day + X24day + X25day + 
##     X27day + X29day + X30day + X31day
## 
##          Df Deviance    AIC
## - X16day  1   114.17 164.17
## - X27day  1   114.17 164.17
## - X17day  1   114.25 164.25
## - X21day  1   114.25 164.25
## - X25day  1   114.31 164.31
## - X12day  1   114.38 164.38
## - X6day   1   114.69 164.69
## - X19day  1   114.71 164.71
## - X3day   1   114.84 164.84
## - X11day  1   115.05 165.05
## - X14day  1   115.11 165.11
## - X8day   1   115.24 165.24
## - X22day  1   115.32 165.32
## <none>        114.13 166.13
## - X5day   1   116.19 166.19
## - X29day  1   116.20 166.21
## - X15day  1   116.28 166.28
## - X31day  1   116.31 166.31
## - X24day  1   116.38 166.38
## - X18day  1   116.38 166.38
## - X30day  1   116.55 166.54
## - X13day  1   116.57 166.57
## - X7day   1   116.75 166.75
## - X4day   1   120.40 170.40
## - X10day  1   120.41 170.41
## - X1day   1   122.37 172.37
## 
## Step:  AIC=164.17
## is_sp ~ X1day + X3day + X4day + X5day + X6day + X7day + X8day + 
##     X10day + X11day + X12day + X13day + X14day + X15day + X17day + 
##     X18day + X19day + X21day + X22day + X24day + X25day + X27day + 
##     X29day + X30day + X31day
## 
##          Df Deviance    AIC
## - X27day  1   114.21 162.21
## - X17day  1   114.27 162.27
## - X21day  1   114.31 162.31
## - X25day  1   114.37 162.37
## - X12day  1   114.42 162.42
## - X19day  1   114.71 162.71
## - X6day   1   114.71 162.71
## - X3day   1   114.87 162.87
## - X11day  1   115.05 163.05
## - X14day  1   115.20 163.20
## - X22day  1   115.32 163.32
## - X8day   1   115.45 163.45
## <none>        114.17 164.17
## - X5day   1   116.19 164.19
## - X29day  1   116.33 164.32
## - X15day  1   116.33 164.33
## - X18day  1   116.43 164.43
## - X31day  1   116.46 164.46
## - X24day  1   116.52 164.52
## - X13day  1   116.58 164.58
## - X30day  1   116.67 164.67
## - X7day   1   116.77 164.77
## - X10day  1   120.42 168.42
## - X4day   1   120.50 168.50
## - X1day   1   122.44 170.44
## 
## Step:  AIC=162.21
## is_sp ~ X1day + X3day + X4day + X5day + X6day + X7day + X8day + 
##     X10day + X11day + X12day + X13day + X14day + X15day + X17day + 
##     X18day + X19day + X21day + X22day + X24day + X25day + X29day + 
##     X30day + X31day
## 
##          Df Deviance    AIC
## - X17day  1   114.31 160.31
## - X25day  1   114.37 160.37
## - X21day  1   114.43 160.43
## - X12day  1   114.46 160.46
## - X6day   1   114.77 160.77
## - X19day  1   114.87 160.87
## - X3day   1   114.93 160.93
## - X11day  1   115.05 161.05
## - X14day  1   115.34 161.34
## - X22day  1   115.34 161.34
## - X8day   1   115.67 161.67
## - X5day   1   116.20 162.20
## <none>        114.21 162.21
## - X15day  1   116.33 162.33
## - X18day  1   116.48 162.48
## - X31day  1   116.48 162.48
## - X29day  1   116.50 162.50
## - X24day  1   116.63 162.63
## - X7day   1   116.81 162.81
## - X30day  1   116.94 162.94
## - X13day  1   117.03 163.03
## - X4day   1   120.55 166.54
## - X10day  1   120.65 166.65
## - X1day   1   122.53 168.53
## 
## Step:  AIC=160.31
## is_sp ~ X1day + X3day + X4day + X5day + X6day + X7day + X8day + 
##     X10day + X11day + X12day + X13day + X14day + X15day + X18day + 
##     X19day + X21day + X22day + X24day + X25day + X29day + X30day + 
##     X31day
## 
##          Df Deviance    AIC
## - X25day  1   114.45 158.45
## - X21day  1   114.48 158.48
## - X12day  1   114.63 158.63
## - X6day   1   114.82 158.82
## - X19day  1   114.92 158.92
## - X3day   1   114.96 158.96
## - X11day  1   115.25 159.25
## - X22day  1   115.41 159.41
## - X14day  1   115.66 159.66
## - X8day   1   115.77 159.77
## - X5day   1   116.20 160.20
## <none>        114.31 160.31
## - X18day  1   116.48 160.48
## - X31day  1   116.64 160.64
## - X24day  1   116.67 160.67
## - X15day  1   116.73 160.73
## - X29day  1   116.86 160.86
## - X7day   1   116.96 160.96
## - X30day  1   117.06 161.06
## - X13day  1   117.12 161.12
## - X4day   1   120.56 164.56
## - X10day  1   121.11 165.11
## - X1day   1   122.67 166.67
## 
## Step:  AIC=158.45
## is_sp ~ X1day + X3day + X4day + X5day + X6day + X7day + X8day + 
##     X10day + X11day + X12day + X13day + X14day + X15day + X18day + 
##     X19day + X21day + X22day + X24day + X29day + X30day + X31day
## 
##          Df Deviance    AIC
## - X21day  1   114.58 156.58
## - X12day  1   114.86 156.86
## - X6day   1   114.95 156.95
## - X3day   1   115.07 157.07
## - X19day  1   115.15 157.15
## - X22day  1   115.47 157.47
## - X11day  1   115.48 157.48
## - X14day  1   115.80 157.79
## - X8day   1   115.90 157.90
## <none>        114.45 158.45
## - X5day   1   116.48 158.48
## - X18day  1   116.64 158.64
## - X29day  1   116.86 158.86
## - X24day  1   116.90 158.90
## - X31day  1   116.92 158.92
## - X15day  1   116.93 158.93
## - X7day   1   117.02 159.02
## - X30day  1   117.11 159.11
## - X13day  1   117.44 159.44
## - X4day   1   120.67 162.67
## - X10day  1   121.13 163.13
## - X1day   1   123.01 165.01
## 
## Step:  AIC=156.58
## is_sp ~ X1day + X3day + X4day + X5day + X6day + X7day + X8day + 
##     X10day + X11day + X12day + X13day + X14day + X15day + X18day + 
##     X19day + X22day + X24day + X29day + X30day + X31day
## 
##          Df Deviance    AIC
## - X6day   1   115.02 155.02
## - X12day  1   115.07 155.07
## - X3day   1   115.13 155.13
## - X19day  1   115.25 155.25
## - X14day  1   115.94 155.94
## - X8day   1   116.09 156.09
## - X11day  1   116.27 156.27
## <none>        114.58 156.58
## - X22day  1   116.64 156.65
## - X5day   1   116.70 156.70
## - X31day  1   117.05 157.05
## - X30day  1   117.13 157.13
## - X15day  1   117.13 157.13
## - X18day  1   117.19 157.19
## - X24day  1   117.22 157.22
## - X7day   1   117.31 157.31
## - X13day  1   117.68 157.68
## - X29day  1   117.92 157.92
## - X4day   1   120.86 160.86
## - X10day  1   122.30 162.30
## - X1day   1   123.01 163.01
## 
## Step:  AIC=155.02
## is_sp ~ X1day + X3day + X4day + X5day + X7day + X8day + X10day + 
##     X11day + X12day + X13day + X14day + X15day + X18day + X19day + 
##     X22day + X24day + X29day + X30day + X31day
## 
##          Df Deviance    AIC
## - X3day   1   115.43 153.43
## - X19day  1   115.56 153.56
## - X12day  1   115.73 153.73
## - X14day  1   116.47 154.47
## - X8day   1   116.51 154.51
## - X11day  1   116.69 154.69
## <none>        115.02 155.02
## - X31day  1   117.32 155.32
## - X18day  1   117.37 155.37
## - X30day  1   117.49 155.49
## - X24day  1   117.52 155.51
## - X7day   1   117.54 155.54
## - X15day  1   117.60 155.60
## - X22day  1   117.71 155.71
## - X13day  1   117.96 155.96
## - X29day  1   118.08 156.08
## - X5day   1   118.17 156.17
## - X4day   1   121.06 159.06
## - X10day  1   122.93 160.93
## - X1day   1   123.25 161.25
## 
## Step:  AIC=153.43
## is_sp ~ X1day + X4day + X5day + X7day + X8day + X10day + X11day + 
##     X12day + X13day + X14day + X15day + X18day + X19day + X22day + 
##     X24day + X29day + X30day + X31day
## 
##          Df Deviance    AIC
## - X19day  1   116.02 152.01
## - X12day  1   116.07 152.07
## - X11day  1   116.91 152.91
## - X14day  1   116.94 152.94
## - X8day   1   117.18 153.18
## <none>        115.43 153.43
## - X18day  1   117.59 153.59
## - X15day  1   117.73 153.74
## - X24day  1   117.74 153.74
## - X31day  1   117.79 153.79
## - X7day   1   117.80 153.80
## - X30day  1   117.99 153.99
## - X13day  1   118.05 154.05
## - X5day   1   118.27 154.27
## - X22day  1   118.28 154.28
## - X29day  1   118.30 154.29
## - X10day  1   122.94 158.94
## - X4day   1   123.68 159.68
## - X1day   1   124.50 160.50
## 
## Step:  AIC=152.02
## is_sp ~ X1day + X4day + X5day + X7day + X8day + X10day + X11day + 
##     X12day + X13day + X14day + X15day + X18day + X22day + X24day + 
##     X29day + X30day + X31day
## 
##          Df Deviance    AIC
## - X12day  1   116.80 150.80
## - X11day  1   117.41 151.41
## - X14day  1   117.77 151.76
## - X18day  1   117.77 151.77
## - X8day   1   117.85 151.85
## <none>        116.02 152.01
## - X24day  1   118.14 152.13
## - X15day  1   118.22 152.22
## - X22day  1   118.28 152.28
## - X30day  1   118.33 152.32
## - X7day   1   118.55 152.55
## - X31day  1   118.79 152.79
## - X5day   1   118.90 152.90
## - X29day  1   118.91 152.91
## - X13day  1   119.11 153.12
## - X4day   1   124.25 158.25
## - X10day  1   124.52 158.52
## - X1day   1   125.22 159.22
## 
## Step:  AIC=150.8
## is_sp ~ X1day + X4day + X5day + X7day + X8day + X10day + X11day + 
##     X13day + X14day + X15day + X18day + X22day + X24day + X29day + 
##     X30day + X31day
## 
##          Df Deviance    AIC
## - X11day  1   117.63 149.63
## - X18day  1   118.53 150.53
## - X24day  1   118.73 150.73
## - X8day   1   118.74 150.74
## - X14day  1   118.79 150.79
## <none>        116.80 150.80
## - X22day  1   118.81 150.81
## - X15day  1   118.88 150.88
## - X30day  1   118.99 150.99
## - X13day  1   119.19 151.19
## - X7day   1   119.31 151.31
## - X31day  1   119.39 151.40
## - X5day   1   119.63 151.63
## - X29day  1   121.21 153.21
## - X4day   1   124.37 156.37
## - X1day   1   125.70 157.70
## - X10day  1   127.57 159.57
## 
## Step:  AIC=149.63
## is_sp ~ X1day + X4day + X5day + X7day + X8day + X10day + X13day + 
##     X14day + X15day + X18day + X22day + X24day + X29day + X30day + 
##     X31day
## 
##          Df Deviance    AIC
## - X15day  1   119.28 149.28
## - X24day  1   119.31 149.31
## - X22day  1   119.36 149.36
## - X8day   1   119.37 149.37
## <none>        117.63 149.63
## - X14day  1   119.63 149.63
## - X30day  1   119.67 149.67
## - X18day  1   119.71 149.71
## - X31day  1   120.08 150.08
## - X7day   1   120.28 150.28
## - X5day   1   120.83 150.83
## - X13day  1   121.75 151.75
## - X29day  1   122.39 152.39
## - X1day   1   126.29 156.29
## - X4day   1   127.14 157.15
## - X10day  1   127.64 157.64
## 
## Step:  AIC=149.28
## is_sp ~ X1day + X4day + X5day + X7day + X8day + X10day + X13day + 
##     X14day + X18day + X22day + X24day + X29day + X30day + X31day
## 
##          Df Deviance    AIC
## - X18day  1   120.30 148.30
## - X24day  1   120.66 148.66
## - X8day   1   120.84 148.84
## - X30day  1   121.01 149.01
## - X22day  1   121.06 149.06
## - X31day  1   121.23 149.23
## <none>        119.28 149.28
## - X14day  1   122.22 150.22
## - X13day  1   122.39 150.39
## - X5day   1   122.49 150.49
## - X7day   1   122.76 150.76
## - X29day  1   123.87 151.87
## - X1day   1   127.25 155.25
## - X4day   1   128.08 156.08
## - X10day  1   129.83 157.83
## 
## Step:  AIC=148.3
## is_sp ~ X1day + X4day + X5day + X7day + X8day + X10day + X13day + 
##     X14day + X22day + X24day + X29day + X30day + X31day
## 
##          Df Deviance    AIC
## - X30day  1   121.86 147.85
## - X8day   1   121.86 147.86
## - X31day  1   122.00 148.00
## <none>        120.30 148.30
## - X24day  1   122.42 148.42
## - X14day  1   122.52 148.52
## - X22day  1   123.03 149.03
## - X13day  1   123.18 149.18
## - X7day   1   123.27 149.27
## - X5day   1   123.50 149.50
## - X29day  1   124.96 150.96
## - X1day   1   128.52 154.52
## - X4day   1   128.64 154.64
## - X10day  1   129.85 155.85
## 
## Step:  AIC=147.86
## is_sp ~ X1day + X4day + X5day + X7day + X8day + X10day + X13day + 
##     X14day + X22day + X24day + X29day + X31day
## 
##          Df Deviance    AIC
## - X8day   1   123.47 147.47
## - X14day  1   123.59 147.59
## <none>        121.86 147.85
## - X24day  1   124.11 148.11
## - X22day  1   124.24 148.24
## - X13day  1   124.29 148.29
## - X5day   1   125.14 149.14
## - X7day   1   125.45 149.45
## - X31day  1   126.11 150.11
## - X29day  1   129.25 153.25
## - X4day   1   129.93 153.93
## - X1day   1   130.61 154.61
## - X10day  1   132.07 156.07
## 
## Step:  AIC=147.47
## is_sp ~ X1day + X4day + X5day + X7day + X10day + X13day + X14day + 
##     X22day + X24day + X29day + X31day
## 
##          Df Deviance    AIC
## - X14day  1   125.30 147.30
## <none>        123.47 147.47
## - X24day  1   125.88 147.88
## - X22day  1   126.33 148.33
## - X5day   1   126.43 148.43
## - X31day  1   127.43 149.43
## - X13day  1   127.66 149.66
## - X7day   1   130.74 152.74
## - X1day   1   130.91 152.91
## - X4day   1   131.28 153.28
## - X10day  1   132.11 154.11
## - X29day  1   132.28 154.28
## 
## Step:  AIC=147.3
## is_sp ~ X1day + X4day + X5day + X7day + X10day + X13day + X22day + 
##     X24day + X29day + X31day
## 
##          Df Deviance    AIC
## - X24day  1   126.73 146.73
## <none>        125.30 147.30
## - X5day   1   127.41 147.41
## - X13day  1   128.18 148.18
## - X22day  1   128.21 148.21
## - X31day  1   129.21 149.21
## - X7day   1   131.12 151.12
## - X1day   1   131.85 151.85
## - X4day   1   132.76 152.76
## - X29day  1   133.76 153.76
## - X10day  1   135.00 155.00
## 
## Step:  AIC=146.73
## is_sp ~ X1day + X4day + X5day + X7day + X10day + X13day + X22day + 
##     X29day + X31day
## 
##          Df Deviance    AIC
## <none>        126.73 146.73
## - X5day   1   128.74 146.74
## - X13day  1   130.04 148.04
## - X31day  1   130.92 148.92
## - X7day   1   132.04 150.04
## - X22day  1   132.97 150.97
## - X1day   1   133.86 151.86
## - X4day   1   134.08 152.08
## - X29day  1   135.86 153.86
## - X10day  1   136.13 154.13
summary(fit.logit)
## 
## Call:
## glm(formula = is_sp ~ X1day + X4day + X5day + X7day + X10day + 
##     X13day + X22day + X29day + X31day, family = binomial, data = fp.dau1.cast[, 
##     -1])
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.95538  -0.45175  -0.23178  -0.06122   2.69461  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -3.6036     0.4269  -8.441  < 2e-16 ***
## X1day1        1.5334     0.5720   2.681  0.00735 ** 
## X4day1        1.7753     0.6424   2.764  0.00572 ** 
## X5day1       -1.0353     0.7622  -1.358  0.17437    
## X7day1        1.7002     0.7109   2.392  0.01678 *  
## X10day1      -2.6753     0.9418  -2.841  0.00450 ** 
## X13day1       1.3726     0.7547   1.819  0.06893 .  
## X22day1       1.6233     0.6382   2.543  0.01098 *  
## X29day1       2.0012     0.6480   3.088  0.00201 ** 
## X31day1       1.7310     0.8143   2.126  0.03352 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 281.20  on 251  degrees of freedom
## Residual deviance: 126.73  on 242  degrees of freedom
## AIC: 146.73
## 
## Number of Fisher Scoring iterations: 6
# 작성된 모델을 이용해 예측하기
# SP(스마트폰) 이전 확률
fp.dau1.cast$prob <- round(fitted(fit.logit), 2)
# SP(스마트폰)으로 이전할 지 예측
fp.dau1.cast$pred <- ifelse(fp.dau1.cast$prob > 0.5, 1, 0)
head(fp.dau1.cast)
##   user_id X1day X2day X3day X4day X5day X6day X7day X8day X9day X10day
## 1  471341     1     1     1     1     0     0     0     0     0      0
## 2  503874     1     0     0     0     0     0     0     0     0      0
## 3 1073544     0     0     0     0     0     0     0     0     0      1
## 4 1073864     0     0     0     0     0     0     0     0     0      0
## 5 1163733     1     1     0     0     0     0     0     0     0      0
## 6 1454629     0     0     0     0     0     0     0     0     0      0
##   X11day X12day X13day X14day X15day X16day X17day X18day X19day X20day
## 1      0      0      0      0      0      0      0      0      0      0
## 2      0      0      0      0      0      0      0      0      0      0
## 3      0      0      0      0      0      0      0      0      0      0
## 4      1      0      0      1      0      0      0      0      0      0
## 5      0      0      0      0      0      0      1      1      0      0
## 6      0      0      1      0      0      0      0      0      0      0
##   X21day X22day X23day X24day X25day X26day X27day X28day X29day X30day
## 1      0      0      0      0      0      0      0      0      0      0
## 2      0      0      0      0      0      0      0      0      0      0
## 3      0      0      1      1      1      0      0      0      0      0
## 4      0      0      0      0      0      0      0      0      0      0
## 5      1      1      1      1      1      1      1      1      0      0
## 6      0      0      0      0      0      0      0      0      0      0
##   X31day is_sp prob pred
## 1      0     1 0.43    0
## 2      0     0 0.11    0
## 3      0     0 0.00    0
## 4      0     0 0.03    0
## 5      0     1 0.39    0
## 6      0     0 0.10    0
# 예측과 실제
table(fp.dau1.cast[, c("is_sp", "pred")])
##      pred
## is_sp   0   1
##     0 180  10
##     1  20  42

[모델검토]

# 예측결과로부터 유저군을 추측하기
fp.dau1.cast1 <- fp.dau1.cast[fp.dau1.cast$is_sp == 1 & fp.dau1.cast$pred
== 1, ]
head(fp.dau1.cast1[order(fp.dau1.cast1$prob, decreasing = T), ])
##      user_id X1day X2day X3day X4day X5day X6day X7day X8day X9day X10day
## 137 24791702     1     1     0     1     0     1     1     1     1      1
## 138 24791702     1     1     0     1     0     1     1     1     1      1
## 22   5526146     1     1     1     1     1     1     1     1     1      1
## 44   9567562     1     1     1     1     1     1     1     1     1      1
## 45   9567562     1     1     1     1     1     1     1     1     1      1
## 86  16557842     1     1     1     1     1     1     1     1     1      1
##     X11day X12day X13day X14day X15day X16day X17day X18day X19day X20day
## 137      1      1      1      1      1      1      1      1      0      1
## 138      1      1      1      1      1      1      1      1      0      1
## 22       1      1      1      1      1      1      1      1      1      1
## 44       1      1      1      1      1      1      1      1      0      1
## 45       1      1      1      1      1      1      1      1      0      1
## 86       1      1      1      1      1      1      1      1      1      1
##     X21day X22day X23day X24day X25day X26day X27day X28day X29day X30day
## 137      1      1      1      1      1      1      1      1      1      1
## 138      1      1      1      1      1      1      1      1      1      1
## 22       1      1      1      1      1      1      1      1      1      1
## 44       1      1      1      1      1      1      1      1      1      1
## 45       1      1      1      1      1      1      1      1      1      1
## 86       1      1      1      1      1      1      1      1      1      1
##     X31day is_sp prob pred
## 137      1     1 1.00    1
## 138      1     1 1.00    1
## 22       1     1 0.99    1
## 44       1     1 0.99    1
## 45       1     1 0.99    1
## 86       1     1 0.99    1
fp.dau1.cast2 <- fp.dau1.cast[fp.dau1.cast$is_sp == 0 & fp.dau1.cast$pred
== 1, ]
head(fp.dau1.cast2[order(fp.dau1.cast2$prob, decreasing = T), ])
##      user_id X1day X2day X3day X4day X5day X6day X7day X8day X9day X10day
## 109 19432099     1     1     1     1     0     1     1     1     1      1
## 195 41590801     0     0     0     0     0     0     0     0     0      0
## 204 43451947     1     1     1     1     1     0     1     1     1      1
## 198 42276142     1     1     1     1     1     1     0     1     1      1
## 28   6147878     1     0     0     1     1     1     1     1     1      1
## 210 46285446     0     0     0     0     1     1     1     1     1      0
##     X11day X12day X13day X14day X15day X16day X17day X18day X19day X20day
## 109      1      1      1      1      0      1      1      0      1      1
## 195      0      0      0      0      0      0      0      0      0      0
## 204      0      0      0      0      0      0      0      0      1      0
## 198      0      1      1      0      1      1      1      1      1      1
## 28       1      1      1      1      1      1      1      1      1      1
## 210      0      0      1      1      0      1      0      0      0      1
##     X21day X22day X23day X24day X25day X26day X27day X28day X29day X30day
## 109      1      1      1      0      0      0      0      0      0      0
## 195      1      1      0      0      0      0      0      0      1      0
## 204      0      1      0      0      1      0      0      1      1      0
## 198      1      1      1      1      1      1      1      1      1      0
## 28       1      1      1      1      1      1      0      0      0      0
## 210      1      0      1      1      1      1      1      0      1      0
##     X31day is_sp prob pred
## 109      0     0 0.85    1
## 195      1     0 0.85    1
## 204      0     0 0.79    1
## 198      0     0 0.73    1
## 28       0     0 0.67    1
## 210      0     0 0.61    1
fp.dau1.cast3 <- fp.dau1.cast[fp.dau1.cast$is_sp == 0 & fp.dau1.cast$pred
== 0, ]
head(fp.dau1.cast3[order(fp.dau1.cast3$prob), ])
##      user_id X1day X2day X3day X4day X5day X6day X7day X8day X9day X10day
## 3    1073544     0     0     0     0     0     0     0     0     0      1
## 11   2541741     0     0     0     0     0     0     0     0     0      1
## 150 27249550     0     0     0     1     1     1     0     0     0      1
## 243 60725457     0     0     0     0     0     0     0     0     0      1
## 71  13967453     0     0     0     0     1     0     0     0     0      0
## 88  16601600     0     0     0     0     1     0     0     0     0      0
##     X11day X12day X13day X14day X15day X16day X17day X18day X19day X20day
## 3        0      0      0      0      0      0      0      0      0      0
## 11       0      0      0      0      0      0      0      0      0      0
## 150      0      1      0      0      0      0      0      0      0      0
## 243      0      0      0      0      0      0      0      0      0      0
## 71       0      0      0      0      0      0      0      0      0      0
## 88       0      0      0      0      0      0      0      0      0      0
##     X21day X22day X23day X24day X25day X26day X27day X28day X29day X30day
## 3        0      0      1      1      1      0      0      0      0      0
## 11       0      0      0      0      0      0      0      0      0      0
## 150      0      0      0      0      0      0      0      0      0      0
## 243      0      0      0      0      0      0      0      0      0      0
## 71       0      0      0      0      0      0      0      0      0      0
## 88       0      0      0      0      0      0      0      0      0      0
##     X31day is_sp prob pred
## 3        0     0 0.00    0
## 11       0     0 0.00    0
## 150      0     0 0.00    0
## 243      0     0 0.00    0
## 71       0     0 0.01    0
## 88       0     0 0.01    0

[결론] 모델 성립 이용 횟수로 자연 탈퇴와 ID 이전 성공을 구별한 모델 결과가 유효하다. 따라서 감소한 피쳐폰 유저 중 자연 탈퇴 유저가 많고, 이에 따라 스마트폰 신규 가입자를 위한 광고가 Action으로 채택될 수 있다. 실무에 적용할 수 있는 케이스는?

태그:

카테고리:

업데이트:

댓글남기기