# Exemplul - zapada in Seatle
#------------------------------
data_snow <- read.csv ("dataIn/snow.csv" )
snow <- data_snow$ x
table (snow)
hist (snow,
probability = TRUE )
# modelul de mixtura > discreta si continua
mixture_model <- function (p, theta, x){
alpha <- theta[1 ]
beta <- theta[2 ]
p* ifelse (x == 0 , 1 , 0 ) + (1 - p)* ifelse (x> 0 , dweibull (x, shape = alpha, scale = beta), 0 )
}
curve (mixture_model (x, p = 0.25 , theta = c (1 , 2 )))
# MLE
n <- length (snow)
n0 <- sum (snow == 0 )
phat <- n0/ n
snow_non_zero <- snow[snow != 0 ]
# logaritmul fctiei de verosimilitate
loglik_weibull <- function (theta, data = snow_non_zero){
alpha <- theta[1 ]
beta <- theta[2 ]
sum (dweibull (data, shape = alpha, scale = beta, log = TRUE ))
}
nloglik_weibull <- function (theta, data = snow_non_zero){
- loglik_weibull (theta, data = data)
}
theta0 <- c (1 , 1 )
theta_hat <- nlm (nloglik_weibull, theta0, data = snow_non_zero)$ estimate
# histograma
hist (snow,
probability = TRUE )
curve (mixture_model (x,
p = phat,
theta = theta_hat),
col = "royalblue" ,
lwd = 2 ,
add = TRUE )
# Functia de repartitie
plot (sort (snow), (1 : n)/ n,
type = "s" ,
bty = "n" )
points (sort (snow), (1 : n)/ n,
pch = 16 )
?ecdf
plot (ecdf (snow))
t <- seq (0 , max (snow), length.out = 300 )
F_T <- ifelse (t == 0 , phat, phat + (1 - phat)* pweibull (t, shape = theta_hat[1 ], scale = theta_hat[2 ]))
plot (sort (snow), (1 : n)/ n,
type = "s" ,
bty = "n" )
points (sort (snow), (1 : n)/ n,
pch = 16 )
lines (c (0 , t),
c (0 , F_T),
lwd = 2 ,
col = "red" )