# OLS likelihood function (my way) -- # Note: I am going to write the LF using sigma2=sigma^2 and not sigma. ols.lf1 <- function(theta, y, X) { beta <- theta[-1] sigma2 <- theta[1] if (sigma2 <= 0) return(NA) n <- nrow(X) e <- y - X%*%beta # t() = matrix transpose logl <- ((-n/2)*log(2*pi)) - ((n/2)*log(sigma2)) - ((t(e)%*%e)/(2*sigma2)) return(-logl) # since optim() does minimisation by default. } # A variant: Shift to logs for sigma2 so I can do unconstrained maximisation -- ols.lf1.inlogs <- function(truetheta, y, X) { ols.lf1(c(exp(truetheta[1]), truetheta[-1]), y, X) } # Write it in another (more readable but costly) way -- ols.lf2 <- function(theta, y, X) { beta <- theta[-1] if (theta[1] <= 0) return(NA) sigma <- sqrt(theta[1]) e <- (y - X%*%beta)/sigma logl <- sum(log(dnorm(e)/sigma)) return(-logl) } # Most people find this more readable than ols.lf1(). # Pretty code, by Douglas Bates -- ols.lf3 <- function(theta, y, X) { if (theta[1] <= 0) return(NA) -sum(dnorm(y, mean = X %*% theta[-1], sd = sqrt(theta[1]), log = TRUE)) } # dnorm(..., log=TRUE) is generally superior to log(dnorm()) # NOTE: The above 4 functions are all just different ways to write # the same thing. ols.lf{1,2,3} produce the identical results. # This is the gradient of ols.lf{1,2,3} but not of ols.lf1.inlogs # It is written in the (mathematical) notation of ols.lf1() ols.gradient <- function(theta, y, X) { beta <- theta[-1] sigma2 <- theta[1] e <- y - X%*%beta n <- nrow(X) g <- numeric(length(theta)) g[1] <- (-n/(2*sigma2)) + (t(e)%*%e)/(2*sigma2*sigma2) # d logl / d sigma g[-1] <- (t(X) %*% e)/sigma2 # d logl / d beta return(-g) }