3
N <- 100
a <- rnorm(1)
b <- rnorm(1)
x <- rnorm(N)
y <- a*x + b + rnorm(N)
plot(y ~ x)
abline(lm(y ~ x), col = "red")
abline(h = 0)
abline(v = 0)
x <- x - mean(x)
y <- y - mean(y)
abline(lm(y ~ x), col = "blue")
abline(h = 0)
abline(v = 0)
legend("topleft", c("Before centering", "After centering"), lty = 1, col = c("red", "blue"))
remove(list = ls())
12
N <- 100
x <- rnorm(N)
y <- rnorm(N)
x.bar <- mean(x)
y.bar <- mean(y)
beta.0 <- sum(y.bar * sum(x^2) - x.bar * sum(x*y)) / sum((x - x.bar) ^ 2)
beta.1 <- sum((x - x.bar) * (y - y.bar)) / sum((x - x.bar) ^ 2)
RSS <- sum((y - beta.0 - beta.1*x) ^ 2)
RSE <- sqrt(RSS / (N - 1 - 1))
B.0 <- sum(x^2) / N / sum((x - x.bar)^2)
B.1 <- 1 / sum((x - x.bar) ^ 2)
se.0 <- RSE * sqrt(B.0)
se.1 <- RSE * sqrt(B.1)
t.0 <- beta.0 / se.0
t.1 <- beta.1 / se.1
p.0 <- 2 * (1 - pt(abs(t.0), N - 2))
p.1 <- 2 * (1 - pt(abs(t.1), N - 2))
beta.0
## [1] 0.09453751
se.0
## [1] 0.1032435
t.0
## [1] 0.9156748
p.0
## [1] 0.3620852
beta.1
## [1] -0.06989407
se.1
## [1] 0.1094299
t.1
## [1] -0.6387111
p.1
## [1] 0.5245009
lm(y ~ x)
##
## Call:
## lm(formula = y ~ x)
##
## Coefficients:
## (Intercept) x
## 0.09454 -0.06989
summary(lm(y ~ x))
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.2361 -0.7286 -0.1229 0.6611 2.6811
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.09454 0.10324 0.916 0.362
## x -0.06989 0.10943 -0.639 0.525
##
## Residual standard error: 1.03 on 98 degrees of freedom
## Multiple R-squared: 0.004146, Adjusted R-squared: -0.006016
## F-statistic: 0.408 on 1 and 98 DF, p-value: 0.5245
13
N <- 100
r <- 1000
T <- NULL
for (i in 1:r) {
x <- rnorm(N)
y <- rnorm(N)
x.bar <- mean(x)
y.bar <- mean(y)
fit <- lm(y ~ x)
beta <- fit$coefficients
RSS <- sum((y - fit$fitted.values) ^ 2)
RSE <- sqrt(RSS / (N - 1 - 1))
B.1 <- 1 / sum((x - x.bar) ^ 2)
se.1 <- RSE * sqrt(B.1)
T <- c(T, beta[2] / se.1)
}
hist(T, breaks = sqrt(r), probability = TRUE, xlab = "Value of t", ylab = "Probability Density",
main = "Histogram of t values and theoretical values (red)")
curve(dt(x, N - 2), -3, 3, type = "l", col = "red", add = TRUE)
N <- 100
r <- 1000
T <- NULL
for (i in 1:r) {
x <- rnorm(N)
y <- 0.1 * x + rnorm(N)
x.bar <- mean(x)
y.bar <- mean(y)
fit <- lm(y ~ x)
beta <- fit$coefficients
RSS <- sum((y - fit$fitted.values) ^ 2)
RSE <- sqrt(RSS / (N - 1 - 1))
B.1 <- 1 / sum((x - x.bar) ^ 2)
se.1 <- RSE * sqrt(B.1)
T <- c(T, beta[2] / se.1)
}
hist(T, breaks = sqrt(r), probability = TRUE, xlab = "Value of t", ylab = "Probability Density",
main = "Histogram of t values and theoretical values (red) (executed after substitution)")
curve(dt(x, N - 2), -3, 3, type = "l", col = "red", add = TRUE)
15(d)
R2 <- function(x, y) {
y.hat <- lm(y ~ x)$fitted.values
y.bar <- mean(y)
RSS <- sum((y - y.hat) ^ 2)
TSS <- sum((y - y.bar) ^ 2)
return(1 - RSS / TSS)
}
N <- 100
m <- 2
x <- matrix(rnorm(m * N), ncol = m)
y <- rnorm(N)
R2(x, y)
## [1] 0.02966563
N <- 100
m <- 1
x <- matrix(rnorm(m * N), ncol = m)
y <- rnorm(N)
R2(x, y)
## [1] 0.005950415
cor(x,y)^2
## [,1]
## [1,] 0.005950415
16
library(MASS)
X <- as.matrix(Boston)
p <- ncol(X)
T <- NULL
for (j in 1:p)
T <- c(T, 1/(1-R2(X[,-j],X[,j])))
T
## [1] 1.831537 2.352186 3.992503 1.095223 4.586920 2.260374 3.100843 4.396007
## [9] 7.808198 9.205542 1.993016 1.381463 3.581585 3.855684
18
N <- 100
p <- 1
X <- matrix(rnorm(N * p), ncol = p)
X <- cbind(rep(1, N), X)
beta <- rnorm(p + 1)
epsilon <- rnorm(N)
y <- X %*% beta + epsilon
U <- solve(t(X) %*% X)
beta.hat <- U %*% t(X) %*% y
RSS <- sum((y - X %*% beta.hat) ^ 2)
RSE <- sqrt(RSS / (N - p - 1))
alpha <- 0.05
f <- function(x) {
x <- cbind(1, x)
range <- qt(df = N - p - 1, 1 - alpha / 2) * RSE * sqrt(x %*% U %*% t(x))
return(list(lower = x %*% beta.hat - range, upper = x %*% beta.hat + range))
}
g <- function(x) {
x <- cbind(1, x)
range <- qt(df = N - p - 1, 1 - alpha / 2) * RSE * sqrt(1 + x %*% U %*% t(x))
return(list(lower = x %*% beta.hat - range, upper = x %*% beta.hat + range))
}
x.seq <- seq(-10, 10, 0.1)
lower.seq <- NULL
for (x in x.seq)
lower.seq <- c(lower.seq, f(x)$lower)
upper.seq <- NULL
for (x in x.seq)
upper.seq <- c(upper.seq, f(x)$upper)
x.lim <- c(min(x.seq), max(x.seq))
y.lim <- c(min(lower.seq), max(upper.seq))
plot(x.seq, lower.seq, col = "blue", xlim = x.lim, ylim = y.lim,
xlab = "x", ylab = "y", type = "l")
par(new = TRUE)
plot(x.seq, upper.seq, col = "red", xlim = x.lim, ylim = y.lim,
xlab = "", ylab = "", type = "l", axes = FALSE)
par(new = TRUE)
lower.seq <- NULL
for (x in x.seq)
lower.seq <- c(lower.seq, g(x)$lower)
upper.seq <- NULL
for (x in x.seq)
upper.seq <- c(upper.seq, g(x)$upper)
x.lim <- c(min(x.seq), max(x.seq))
y.lim <- c(min(lower.seq), max(upper.seq))
plot(x.seq, lower.seq, col = "blue", xlim = x.lim, ylim = y.lim,
xlab = "", ylab = "", type = "l", lty = 4, axes = FALSE)
par(new = TRUE)
plot(x.seq, upper.seq, col = "red", xlim = x.lim, ylim = y.lim,
xlab = "", ylab = "", type = "l", lty = 4, axes = FALSE)
abline(beta.hat[1], beta.hat[2])