Sample Multiple Regression Model Development with R using CPS1985 Data in AER Package
This page demonstrates the Multiple Regression Model with R using CPS1985 data
# install the "Applied Econometrics with R" package
install.packages("AER")
library(AER)
<div class="text_exposed_show">
data(CPS1985)
?CPS1985
install.packages("psych")
library(psych)
boxplot(CPS1985$wage)
skew(CPS1985$wage)
boxplot(sqrt(CPS1985$wage))
skew(sqrt(CPS1985$wage))
boxplot(log(CPS1985$wage))
skew(log(CPS1985$wage))
par(mfrow = c(1, 3))
boxplot(CPS1985$wage, main = "skew(wage)=1.688", col = "darkblue")
boxplot(sqrt(CPS1985$wage), main = "skew(sqrt(wage)=0.817", col = "blue")
boxplot(log(CPS1985$wage), main = "skew(log(wage))=0.099", col = "lightblue")
## Education
boxplot(CPS1985$education)
skew(CPS1985$education)
boxplot((CPS1985$education)^2)
skew((CPS1985$education)^2)
## Regress log(wage) on education
?lm
model1 <- lm(log(wage) ~ education, data = CPS1985)
summary(model1)
# log(wage) = 1.06 + 0.077Education
# R sqaure = 0.145
# Adj R Sqaure = 0.1431
# both constant and slope are statistically significant (p < 0.001)
## Next we will improve the model by adding a variable
## In this case that variable is experience
par(mfrow = c(1, 1))
boxplot(CPS1985$experience)
skew(CPS1985$experience)
boxplot(sqrt(CPS1985$experience))
skew(sqrt(CPS1985$experience))
# So then we will add sqrt(experience) into the model1
model2 <- lm(log(wage) ~ education + sqrt(experience), data = CPS1985)
summary(model2)
# log(wage) = 0.39 + 0.096education + 0.105sqrt(experience)
# All coefficients are statitiscally significant (p < 0.01)
# R Square = 0.2344
# Adj R Sqaure = 0.2315
# This R sqaure is greater than the model1 but we need to test the diffenrence.
anova(model1, model2)
# we can conclude that model2 is statically better than model1 (p < 0.001)
model3 <- lm(log(wage) ~ education + sqrt(experience) + gender, data = CPS1985)
summary(model3)
# log(wage) = 0.470 + 0.097education + 0.112sqrt(experience) - 0.258D_female
# R sqaure = 0.296
# Adj R Sqaure = 0.2906
anova(model2, model3)
model4 <- lm(log(wage) ~ education + sqrt(experience) + gender + union, data = CPS1985)
summary(model4)
# log(wage) = 0.457 + 0.0969education + 0.105sqrt(exp) - 0.232D_female + 0.201D_union
# R sqaure = 0.3138
# Adj R square = 0.3086
anova(model3, model4)
model5 <- lm(log(wage) ~ education + sqrt(experience) + gender + union + region, data = CPS1985)
summary(model5)
# log(wage) = 0.432 + 0.0939education + 0.103sqrt(experience) - 0.235D_female
# + 0.189D_Union + 0.105D_region
# R square = 0.3217
# Adj R square = 0.3153




