Sample Multiple Regression Model Development with R using CPS1985 Data in AER Package
This page demonstrates the Multiple Regression Model with R using CPS1985 data
# install the "Applied Econometrics with R" package install.packages("AER") library(AER) <div class="text_exposed_show"> data(CPS1985) ?CPS1985 install.packages("psych") library(psych) boxplot(CPS1985$wage) skew(CPS1985$wage) boxplot(sqrt(CPS1985$wage)) skew(sqrt(CPS1985$wage)) boxplot(log(CPS1985$wage)) skew(log(CPS1985$wage)) par(mfrow = c(1, 3)) boxplot(CPS1985$wage, main = "skew(wage)=1.688", col = "darkblue") boxplot(sqrt(CPS1985$wage), main = "skew(sqrt(wage)=0.817", col = "blue") boxplot(log(CPS1985$wage), main = "skew(log(wage))=0.099", col = "lightblue") ## Education boxplot(CPS1985$education) skew(CPS1985$education) boxplot((CPS1985$education)^2) skew((CPS1985$education)^2) ## Regress log(wage) on education ?lm model1 <- lm(log(wage) ~ education, data = CPS1985) summary(model1) # log(wage) = 1.06 + 0.077Education # R sqaure = 0.145 # Adj R Sqaure = 0.1431 # both constant and slope are statistically significant (p < 0.001) ## Next we will improve the model by adding a variable ## In this case that variable is experience par(mfrow = c(1, 1)) boxplot(CPS1985$experience) skew(CPS1985$experience) boxplot(sqrt(CPS1985$experience)) skew(sqrt(CPS1985$experience)) # So then we will add sqrt(experience) into the model1 model2 <- lm(log(wage) ~ education + sqrt(experience), data = CPS1985) summary(model2) # log(wage) = 0.39 + 0.096education + 0.105sqrt(experience) # All coefficients are statitiscally significant (p < 0.01) # R Square = 0.2344 # Adj R Sqaure = 0.2315 # This R sqaure is greater than the model1 but we need to test the diffenrence. anova(model1, model2) # we can conclude that model2 is statically better than model1 (p < 0.001) model3 <- lm(log(wage) ~ education + sqrt(experience) + gender, data = CPS1985) summary(model3) # log(wage) = 0.470 + 0.097education + 0.112sqrt(experience) - 0.258D_female # R sqaure = 0.296 # Adj R Sqaure = 0.2906 anova(model2, model3) model4 <- lm(log(wage) ~ education + sqrt(experience) + gender + union, data = CPS1985) summary(model4) # log(wage) = 0.457 + 0.0969education + 0.105sqrt(exp) - 0.232D_female + 0.201D_union # R sqaure = 0.3138 # Adj R square = 0.3086 anova(model3, model4) model5 <- lm(log(wage) ~ education + sqrt(experience) + gender + union + region, data = CPS1985) summary(model5) # log(wage) = 0.432 + 0.0939education + 0.103sqrt(experience) - 0.235D_female # + 0.189D_Union + 0.105D_region # R square = 0.3217 # Adj R square = 0.3153