# Sample Multiple Regression Model Development with R using CPS1985 Data in AER Package

This page demonstrates the Multiple Regression Model with R using CPS1985 data

```
# install the "Applied Econometrics with R" package
install.packages("AER")
library(AER)
<div class="text_exposed_show">

data(CPS1985)
?CPS1985

install.packages("psych")
library(psych)

boxplot(CPS1985\$wage)
skew(CPS1985\$wage)

boxplot(sqrt(CPS1985\$wage))
skew(sqrt(CPS1985\$wage))

boxplot(log(CPS1985\$wage))
skew(log(CPS1985\$wage))

par(mfrow = c(1, 3))
boxplot(CPS1985\$wage, main = "skew(wage)=1.688", col = "darkblue")
boxplot(sqrt(CPS1985\$wage), main = "skew(sqrt(wage)=0.817", col = "blue")
boxplot(log(CPS1985\$wage), main = "skew(log(wage))=0.099", col = "lightblue")

## Education
boxplot(CPS1985\$education)
skew(CPS1985\$education)

boxplot((CPS1985\$education)^2)
skew((CPS1985\$education)^2)

## Regress log(wage) on education
?lm
model1 <- lm(log(wage) ~ education, data = CPS1985)
summary(model1)
# log(wage) = 1.06 + 0.077Education
# R sqaure = 0.145
# Adj R Sqaure = 0.1431
# both constant and slope are statistically significant (p < 0.001)

## Next we will improve the model by adding a variable
## In this case that variable is experience
par(mfrow = c(1, 1))
boxplot(CPS1985\$experience)
skew(CPS1985\$experience)

boxplot(sqrt(CPS1985\$experience))
skew(sqrt(CPS1985\$experience))

# So then we will add sqrt(experience) into the model1

model2 <- lm(log(wage) ~ education + sqrt(experience), data = CPS1985)
summary(model2)

# log(wage) = 0.39 + 0.096education + 0.105sqrt(experience)
# All coefficients are statitiscally significant (p < 0.01)
# R Square = 0.2344
# Adj R Sqaure = 0.2315
# This R sqaure is greater than the model1 but we need to test the diffenrence.

anova(model1, model2)
# we can conclude that model2 is statically better than model1 (p < 0.001)

model3 <- lm(log(wage) ~ education + sqrt(experience) + gender, data = CPS1985)
summary(model3)

# log(wage) = 0.470 + 0.097education + 0.112sqrt(experience) - 0.258D_female
# R sqaure = 0.296
# Adj R Sqaure = 0.2906

anova(model2, model3)

model4 <- lm(log(wage) ~ education + sqrt(experience) + gender + union, data = CPS1985)
summary(model4)

# log(wage) = 0.457 + 0.0969education + 0.105sqrt(exp) - 0.232D_female + 0.201D_union
# R sqaure = 0.3138
# Adj R square = 0.3086
anova(model3, model4)

model5 <- lm(log(wage) ~ education + sqrt(experience) + gender + union + region, data = CPS1985)
summary(model5)

# log(wage) = 0.432 + 0.0939education + 0.103sqrt(experience) - 0.235D_female
# + 0.189D_Union + 0.105D_region
# R square = 0.3217
# Adj R square = 0.3153

```