习题1 数据集:mpg{ggplot2}

要求:

1.1 报告displ, cyl, cty,hwy的相关系数矩阵,并对相关系数矩阵进行可视化呈现。

提示:corr(), corrplot::corrplot()

mpg %>% 
  select(displ, cyl, cty, hwy) %>% 
  cor() %>% 
  round(3)
       displ    cyl    cty    hwy
displ  1.000  0.930 -0.799 -0.766
cyl    0.930  1.000 -0.806 -0.762
cty   -0.799 -0.806  1.000  0.956
hwy   -0.766 -0.762  0.956  1.000
mpg %>% 
  select(displ, cyl, cty, hwy) %>% 
  cor() %>% 
  round(3) %>% 
  corrplot(col = c(4,5),
           title = "Coefficient of Correlation Matrix",
           mar = c(2,2,2,2),
           tl.col = 1,
           addCoef.col = 'white',
           number.digits = 3)

1.2 绘制displ, cyl, cty,hwy的矩阵散点图。

mpg %>% 
  select(displ, cyl, cty, hwy) %>% 
  pairs()

# 习题2 数据文件: mtcars

要求:

2.1.建立mpg和wt的一元线性回归模型,报告估计结果

mtcars %>% 
  lm(mpg ~ wt, .) %>% 
  summary()

Call:
lm(formula = mpg ~ wt, data = .)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.5432 -2.3647 -0.1252  1.4096  6.8727 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  37.2851     1.8776  19.858  < 2e-16 ***
wt           -5.3445     0.5591  -9.559 1.29e-10 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 3.046 on 30 degrees of freedom
Multiple R-squared:  0.7528,    Adjusted R-squared:  0.7446 
F-statistic: 91.38 on 1 and 30 DF,  p-value: 1.294e-10

2.2.绘制mpg和hp的散点图,在散点图上添加估计的回归方程的表达式。

coef <- mtcars %>% 
  lm(mpg ~ wt, .) %>% 
  coef() %>% 
  round(3)
coef
(Intercept)          wt 
     37.285      -5.344 
mtcars %>% 
  ggplot(aes(hp, mpg))+
  geom_point()+
  geom_smooth(method = lm, se = F)+
  annotate("text", 200,30,
           label = paste("mpg_hat = " ,
                         coef[1], coef[2],"hp"))

2.3.根据vs将样本分为两组,对两个组别分别建立mpg和wt的一元线性回归模型,报告估计结果。

library(nlme)

model_vs <- mtcars %>% 
  lmList(mpg ~ wt|vs,.) 

coef <- model_vs%>% 
  coef() %>% 
  round(3)
coef
  (Intercept)     wt
0      29.531 -3.501
1      41.298 -6.411
rsq <- summary(model_vs)$r.squared %>% round(3)
rsq
[1] 0.672 0.726
mtcars %>% 
  ggplot(aes(wt, mpg, col = factor(vs)))+
  geom_point()+
  geom_smooth(method = lm, se = F)+
  scale_colour_discrete(name = "Engine",
                        labels = c("V-shaped", "Straight"))+
  annotate("text",3,12,
           label = paste("mpg_hat = ",
                                coef[1,1],
                                coef[1,2],
                                "wt, Rsq =",
                                rsq[1]),
           col = 2)+
    annotate("text",3.5,30,
           label = paste("mpg_hat = ",
                                coef[2,1],
                                coef[2,2],
                                "wt, Rsq =",
                                rsq[2]),
           col = "cyan4")

2.4.根据vs将样本分为两组,对两个组别分别建立mpg和hp的一元线性回归模型,报告估计结果。

library(nlme)

model_vs <- mtcars %>% 
  lmList(mpg ~ hp|vs,.) 

coef <- model_vs%>% 
  coef() %>% 
  round(3)
coef
  (Intercept)     hp
0      24.496 -0.042
1      39.001 -0.158
rsq <- summary(model_vs)$r.squared %>% round(3)
rsq
[1] 0.421 0.515
mtcars %>% 
  ggplot(aes(hp, mpg, col = factor(vs)))+
  geom_point()+
  geom_smooth(method = lm, se = F)+
  scale_colour_discrete(name = "Engine",
                        labels = c("V-shaped", "Straight"))+
  annotate("text",150,12,
           label = paste("mpg_hat = ",
                                coef[1,1],
                                coef[1,2],
                                "hp, Rsq =",
                                rsq[1]),
           col = 2)+
    annotate("text",200,27,
           label = paste("mpg_hat = ",
                                coef[2,1],
                                coef[2,2],
                                "hp, Rsq =",
                                rsq[2]),
           col = "cyan4")

# 统计学(第8版) P241 11.2

答题要求:完成11.2的(1)和(2),附上R代码及输出结果。

data <- data.frame(
  flight_on_time = c(81.8,76.6,76.6,75.7,73.8,
                     72.2,71.2,70.8,91.4,68.5),
  number_of_complaints = c(21,58,85,68,74,
                           93,72,122,18,125)
)

data %>% 
  ggplot(aes(flight_on_time,number_of_complaints))+
  geom_point()

model <- data %>% 
  lm(number_of_complaints ~ flight_on_time, .)

summary(model)

Call:
lm(formula = number_of_complaints ~ flight_on_time, data = .)

Residuals:
    Min      1Q  Median      3Q     Max 
-24.678 -11.412  -2.078  16.322  24.615 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)    430.1892    72.1548   5.962 0.000337 ***
flight_on_time  -4.7006     0.9479  -4.959 0.001108 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 18.89 on 8 degrees of freedom
Multiple R-squared:  0.7545,    Adjusted R-squared:  0.7239 
F-statistic: 24.59 on 1 and 8 DF,  p-value: 0.001108