习题1 数据集:mpg{ggplot2}
要求:
1.1 报告displ, cyl, cty,hwy的相关系数矩阵,并对相关系数矩阵进行可视化呈现。
提示:corr(), corrplot::corrplot()
%>%
mpg select(displ, cyl, cty, hwy) %>%
cor() %>%
round(3)
displ cyl cty hwy
displ 1.000 0.930 -0.799 -0.766
cyl 0.930 1.000 -0.806 -0.762
cty -0.799 -0.806 1.000 0.956
hwy -0.766 -0.762 0.956 1.000
%>%
mpg select(displ, cyl, cty, hwy) %>%
cor() %>%
round(3) %>%
corrplot(col = c(4,5),
title = "Coefficient of Correlation Matrix",
mar = c(2,2,2,2),
tl.col = 1,
addCoef.col = 'white',
number.digits = 3)
1.2 绘制displ, cyl, cty,hwy的矩阵散点图。
%>%
mpg select(displ, cyl, cty, hwy) %>%
pairs()
#
习题2 数据文件: mtcars
要求:
2.1.建立mpg和wt的一元线性回归模型,报告估计结果
%>%
mtcars lm(mpg ~ wt, .) %>%
summary()
Call:
lm(formula = mpg ~ wt, data = .)
Residuals:
Min 1Q Median 3Q Max
-4.5432 -2.3647 -0.1252 1.4096 6.8727
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 37.2851 1.8776 19.858 < 2e-16 ***
wt -5.3445 0.5591 -9.559 1.29e-10 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 3.046 on 30 degrees of freedom
Multiple R-squared: 0.7528, Adjusted R-squared: 0.7446
F-statistic: 91.38 on 1 and 30 DF, p-value: 1.294e-10
2.2.绘制mpg和hp的散点图,在散点图上添加估计的回归方程的表达式。
<- mtcars %>%
coef lm(mpg ~ wt, .) %>%
coef() %>%
round(3)
coef
(Intercept) wt
37.285 -5.344
%>%
mtcars ggplot(aes(hp, mpg))+
geom_point()+
geom_smooth(method = lm, se = F)+
annotate("text", 200,30,
label = paste("mpg_hat = " ,
1], coef[2],"hp")) coef[
2.3.根据vs将样本分为两组,对两个组别分别建立mpg和wt的一元线性回归模型,报告估计结果。
library(nlme)
<- mtcars %>%
model_vs lmList(mpg ~ wt|vs,.)
<- model_vs%>%
coef coef() %>%
round(3)
coef
(Intercept) wt
0 29.531 -3.501
1 41.298 -6.411
<- summary(model_vs)$r.squared %>% round(3)
rsq rsq
[1] 0.672 0.726
%>%
mtcars ggplot(aes(wt, mpg, col = factor(vs)))+
geom_point()+
geom_smooth(method = lm, se = F)+
scale_colour_discrete(name = "Engine",
labels = c("V-shaped", "Straight"))+
annotate("text",3,12,
label = paste("mpg_hat = ",
1,1],
coef[1,2],
coef["wt, Rsq =",
1]),
rsq[col = 2)+
annotate("text",3.5,30,
label = paste("mpg_hat = ",
2,1],
coef[2,2],
coef["wt, Rsq =",
2]),
rsq[col = "cyan4")
2.4.根据vs将样本分为两组,对两个组别分别建立mpg和hp的一元线性回归模型,报告估计结果。
library(nlme)
<- mtcars %>%
model_vs lmList(mpg ~ hp|vs,.)
<- model_vs%>%
coef coef() %>%
round(3)
coef
(Intercept) hp
0 24.496 -0.042
1 39.001 -0.158
<- summary(model_vs)$r.squared %>% round(3)
rsq rsq
[1] 0.421 0.515
%>%
mtcars ggplot(aes(hp, mpg, col = factor(vs)))+
geom_point()+
geom_smooth(method = lm, se = F)+
scale_colour_discrete(name = "Engine",
labels = c("V-shaped", "Straight"))+
annotate("text",150,12,
label = paste("mpg_hat = ",
1,1],
coef[1,2],
coef["hp, Rsq =",
1]),
rsq[col = 2)+
annotate("text",200,27,
label = paste("mpg_hat = ",
2,1],
coef[2,2],
coef["hp, Rsq =",
2]),
rsq[col = "cyan4")
#
统计学(第8版) P241 11.2
答题要求:完成11.2的(1)和(2),附上R代码及输出结果。
<- data.frame(
data flight_on_time = c(81.8,76.6,76.6,75.7,73.8,
72.2,71.2,70.8,91.4,68.5),
number_of_complaints = c(21,58,85,68,74,
93,72,122,18,125)
)
%>%
data ggplot(aes(flight_on_time,number_of_complaints))+
geom_point()
<- data %>%
model lm(number_of_complaints ~ flight_on_time, .)
summary(model)
Call:
lm(formula = number_of_complaints ~ flight_on_time, data = .)
Residuals:
Min 1Q Median 3Q Max
-24.678 -11.412 -2.078 16.322 24.615
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 430.1892 72.1548 5.962 0.000337 ***
flight_on_time -4.7006 0.9479 -4.959 0.001108 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 18.89 on 8 degrees of freedom
Multiple R-squared: 0.7545, Adjusted R-squared: 0.7239
F-statistic: 24.59 on 1 and 8 DF, p-value: 0.001108