注意需要先加载 tidyverse 包,才可运行本节代码。
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.3 ✔ readr 2.1.4
✔ forcats 1.0.0 ✔ stringr 1.5.0
✔ ggplot2 3.5.0 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.0
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
mpg
第1题 条形图
1.任选一个定性变量,绘制条形图。
# 7.1 ggplot2 绘图: mpg
# 数据:mpg {ggplot2}
# 在完成上述图形时,尽可能美化图形、完善坐标轴、图形标题、主题、颜色等的设置。
mpg %>%
ggplot(aes(fct_infreq(class)))+ # fct_infreq() 用于按频数排序
geom_bar(fill = 5)+ # fill = 5 用于填充颜色
geom_text(stat = "count", # 统计频数
aes(label = after_stat(count)), # , after_stat() 用于显示频数
vjust = -0.5, # 调整文本位置
size = 4)+ # 调整文本大小
labs(title = "Barplot of Class", # 设置标题
subtitle = "38 Popular Models of Cars from 1999 to 2008", # 设置副标题
x = "Class")+ # 设置 x 轴标签
guides(x = guide_axis(angle = 45))+ # 旋转 x 轴标签
theme_bw() +
theme(axis.text.x = element_text(size = rel(1.2)), # 调整x轴刻度标签字体大小
axis.text.y = element_text(size = rel(1.2))) # 调整y轴刻度标签字体大小
mpg %>%
ggplot(aes(fct_infreq(drv)))+ # fct_infreq() 用于按频数排序
geom_bar(fill = 6)+ # fill = 6 用于填充颜色
geom_text(stat = "count", # 统计频数
aes(label = after_stat(count)), # after_stat() 用于显示频数
vjust = -0.5, # 调整文本垂直位置
size = 3)+ # 调整文本大小
labs(title = "Barplot of the Type of Drive Train",
subtitle = "38 Popular Models of Cars from 1999 to 2008",
x = "Type of Drive Train")+
scale_x_discrete(labels = c("Front-wheel", "Four-wheel", "Rear-wheel"))+ # 设置 x 轴标签
theme_bw() # 设置主题
# fuel type:
# r: Regular Gasoline 85~87
# p: Premium Gasoline 91~95
# e: E85 85% ethanol乙醇 and 15% gasoline汽油
# d: Diesel
# c: ?
mpg %>%
ggplot(aes(fct_infreq(fl)))+ # fct_infreq() 用于按频数排序
geom_bar(fill = 6)+ # fill = 6 用于填充颜色
geom_text(stat = "count", # 统计频数
aes(label = after_stat(count)), # after_stat() 用于显示频数
vjust = -0.5, # 调整文本位置
size = 3)+ # 调整文本大小
labs(title = "Barplot of the Fuel Type",
subtitle = "38 Popular Models of Cars from 1999 to 2008",
x = "Type of Drive Train")+
scale_x_discrete(labels = c("Regular", "Premium", "E85",
"Diesel", "c"))+ # 设置 x 轴标签
theme_bw()
第2题 分组条形图
- 任选两个定性变量,绘制分组条形图
簇状条形图
# Clustered Barplot
mpg %>%
ggplot(aes(class, fill = drv))+ # 设置 x 轴和填充颜色
geom_bar(position = "dodge")+ # 设置分组条形图
labs(title = "Grouped Barplot by Class and Drive", # 设置标题
subtitle = "38 Popular Models of Cars from 1999 to 2008", # 设置副标题
x = "Type of Drive Train")+ # 设置 x 轴标签
geom_text(stat = "count",
aes(label = after_stat(count)), # 统计频数
position = position_dodge(width = 1), # 调整文本间隙
vjust = -0.5)+ # 调整文本垂直位置
scale_fill_discrete(label = c("Four-wheel",
"Front-wheel",
"Rear-Wheel"))+ # 设置填充颜色标签
theme(legend.position = "bottom")
mpg %>%
ggplot(aes(manufacturer, fill = drv))+ # 设置 x 轴和填充颜色
geom_bar(position = "dodge")+ # 设置分组条形图
geom_text(stat = "count", # 统计频数
aes(label = after_stat(count)), # after_stat(count)
position = position_dodge(width = 0.5), # 调整文本间隙
hjust = -0.5)+ # 调整文本垂直位置
scale_y_continuous(limits = c(0,30))+ # 设置 y 轴范围
coord_flip()+ # 旋转坐标轴
labs(title = "Grouped Barplot by Class and Drive", # 设置标题
subtitle = "38 Popular Models of Cars from 1999 to 2008", # 设置副标题
x = "Type of Drive Train")+ #
scale_fill_discrete(label = c("Four-wheel",
"Front-wheel",
"Rear-Wheel"))+ # 设置填充颜色标签
theme(legend.position = "bottom") # 设置主题, legend.position = "bottom" 设置图例位置
堆栈条形图
# Stacked Barplot
mpg %>%
ggplot(aes(class, fill = drv))+ # 设置 x 轴和填充颜色
geom_bar(position = "stack")+ # 设置堆叠条形图 stacked barplot
labs(title = "Grouped Barplot by Class and Drive", # 设置标题
subtitle = "38 Popular Models of Cars from 1999 to 2008", # 设置副标题
x = "Type of Drive Train")+ # 设置 x 轴标签
geom_text(stat = "count", # 统计频数
aes(label = after_stat(count)), # after_stat(count)
position = position_stack(vjust = 0.5))+ # position_stack() 让标签堆叠 vjust = 0.5 调整文本垂直位置
scale_fill_discrete(label = c("Four-wheel",
"Front-wheel",
"Rear-Wheel"))+ # 设置填充颜色标签
theme(legend.position = "bottom") # 设置主题, legend.position = "bottom" 设置图例位置
mpg %>%
ggplot(aes(class, fill = drv))+
geom_bar(position = "fill")+ # 设置填充条形图
geom_text(stat = "count", # 统计频数
aes(label = after_stat(count)), # after_stat(count) 设置标签内容为频数
position = position_fill(vjust = 0.5))+ # position_fill() 让标签填充 vjust = 0.5 调整文本垂直位置
scale_fill_discrete(label = c("Four-wheel",
"Front-wheel",
"Rear-Wheel"))+ # 设置填充颜色标签
labs(title = "Grouped Barplot by Class and Drive", # 设置标题
x = "Type of Drive Train")+ # 设置 x 轴标签
theme(legend.position = "bottom") # 设置主题, legend.position = "bottom" 设置图例位置
百分比堆栈条形图
# Percentage Stacked Barplot
# 计算每个class中不同drv的比重百分比
mpg_percent <- mpg %>%
group_by(class) %>%
count(drv) %>%
mutate(percent = n / sum(n)) %>%
ungroup()
ggplot(mpg_percent,
aes(x = class, y = n, fill = drv)) +
geom_bar(stat = "identity",
position = "fill") +
geom_text(aes(label = scales::percent(percent, accuracy = 1),
y = percent),
position = position_fill(vjust = 0.5), size = 4) +
scale_fill_discrete(name = "Drive Type",
labels = c("4" = "Four-wheel",
"f" = "Front-wheel",
"r" = "Rear-Wheel")) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1))+
labs(title = "Grouped Barplot by Class and Drive",
x = "Type of Drive Train", y = "Percentage") +
theme(legend.position = "bottom")
第3题 直方图
- 任选一个定量变量,绘制直方图。
mpg %>%
ggplot(aes(cty))+ # 设置 x 轴
geom_histogram(col = 1,fill = 5, binwidth = 2)+
stat_bin(aes(label = ifelse(after_stat(count) == 0, # 统计频数, after_stat(count)等于0,则不显示频数
"", after_stat(count))),
binwidth = 2, # 设置直方图宽度
geom = "text", # 设置文本
vjust = -0.5)+ # 设置文本位置
scale_y_continuous(limits = c(0,60), # 设置 y 轴范围
breaks = seq(0,60,10))+ # 设置 y 轴刻度
scale_x_continuous(breaks = seq(9,35,2))+ # 设置 x 轴刻度
labs(title = "Histogram of City Miles per Gallon", # 设置标题
x = "City Miles per Gallon") # 设置 x 轴标签
mpg %>%
ggplot(aes(hwy))+
geom_histogram(col = 1,fill = 6, binwidth = 2)+
stat_bin(aes(label = ifelse(after_stat(count) == 0,
"", after_stat(count))),
binwidth = 2,
geom = "text",vjust = -0.5)+
scale_y_continuous(limits = c(0,60), breaks = seq(0,60,10))+
scale_x_continuous(breaks = seq(11,45,4))+
labs(title = "Histogram of Highway Miles per Gallon",
x = "Highway Miles per Gallon")
mpg %>%
ggplot(aes(cty))+
geom_histogram(col = 1,fill = 5, binwidth = 2)+
stat_bin(aes(label = ifelse(after_stat(count) == 0,
"", after_stat(count))),
binwidth = 2,
geom = "text",vjust = -0.5)+
scale_y_continuous(limits = c(0,60), breaks = seq(0,60,10))+
scale_x_continuous(breaks = seq(9,35,2))+
labs(title = "Histogram of City Miles per Gallon",
x = "City MPG")
# 绘制两个组别的叠加直方图
ggplot(mpg, aes(x = cty)) +
geom_histogram(aes(fill = "City"), binwidth = 2,
color = "black", alpha = 0.5) +
geom_histogram(aes(x = hwy, fill = "Highway"),
binwidth = 2, color = "black", alpha = 0.5) +
scale_y_continuous(limits = c(0,50), breaks = seq(0,50,10))+
scale_x_continuous(breaks = seq(5,50,2))+
scale_fill_manual(values = c("cyan", "darkorchid"),
labels = c("City", "Highway"))+
theme(legend.position = "bottom")+
labs(title = "Histogram of Miles per Gallon",
x = "Miles per Gallon",
fill = "")
第4题 分组直方图
4.任选一个定量变量,两个定性变量,绘制分组直方图
mpg %>%
ggplot(aes(cty,fill = drv))+
geom_histogram(binwidth = 2, color = "black", alpha = 0.5)+
facet_wrap(~ class, ncol = 2)+ # 设置分组变量
labs(title = "Histogram of City Miles per Gallon",
subtitle = "Grouped by the Type of Drive Engine and Class",
x = "City MPG",
fill = "Drive Engine")+
scale_x_continuous(breaks = seq(9, 35, 2),
labels = seq(9, 35, 2))+
scale_fill_discrete(name = "Drive Engine",
labels = c("Four-wheel",
"Front-wheel",
"Rear-Wheel"))+
theme(legend.position = "bottom")
mpg %>%
ggplot(aes(cty, fill = factor(year), # 设置分组变量,factor(year) 将 year数值变量转换为因子变量
alpha = 0.5))+
geom_histogram(binwidth = 2, col = 1)+
facet_wrap(~ drv, # 设置切面变量
labeller = labeller(drv = c("4" = "four-wheel",
"f" = "front-wheel",
"r" = "rear-wheel")), # 设置切面标签
ncol = 1)+
theme_bw()+
scale_fill_manual(values = c("cyan", "darkorchid"))+
labs(title = "Histogram of City Miles per Gallon",
subtitle = "Grouped by the Type of Drive Engine",
x = "City Miles per Gallon",
fill = "Year")
mpg %>%
ggplot(aes(cty, fill = drv))+
geom_histogram(binwidth = 2, col = 1)+
facet_grid(rows = vars(drv),
cols = vars(cyl))+
scale_fill_manual(values = c("cyan", "darkorchid","firebrick1"),
label = c("Four-wheel",
"Front-wheel",
"Rear-Wheel"))+
theme(legend.position = "bottom")+
labs(title = "Histogram of City Miels per Gallon")
mpg %>%
ggplot(aes(hwy, fill = substr(trans, 1, 4))) +
geom_histogram(breaks = seq(10, 40, 2), color = 1) +
facet_wrap(~ substr(trans, 1, 4),
labeller = as_labeller(c(auto = "Automatic", manu = "Manual")))+
scale_x_continuous(breaks = seq(10, 40, 2),
labels = seq(10, 40,2)) +
scale_fill_manual(values = c("cyan", "darkorchid"),
labels = c("automatic", "manual")) +
labs(title = "Highway Mileage Distribution by Transmission Type",
x = "Highway Mileage",
y = "Frequency",
fill = "Transmission")
第5题 散点图 + 趋势线
5.任选两个定量变量,绘制散点图,在散点图上添加趋势线。
mpg %>%
ggplot(aes(displ, hwy))+ # 设置 x 轴和 y 轴
geom_point(col = "deepskyblue3")+ # 绘制散点图
geom_smooth(method = lm, se = F)+ # method = lm 添加线性趋势线 se = F 不显示置信区间
labs(title = "Scatter Diagram of Displacement and Highway MPG",
x = "Displacement",
y = "Highway MPG")+
theme_bw()
`geom_smooth()` using formula = 'y ~ x'
mpg %>%
ggplot(aes(cty, hwy, col = I("deeppink3")))+ # I("deeppink3") 代表颜色
geom_point()+
geom_smooth(method = lm, se = F, col = "deepskyblue")+
labs(title = "Scatter Diagram of City MPG and Highway MPG",
x = "City MPG",
y = "Highway MPG")+
theme_bw()
`geom_smooth()` using formula = 'y ~ x'
第6题 散点图及映射
6.任选两个定量变量,绘制散点图,让散点的大小、颜色分别体现第3个、第4个变量的信息
mpg %>%
ggplot(aes(displ, hwy, size = cyl, col = drv))+ # 设置 x 轴和 y 轴,size = cyl 设置点的大小,col = drv 设置点的颜色
geom_point()+
labs(title = "Scatter Diagram of Displacement and Highway MPG",
subtitle = "Grouped by the Type of Drive Engine and Number of Cylinders",
x = "Displacement",
y = "Highway Miles per Gallon")+
scale_color_manual(values = c("cyan",
"darkorchid",
"deepskyblue3"),
labels = c("Four-wheel",
"Front-wheel",
"Rear-Wheel"))+
theme_bw()
mpg %>%
ggplot(aes(cty, hwy, size = cyl, col = drv, alpha = 0.5))+
geom_point()+
labs(title = "Scatter Diagram of City MPG and Highway MPG",
subtitle = "Grouped by the Type of Drive Engine and Number of Cylinders",
x = "City MPG",
y = "Highway MPG")+
scale_color_manual(values = c("cyan",
"darkorchid",
"deepskyblue3"),
labels = c("Four-wheel",
"Front-wheel",
"Rear-Wheel"))+
theme_bw()
第7题 分组散点图
7.任选两个定量变量,一个定性变量,绘制分组散点图,添加趋势线
mpg %>%
ggplot(aes(displ, hwy, col =drv))+
geom_point()+
geom_smooth(method = lm, se = F)+
scale_color_manual(values = c("cyan4", "darkorchid","deepskyblue"),
label = c("Four-wheel",
"Front-wheel",
"Rear-Wheel"))+
labs(title = "Scatter Diagram of Displacement and Highway MPG",
subtitle = "Grouped by the Type of Drive Engine",
x = "Displacement",
y = "Highway MPG")+
theme(legend.position = "bottom")+
theme_bw()
`geom_smooth()` using formula = 'y ~ x'
mpg %>%
ggplot(aes(displ, hwy, col = drv))+
geom_point()+
geom_smooth(method = lm, se = F)+
facet_wrap(~drv, ncol = 1, # 设置切面变量
labeller = labeller(drv = c("4" = "four-wheel",
"f" = "front-wheel",
"r" = "rear-wheel")))+ # 设置切面标签
scale_color_manual(values = c("cyan4", "darkorchid","deepskyblue"),
labels = c("Four-wheel",
"Front-wheel",
"Rear-Wheel"))+
labs(title = "Scatter Diagram of Displacement and Highway MPG",
subtitle = "Grouped by the Type of Drive Engine",
x = "Displacement",
y = "Highway MPG")+
theme(legend.position = "bottom")+
theme_bw()
`geom_smooth()` using formula = 'y ~ x'
第8题 分组箱线图
8.任选一个定量变量,一个定性变量,绘制分组箱线图
mpg %>%
ggplot(aes(reorder(drv,cty), cty,col = drv))+ # reorder(drv,cty) 重新排序 drv 变量,按照 cty 变量的大小 col = drv 设置箱线图的颜色
geom_boxplot() +
labs(title = "Boxplot of City MPG",
subtitle = "Grouped by the Type of Drive Engine",
x = "Drive Engine",
y = "City MPG") +
scale_color_manual(values = c("cyan4", "darkorchid","deepskyblue"),
labels = c("Four-wheel",
"Front-wheel",
"Rear-Wheel")) +
theme_bw()
mpg %>%
ggplot(aes(reorder(drv, hwy), hwy,col = drv))+ # reorder(drv, hwy) 重新排序 drv 变量,按照 hwy 变量的大小 col = drv 设置箱线图的颜色
geom_boxplot() +
coord_flip() + # 交换 x 轴和 y 轴
labs(title = "Boxplot of Highway MPG",
subtitle = "Grouped by the Type of Drive Engine",
x = "Drive Engine",
y = "City MPG")+
theme(legend.position = "bottom")+
scale_color_manual(values = c("cyan4", "darkorchid","deepskyblue"),
labels = c("Four-wheel",
"Front-wheel",
"Rear-Wheel")) +
theme_bw()
cellphone
library(readxl)
cellphone <- read_excel("cellphone.xlsx")
percent <- cellphone %>%
mutate(sex = recode(male,
"0" = "female",
"1" = "male"))%>%
group_by(sex,brand) %>%
tally() %>%
mutate(percent = n/sum(n))
percent %>% ggplot(aes(sex, percent, fill = brand))+
geom_bar(stat = "identity", position = "fill")+
geom_text(aes(label = paste0(sprintf("%1.1f", percent*100),"%")),
position = position_stack(vjust = 0.5),
colour = "white",
size = 4)+
theme_grey()+
scale_fill_brewer(palette = "PRGn")+
scale_y_continuous(labels = scales::percent)