注意需要先加载 tidyverse 包,才可运行本节代码。

# install.packages("tidyverse")
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.3     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.5.0     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

mpg

第1题 条形图

1.任选一个定性变量,绘制条形图。

# 7.1 ggplot2 绘图: mpg
# 数据:mpg {ggplot2}
# 在完成上述图形时,尽可能美化图形、完善坐标轴、图形标题、主题、颜色等的设置。
mpg %>% 
  ggplot(aes(fct_infreq(class)))+  # fct_infreq() 用于按频数排序
  geom_bar(fill = 5)+              # fill = 5 用于填充颜色
  geom_text(stat = "count", # 统计频数
            aes(label = after_stat(count)), # , after_stat() 用于显示频数
            vjust = -0.5,   # 调整文本位置
            size = 4)+ # 调整文本大小
  labs(title = "Barplot of Class", # 设置标题
       subtitle = "38 Popular Models of Cars from 1999 to 2008", # 设置副标题
       x = "Class")+ # 设置 x 轴标签
  guides(x = guide_axis(angle = 45))+ # 旋转 x 轴标签
  theme_bw() +
  theme(axis.text.x = element_text(size = rel(1.2)), # 调整x轴刻度标签字体大小
        axis.text.y = element_text(size = rel(1.2))) # 调整y轴刻度标签字体大小

mpg %>% 
  ggplot(aes(fct_infreq(drv)))+ # fct_infreq() 用于按频数排序
  geom_bar(fill = 6)+ # fill = 6 用于填充颜色
  geom_text(stat = "count",  # 统计频数
            aes(label = after_stat(count)), # after_stat() 用于显示频数
            vjust = -0.5, # 调整文本垂直位置
            size = 3)+ # 调整文本大小
  labs(title = "Barplot of the Type of Drive Train",
       subtitle = "38 Popular Models of Cars from 1999 to 2008",
       x = "Type of Drive Train")+
  scale_x_discrete(labels = c("Front-wheel", "Four-wheel", "Rear-wheel"))+ # 设置 x 轴标签
  theme_bw() # 设置主题

# fuel type:
#   r: Regular Gasoline 85~87
#   p: Premium Gasoline  91~95
#   e: E85 85% ethanol乙醇 and 15% gasoline汽油 
#   d: Diesel 
#   c: ?

mpg %>% 
    ggplot(aes(fct_infreq(fl)))+  # fct_infreq() 用于按频数排序
  geom_bar(fill = 6)+             # fill = 6 用于填充颜色
  geom_text(stat = "count",     # 统计频数
            aes(label = after_stat(count)), # after_stat() 用于显示频数
            vjust = -0.5,     # 调整文本位置
            size = 3)+       # 调整文本大小
  labs(title = "Barplot of the Fuel Type",
       subtitle = "38 Popular Models of Cars from 1999 to 2008",
       x = "Type of Drive Train")+
  scale_x_discrete(labels = c("Regular", "Premium", "E85",
                              "Diesel", "c"))+   # 设置 x 轴标签
  theme_bw()

第2题 分组条形图

  1. 任选两个定性变量,绘制分组条形图

簇状条形图

#  Clustered Barplot

mpg %>% 
  ggplot(aes(class, fill = drv))+ # 设置 x 轴和填充颜色
  geom_bar(position = "dodge")+ # 设置分组条形图
  labs(title = "Grouped Barplot by Class and Drive",  # 设置标题
       subtitle = "38 Popular Models of Cars from 1999 to 2008",  # 设置副标题
       x = "Type of Drive Train")+ # 设置 x 轴标签
  geom_text(stat = "count", 
            aes(label = after_stat(count)),  # 统计频数
            position = position_dodge(width = 1), # 调整文本间隙
                                      vjust = -0.5)+ # 调整文本垂直位置
  scale_fill_discrete(label = c("Four-wheel",
                                "Front-wheel",
                                "Rear-Wheel"))+ # 设置填充颜色标签
  theme(legend.position = "bottom")

mpg %>% 
  ggplot(aes(manufacturer, fill = drv))+ # 设置 x 轴和填充颜色
  geom_bar(position = "dodge")+ # 设置分组条形图
  geom_text(stat = "count", # 统计频数
            aes(label = after_stat(count)), # after_stat(count)
            position = position_dodge(width = 0.5), # 调整文本间隙
            hjust = -0.5)+ # 调整文本垂直位置
  scale_y_continuous(limits = c(0,30))+ # 设置 y 轴范围
  coord_flip()+ # 旋转坐标轴
  labs(title = "Grouped Barplot by Class and Drive",  # 设置标题
       subtitle = "38 Popular Models of Cars from 1999 to 2008",  # 设置副标题
       x = "Type of Drive Train")+  #
  scale_fill_discrete(label = c("Four-wheel",
                                "Front-wheel",
                                "Rear-Wheel"))+  # 设置填充颜色标签
  theme(legend.position = "bottom") # 设置主题, legend.position = "bottom" 设置图例位置

堆栈条形图

#  Stacked Barplot
mpg %>% 
  ggplot(aes(class, fill = drv))+   # 设置 x 轴和填充颜色
  geom_bar(position = "stack")+  # 设置堆叠条形图 stacked barplot
  labs(title = "Grouped Barplot by Class and Drive",   # 设置标题
       subtitle = "38 Popular Models of Cars from 1999 to 2008",  # 设置副标题
       x = "Type of Drive Train")+ # 设置 x 轴标签
  geom_text(stat = "count",  # 统计频数
            aes(label = after_stat(count)), # after_stat(count)
            position = position_stack(vjust = 0.5))+ # position_stack() 让标签堆叠  vjust = 0.5 调整文本垂直位置
  scale_fill_discrete(label = c("Four-wheel",
                                "Front-wheel",
                                "Rear-Wheel"))+ # 设置填充颜色标签
  theme(legend.position = "bottom") # 设置主题, legend.position = "bottom" 设置图例位置

mpg %>% 
  ggplot(aes(class, fill = drv))+
  geom_bar(position = "fill")+   # 设置填充条形图
  geom_text(stat = "count",     # 统计频数
            aes(label = after_stat(count)), # after_stat(count) 设置标签内容为频数
            position = position_fill(vjust = 0.5))+ # position_fill() 让标签填充  vjust = 0.5 调整文本垂直位置
  scale_fill_discrete(label = c("Four-wheel",
                                "Front-wheel",
                                "Rear-Wheel"))+ # 设置填充颜色标签
  labs(title = "Grouped Barplot by Class and Drive",  # 设置标题
       x = "Type of Drive Train")+  # 设置 x 轴标签
  theme(legend.position = "bottom") # 设置主题, legend.position = "bottom" 设置图例位置

百分比堆栈条形图

#  Percentage Stacked Barplot
# 计算每个class中不同drv的比重百分比
mpg_percent <- mpg %>%
                  group_by(class) %>%
                  count(drv) %>%
                  mutate(percent = n / sum(n)) %>%
                  ungroup()


ggplot(mpg_percent, 
       aes(x = class, y = n, fill = drv)) +
  geom_bar(stat = "identity", 
           position = "fill") +  
  geom_text(aes(label = scales::percent(percent, accuracy = 1), 
                y = percent), 
            position = position_fill(vjust = 0.5), size = 4) +
  scale_fill_discrete(name = "Drive Type", 
                      labels = c("4" = "Four-wheel", 
                                 "f" = "Front-wheel", 
                                 "r" = "Rear-Wheel")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1))+
  labs(title = "Grouped Barplot by Class and Drive", 
       x = "Type of Drive Train", y = "Percentage") +
  theme(legend.position = "bottom")

第3题 直方图

  1. 任选一个定量变量,绘制直方图。
mpg %>% 
  ggplot(aes(cty))+ # 设置 x 轴
  geom_histogram(col = 1,fill = 5, binwidth = 2)+  
  stat_bin(aes(label = ifelse(after_stat(count) == 0,  # 统计频数, after_stat(count)等于0,则不显示频数
                              "", after_stat(count))), 
           binwidth = 2, # 设置直方图宽度
           geom = "text", # 设置文本
           vjust = -0.5)+  # 设置文本位置
  scale_y_continuous(limits = c(0,60),        # 设置 y 轴范围
                     breaks = seq(0,60,10))+  # 设置 y 轴刻度
  scale_x_continuous(breaks = seq(9,35,2))+   # 设置 x 轴刻度
  labs(title = "Histogram of City Miles per Gallon", # 设置标题
       x = "City Miles per Gallon")                 # 设置 x 轴标签

mpg %>% 
  ggplot(aes(hwy))+
  geom_histogram(col = 1,fill = 6, binwidth = 2)+
  stat_bin(aes(label = ifelse(after_stat(count) == 0,
                              "", after_stat(count))), 
           binwidth = 2, 
           geom = "text",vjust = -0.5)+
  scale_y_continuous(limits = c(0,60), breaks = seq(0,60,10))+
  scale_x_continuous(breaks = seq(11,45,4))+
  labs(title = "Histogram of Highway Miles per Gallon",
       x = "Highway Miles per Gallon")

mpg %>% 
  ggplot(aes(cty))+
  geom_histogram(col = 1,fill = 5, binwidth = 2)+
  
  stat_bin(aes(label = ifelse(after_stat(count) == 0,
                              "", after_stat(count))), 
           binwidth = 2, 
           geom = "text",vjust = -0.5)+
  scale_y_continuous(limits = c(0,60), breaks = seq(0,60,10))+
  scale_x_continuous(breaks = seq(9,35,2))+
  labs(title = "Histogram of City Miles per Gallon",
       x = "City MPG")

# 绘制两个组别的叠加直方图
ggplot(mpg, aes(x = cty)) + 
  geom_histogram(aes(fill = "City"), binwidth = 2, 
                 color = "black", alpha = 0.5) + 
  geom_histogram(aes(x = hwy, fill = "Highway"), 
                 binwidth = 2, color = "black", alpha = 0.5) + 
  scale_y_continuous(limits = c(0,50), breaks = seq(0,50,10))+
  scale_x_continuous(breaks = seq(5,50,2))+
  scale_fill_manual(values = c("cyan", "darkorchid"), 
                    labels = c("City", "Highway"))+
  theme(legend.position = "bottom")+
  labs(title = "Histogram of Miles per Gallon",
       x = "Miles per Gallon",
       fill = "")

第4题 分组直方图

4.任选一个定量变量,两个定性变量,绘制分组直方图

mpg %>% 
  ggplot(aes(cty,fill = drv))+
  geom_histogram(binwidth = 2, color = "black", alpha = 0.5)+
  facet_wrap(~ class, ncol = 2)+ # 设置分组变量
  labs(title = "Histogram of City Miles per Gallon",
       subtitle = "Grouped by the Type of Drive Engine and Class",
       x = "City MPG",
       fill = "Drive Engine")+
  scale_x_continuous(breaks = seq(9, 35, 2),
                     labels = seq(9, 35, 2))+
  scale_fill_discrete(name = "Drive Engine",
                      labels = c("Four-wheel",
                                 "Front-wheel",
                                 "Rear-Wheel"))+
  theme(legend.position = "bottom")

mpg %>% 
  ggplot(aes(cty, fill = factor(year), # 设置分组变量,factor(year) 将 year数值变量转换为因子变量
             alpha = 0.5))+
  geom_histogram(binwidth = 2, col = 1)+
  facet_wrap(~ drv,  # 设置切面变量
             labeller = labeller(drv = c("4" = "four-wheel",
                                         "f" = "front-wheel",
                                         "r" = "rear-wheel")), # 设置切面标签
                                 ncol = 1)+
  theme_bw()+
  scale_fill_manual(values = c("cyan", "darkorchid"))+
  labs(title = "Histogram of City Miles per Gallon",
       subtitle = "Grouped by the Type of Drive Engine",
       x = "City Miles per Gallon",
       fill = "Year")

mpg %>% 
  ggplot(aes(cty, fill = drv))+
  geom_histogram(binwidth = 2, col = 1)+
  facet_grid(rows = vars(drv),
             cols = vars(cyl))+
  scale_fill_manual(values = c("cyan", "darkorchid","firebrick1"),
                    label = c("Four-wheel",
                              "Front-wheel",
                              "Rear-Wheel"))+
  theme(legend.position = "bottom")+
  labs(title = "Histogram of City Miels per Gallon")

mpg %>% 
  ggplot(aes(hwy, fill = substr(trans, 1, 4))) +
  geom_histogram(breaks = seq(10, 40, 2), color = 1) +
  facet_wrap(~ substr(trans, 1, 4), 
             labeller = as_labeller(c(auto = "Automatic", manu = "Manual")))+
  scale_x_continuous(breaks = seq(10, 40, 2), 
                     labels = seq(10, 40,2)) +
  scale_fill_manual(values = c("cyan", "darkorchid"),
                    labels = c("automatic", "manual")) +
    labs(title = "Highway Mileage Distribution by Transmission Type",
       x = "Highway Mileage", 
       y = "Frequency",
       fill = "Transmission")

第5题 散点图 + 趋势线

5.任选两个定量变量,绘制散点图,在散点图上添加趋势线。

mpg %>% 
  ggplot(aes(displ, hwy))+   # 设置 x 轴和 y 轴
  geom_point(col = "deepskyblue3")+ # 绘制散点图
  geom_smooth(method = lm, se = F)+ # method = lm 添加线性趋势线 se = F 不显示置信区间
  labs(title = "Scatter Diagram of Displacement and Highway MPG",
       x = "Displacement",
       y = "Highway MPG")+
  theme_bw()
`geom_smooth()` using formula = 'y ~ x'

mpg %>% 
  ggplot(aes(cty, hwy, col = I("deeppink3")))+ # I("deeppink3") 代表颜色
  geom_point()+
  geom_smooth(method = lm, se = F, col = "deepskyblue")+
  labs(title = "Scatter Diagram of City MPG and Highway MPG",
       x = "City MPG",
       y = "Highway MPG")+
  theme_bw()
`geom_smooth()` using formula = 'y ~ x'

第6题 散点图及映射

6.任选两个定量变量,绘制散点图,让散点的大小、颜色分别体现第3个、第4个变量的信息

mpg %>% 
  ggplot(aes(displ, hwy, size = cyl, col = drv))+ # 设置 x 轴和 y 轴,size = cyl 设置点的大小,col = drv 设置点的颜色
  geom_point()+
  labs(title = "Scatter Diagram of Displacement and Highway MPG",
       subtitle = "Grouped by the Type of Drive Engine and Number of Cylinders",
       x = "Displacement",
       y = "Highway Miles per Gallon")+
  scale_color_manual(values = c("cyan", 
                                "darkorchid",
                                "deepskyblue3"),
                     labels = c("Four-wheel",
                                 "Front-wheel",
                                 "Rear-Wheel"))+
  theme_bw()

mpg %>% 
  ggplot(aes(cty, hwy, size = cyl, col = drv, alpha = 0.5))+
  geom_point()+
  labs(title = "Scatter Diagram of City MPG and Highway MPG",
       subtitle = "Grouped by the Type of Drive Engine and Number of Cylinders",
       x = "City MPG",
       y = "Highway MPG")+
  scale_color_manual(values = c("cyan", 
                                "darkorchid",
                                "deepskyblue3"),
                     labels = c("Four-wheel",
                                 "Front-wheel",
                                 "Rear-Wheel"))+
  theme_bw()

第7题 分组散点图

7.任选两个定量变量,一个定性变量,绘制分组散点图,添加趋势线

mpg %>% 
  ggplot(aes(displ, hwy, col =drv))+
  geom_point()+
  geom_smooth(method = lm, se = F)+
  scale_color_manual(values = c("cyan4", "darkorchid","deepskyblue"),
                    label = c("Four-wheel",
                              "Front-wheel",
                              "Rear-Wheel"))+
  labs(title = "Scatter Diagram of Displacement and Highway MPG",
       subtitle = "Grouped by the Type of Drive Engine",
       x = "Displacement",
       y = "Highway MPG")+
  theme(legend.position = "bottom")+
  theme_bw()
`geom_smooth()` using formula = 'y ~ x'

mpg %>% 
  ggplot(aes(displ, hwy, col = drv))+
  geom_point()+
  geom_smooth(method = lm, se = F)+
  facet_wrap(~drv, ncol = 1, # 设置切面变量
             labeller = labeller(drv = c("4" = "four-wheel",
                                         "f" = "front-wheel",
                                         "r" = "rear-wheel")))+ # 设置切面标签
  scale_color_manual(values = c("cyan4", "darkorchid","deepskyblue"),
                     labels = c("Four-wheel",
                                 "Front-wheel",
                                 "Rear-Wheel"))+
  labs(title = "Scatter Diagram of Displacement and Highway MPG",
       subtitle = "Grouped by the Type of Drive Engine",
       x = "Displacement",
       y = "Highway MPG")+
  theme(legend.position = "bottom")+
  theme_bw()
`geom_smooth()` using formula = 'y ~ x'

第8题 分组箱线图

8.任选一个定量变量,一个定性变量,绘制分组箱线图

mpg %>% 
  ggplot(aes(reorder(drv,cty), cty,col = drv))+ # reorder(drv,cty) 重新排序 drv 变量,按照 cty 变量的大小 col = drv 设置箱线图的颜色
  geom_boxplot() +
  labs(title = "Boxplot of City MPG",
       subtitle = "Grouped by the Type of Drive Engine",
       x = "Drive Engine",
       y = "City MPG") +
    scale_color_manual(values = c("cyan4", "darkorchid","deepskyblue"),
                     labels = c("Four-wheel",
                                 "Front-wheel",
                                 "Rear-Wheel")) +
  theme_bw()

mpg %>% 
    ggplot(aes(reorder(drv, hwy), hwy,col = drv))+ # reorder(drv, hwy) 重新排序 drv 变量,按照 hwy 变量的大小 col = drv 设置箱线图的颜色
    geom_boxplot() +
    coord_flip() + # 交换 x 轴和 y 轴
    labs(title = "Boxplot of Highway MPG",
         subtitle = "Grouped by the Type of Drive Engine",
         x = "Drive Engine",
         y = "City MPG")+
  theme(legend.position = "bottom")+
    scale_color_manual(values = c("cyan4", "darkorchid","deepskyblue"),
                     labels = c("Four-wheel",
                                 "Front-wheel",
                                 "Rear-Wheel")) +
    theme_bw()

cellphone

library(readxl)
cellphone <- read_excel("cellphone.xlsx")
percent <- cellphone %>% 
  mutate(sex = recode(male, 
                      "0" = "female", 
                      "1" = "male"))%>% 
  group_by(sex,brand) %>% 
  tally() %>% 
  mutate(percent = n/sum(n))


percent %>% ggplot(aes(sex, percent, fill = brand))+
  geom_bar(stat = "identity", position = "fill")+
  geom_text(aes(label = paste0(sprintf("%1.1f", percent*100),"%")),
            position = position_stack(vjust = 0.5), 
            colour = "white",
            size = 4)+
  theme_grey()+
  scale_fill_brewer(palette = "PRGn")+
  scale_y_continuous(labels = scales::percent)