数据文件:msleep{ggplot2}

请查看msleep帮助文件

1 计算单个的描述性统计量

data(msleep)
attach(msleep)

mean(sleep_total)
[1] 10.43373
median(sleep_total)
[1] 10.1
max(sleep_total)
[1] 19.9
min(sleep_total)
[1] 1.9
IQR(sleep_total)
[1] 5.9
var(sleep_total)
[1] 19.80568
sd(sleep_total)
[1] 4.450357
quantile(sleep_total,probs = c(0,0.25,0.5,0.75,1))
   0%   25%   50%   75%  100% 
 1.90  7.85 10.10 13.75 19.90 
quantile(sleep_total,probs = seq(0.1,0.9,0.1))
  10%   20%   30%   40%   50%   60%   70%   80%   90% 
 3.92  6.24  8.52  9.48 10.10 11.14 12.80 14.40 15.88 
#计算偏度和峰度
#install.packages("e1071")
library(e1071)
skewness(sleep_total)
[1] 0.05230964
kurtosis(sleep_total)
[1] -0.7074466

注意:mode()返回对象的类型,并不是众数。

mode(vore)
[1] "character"
mode(genus)
[1] "character"
mode(sleep_total)
[1] "numeric"
#定量数据的众数
#install.packages("DescTools")
library(DescTools)
Mode(sleep_total)
[1] 12.5
attr(,"freq")
[1] 4
#定性数据的众数
table(msleep$vore)

  carni   herbi insecti    omni 
     19      32       5      20 
levels(msleep$vore)
NULL
which.max(table(msleep$vore))
herbi 
    2 

2 批量计算描述性统计量

2.1 base::summary()

summary(msleep)
     name              genus               vore              order          
 Length:83          Length:83          Length:83          Length:83         
 Class :character   Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character   Mode  :character  
                                                                            
                                                                            
                                                                            
                                                                            
 conservation        sleep_total      sleep_rem      sleep_cycle    
 Length:83          Min.   : 1.90   Min.   :0.100   Min.   :0.1167  
 Class :character   1st Qu.: 7.85   1st Qu.:0.900   1st Qu.:0.1833  
 Mode  :character   Median :10.10   Median :1.500   Median :0.3333  
                    Mean   :10.43   Mean   :1.875   Mean   :0.4396  
                    3rd Qu.:13.75   3rd Qu.:2.400   3rd Qu.:0.5792  
                    Max.   :19.90   Max.   :6.600   Max.   :1.5000  
                                    NA's   :22      NA's   :51      
     awake          brainwt            bodywt        
 Min.   : 4.10   Min.   :0.00014   Min.   :   0.005  
 1st Qu.:10.25   1st Qu.:0.00290   1st Qu.:   0.174  
 Median :13.90   Median :0.01240   Median :   1.670  
 Mean   :13.57   Mean   :0.28158   Mean   : 166.136  
 3rd Qu.:16.15   3rd Qu.:0.12550   3rd Qu.:  41.750  
 Max.   :22.10   Max.   :5.71200   Max.   :6654.000  
                 NA's   :27                          

2.2 psych::describe()

#install.packages("psych")
library(psych)
describe(msleep)
              vars  n   mean     sd median trimmed   mad  min     max   range
name*            1 83  42.00  24.10  42.00   42.00 31.13 1.00   83.00   82.00
genus*           2 83  40.25  22.52  41.00   40.45 28.17 1.00   77.00   76.00
vore*            3 76   2.34   1.13   2.00    2.31  1.48 1.00    4.00    3.00
order*           4 83  11.28   6.15  15.00   11.54  4.45 1.00   19.00   18.00
conservation*    5 54   3.78   1.31   4.00    3.77  0.74 1.00    6.00    5.00
sleep_total      6 83  10.43   4.45  10.10   10.38  5.04 1.90   19.90   18.00
sleep_rem        7 61   1.88   1.30   1.50    1.71  1.19 0.10    6.60    6.50
sleep_cycle      8 32   0.44   0.36   0.33    0.38  0.23 0.12    1.50    1.38
awake            9 83  13.57   4.45  13.90   13.62  5.04 4.10   22.10   18.00
brainwt         10 56   0.28   0.98   0.01    0.07  0.02 0.00    5.71    5.71
bodywt          11 83 166.14 786.84   1.67   20.49  2.43 0.00 6654.00 6654.00
               skew kurtosis    se
name*          0.00    -1.24  2.65
genus*        -0.06    -1.25  2.47
vore*          0.42    -1.25  0.13
order*        -0.38    -1.55  0.68
conservation* -0.14    -0.50  0.18
sleep_total    0.05    -0.71  0.49
sleep_rem      1.46     2.73  0.17
sleep_cycle    1.49     1.57  0.06
awake         -0.05    -0.71  0.49
brainwt        4.63    20.96  0.13
bodywt         7.10    53.72 86.37

报告四分位数和IQR

describe(msleep,quant=c(.25,.75),IQR = TRUE)
              vars  n   mean     sd median trimmed   mad  min     max   range
name*            1 83  42.00  24.10  42.00   42.00 31.13 1.00   83.00   82.00
genus*           2 83  40.25  22.52  41.00   40.45 28.17 1.00   77.00   76.00
vore*            3 76   2.34   1.13   2.00    2.31  1.48 1.00    4.00    3.00
order*           4 83  11.28   6.15  15.00   11.54  4.45 1.00   19.00   18.00
conservation*    5 54   3.78   1.31   4.00    3.77  0.74 1.00    6.00    5.00
sleep_total      6 83  10.43   4.45  10.10   10.38  5.04 1.90   19.90   18.00
sleep_rem        7 61   1.88   1.30   1.50    1.71  1.19 0.10    6.60    6.50
sleep_cycle      8 32   0.44   0.36   0.33    0.38  0.23 0.12    1.50    1.38
awake            9 83  13.57   4.45  13.90   13.62  5.04 4.10   22.10   18.00
brainwt         10 56   0.28   0.98   0.01    0.07  0.02 0.00    5.71    5.71
bodywt          11 83 166.14 786.84   1.67   20.49  2.43 0.00 6654.00 6654.00
               skew kurtosis    se   IQR Q0.25 Q0.75
name*          0.00    -1.24  2.65 41.00 21.50 62.50
genus*        -0.06    -1.25  2.47 38.00 21.50 59.50
vore*          0.42    -1.25  0.13  2.25  1.75  4.00
order*        -0.38    -1.55  0.68 13.00  4.00 17.00
conservation* -0.14    -0.50  0.18  1.00  3.00  4.00
sleep_total    0.05    -0.71  0.49  5.90  7.85 13.75
sleep_rem      1.46     2.73  0.17  1.50  0.90  2.40
sleep_cycle    1.49     1.57  0.06  0.40  0.18  0.58
awake         -0.05    -0.71  0.49  5.90 10.25 16.15
brainwt        4.63    20.96  0.13  0.12  0.00  0.13
bodywt         7.10    53.72 86.37 41.58  0.17 41.75

3 计算分组数据的描述性统计量

describe(msleep~vore,quant=c(.25,.75),IQR = TRUE)

 Descriptive statistics by group 
vore: carni
             vars  n  mean     sd median trimmed   mad  min    max  range  skew
name            1 19 39.58  24.28  42.00   39.41 31.13 4.00  78.00  74.00  0.08
genus           2 19 41.11  23.05  45.00   41.35 20.76 1.00  77.00  76.00 -0.04
vore            3 19  1.00   0.00   1.00    1.00  0.00 1.00   1.00   0.00   NaN
order           4 19  4.89   4.08   3.00    4.29  0.00 3.00  17.00  14.00  2.12
conservation    5 14  4.07   1.64   4.00    4.17  2.22 1.00   6.00   5.00 -0.30
sleep_total     6 19 10.38   4.67  10.40   10.30  6.08 2.70  19.40  16.70  0.10
sleep_rem       7 10  2.29   1.86   1.95    2.02  1.56 0.10   6.60   6.50  0.96
sleep_cycle     8  5  0.37   0.03   0.38    0.37  0.05 0.33   0.42   0.08  0.04
awake           9 19 13.63   4.68  13.60   13.70  6.08 4.60  21.35  16.75 -0.10
brainwt        10  9  0.08   0.10   0.04    0.08  0.04 0.01   0.32   0.31  1.46
bodywt         11 19 90.75 182.07  20.49   54.37 30.34 0.03 800.00 799.97  3.07
             kurtosis    se   IQR Q0.25 Q0.75
name            -1.51  5.57 40.50 18.00 58.50
genus           -1.07  5.29 26.00 28.00 54.00
vore              NaN  0.00  0.00  1.00  1.00
order            3.09  0.94  1.00  3.00  4.00
conservation    -1.19  0.44  2.50  3.25  5.75
sleep_total     -1.02  1.07  6.75  6.25 13.00
sleep_rem        0.19  0.59  1.73  1.33  3.05
sleep_cycle     -1.85  0.01  0.03  0.35  0.38
awake           -1.02  1.07  6.75 11.00 17.75
brainwt          0.74  0.03  0.05  0.02  0.07
bodywt           9.15 41.77 89.66  3.34 93.00
------------------------------------------------------------ 
vore: herbi
             vars  n   mean      sd median trimmed   mad  min     max   range
name            1 32  41.72   25.01  38.50   41.50 32.62 1.00   83.00   82.00
genus           2 32  37.25   21.75  37.50   37.12 25.95 3.00   73.00   70.00
vore            3 32   2.00    0.00   2.00    2.00  0.00 2.00    2.00    0.00
order           4 32  13.16    5.48  16.50   14.00  0.74 2.00   17.00   15.00
conservation    5 26   3.62    1.42   4.00    3.59  1.48 1.00    6.00    5.00
sleep_total     6 32   9.51    4.88  10.30    9.55  6.23 1.90   16.60   14.70
sleep_rem       7 24   1.37    0.92   0.95    1.27  0.67 0.40    3.40    3.00
sleep_cycle     8 12   0.42    0.32   0.22    0.39  0.12 0.12    1.00    0.88
awake           9 32  14.49    4.88  13.70   14.45  6.23 7.40   22.10   14.70
brainwt        10 20   0.62    1.57   0.01    0.13  0.02 0.00    5.71    5.71
bodywt         11 32 366.88 1244.08   1.23   63.04  1.75 0.02 6654.00 6653.98
              skew kurtosis     se   IQR Q0.25 Q0.75
name          0.14    -1.29   4.42 42.75 22.00 64.75
genus         0.12    -1.25   3.84 31.00 20.50 51.50
vore           NaN      NaN   0.00  0.00  2.00  2.00
order        -1.18    -0.15   0.97  6.25 10.75 17.00
conservation  0.02    -1.06   0.28  2.00  2.00  4.00
sleep_total  -0.14    -1.63   0.86  9.92  4.30 14.22
sleep_rem     0.84    -0.68   0.19  1.38  0.60  1.97
sleep_cycle   0.65    -1.40   0.09  0.51  0.18  0.69
awake         0.14    -1.63   0.86  9.93  9.78 19.70
brainwt       2.47     4.57   0.35  0.23  0.01  0.24
bodywt        4.18    17.68 219.92 38.81  0.19 39.00
------------------------------------------------------------ 
vore: insecti
             vars n  mean    sd median trimmed   mad   min   max range  skew
name            1 5 35.60 23.50  29.00   35.60 25.20  8.00 70.00 62.00  0.30
genus           2 5 52.20 19.84  60.00   52.20 16.31 22.00 71.00 49.00 -0.49
vore            3 5  3.00  0.00   3.00    3.00  0.00  3.00  3.00  0.00   NaN
order           4 5  9.40  6.11   6.00    9.40  1.48  5.00 19.00 14.00  0.61
conservation    5 3  3.67  0.58   4.00    3.67  0.00  3.00  4.00  1.00 -0.38
sleep_total     6 5 14.94  5.92  18.10   14.94  2.67  8.40 19.90 11.50 -0.26
sleep_rem       7 4  3.52  1.93   3.00    3.52  1.41  2.00  6.10  4.10  0.37
sleep_cycle     8 3  0.16  0.04   0.17    0.16  0.05  0.12  0.20  0.08 -0.13
awake           9 5  9.06  5.92   5.90    9.06  2.67  4.10 15.60 11.50  0.26
brainwt        10 5  0.02  0.03   0.00    0.02  0.00  0.00  0.08  0.08  0.86
bodywt         11 5 12.92 26.39   0.07   12.92  0.10  0.01 60.00 59.99  1.06
             kurtosis    se   IQR Q0.25 Q0.75
name            -1.68 10.51 21.00 25.00 46.00
genus           -1.72  8.87 22.00 43.00 65.00
vore              NaN  0.00  0.00  3.00  3.00
order           -1.65  2.73  7.00  5.00 12.00
conservation    -2.33  0.33  0.50  3.50  4.00
sleep_total     -2.24  2.65 11.10  8.60 19.70
sleep_rem       -2.03  0.96  2.37  2.08  4.45
sleep_cycle     -2.33  0.02  0.04  0.14  0.18
awake           -2.24  2.65 11.10  4.30 15.40
brainwt         -1.23  0.02  0.02  0.00  0.03
bodywt          -0.94 11.80  4.48  0.02  4.50
------------------------------------------------------------ 
vore: omni
             vars  n  mean    sd median trimmed   mad  min   max range  skew
name            1 20 43.75 24.52  46.00   44.75 26.69 2.00 80.00 78.00 -0.30
genus           2 20 36.90 23.95  31.00   36.44 27.43 2.00 75.00 73.00  0.23
vore            3 20  4.00  0.00   4.00    4.00  0.00 4.00  4.00  0.00   NaN
order           4 20 13.90  5.16  15.00   14.81  1.48 1.00 19.00 18.00 -1.36
conservation    5  9  3.78  0.67   4.00    3.78  0.00 2.00  4.00  2.00 -2.07
sleep_total     6 20 10.93  2.95   9.90   10.45  1.33 8.00 18.00 10.00  1.28
sleep_rem       7 18  1.96  1.01   1.85    1.85  0.74 0.70  4.90  4.20  1.32
sleep_cycle     8 11  0.59  0.47   0.50    0.54  0.37 0.13  1.50  1.37  0.89
awake           9 20 13.07  2.95  14.10   13.55  1.33 6.00 16.00 10.00 -1.28
brainwt        10 17  0.15  0.32   0.01    0.08  0.01 0.00  1.32  1.32  2.78
bodywt         11 20 12.72 24.69   0.95    6.63  1.33 0.00 86.25 86.25  1.85
             kurtosis   se   IQR Q0.25 Q0.75
name            -1.21 5.48 34.00 26.75 60.75
genus           -1.47 5.36 39.50 16.75 56.25
vore              NaN 0.00  0.00  4.00  4.00
order            0.75 1.15  2.00 15.00 17.00
conservation     2.63 0.22  0.00  4.00  4.00
sleep_total      0.17 0.66  1.83  9.10 10.93
sleep_rem        1.54 0.24  1.05  1.25  2.30
sleep_cycle     -0.73 0.14  0.45  0.26  0.71
awake            0.17 0.66  1.83 13.07 14.90
brainwt          7.14 0.08  0.18  0.00  0.18
bodywt           2.09 5.52  7.42  0.18  7.60
msleep %>%
  group_by(vore) %>%
  summarize(n(),
            min = min(bodywt),
            q1 = quantile(bodywt, 0.25),
            median = median(bodywt),
            mean = mean(bodywt),
            q3 = quantile(bodywt, 0.75),
            max = max(bodywt),
            sd = sd(bodywt))
# A tibble: 5 × 9
  vore    `n()`   min     q1 median    mean    q3    max      sd
  <chr>   <int> <dbl>  <dbl>  <dbl>   <dbl> <dbl>  <dbl>   <dbl>
1 carni      19 0.028 3.34   20.5    90.8   93     800    182.  
2 herbi      32 0.022 0.191   1.23  367.    39    6654   1244.  
3 insecti     5 0.01  0.023   0.075  12.9    4.5    60     26.4 
4 omni       20 0.005 0.176   0.95   12.7    7.6    86.2   24.7 
5 <NA>        7 0.021 0.0465  0.122   0.858  1.08    3.6    1.34

4 本章习题

数据:ggplot2::mpg

要求:将代码与输出结果转成图片后提交。

  1. 报告该数据集中所有定量变量的均值、中位数、最⼤值、最小值、四分位数、⽅差、标准差、偏度和峰度。

  2. 将mpg中的汽车按照drv的不同分为三组(front-wheel, four-wheel, rear-wheel), 计算各组的cty和hwy的均值、中位数、最⼤值、最小值和标准差。