library(tidyverse)
library(psych)
fa.pc.varimax <- car_sales %>% select(engine_s:mpg) %>% 
  principal(nfactors = 2, 
            rotate = "varimax")
fa.pc.varimax
 
Principal Components Analysis
Call: principal(r = ., nfactors = 2, rotate = "varimax")
Standardized loadings (pattern matrix) based upon correlation matrix
           RC1   RC2   h2   u2 com
engine_s  0.88  0.32 0.87 0.13 1.3
horsepow  0.89  0.09 0.80 0.20 1.0
wheelbas  0.19  0.94 0.91 0.09 1.1
width     0.52  0.69 0.75 0.25 1.9
length    0.23  0.89 0.84 0.16 1.1
curb_wgt  0.72  0.58 0.85 0.15 1.9
fuel_cap  0.64  0.59 0.76 0.24 2.0
mpg      -0.80 -0.37 0.78 0.22 1.4
                       RC1  RC2
SS loadings           3.49 3.06
Proportion Var        0.44 0.38
Cumulative Var        0.44 0.82
Proportion Explained  0.53 0.47
Cumulative Proportion 0.53 1.00
Mean item complexity =  1.5
Test of the hypothesis that 2 components are sufficient.
The root mean square of the residuals (RMSR) is  0.07 
 with the empirical chi square  38.95  with prob <  2e-04 
Fit based upon off diagonal values = 0.99
 
print(fa.pc.varimax$loadings, digits = 3, cutoff = 0.5,sort = T)
 
Loadings:
         RC1    RC2   
engine_s  0.876       
horsepow  0.888       
curb_wgt  0.722  0.577
fuel_cap  0.644  0.586
mpg      -0.803       
wheelbas         0.935
width     0.517  0.693
length           0.888
                 RC1   RC2
SS loadings    3.493 3.063
Proportion Var 0.437 0.383
Cumulative Var 0.437 0.819
 
fa.pc.varimax <- car_sales %>% select(engine_s:mpg) %>% 
  principal(nfactors = 3, 
            rotate = "varimax")
fa.pc.varimax
 
Principal Components Analysis
Call: principal(r = ., nfactors = 3, rotate = "varimax")
Standardized loadings (pattern matrix) based upon correlation matrix
           RC2   RC1   RC3   h2    u2 com
engine_s  0.31  0.43  0.80 0.92 0.084 1.9
horsepow  0.13  0.26  0.92 0.93 0.066 1.2
wheelbas  0.88  0.37  0.04 0.91 0.090 1.3
width     0.68  0.35  0.45 0.78 0.216 2.3
length    0.91  0.18  0.24 0.91 0.085 1.2
curb_wgt  0.43  0.75  0.39 0.90 0.099 2.2
fuel_cap  0.39  0.84  0.22 0.91 0.085 1.6
mpg      -0.19 -0.83 -0.41 0.89 0.107 1.6
                       RC2  RC1  RC3
SS loadings           2.55 2.50 2.12
Proportion Var        0.32 0.31 0.26
Cumulative Var        0.32 0.63 0.90
Proportion Explained  0.36 0.35 0.30
Cumulative Proportion 0.36 0.70 1.00
Mean item complexity =  1.7
Test of the hypothesis that 3 components are sufficient.
The root mean square of the residuals (RMSR) is  0.03 
 with the empirical chi square  8.66  with prob <  0.28 
Fit based upon off diagonal values = 1
 
print(fa.pc.varimax$loadings, digits = 3, cutoff = 0.5,sort = T)
 
Loadings:
         RC2    RC1    RC3   
wheelbas  0.879              
width     0.680              
length    0.908              
curb_wgt         0.748       
fuel_cap         0.842       
mpg             -0.829       
engine_s                0.797
horsepow                0.922
                 RC2   RC1   RC3
SS loadings    2.550 2.500 2.119
Proportion Var 0.319 0.312 0.265
Cumulative Var 0.319 0.631 0.896
 
 
library(tidyverse)
data <- cbind(car_sales, fa.pc.varimax$scores)
data <- data %>% mutate(suv = if_else(type == 1, 1,0)) 
# RC1,其值越大,代表车重大、油箱容积大、耗油越高(SUV)
data %>% ggplot(aes(RC1, fill = as.factor(suv)))+
  geom_histogram(col = 1)+
  facet_wrap(~ suv,ncol = 1)
 
# RC2,其值越大,代表车子轮距、车宽、车长大
data %>% ggplot(aes(RC2, fill = as.factor(suv)))+
  geom_histogram(col = 1)+
  facet_wrap(~ suv,ncol = 1)
 
# RC3,动力性能
data %>% ggplot(aes(RC3, fill = as.factor(suv)))+
  geom_histogram(col = 1)+
  facet_wrap(~ suv,ncol = 1)
 
 
eq1 <- lm(price ~ RC1 +RC2 +RC3, data)
summary(eq1)
 
Call:
lm(formula = price ~ RC1 + RC2 + RC3, data = data)
Residuals:
    Min      1Q  Median      3Q     Max 
-21.468  -5.049  -0.936   2.972  36.978 
Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  27.4646     0.7014  39.156  < 2e-16 ***
RC1           4.3130     0.6999   6.162 6.47e-09 ***
RC2          -1.0709     0.7001  -1.530    0.128    
RC3          10.6574     0.7036  15.148  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 8.643 on 148 degrees of freedom
  (5 observations deleted due to missingness)
Multiple R-squared:  0.6479,    Adjusted R-squared:  0.6407 
F-statistic: 90.76 on 3 and 148 DF,  p-value: < 2.2e-16
 
eq2 <- lm(resale ~ RC1 +RC2 +RC3, data)
summary(eq2)
 
Call:
lm(formula = resale ~ RC1 + RC2 + RC3, data = data)
Residuals:
    Min      1Q  Median      3Q     Max 
-14.680  -4.848  -1.442   2.978  30.519 
Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  18.5900     0.7066  26.310  < 2e-16 ***
RC1           3.0571     0.7242   4.222  4.9e-05 ***
RC2          -2.1020     0.6729  -3.124  0.00227 ** 
RC3           7.5295     0.6661  11.303  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 7.639 on 114 degrees of freedom
  (39 observations deleted due to missingness)
Multiple R-squared:  0.5742,    Adjusted R-squared:  0.563 
F-statistic: 51.25 on 3 and 114 DF,  p-value: < 2.2e-16
 
eq3 <- lm(sales ~ RC1 +RC2 +RC3, data)
summary(eq3)
 
Call:
lm(formula = sales ~ RC1 + RC2 + RC3, data = data)
Residuals:
   Min     1Q Median     3Q    Max 
-88.46 -35.92 -18.67  20.10 397.87 
Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  53.30585    5.08555  10.482  < 2e-16 ***
RC1           0.05693    5.08494   0.011  0.99108    
RC2          25.16954    5.09264   4.942 2.05e-06 ***
RC3         -14.56111    5.11090  -2.849  0.00501 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 62.88 on 149 degrees of freedom
  (4 observations deleted due to missingness)
Multiple R-squared:  0.1808,    Adjusted R-squared:  0.1643 
F-statistic: 10.96 on 3 and 149 DF,  p-value: 1.508e-06