install.packages("tidyverse")
install.packages("MASS")
install.packages("klaR")
install.packages("devtools")
install.packages("psych")
install.packages("MVN")
install.packages("e1071")5 SVM在R中的实现
本章介绍R中的SVM
安装包
加载包
library(tidyverse)
library(psych)
library(biotools)
library(MVN)
library(e1071)1 SVM模型的建立
# 取两个变量做演示
iris_sub <- iris[, c("Sepal.Length", "Sepal.Width", "Species")]
# 建立 SVM 模型(径向基核函数 RBF)
svm_model <- svm(Species ~ ., data = iris_sub, 
                 kernel = "radial", 
                 cost = 1, 
                 gamma = 0.5)2 SVM模型的可视化
# 生成网格点用于预测
xrange <- seq(min(iris_sub$Sepal.Length) - 0.5, 
              max(iris_sub$Sepal.Length) + 0.5, 
              by = 0.02)
yrange <- seq(min(iris_sub$Sepal.Width) - 0.5, 
              max(iris_sub$Sepal.Width) + 0.5, 
              by = 0.02)
grid <- expand.grid(Sepal.Length = xrange, 
                    Sepal.Width = yrange)
# 对网格点分类预测
grid$Species <- predict(svm_model, grid)
# 画出分类区域和样本点
ggplot() +
  geom_tile(data = grid, 
            aes(x = Sepal.Length, 
                y = Sepal.Width, 
                fill = Species), 
            alpha = 0.3) +
  geom_point(data = iris_sub, 
             aes(x = Sepal.Length, 
                 y = Sepal.Width, 
                 color = Species), 
             size = 2) +
  labs(title = "SVM on Iris Dataset (3-class)",
       x = "Sepal Length", 
       y = "Sepal Width") +
  theme_minimal()
3 计算预测准确率
pred <- predict(svm_model, iris_sub)
accuracy <- mean(pred == iris_sub$Species)
print(paste("Accuracy:", round(accuracy * 100, 2), "%"))[1] "Accuracy: 82 %"
4 改进SVM模型
4.1 调整参数
svm_model_tuned <- svm(Species ~ ., 
                       data = iris_sub,
                       kernel = "radial", 
                       cost = 10, 
                       gamma = 0.8)
pred_tuned <- predict(svm_model_tuned, iris_sub)
accuracy_tuned <- mean(pred_tuned == iris_sub$Species)
print(paste("Tuned Accuracy:", round(accuracy_tuned * 100, 2), "%"))[1] "Tuned Accuracy: 82 %"
4.2 使用不同核函数(线性核)
svm_model_linear <- svm(Species ~ ., 
                        data = iris_sub,
                        kernel = "linear", 
                        cost = 10)
pred_linear <- predict(svm_model_linear, 
                       iris_sub)
accuracy_linear <- mean(pred_linear == iris_sub$Species)
print(paste("Linear Kernel Accuracy:", round(accuracy_linear * 100, 2),
            "%"))[1] "Linear Kernel Accuracy: 82 %"
4.3 数据预处理(标准化)
iris_sub_scaled <- iris_sub
iris_sub_scaled[, 1:2] <- scale(iris_sub_scaled[, 1:2])
svm_model_scaled <- svm(Species ~ ., 
                        data = iris_sub_scaled,
                        kernel = "radial", 
                        cost = 1, 
                        gamma = 0.5)
pred_scaled <- predict(svm_model_scaled, iris_sub_scaled)
accuracy_scaled <- mean(pred_scaled == iris_sub_scaled$Species)
print(paste("Scaled Data Accuracy:", round(accuracy_scaled * 100, 2),
            "%"))[1] "Scaled Data Accuracy: 82 %"
4.4 交叉验证选择最佳参数
tune_result <- tune(svm, Species ~ ., 
                    data = iris_sub,
                    ranges = list(cost = 10^(-1:2), 
                                  gamma = c(0.1, 0.5, 1)))
best_model <- tune_result$best.model
pred_best <- predict(best_model, iris_sub)
accuracy_best <- mean(pred_best == iris_sub$Species)
print(paste("Best Model Accuracy:", round(accuracy_best * 100, 2),
            "%"))[1] "Best Model Accuracy: 80 %"
# 输出最佳参数
print(tune_result$best.parameters)  cost gamma
5  0.1   0.5
# 输出调参结果
print(tune_result)
Parameter tuning of 'svm':
- sampling method: 10-fold cross validation 
- best parameters:
 cost gamma
  0.1   0.5
- best performance: 0.2133333 
# 画出最佳模型的分类区域和样本点
grid$Species <- predict(best_model, grid)
ggplot() +
  geom_tile(data = grid, 
            aes(x = Sepal.Length, 
                y = Sepal.Width, 
                fill = Species), 
            alpha = 0.3) +
  geom_point(data = iris_sub, 
             aes(x = Sepal.Length, 
                 y = Sepal.Width, 
                 color = Species), 
             size = 2) +
  labs(title = "Tuned SVM on Iris Dataset (3-class)",
       x = "Sepal Length", y = "Sepal Width") +
  theme_minimal()
4.5. 使用更多特征
# 前文仅用两个特征做可视化,实际可用全部特征
# 由于可视化限制,无法直接展示多维特征空间的分类边界,但可以通过准确率来评估模型性能
svm_model_full <- svm(Species ~ ., 
                      data = iris,
                      kernel = "radial", 
                      cost = 1, 
                      gamma = 0.5)
pred_full <- predict(svm_model_full, iris)
accuracy_full <- mean(pred_full == iris$Species)
print(paste("Full Feature Set Accuracy:", 
            round(accuracy_full * 100, 2), "%"))[1] "Full Feature Set Accuracy: 97.33 %"