16 min read

机器学习包mlr3和chatGPT

最近一年发生的事情实在是太多了,chatGPT改变了我们学习的方式。再去做一个知识存储器,已经变得毫无意义。从另外一个角度看,我们已经不需要去记忆。chatGPT就是我们的第二大脑。我们要做的是,如何利用chatGPT来实现自己的想法。创造性、批判性思维是未来衡量一个人是否优秀的重要标准。

看看如何用chatGPT来构建一个机器学习的代码,实现对性别的预测。

rm(list=ls())
# 安装和加载必要的包
library(mlr3)
library(mlr3learners)
library(mlr3filters)
library(mlr3tuning)
## Loading required package: paradox
library(data.table)
library(mlr3verse)
library(mlr3viz)

# 读取包括SNP和性别信息的训练文件
sex_snp_dt <- fread(file="../datasets/BP_55K_SNP_Sex.csv", sep=",",header = TRUE) 


# 创建任务
sex_task <- as_task_classif(sex_snp_dt, target = "Sex",id="PreSex")

# 去除ID列
features_to_keep <- setdiff(sex_task$feature_names, "ID")

# Select only the desired features
sex_task$select(features_to_keep)

# 创建随机森林学习器,基于Gini指数的减少来衡量变量的重要性
rf_learner <- lrn("classif.ranger", predict_type= "prob", importance = "impurity", keep.inbag = TRUE, num.trees=100, write.forest = TRUE)

# 查看学习器的参数
rf_learner$param_set
## <ParamSet(28)>
## Key: <id>
##                               id    class lower upper nlevels        default
##                           <char>   <char> <num> <num>   <num>         <list>
##  1:       always.split.variables ParamUty    NA    NA     Inf <NoDefault[0]>
##  2:                class.weights ParamUty    NA    NA     Inf         [NULL]
##  3:                      holdout ParamLgl    NA    NA       2          FALSE
##  4:                   importance ParamFct    NA    NA       4 <NoDefault[0]>
##  5:                   keep.inbag ParamLgl    NA    NA       2          FALSE
##  6:                    max.depth ParamInt     1   Inf     Inf         [NULL]
##  7:                   min.bucket ParamUty    NA    NA     Inf              1
##  8:                min.node.size ParamUty    NA    NA     Inf         [NULL]
##  9:                         mtry ParamInt     1   Inf     Inf <NoDefault[0]>
## 10:                   mtry.ratio ParamDbl     0     1     Inf <NoDefault[0]>
## 11:                    na.action ParamFct    NA    NA       3       na.learn
## 12:                   node.stats ParamLgl    NA    NA       2          FALSE
## 13:            num.random.splits ParamInt     1   Inf     Inf              1
## 14:                  num.threads ParamInt     1   Inf     Inf              1
## 15:                    num.trees ParamInt     1   Inf     Inf            500
## 16:                    oob.error ParamLgl    NA    NA       2           TRUE
## 17:        regularization.factor ParamUty    NA    NA     Inf              1
## 18:      regularization.usedepth ParamLgl    NA    NA       2          FALSE
## 19:                      replace ParamLgl    NA    NA       2           TRUE
## 20:    respect.unordered.factors ParamFct    NA    NA       3 <NoDefault[0]>
## 21:              sample.fraction ParamDbl     0     1     Inf <NoDefault[0]>
## 22:                  save.memory ParamLgl    NA    NA       2          FALSE
## 23: scale.permutation.importance ParamLgl    NA    NA       2          FALSE
## 24:                         seed ParamInt  -Inf   Inf     Inf         [NULL]
## 25:         split.select.weights ParamUty    NA    NA     Inf         [NULL]
## 26:                    splitrule ParamFct    NA    NA       3           gini
## 27:                      verbose ParamLgl    NA    NA       2           TRUE
## 28:                 write.forest ParamLgl    NA    NA       2           TRUE
##                               id    class lower upper nlevels        default
##        parents    value
##         <list>   <list>
##  1:     [NULL]   [NULL]
##  2:     [NULL]   [NULL]
##  3:     [NULL]   [NULL]
##  4:     [NULL] impurity
##  5:     [NULL]     TRUE
##  6:     [NULL]   [NULL]
##  7:     [NULL]   [NULL]
##  8:     [NULL]   [NULL]
##  9:     [NULL]   [NULL]
## 10:     [NULL]   [NULL]
## 11:     [NULL]   [NULL]
## 12:     [NULL]   [NULL]
## 13:  splitrule   [NULL]
## 14:     [NULL]        1
## 15:     [NULL]      100
## 16:     [NULL]   [NULL]
## 17:     [NULL]   [NULL]
## 18:     [NULL]   [NULL]
## 19:     [NULL]   [NULL]
## 20:     [NULL]   [NULL]
## 21:     [NULL]   [NULL]
## 22:     [NULL]   [NULL]
## 23: importance   [NULL]
## 24:     [NULL]   [NULL]
## 25:     [NULL]   [NULL]
## 26:     [NULL]   [NULL]
## 27:     [NULL]   [NULL]
## 28:     [NULL]     TRUE
##        parents    value
# 创建交叉验证
resampling <- rsmp("cv", folds = 5)

# 进行交叉验证
resample_result <- resample(sex_task, rf_learner, resampling, store_models = TRUE)
## INFO  [13:57:21.196] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO  [13:57:21.713] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO  [13:57:21.759] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO  [13:57:21.796] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO  [13:57:21.834] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
# 设置评估参考群数据的预测能力的度量标准
# 准确性
measure_acc <- msr("classif.acc")
# 工作特征曲线下面积
measure_auc <- msr("classif.auc")

# 打印交叉验证结果
resample_result$aggregate(measure_acc)
## classif.acc 
##   0.9806452
resample_result$aggregate(measure_auc)
## classif.auc 
##   0.9877493
autoplot(resample_result,type = "roc")

# 定义超参数搜索空间
#	1.	mtry:每个树分割节点随机选择的变量数量。
#	•	p_int(1, ncol(data) - 1):mtry的取值范围是从1到数据集中变量数量减1。
#	2.	min.node.size:树叶节点的最小样本数。
#	•	p_int(1, 10):min.node.size的取值范围是1到10。
search_space <- ps(
  mtry = p_int(1, length(sex_task$feature_names) - 1),
  min.node.size = p_int(1, 10)
)

# 定义调优实例
tuner <- tnr("grid_search", resolution = 10)
instance <- ti(
  task = sex_task,
  learner = rf_learner,
  resampling = resampling,
  measure = measure_acc,
  search_space = search_space,
  terminator = trm("evals", n_evals = 20)
)


# 进行超参数调优
tuner$optimize(instance)
## INFO  [13:57:22.322] [bbotk] Starting to optimize 2 parameter(s) with '<OptimizerBatchGridSearch>' and '<TerminatorEvals> [n_evals=20, k=0]'
## INFO  [13:57:22.339] [bbotk] Evaluating 1 configuration(s)
## INFO  [13:57:22.345] [mlr3] Running benchmark with 5 resampling iterations
## INFO  [13:57:22.353] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO  [13:57:22.420] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO  [13:57:22.491] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO  [13:57:22.563] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO  [13:57:22.636] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO  [13:57:22.702] [mlr3] Finished benchmark
## INFO  [13:57:22.723] [bbotk] Result of batch 1:
## INFO  [13:57:22.725] [bbotk]  mtry min.node.size classif.acc warnings errors runtime_learners
## INFO  [13:57:22.725] [bbotk]    18             2   0.9782258        0      0            0.309
## INFO  [13:57:22.725] [bbotk]                                 uhash
## INFO  [13:57:22.725] [bbotk]  6f5c36db-f8eb-4e1b-a5bd-78f7c4d1079c
## INFO  [13:57:22.727] [bbotk] Evaluating 1 configuration(s)
## INFO  [13:57:22.731] [mlr3] Running benchmark with 5 resampling iterations
## INFO  [13:57:22.738] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO  [13:57:22.804] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO  [13:57:22.867] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO  [13:57:22.935] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO  [13:57:23.003] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO  [13:57:23.064] [mlr3] Finished benchmark
## INFO  [13:57:23.085] [bbotk] Result of batch 2:
## INFO  [13:57:23.086] [bbotk]  mtry min.node.size classif.acc warnings errors runtime_learners
## INFO  [13:57:23.086] [bbotk]    21            10   0.9790323        0      0            0.285
## INFO  [13:57:23.086] [bbotk]                                 uhash
## INFO  [13:57:23.086] [bbotk]  5e3b651b-df2b-40f6-b109-9b235a934049
## INFO  [13:57:23.089] [bbotk] Evaluating 1 configuration(s)
## INFO  [13:57:23.094] [mlr3] Running benchmark with 5 resampling iterations
## INFO  [13:57:23.100] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO  [13:57:23.142] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO  [13:57:23.189] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO  [13:57:23.240] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO  [13:57:23.286] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO  [13:57:23.327] [mlr3] Finished benchmark
## INFO  [13:57:23.347] [bbotk] Result of batch 3:
## INFO  [13:57:23.349] [bbotk]  mtry min.node.size classif.acc warnings errors runtime_learners
## INFO  [13:57:23.349] [bbotk]     6             7   0.9806452        0      0            0.192
## INFO  [13:57:23.349] [bbotk]                                 uhash
## INFO  [13:57:23.349] [bbotk]  ff9baa41-f055-4924-8888-dbab343705d1
## INFO  [13:57:23.351] [bbotk] Evaluating 1 configuration(s)
## INFO  [13:57:23.355] [mlr3] Running benchmark with 5 resampling iterations
## INFO  [13:57:23.361] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO  [13:57:23.420] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO  [13:57:23.484] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO  [13:57:23.549] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO  [13:57:23.621] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO  [13:57:23.678] [mlr3] Finished benchmark
## INFO  [13:57:23.700] [bbotk] Result of batch 4:
## INFO  [13:57:23.701] [bbotk]  mtry min.node.size classif.acc warnings errors runtime_learners
## INFO  [13:57:23.701] [bbotk]    18             1   0.9798387        0      0            0.278
## INFO  [13:57:23.701] [bbotk]                                 uhash
## INFO  [13:57:23.701] [bbotk]  77fffd66-f218-4a07-98ee-f7d576ca85b1
## INFO  [13:57:23.703] [bbotk] Evaluating 1 configuration(s)
## INFO  [13:57:23.707] [mlr3] Running benchmark with 5 resampling iterations
## INFO  [13:57:23.713] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO  [13:57:23.748] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO  [13:57:23.785] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO  [13:57:23.824] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO  [13:57:23.868] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO  [13:57:23.903] [mlr3] Finished benchmark
## INFO  [13:57:23.926] [bbotk] Result of batch 5:
## INFO  [13:57:23.927] [bbotk]  mtry min.node.size classif.acc warnings errors runtime_learners
## INFO  [13:57:23.927] [bbotk]     3             4   0.9806452        0      0            0.156
## INFO  [13:57:23.927] [bbotk]                                 uhash
## INFO  [13:57:23.927] [bbotk]  b23b096c-5a1e-4941-b785-7ee55c947afa
## INFO  [13:57:23.930] [bbotk] Evaluating 1 configuration(s)
## INFO  [13:57:23.934] [mlr3] Running benchmark with 5 resampling iterations
## INFO  [13:57:23.941] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO  [13:57:23.997] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO  [13:57:24.055] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO  [13:57:24.115] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO  [13:57:24.183] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO  [13:57:24.238] [mlr3] Finished benchmark
## INFO  [13:57:24.260] [bbotk] Result of batch 6:
## INFO  [13:57:24.262] [bbotk]  mtry min.node.size classif.acc warnings errors runtime_learners
## INFO  [13:57:24.262] [bbotk]    13             9   0.9798387        0      0            0.261
## INFO  [13:57:24.262] [bbotk]                                 uhash
## INFO  [13:57:24.262] [bbotk]  6a2deb1a-e803-44c0-b51b-b5422e6336a4
## INFO  [13:57:24.264] [bbotk] Evaluating 1 configuration(s)
## INFO  [13:57:24.268] [mlr3] Running benchmark with 5 resampling iterations
## INFO  [13:57:24.275] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO  [13:57:24.326] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO  [13:57:24.381] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO  [13:57:24.451] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO  [13:57:24.507] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO  [13:57:24.559] [mlr3] Finished benchmark
## INFO  [13:57:24.579] [bbotk] Result of batch 7:
## INFO  [13:57:24.581] [bbotk]  mtry min.node.size classif.acc warnings errors runtime_learners
## INFO  [13:57:24.581] [bbotk]    13             6   0.9806452        0      0            0.249
## INFO  [13:57:24.581] [bbotk]                                 uhash
## INFO  [13:57:24.581] [bbotk]  c2545532-6e5a-411f-9f69-bfd75312902c
## INFO  [13:57:24.583] [bbotk] Evaluating 1 configuration(s)
## INFO  [13:57:24.586] [mlr3] Running benchmark with 5 resampling iterations
## INFO  [13:57:24.593] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO  [13:57:24.649] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO  [13:57:24.709] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO  [13:57:24.775] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO  [13:57:24.836] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO  [13:57:24.890] [mlr3] Finished benchmark
## INFO  [13:57:24.910] [bbotk] Result of batch 8:
## INFO  [13:57:24.911] [bbotk]  mtry min.node.size classif.acc warnings errors runtime_learners
## INFO  [13:57:24.911] [bbotk]    16             6   0.9806452        0      0            0.265
## INFO  [13:57:24.911] [bbotk]                                 uhash
## INFO  [13:57:24.911] [bbotk]  43e933ed-70a6-45c4-884a-c6dd78c58590
## INFO  [13:57:24.913] [bbotk] Evaluating 1 configuration(s)
## INFO  [13:57:24.917] [mlr3] Running benchmark with 5 resampling iterations
## INFO  [13:57:24.924] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO  [13:57:24.964] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO  [13:57:25.136] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO  [13:57:25.252] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO  [13:57:25.329] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO  [13:57:25.382] [mlr3] Finished benchmark
## INFO  [13:57:25.407] [bbotk] Result of batch 9:
## INFO  [13:57:25.412] [bbotk]  mtry min.node.size classif.acc warnings errors runtime_learners
## INFO  [13:57:25.412] [bbotk]     6            10   0.9806452        0      0             0.33
## INFO  [13:57:25.412] [bbotk]                                 uhash
## INFO  [13:57:25.412] [bbotk]  a0751c86-e072-46fd-9d50-1e856746a7c2
## INFO  [13:57:25.419] [bbotk] Evaluating 1 configuration(s)
## INFO  [13:57:25.427] [mlr3] Running benchmark with 5 resampling iterations
## INFO  [13:57:25.438] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO  [13:57:25.488] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO  [13:57:25.561] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO  [13:57:25.626] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO  [13:57:25.696] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO  [13:57:25.749] [mlr3] Finished benchmark
## INFO  [13:57:25.776] [bbotk] Result of batch 10:
## INFO  [13:57:25.778] [bbotk]  mtry min.node.size classif.acc warnings errors runtime_learners
## INFO  [13:57:25.778] [bbotk]     6             4   0.9806452        0      0            0.251
## INFO  [13:57:25.778] [bbotk]                                 uhash
## INFO  [13:57:25.778] [bbotk]  155bde19-489a-4481-8eea-84932578cd76
## INFO  [13:57:25.780] [bbotk] Evaluating 1 configuration(s)
## INFO  [13:57:25.785] [mlr3] Running benchmark with 5 resampling iterations
## INFO  [13:57:25.794] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO  [13:57:25.868] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO  [13:57:26.112] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO  [13:57:26.181] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO  [13:57:26.252] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO  [13:57:26.316] [mlr3] Finished benchmark
## INFO  [13:57:26.341] [bbotk] Result of batch 11:
## INFO  [13:57:26.343] [bbotk]  mtry min.node.size classif.acc warnings errors runtime_learners
## INFO  [13:57:26.343] [bbotk]    18             9   0.9798387        0      0            0.478
## INFO  [13:57:26.343] [bbotk]                                 uhash
## INFO  [13:57:26.343] [bbotk]  387509df-a737-4c69-8ec4-15b0412dd1bc
## INFO  [13:57:26.346] [bbotk] Evaluating 1 configuration(s)
## INFO  [13:57:26.350] [mlr3] Running benchmark with 5 resampling iterations
## INFO  [13:57:26.358] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO  [13:57:26.424] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO  [13:57:26.497] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO  [13:57:26.567] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO  [13:57:26.650] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO  [13:57:26.722] [mlr3] Finished benchmark
## INFO  [13:57:26.748] [bbotk] Result of batch 12:
## INFO  [13:57:26.750] [bbotk]  mtry min.node.size classif.acc warnings errors runtime_learners
## INFO  [13:57:26.750] [bbotk]    16             8   0.9790323        0      0            0.317
## INFO  [13:57:26.750] [bbotk]                                 uhash
## INFO  [13:57:26.750] [bbotk]  aa55c319-0281-4cb7-ba65-433f6381ad9e
## INFO  [13:57:26.752] [bbotk] Evaluating 1 configuration(s)
## INFO  [13:57:26.757] [mlr3] Running benchmark with 5 resampling iterations
## INFO  [13:57:26.765] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO  [13:57:26.831] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO  [13:57:26.923] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO  [13:57:26.994] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO  [13:57:27.063] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO  [13:57:27.135] [mlr3] Finished benchmark
## INFO  [13:57:27.163] [bbotk] Result of batch 13:
## INFO  [13:57:27.165] [bbotk]  mtry min.node.size classif.acc warnings errors runtime_learners
## INFO  [13:57:27.165] [bbotk]    16             9   0.9806452        0      0            0.323
## INFO  [13:57:27.165] [bbotk]                                 uhash
## INFO  [13:57:27.165] [bbotk]  586ee644-1594-4c28-8cfb-2dc6a9eb80eb
## INFO  [13:57:27.167] [bbotk] Evaluating 1 configuration(s)
## INFO  [13:57:27.173] [mlr3] Running benchmark with 5 resampling iterations
## INFO  [13:57:27.183] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO  [13:57:27.241] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO  [13:57:27.298] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO  [13:57:27.356] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO  [13:57:27.418] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO  [13:57:27.470] [mlr3] Finished benchmark
## INFO  [13:57:27.501] [bbotk] Result of batch 14:
## INFO  [13:57:27.503] [bbotk]  mtry min.node.size classif.acc warnings errors runtime_learners
## INFO  [13:57:27.503] [bbotk]     6             5   0.9806452        0      0            0.243
## INFO  [13:57:27.503] [bbotk]                                 uhash
## INFO  [13:57:27.503] [bbotk]  79ed527a-07ba-4d5e-989d-b51ce7f9f361
## INFO  [13:57:27.505] [bbotk] Evaluating 1 configuration(s)
## INFO  [13:57:27.510] [mlr3] Running benchmark with 5 resampling iterations
## INFO  [13:57:27.518] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO  [13:57:27.569] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO  [13:57:27.623] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO  [13:57:27.677] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO  [13:57:27.731] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO  [13:57:27.782] [mlr3] Finished benchmark
## INFO  [13:57:27.805] [bbotk] Result of batch 15:
## INFO  [13:57:27.806] [bbotk]  mtry min.node.size classif.acc warnings errors runtime_learners
## INFO  [13:57:27.806] [bbotk]     8             4   0.9806452        0      0            0.224
## INFO  [13:57:27.806] [bbotk]                                 uhash
## INFO  [13:57:27.806] [bbotk]  02a2e1f5-477b-492d-8eac-12d5ef45b240
## INFO  [13:57:27.808] [bbotk] Evaluating 1 configuration(s)
## INFO  [13:57:27.813] [mlr3] Running benchmark with 5 resampling iterations
## INFO  [13:57:27.821] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO  [13:57:27.880] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO  [13:57:27.945] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO  [13:57:28.010] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO  [13:57:28.085] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO  [13:57:28.146] [mlr3] Finished benchmark
## INFO  [13:57:28.169] [bbotk] Result of batch 16:
## INFO  [13:57:28.171] [bbotk]  mtry min.node.size classif.acc warnings errors runtime_learners
## INFO  [13:57:28.171] [bbotk]    11             7   0.9806452        0      0            0.282
## INFO  [13:57:28.171] [bbotk]                                 uhash
## INFO  [13:57:28.171] [bbotk]  e69688bf-d8d1-447c-9a9d-36fe594cc8dc
## INFO  [13:57:28.173] [bbotk] Evaluating 1 configuration(s)
## INFO  [13:57:28.178] [mlr3] Running benchmark with 5 resampling iterations
## INFO  [13:57:28.186] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO  [13:57:28.258] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO  [13:57:28.338] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO  [13:57:28.416] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO  [13:57:28.488] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO  [13:57:28.552] [mlr3] Finished benchmark
## INFO  [13:57:28.575] [bbotk] Result of batch 17:
## INFO  [13:57:28.577] [bbotk]  mtry min.node.size classif.acc warnings errors runtime_learners
## INFO  [13:57:28.577] [bbotk]    18             7   0.9798387        0      0            0.324
## INFO  [13:57:28.577] [bbotk]                                 uhash
## INFO  [13:57:28.577] [bbotk]  e2577f6e-e18c-47ca-a6bc-4faec78013f4
## INFO  [13:57:28.579] [bbotk] Evaluating 1 configuration(s)
## INFO  [13:57:28.584] [mlr3] Running benchmark with 5 resampling iterations
## INFO  [13:57:28.591] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO  [13:57:28.661] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO  [13:57:28.730] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO  [13:57:28.799] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO  [13:57:28.872] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO  [13:57:28.941] [mlr3] Finished benchmark
## INFO  [13:57:28.969] [bbotk] Result of batch 18:
## INFO  [13:57:28.972] [bbotk]  mtry min.node.size classif.acc warnings errors runtime_learners
## INFO  [13:57:28.972] [bbotk]    16             3   0.9814516        0      0            0.309
## INFO  [13:57:28.972] [bbotk]                                 uhash
## INFO  [13:57:28.972] [bbotk]  c3bec1f8-9cb9-4934-bdfa-f2092bbfa0db
## INFO  [13:57:28.974] [bbotk] Evaluating 1 configuration(s)
## INFO  [13:57:28.980] [mlr3] Running benchmark with 5 resampling iterations
## INFO  [13:57:28.988] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO  [13:57:29.052] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO  [13:57:29.119] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO  [13:57:29.184] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO  [13:57:29.253] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO  [13:57:29.312] [mlr3] Finished benchmark
## INFO  [13:57:29.335] [bbotk] Result of batch 19:
## INFO  [13:57:29.337] [bbotk]  mtry min.node.size classif.acc warnings errors runtime_learners
## INFO  [13:57:29.337] [bbotk]    16            10   0.9798387        0      0            0.283
## INFO  [13:57:29.337] [bbotk]                                 uhash
## INFO  [13:57:29.337] [bbotk]  875ad9a9-28b8-41aa-a04f-75bf752994ea
## INFO  [13:57:29.339] [bbotk] Evaluating 1 configuration(s)
## INFO  [13:57:29.343] [mlr3] Running benchmark with 5 resampling iterations
## INFO  [13:57:29.350] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO  [13:57:29.401] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO  [13:57:29.455] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO  [13:57:29.509] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO  [13:57:29.564] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO  [13:57:29.613] [mlr3] Finished benchmark
## INFO  [13:57:29.637] [bbotk] Result of batch 20:
## INFO  [13:57:29.639] [bbotk]  mtry min.node.size classif.acc warnings errors runtime_learners
## INFO  [13:57:29.639] [bbotk]     8             3   0.9806452        0      0            0.227
## INFO  [13:57:29.639] [bbotk]                                 uhash
## INFO  [13:57:29.639] [bbotk]  1489c57e-f32d-4f3c-b042-69e43941a7de
## INFO  [13:57:29.655] [bbotk] Finished optimizing after 20 evaluation(s)
## INFO  [13:57:29.655] [bbotk] Result:
## INFO  [13:57:29.657] [bbotk]   mtry min.node.size learner_param_vals  x_domain classif.acc
## INFO  [13:57:29.657] [bbotk]  <int>         <int>             <list>    <list>       <num>
## INFO  [13:57:29.657] [bbotk]     16             3          <list[7]> <list[2]>   0.9814516
##     mtry min.node.size learner_param_vals  x_domain classif.acc
##    <int>         <int>             <list>    <list>       <num>
## 1:    16             3          <list[7]> <list[2]>   0.9814516
# 获取最佳参数
best_params <- instance$result_learner_param_vals
print(best_params)
## $importance
## [1] "impurity"
## 
## $keep.inbag
## [1] TRUE
## 
## $num.threads
## [1] 1
## 
## $num.trees
## [1] 100
## 
## $write.forest
## [1] TRUE
## 
## $mtry
## [1] 16
## 
## $min.node.size
## [1] 3
# 使用最佳参数训练模型
rf_learner$param_set$values <- best_params
rf_learner$train(sex_task)

# 获取特征重要性
importance <- as.data.table(rf_learner$importance(), keep.rownames = TRUE)
setnames(importance, c("Feature", "Importance"))

# 打印变量重要性
print(importance)
##                    Feature   Importance
##                     <char>        <num>
##  1:  NW_020869892.1_791056 2.982597e+02
##  2:   NW_020870605.1_27103 1.370551e+02
##  3: NW_020870091.1_1375545 6.884403e+01
##  4:    NW_020871184.1_1572 4.513733e+01
##  5:  NW_020872449.1_998527 2.290148e+01
##  6:  NW_020868494.1_348609 3.142147e+00
##  7: NW_020872622.1_1032359 3.057238e+00
##  8:   NW_020870605.1_26371 2.538398e+00
##  9:   NW_020869050.1_77499 2.477162e+00
## 10:  NW_020872535.1_937428 2.069124e+00
## 11:  NW_020870721.1_658002 1.797881e+00
## 12:  NW_020871094.1_359648 1.796081e+00
## 13:   NW_020872396.1_14640 1.770227e+00
## 14:  NW_020870605.1_296954 1.211137e+00
## 15:   NW_020870023.1_44774 1.094132e+00
## 16:  NW_020871839.1_399752 9.567355e-02
## 17:   NW_020870770.1_46798 7.761502e-02
## 18:  NW_020868464.1_423800 6.037652e-02
## 19:  NW_020870787.1_121276 3.886719e-02
## 20:  NW_020869892.1_778639 2.314966e-02
## 21:   NW_020872461.1_91390 2.182592e-02
## 22:  NW_020871839.1_399750 6.379509e-03
## 23:  NW_020869886.1_223326 2.461400e-03
## 24:   NW_020870770.1_64668 0.000000e+00
##                    Feature   Importance

可视化随机森林的结果

library(ggparty)
## Loading required package: ggplot2
## Loading required package: partykit
## Loading required package: grid
## Loading required package: libcoin
## Loading required package: mvtnorm
# 获取随机森林模型中的所有树

autoplot(sex_task)

# ROC curve for resample result
autoplot(resample_result, type = "roc")

rp_learner = lrn("classif.rpart", keep_model = TRUE)
rp_learner$train(sex_task)
autoplot(rp_learner, type = "ggparty")