最近一年发生的事情实在是太多了,chatGPT改变了我们学习的方式。再去做一个知识存储器,已经变得毫无意义。从另外一个角度看,我们已经不需要去记忆。chatGPT就是我们的第二大脑。我们要做的是,如何利用chatGPT来实现自己的想法。创造性、批判性思维是未来衡量一个人是否优秀的重要标准。
看看如何用chatGPT来构建一个机器学习的代码,实现对性别的预测。
rm(list=ls())
# 安装和加载必要的包
library(mlr3)
library(mlr3learners)
library(mlr3filters)
library(mlr3tuning)
## Loading required package: paradox
library(data.table)
library(mlr3verse)
library(mlr3viz)
# 读取包括SNP和性别信息的训练文件
sex_snp_dt <- fread(file="../datasets/BP_55K_SNP_Sex.csv", sep=",",header = TRUE)
# 创建任务
sex_task <- as_task_classif(sex_snp_dt, target = "Sex",id="PreSex")
# 去除ID列
features_to_keep <- setdiff(sex_task$feature_names, "ID")
# Select only the desired features
sex_task$select(features_to_keep)
# 创建随机森林学习器,基于Gini指数的减少来衡量变量的重要性
rf_learner <- lrn("classif.ranger", predict_type= "prob", importance = "impurity", keep.inbag = TRUE, num.trees=100, write.forest = TRUE)
# 查看学习器的参数
rf_learner$param_set
## <ParamSet(28)>
## Key: <id>
## id class lower upper nlevels default
## <char> <char> <num> <num> <num> <list>
## 1: always.split.variables ParamUty NA NA Inf <NoDefault[0]>
## 2: class.weights ParamUty NA NA Inf [NULL]
## 3: holdout ParamLgl NA NA 2 FALSE
## 4: importance ParamFct NA NA 4 <NoDefault[0]>
## 5: keep.inbag ParamLgl NA NA 2 FALSE
## 6: max.depth ParamInt 1 Inf Inf [NULL]
## 7: min.bucket ParamUty NA NA Inf 1
## 8: min.node.size ParamUty NA NA Inf [NULL]
## 9: mtry ParamInt 1 Inf Inf <NoDefault[0]>
## 10: mtry.ratio ParamDbl 0 1 Inf <NoDefault[0]>
## 11: na.action ParamFct NA NA 3 na.learn
## 12: node.stats ParamLgl NA NA 2 FALSE
## 13: num.random.splits ParamInt 1 Inf Inf 1
## 14: num.threads ParamInt 1 Inf Inf 1
## 15: num.trees ParamInt 1 Inf Inf 500
## 16: oob.error ParamLgl NA NA 2 TRUE
## 17: regularization.factor ParamUty NA NA Inf 1
## 18: regularization.usedepth ParamLgl NA NA 2 FALSE
## 19: replace ParamLgl NA NA 2 TRUE
## 20: respect.unordered.factors ParamFct NA NA 3 <NoDefault[0]>
## 21: sample.fraction ParamDbl 0 1 Inf <NoDefault[0]>
## 22: save.memory ParamLgl NA NA 2 FALSE
## 23: scale.permutation.importance ParamLgl NA NA 2 FALSE
## 24: seed ParamInt -Inf Inf Inf [NULL]
## 25: split.select.weights ParamUty NA NA Inf [NULL]
## 26: splitrule ParamFct NA NA 3 gini
## 27: verbose ParamLgl NA NA 2 TRUE
## 28: write.forest ParamLgl NA NA 2 TRUE
## id class lower upper nlevels default
## parents value
## <list> <list>
## 1: [NULL] [NULL]
## 2: [NULL] [NULL]
## 3: [NULL] [NULL]
## 4: [NULL] impurity
## 5: [NULL] TRUE
## 6: [NULL] [NULL]
## 7: [NULL] [NULL]
## 8: [NULL] [NULL]
## 9: [NULL] [NULL]
## 10: [NULL] [NULL]
## 11: [NULL] [NULL]
## 12: [NULL] [NULL]
## 13: splitrule [NULL]
## 14: [NULL] 1
## 15: [NULL] 100
## 16: [NULL] [NULL]
## 17: [NULL] [NULL]
## 18: [NULL] [NULL]
## 19: [NULL] [NULL]
## 20: [NULL] [NULL]
## 21: [NULL] [NULL]
## 22: [NULL] [NULL]
## 23: importance [NULL]
## 24: [NULL] [NULL]
## 25: [NULL] [NULL]
## 26: [NULL] [NULL]
## 27: [NULL] [NULL]
## 28: [NULL] TRUE
## parents value
# 创建交叉验证
resampling <- rsmp("cv", folds = 5)
# 进行交叉验证
resample_result <- resample(sex_task, rf_learner, resampling, store_models = TRUE)
## INFO [13:57:21.196] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO [13:57:21.713] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO [13:57:21.759] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO [13:57:21.796] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO [13:57:21.834] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
# 设置评估参考群数据的预测能力的度量标准
# 准确性
measure_acc <- msr("classif.acc")
# 工作特征曲线下面积
measure_auc <- msr("classif.auc")
# 打印交叉验证结果
resample_result$aggregate(measure_acc)
## classif.acc
## 0.9806452
resample_result$aggregate(measure_auc)
## classif.auc
## 0.9877493
autoplot(resample_result,type = "roc")

# 定义超参数搜索空间
# 1. mtry:每个树分割节点随机选择的变量数量。
# • p_int(1, ncol(data) - 1):mtry的取值范围是从1到数据集中变量数量减1。
# 2. min.node.size:树叶节点的最小样本数。
# • p_int(1, 10):min.node.size的取值范围是1到10。
search_space <- ps(
mtry = p_int(1, length(sex_task$feature_names) - 1),
min.node.size = p_int(1, 10)
)
# 定义调优实例
tuner <- tnr("grid_search", resolution = 10)
instance <- ti(
task = sex_task,
learner = rf_learner,
resampling = resampling,
measure = measure_acc,
search_space = search_space,
terminator = trm("evals", n_evals = 20)
)
# 进行超参数调优
tuner$optimize(instance)
## INFO [13:57:22.322] [bbotk] Starting to optimize 2 parameter(s) with '<OptimizerBatchGridSearch>' and '<TerminatorEvals> [n_evals=20, k=0]'
## INFO [13:57:22.339] [bbotk] Evaluating 1 configuration(s)
## INFO [13:57:22.345] [mlr3] Running benchmark with 5 resampling iterations
## INFO [13:57:22.353] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO [13:57:22.420] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO [13:57:22.491] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO [13:57:22.563] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO [13:57:22.636] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO [13:57:22.702] [mlr3] Finished benchmark
## INFO [13:57:22.723] [bbotk] Result of batch 1:
## INFO [13:57:22.725] [bbotk] mtry min.node.size classif.acc warnings errors runtime_learners
## INFO [13:57:22.725] [bbotk] 18 2 0.9782258 0 0 0.309
## INFO [13:57:22.725] [bbotk] uhash
## INFO [13:57:22.725] [bbotk] 6f5c36db-f8eb-4e1b-a5bd-78f7c4d1079c
## INFO [13:57:22.727] [bbotk] Evaluating 1 configuration(s)
## INFO [13:57:22.731] [mlr3] Running benchmark with 5 resampling iterations
## INFO [13:57:22.738] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO [13:57:22.804] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO [13:57:22.867] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO [13:57:22.935] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO [13:57:23.003] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO [13:57:23.064] [mlr3] Finished benchmark
## INFO [13:57:23.085] [bbotk] Result of batch 2:
## INFO [13:57:23.086] [bbotk] mtry min.node.size classif.acc warnings errors runtime_learners
## INFO [13:57:23.086] [bbotk] 21 10 0.9790323 0 0 0.285
## INFO [13:57:23.086] [bbotk] uhash
## INFO [13:57:23.086] [bbotk] 5e3b651b-df2b-40f6-b109-9b235a934049
## INFO [13:57:23.089] [bbotk] Evaluating 1 configuration(s)
## INFO [13:57:23.094] [mlr3] Running benchmark with 5 resampling iterations
## INFO [13:57:23.100] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO [13:57:23.142] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO [13:57:23.189] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO [13:57:23.240] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO [13:57:23.286] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO [13:57:23.327] [mlr3] Finished benchmark
## INFO [13:57:23.347] [bbotk] Result of batch 3:
## INFO [13:57:23.349] [bbotk] mtry min.node.size classif.acc warnings errors runtime_learners
## INFO [13:57:23.349] [bbotk] 6 7 0.9806452 0 0 0.192
## INFO [13:57:23.349] [bbotk] uhash
## INFO [13:57:23.349] [bbotk] ff9baa41-f055-4924-8888-dbab343705d1
## INFO [13:57:23.351] [bbotk] Evaluating 1 configuration(s)
## INFO [13:57:23.355] [mlr3] Running benchmark with 5 resampling iterations
## INFO [13:57:23.361] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO [13:57:23.420] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO [13:57:23.484] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO [13:57:23.549] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO [13:57:23.621] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO [13:57:23.678] [mlr3] Finished benchmark
## INFO [13:57:23.700] [bbotk] Result of batch 4:
## INFO [13:57:23.701] [bbotk] mtry min.node.size classif.acc warnings errors runtime_learners
## INFO [13:57:23.701] [bbotk] 18 1 0.9798387 0 0 0.278
## INFO [13:57:23.701] [bbotk] uhash
## INFO [13:57:23.701] [bbotk] 77fffd66-f218-4a07-98ee-f7d576ca85b1
## INFO [13:57:23.703] [bbotk] Evaluating 1 configuration(s)
## INFO [13:57:23.707] [mlr3] Running benchmark with 5 resampling iterations
## INFO [13:57:23.713] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO [13:57:23.748] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO [13:57:23.785] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO [13:57:23.824] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO [13:57:23.868] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO [13:57:23.903] [mlr3] Finished benchmark
## INFO [13:57:23.926] [bbotk] Result of batch 5:
## INFO [13:57:23.927] [bbotk] mtry min.node.size classif.acc warnings errors runtime_learners
## INFO [13:57:23.927] [bbotk] 3 4 0.9806452 0 0 0.156
## INFO [13:57:23.927] [bbotk] uhash
## INFO [13:57:23.927] [bbotk] b23b096c-5a1e-4941-b785-7ee55c947afa
## INFO [13:57:23.930] [bbotk] Evaluating 1 configuration(s)
## INFO [13:57:23.934] [mlr3] Running benchmark with 5 resampling iterations
## INFO [13:57:23.941] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO [13:57:23.997] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO [13:57:24.055] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO [13:57:24.115] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO [13:57:24.183] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO [13:57:24.238] [mlr3] Finished benchmark
## INFO [13:57:24.260] [bbotk] Result of batch 6:
## INFO [13:57:24.262] [bbotk] mtry min.node.size classif.acc warnings errors runtime_learners
## INFO [13:57:24.262] [bbotk] 13 9 0.9798387 0 0 0.261
## INFO [13:57:24.262] [bbotk] uhash
## INFO [13:57:24.262] [bbotk] 6a2deb1a-e803-44c0-b51b-b5422e6336a4
## INFO [13:57:24.264] [bbotk] Evaluating 1 configuration(s)
## INFO [13:57:24.268] [mlr3] Running benchmark with 5 resampling iterations
## INFO [13:57:24.275] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO [13:57:24.326] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO [13:57:24.381] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO [13:57:24.451] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO [13:57:24.507] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO [13:57:24.559] [mlr3] Finished benchmark
## INFO [13:57:24.579] [bbotk] Result of batch 7:
## INFO [13:57:24.581] [bbotk] mtry min.node.size classif.acc warnings errors runtime_learners
## INFO [13:57:24.581] [bbotk] 13 6 0.9806452 0 0 0.249
## INFO [13:57:24.581] [bbotk] uhash
## INFO [13:57:24.581] [bbotk] c2545532-6e5a-411f-9f69-bfd75312902c
## INFO [13:57:24.583] [bbotk] Evaluating 1 configuration(s)
## INFO [13:57:24.586] [mlr3] Running benchmark with 5 resampling iterations
## INFO [13:57:24.593] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO [13:57:24.649] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO [13:57:24.709] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO [13:57:24.775] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO [13:57:24.836] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO [13:57:24.890] [mlr3] Finished benchmark
## INFO [13:57:24.910] [bbotk] Result of batch 8:
## INFO [13:57:24.911] [bbotk] mtry min.node.size classif.acc warnings errors runtime_learners
## INFO [13:57:24.911] [bbotk] 16 6 0.9806452 0 0 0.265
## INFO [13:57:24.911] [bbotk] uhash
## INFO [13:57:24.911] [bbotk] 43e933ed-70a6-45c4-884a-c6dd78c58590
## INFO [13:57:24.913] [bbotk] Evaluating 1 configuration(s)
## INFO [13:57:24.917] [mlr3] Running benchmark with 5 resampling iterations
## INFO [13:57:24.924] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO [13:57:24.964] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO [13:57:25.136] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO [13:57:25.252] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO [13:57:25.329] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO [13:57:25.382] [mlr3] Finished benchmark
## INFO [13:57:25.407] [bbotk] Result of batch 9:
## INFO [13:57:25.412] [bbotk] mtry min.node.size classif.acc warnings errors runtime_learners
## INFO [13:57:25.412] [bbotk] 6 10 0.9806452 0 0 0.33
## INFO [13:57:25.412] [bbotk] uhash
## INFO [13:57:25.412] [bbotk] a0751c86-e072-46fd-9d50-1e856746a7c2
## INFO [13:57:25.419] [bbotk] Evaluating 1 configuration(s)
## INFO [13:57:25.427] [mlr3] Running benchmark with 5 resampling iterations
## INFO [13:57:25.438] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO [13:57:25.488] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO [13:57:25.561] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO [13:57:25.626] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO [13:57:25.696] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO [13:57:25.749] [mlr3] Finished benchmark
## INFO [13:57:25.776] [bbotk] Result of batch 10:
## INFO [13:57:25.778] [bbotk] mtry min.node.size classif.acc warnings errors runtime_learners
## INFO [13:57:25.778] [bbotk] 6 4 0.9806452 0 0 0.251
## INFO [13:57:25.778] [bbotk] uhash
## INFO [13:57:25.778] [bbotk] 155bde19-489a-4481-8eea-84932578cd76
## INFO [13:57:25.780] [bbotk] Evaluating 1 configuration(s)
## INFO [13:57:25.785] [mlr3] Running benchmark with 5 resampling iterations
## INFO [13:57:25.794] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO [13:57:25.868] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO [13:57:26.112] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO [13:57:26.181] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO [13:57:26.252] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO [13:57:26.316] [mlr3] Finished benchmark
## INFO [13:57:26.341] [bbotk] Result of batch 11:
## INFO [13:57:26.343] [bbotk] mtry min.node.size classif.acc warnings errors runtime_learners
## INFO [13:57:26.343] [bbotk] 18 9 0.9798387 0 0 0.478
## INFO [13:57:26.343] [bbotk] uhash
## INFO [13:57:26.343] [bbotk] 387509df-a737-4c69-8ec4-15b0412dd1bc
## INFO [13:57:26.346] [bbotk] Evaluating 1 configuration(s)
## INFO [13:57:26.350] [mlr3] Running benchmark with 5 resampling iterations
## INFO [13:57:26.358] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO [13:57:26.424] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO [13:57:26.497] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO [13:57:26.567] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO [13:57:26.650] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO [13:57:26.722] [mlr3] Finished benchmark
## INFO [13:57:26.748] [bbotk] Result of batch 12:
## INFO [13:57:26.750] [bbotk] mtry min.node.size classif.acc warnings errors runtime_learners
## INFO [13:57:26.750] [bbotk] 16 8 0.9790323 0 0 0.317
## INFO [13:57:26.750] [bbotk] uhash
## INFO [13:57:26.750] [bbotk] aa55c319-0281-4cb7-ba65-433f6381ad9e
## INFO [13:57:26.752] [bbotk] Evaluating 1 configuration(s)
## INFO [13:57:26.757] [mlr3] Running benchmark with 5 resampling iterations
## INFO [13:57:26.765] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO [13:57:26.831] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO [13:57:26.923] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO [13:57:26.994] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO [13:57:27.063] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO [13:57:27.135] [mlr3] Finished benchmark
## INFO [13:57:27.163] [bbotk] Result of batch 13:
## INFO [13:57:27.165] [bbotk] mtry min.node.size classif.acc warnings errors runtime_learners
## INFO [13:57:27.165] [bbotk] 16 9 0.9806452 0 0 0.323
## INFO [13:57:27.165] [bbotk] uhash
## INFO [13:57:27.165] [bbotk] 586ee644-1594-4c28-8cfb-2dc6a9eb80eb
## INFO [13:57:27.167] [bbotk] Evaluating 1 configuration(s)
## INFO [13:57:27.173] [mlr3] Running benchmark with 5 resampling iterations
## INFO [13:57:27.183] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO [13:57:27.241] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO [13:57:27.298] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO [13:57:27.356] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO [13:57:27.418] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO [13:57:27.470] [mlr3] Finished benchmark
## INFO [13:57:27.501] [bbotk] Result of batch 14:
## INFO [13:57:27.503] [bbotk] mtry min.node.size classif.acc warnings errors runtime_learners
## INFO [13:57:27.503] [bbotk] 6 5 0.9806452 0 0 0.243
## INFO [13:57:27.503] [bbotk] uhash
## INFO [13:57:27.503] [bbotk] 79ed527a-07ba-4d5e-989d-b51ce7f9f361
## INFO [13:57:27.505] [bbotk] Evaluating 1 configuration(s)
## INFO [13:57:27.510] [mlr3] Running benchmark with 5 resampling iterations
## INFO [13:57:27.518] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO [13:57:27.569] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO [13:57:27.623] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO [13:57:27.677] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO [13:57:27.731] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO [13:57:27.782] [mlr3] Finished benchmark
## INFO [13:57:27.805] [bbotk] Result of batch 15:
## INFO [13:57:27.806] [bbotk] mtry min.node.size classif.acc warnings errors runtime_learners
## INFO [13:57:27.806] [bbotk] 8 4 0.9806452 0 0 0.224
## INFO [13:57:27.806] [bbotk] uhash
## INFO [13:57:27.806] [bbotk] 02a2e1f5-477b-492d-8eac-12d5ef45b240
## INFO [13:57:27.808] [bbotk] Evaluating 1 configuration(s)
## INFO [13:57:27.813] [mlr3] Running benchmark with 5 resampling iterations
## INFO [13:57:27.821] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO [13:57:27.880] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO [13:57:27.945] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO [13:57:28.010] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO [13:57:28.085] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO [13:57:28.146] [mlr3] Finished benchmark
## INFO [13:57:28.169] [bbotk] Result of batch 16:
## INFO [13:57:28.171] [bbotk] mtry min.node.size classif.acc warnings errors runtime_learners
## INFO [13:57:28.171] [bbotk] 11 7 0.9806452 0 0 0.282
## INFO [13:57:28.171] [bbotk] uhash
## INFO [13:57:28.171] [bbotk] e69688bf-d8d1-447c-9a9d-36fe594cc8dc
## INFO [13:57:28.173] [bbotk] Evaluating 1 configuration(s)
## INFO [13:57:28.178] [mlr3] Running benchmark with 5 resampling iterations
## INFO [13:57:28.186] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO [13:57:28.258] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO [13:57:28.338] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO [13:57:28.416] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO [13:57:28.488] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO [13:57:28.552] [mlr3] Finished benchmark
## INFO [13:57:28.575] [bbotk] Result of batch 17:
## INFO [13:57:28.577] [bbotk] mtry min.node.size classif.acc warnings errors runtime_learners
## INFO [13:57:28.577] [bbotk] 18 7 0.9798387 0 0 0.324
## INFO [13:57:28.577] [bbotk] uhash
## INFO [13:57:28.577] [bbotk] e2577f6e-e18c-47ca-a6bc-4faec78013f4
## INFO [13:57:28.579] [bbotk] Evaluating 1 configuration(s)
## INFO [13:57:28.584] [mlr3] Running benchmark with 5 resampling iterations
## INFO [13:57:28.591] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO [13:57:28.661] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO [13:57:28.730] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO [13:57:28.799] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO [13:57:28.872] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO [13:57:28.941] [mlr3] Finished benchmark
## INFO [13:57:28.969] [bbotk] Result of batch 18:
## INFO [13:57:28.972] [bbotk] mtry min.node.size classif.acc warnings errors runtime_learners
## INFO [13:57:28.972] [bbotk] 16 3 0.9814516 0 0 0.309
## INFO [13:57:28.972] [bbotk] uhash
## INFO [13:57:28.972] [bbotk] c3bec1f8-9cb9-4934-bdfa-f2092bbfa0db
## INFO [13:57:28.974] [bbotk] Evaluating 1 configuration(s)
## INFO [13:57:28.980] [mlr3] Running benchmark with 5 resampling iterations
## INFO [13:57:28.988] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO [13:57:29.052] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO [13:57:29.119] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO [13:57:29.184] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO [13:57:29.253] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO [13:57:29.312] [mlr3] Finished benchmark
## INFO [13:57:29.335] [bbotk] Result of batch 19:
## INFO [13:57:29.337] [bbotk] mtry min.node.size classif.acc warnings errors runtime_learners
## INFO [13:57:29.337] [bbotk] 16 10 0.9798387 0 0 0.283
## INFO [13:57:29.337] [bbotk] uhash
## INFO [13:57:29.337] [bbotk] 875ad9a9-28b8-41aa-a04f-75bf752994ea
## INFO [13:57:29.339] [bbotk] Evaluating 1 configuration(s)
## INFO [13:57:29.343] [mlr3] Running benchmark with 5 resampling iterations
## INFO [13:57:29.350] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 1/5)
## INFO [13:57:29.401] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 2/5)
## INFO [13:57:29.455] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 3/5)
## INFO [13:57:29.509] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 4/5)
## INFO [13:57:29.564] [mlr3] Applying learner 'classif.ranger' on task 'PreSex' (iter 5/5)
## INFO [13:57:29.613] [mlr3] Finished benchmark
## INFO [13:57:29.637] [bbotk] Result of batch 20:
## INFO [13:57:29.639] [bbotk] mtry min.node.size classif.acc warnings errors runtime_learners
## INFO [13:57:29.639] [bbotk] 8 3 0.9806452 0 0 0.227
## INFO [13:57:29.639] [bbotk] uhash
## INFO [13:57:29.639] [bbotk] 1489c57e-f32d-4f3c-b042-69e43941a7de
## INFO [13:57:29.655] [bbotk] Finished optimizing after 20 evaluation(s)
## INFO [13:57:29.655] [bbotk] Result:
## INFO [13:57:29.657] [bbotk] mtry min.node.size learner_param_vals x_domain classif.acc
## INFO [13:57:29.657] [bbotk] <int> <int> <list> <list> <num>
## INFO [13:57:29.657] [bbotk] 16 3 <list[7]> <list[2]> 0.9814516
## mtry min.node.size learner_param_vals x_domain classif.acc
## <int> <int> <list> <list> <num>
## 1: 16 3 <list[7]> <list[2]> 0.9814516
# 获取最佳参数
best_params <- instance$result_learner_param_vals
print(best_params)
## $importance
## [1] "impurity"
##
## $keep.inbag
## [1] TRUE
##
## $num.threads
## [1] 1
##
## $num.trees
## [1] 100
##
## $write.forest
## [1] TRUE
##
## $mtry
## [1] 16
##
## $min.node.size
## [1] 3
# 使用最佳参数训练模型
rf_learner$param_set$values <- best_params
rf_learner$train(sex_task)
# 获取特征重要性
importance <- as.data.table(rf_learner$importance(), keep.rownames = TRUE)
setnames(importance, c("Feature", "Importance"))
# 打印变量重要性
print(importance)
## Feature Importance
## <char> <num>
## 1: NW_020869892.1_791056 2.982597e+02
## 2: NW_020870605.1_27103 1.370551e+02
## 3: NW_020870091.1_1375545 6.884403e+01
## 4: NW_020871184.1_1572 4.513733e+01
## 5: NW_020872449.1_998527 2.290148e+01
## 6: NW_020868494.1_348609 3.142147e+00
## 7: NW_020872622.1_1032359 3.057238e+00
## 8: NW_020870605.1_26371 2.538398e+00
## 9: NW_020869050.1_77499 2.477162e+00
## 10: NW_020872535.1_937428 2.069124e+00
## 11: NW_020870721.1_658002 1.797881e+00
## 12: NW_020871094.1_359648 1.796081e+00
## 13: NW_020872396.1_14640 1.770227e+00
## 14: NW_020870605.1_296954 1.211137e+00
## 15: NW_020870023.1_44774 1.094132e+00
## 16: NW_020871839.1_399752 9.567355e-02
## 17: NW_020870770.1_46798 7.761502e-02
## 18: NW_020868464.1_423800 6.037652e-02
## 19: NW_020870787.1_121276 3.886719e-02
## 20: NW_020869892.1_778639 2.314966e-02
## 21: NW_020872461.1_91390 2.182592e-02
## 22: NW_020871839.1_399750 6.379509e-03
## 23: NW_020869886.1_223326 2.461400e-03
## 24: NW_020870770.1_64668 0.000000e+00
## Feature Importance
可视化随机森林的结果
library(ggparty)
## Loading required package: ggplot2
## Loading required package: partykit
## Loading required package: grid
## Loading required package: libcoin
## Loading required package: mvtnorm
# 获取随机森林模型中的所有树
autoplot(sex_task)

# ROC curve for resample result
autoplot(resample_result, type = "roc")

rp_learner = lrn("classif.rpart", keep_model = TRUE)
rp_learner$train(sex_task)
autoplot(rp_learner, type = "ggparty")
