R语言决策树分类模型
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | rm (list= ls ()) gc () memory.limit (4000) library (corrplot) library (rpart) data_health<- read.csv ( "D:/smart_data0608/smart_data_section_good_15.txt" ,header= FALSE ,sep= "\t" ,na.strings= "None" ) #读健康数据 data_fault<- read.csv ( "D:/smart_data0608/smart_data_section_failTrainSet_last24h.txt" ,header= FALSE ,sep= "\t" ,na.strings= "None" ) #读故障数据-训练数据 data_fault_test<- read.csv ( "D:/smart_data0608/smart_data_section_failTestSet_last24h.txt" ,header= FALSE ,sep= "\t" ,na.strings= "None" ) #读故障数据—测试数据 colnames (data_health) <- c ( "id" , "serial_number" , "update_time" , "smart_health_status" , "current_drive_temperature" , "drive_trip_temperature" , "elements_in_grown_defect_list" , "manufactured_time" , "cycle_count" , "load_unload_count" , "load_unload_count" , "load_unload_cycles" , "blocks_sent_to_initiator" , "blocks_received_from_initiator" , "blocks_read_from_cache" , "num_commands_size_not_larger_than_segment_size " , "num_commands_size_larger_than_segment_size" , "num_hours_powered_up" , "num_minutes_next_test" , "read_corrected_ecc_fast" , "read_corrected_ecc_delayed" , "read_corrected_re" , "read_total_errors_corrected" , "read_correction_algo_invocations" , "read_gigabytes_processed" , "read_total_uncorrected_errors" , "write_corrected_ecc_fast" , "write_corrected_ecc_delayed" , "write_corrected_re" , "write_total_errors_corrected" , "write_correction_algo_invocations" , "write_gigabytes_processed" , "write_total_uncorrected_errors" , "verify_corrected_ecc_fast" , "verify_corrected_ecc_delayed" , "verify_corrected_re" , "verify_total_errors_corrected" , "verify_correction_algo_invocations" , "verify_gigabytes_processed" , "verify_total_uncorrected_errors" , "non_medium_error_count" ) #列改名 colnames (data_fault) <- c ( "id" , "serial_number" , "update_time" , "smart_health_status" , "current_drive_temperature" , "drive_trip_temperature" , "elements_in_grown_defect_list" , "manufactured_time" , "cycle_count" , "load_unload_count" , "load_unload_count" , "load_unload_cycles" , "blocks_sent_to_initiator" , "blocks_received_from_initiator" , "blocks_read_from_cache" , "num_commands_size_not_larger_than_segment_size " , "num_commands_size_larger_than_segment_size" , "num_hours_powered_up" , "num_minutes_next_test" , "read_corrected_ecc_fast" , "read_corrected_ecc_delayed" , "read_corrected_re" , "read_total_errors_corrected" , "read_correction_algo_invocations" , "read_gigabytes_processed" , "read_total_uncorrected_errors" , "write_corrected_ecc_fast" , "write_corrected_ecc_delayed" , "write_corrected_re" , "write_total_errors_corrected" , "write_correction_algo_invocations" , "write_gigabytes_processed" , "write_total_uncorrected_errors" , "verify_corrected_ecc_fast" , "verify_corrected_ecc_delayed" , "verify_corrected_re" , "verify_total_errors_corrected" , "verify_correction_algo_invocations" , "verify_gigabytes_processed" , "verify_total_uncorrected_errors" , "non_medium_error_count" ) #列改名 colnames (data_fault_test) <- c ( "id" , "serial_number" , "update_time" , "smart_health_status" , "current_drive_temperature" , "drive_trip_temperature" , "elements_in_grown_defect_list" , "manufactured_time" , "cycle_count" , "load_unload_count" , "load_unload_count" , "load_unload_cycles" , "blocks_sent_to_initiator" , "blocks_received_from_initiator" , "blocks_read_from_cache" , "num_commands_size_not_larger_than_segment_size " , "num_commands_size_larger_than_segment_size" , "num_hours_powered_up" , "num_minutes_next_test" , "read_corrected_ecc_fast" , "read_corrected_ecc_delayed" , "read_corrected_re" , "read_total_errors_corrected" , "read_correction_algo_invocations" , "read_gigabytes_processed" , "read_total_uncorrected_errors" , "write_corrected_ecc_fast" , "write_corrected_ecc_delayed" , "write_corrected_re" , "write_total_errors_corrected" , "write_correction_algo_invocations" , "write_gigabytes_processed" , "write_total_uncorrected_errors" , "verify_corrected_ecc_fast" , "verify_corrected_ecc_delayed" , "verify_corrected_re" , "verify_total_errors_corrected" , "verify_correction_algo_invocations" , "verify_gigabytes_processed" , "verify_total_uncorrected_errors" , "non_medium_error_count" ) #列改名 data_health$label <- 0 data_fault$label <- 1 data_fault_test$label <- 1 #决策树 n <- nrow (data_fault) dataNewTraining<- rbind (data_fault,data_health[ sample (1:( nrow (data_health[1:( nrow (data_health)*0.7),])),n*20),]) dataNewTest<- rbind (data_fault_test,data_health[-(1:( nrow (data_health)*0.7)),]) pdf (file= 'D:/smart_data0608/smartDT_last24h.pdf' ,family= "GB1" ) dt <- rpart (label~ current_drive_temperature + elements_in_grown_defect_list + read_corrected_ecc_fast + read_corrected_ecc_delayed + read_corrected_re + read_total_errors_corrected + read_correction_algo_invocations + read_gigabytes_processed + read_total_uncorrected_errors + write_corrected_ecc_fast + write_corrected_ecc_delayed + write_corrected_re + write_total_errors_corrected + write_correction_algo_invocations + write_gigabytes_processed + write_total_uncorrected_errors,data = dataNewTraining, method = "class" ) plot (dt,main= "smartDT" ); text (dt) dev.off () rawPredictScore = predict (dt,dataNewTest) predictScore <- data.frame (rawPredictScore) predictScore$label <- 2 predictScore[predictScore$X0 > predictScore$X1,][, "label" ]=0 predictScore[predictScore$X0 <= predictScore$X1,][, "label" ]=1 write.table ( data.frame (predictScore$label,dataNewTest$label,dataNewTest$update_time,dataNewTest$serial_number), file= "D:/smart_data0608/smartTestSetWithSerNO_last24h.txt" ,row.names= F ,col.names= F ,sep= "\t" ) |
分类结果:
//smartTestSetWithSerNO_last24h
健康样本数/健康判为故障样本数:583670/978
健康磁盘数/健康判为故障磁盘数:4150/12
健康样本预测率为:0.9983243956345195
健康盘预测率为:0.9971084337349397
--------------------------------
故障样本数/故障判为故障样本数:170/169
故障磁盘数/故障判为故障磁盘数:11/11
故障样本预测率为:0.9941176470588236
故障盘预测率为:1.0
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 如何编写易于单元测试的代码
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 周边上新:园子的第一款马克杯温暖上架
· Open-Sora 2.0 重磅开源!
· 分享 3 个 .NET 开源的文件压缩处理库,助力快速实现文件压缩解压功能!
· Ollama——大语言模型本地部署的极速利器
· DeepSeek如何颠覆传统软件测试?测试工程师会被淘汰吗?