KNN算法[分类算法]
kNN(k-近邻)分类算法的实现
(1) 简介:
(2)算法描述:
(3)
1 <?php 2 /* 3 *KNN K-近邻方法(分类算法的实现) 4 */ 5 6 /* 7 *把.txt中的内容读到数组中保存,$filename:文件名称 8 */ 9 //-------------------------------------------------------------------- 10 function getFileContent($filename) 11 { 12 $array = array(null); 13 $content = file_get_contents($filename); 14 $result = explode("\r\n",$content); 15 //print_r(count($result)); 16 for($j=0;$j<count($result);$j++) 17 { 18 //print_r($result[$j]."<br>"); 19 $con = explode(" ",$result[$j]); 20 array_push($array,$con); 21 } 22 array_splice($array,0,1); 23 return $array; 24 } 25 //-------------------------------------------------------------------- 26 27 /* 28 */ 29 //希尔排序算法 30 //-------------------------------------------------------------------- 31 function shell_sort($array)//降序 32 { 33 $dh=(int)(count($array)/2); 34 while($dh>=1) 35 { 36 for($i=$dh;$i<count($array);$i++) 37 { 38 $temp=array($array[$i][0],$array[$i][1]); 39 $j=$i-$dh; 40 while($j>=0&&($array[$j][1]<$temp[1])) 41 { 42 $array[$j+$dh][1]=$array[$j][1]; 43 $array[$j+$dh][0]=$array[$j][0]; 44 $j-=$dh; 45 } 46 $array[$j+$dh][1]=$temp[1]; 47 $array[$j+$dh][0]=$temp[0]; 48 } 49 $dh=(int)($dh/2); 50 } 51 return $array; 52 } 53 //------------------------------------------------------------------------- 54 /* 55 *KNN算法 56 *$test:测试文本;$train:训练文本;$flagsyes:yes;$flagsno:no 57 */ 58 //-------------------------------------------------------------------- 59 function KNN($test,$train,$flagsyes,$flagsno) 60 { 61 for($i=1;$i<count($train);$i++) 62 { 63 for($j=1;$j<count($test)-1;$j++) 64 { 65 if($test[$j]==$train[$i][$j]) $a[$j] = 1; 66 else $a[$j] = 0; 67 } 68 69 //求两个例子之间的欧氏距离 70 $sum = 0; 71 for($j=1;$j<count($test)-1;$j++) 72 { 73 $sum += pow(1-$a[$j],2); 74 } 75 $distance[$i] = sqrt($sum); 76 } 77 $d = array(array(NULL,NULL)); 78 for($i=1;$i<count($train);$i++) 79 { 80 $d[$i-1][0] = $train[$i][count($train[$i])-1]; 81 $d[$i-1][1] = 1 /(1+ $distance[$i]) ; 82 } 83 $result =shell_sort($d); 84 85 $k = (int) sqrt(count($train)-1);//k=sqrt(N); 86 87 $count_yes = 0; 88 for($i=0;$i<$k;$i++) 89 { 90 if($result[$i][0]==$flagsyes)$count_yes++; 91 } 92 $count_no = $k - $count_yes; 93 94 if($count_yes>$count_no)return $flagsyes; 95 else return $flagsno; 96 } 97 //-------------------------------------------------------------------- 98 99 $train = getFileContent("train.txt"); 100 $test = getFileContent("test.txt"); 101 102 for($i=1;$i<count($test);$i++) 103 { 104 $test[$i][count($test[0])-1] = KNN($test[$i],$train,Y,N); 105 } 106 /* 107 *将数组中的内容读到.txt中 108 */ 109 //-------------------------------------------------------------------- 110 $fp= fopen('result.txt','wb'); 111 for($i=0;$i<count($test);$i++) 112 { 113 $temp = NULL; 114 for($j=0;$j<count($test[$i]);$j++) 115 { 116 $temp = $test[$i][$j]."\t"; 117 fwrite($fp,$temp); 118 } 119 fwrite($fp,"\r\n"); 120 } 121 fclose($fp); 122 //-------------------------------------------------------------------- 123 124 /* 125 *打印输出 126 */ 127 //-------------------------------------------------------------------- 128 echo "<pre>"; 129 print_r($test); 130 echo "</pre>"; 131 //-------------------------------------------------------------------- 132 ?>