PHP实现图片的汉明码提取与降维
作者感言:数学不好,遇到算法问题分分钟狗带,毫无转寰的余地-_-|||
最近心血来潮,看了相似图片的搜索,最最最初级的方法即提取汉明码,之后匹配汉明距离。当然,在数以亿计的汉明码中,要筛出需要的图片,计算量太大了,满足不了生产要求。作为数学小白,此时完全没折了。这时不小心看到“降维”一说,可以把降维后的哈希码存入数据库,加之分类的约束,基本满足小白初尝电商相似图片搜索的要求了:)
以下直接贴出代码(2017-8-17傍晚对降维做了调整):
class DFingerPrint { private $hImg=null; private $hTargetImg=null; private $arrResult=array(); private $fullBinData=''; const PRINT_WIDTH=8; const PRINT_HEIGHT=8; public const DESC_DM_2=2; public const DESC_DM_4=4; public const DESC_DM_8=8; public function __construct($imgPath) { if(file_exists($imgPath)) { $ext=$this->GetExt($imgPath); $create_fun=''; switch($ext) { case 'jpg': case 'jpeg': $create_fun='imagecreatefromjpeg'; break; case 'gif': $create_fun='imagecreatefromgif'; break; case 'png': $create_fun='imagecreatefrompng'; break; case 'bmp': $create_fun='imagecreatefrombmp'; break; default: $create_fun='imagecreatefromgd2'; break; } $this->hImg=$create_fun($imgPath); if($this->hImg===false) $this->hImg=null; } if($this->hImg) { $this->hTargetImg=imagecreatetruecolor(self::PRINT_HEIGHT,self::PRINT_HEIGHT); } } public function CalPrint() { if(!$this->hImg) return false; if(!$this->SizeCompress()) return false; if(!$this->ToGray()) return false; if(!($binData=$this->Binaryzation())) return false; $this->fullBinData=$binData; //$hexData=base_convert($binData,2,16); //这一句转出来问题,尾数错误,原因未知 $hexData=$this->Bin2Hex($binData); return array( 'bin'=>$binData, 'hex'=>$hexData ); } public function DecendDimension($dm=8) //可指定降维参数,参数越大,精度缺失越多 { if(!$this->fullBinData) return false; if(!in_array($dm,array(self::DESC_DM_2,self::DESC_DM_4,self::DESC_DM_8))) return false; $len=strlen($this->fullBinData); $newBinData=''; for($i=0,$iTmp=0;$i<$len;$i++) { $iTmp+=$this->fullBinData{$i}=='1'?1:-1; if($dm-($i%$dm)==1) { $newBinData.=$iTmp>0?1:0; $iTmp=0; } } $hexData=$this->Bin2Hex($newBinData); return array( 'bin'=>$newBinData, 'hex'=>$hexData ); } /* public function DecendDimension($dm=1) //二值降维 { if(!$this->arrResult) return false; $binData=''; for($x=0;$x<self::PRINT_WIDTH;$x++) { $rtl=0; for($y=0;$y<self::PRINT_HEIGHT;$y++) { $rtl+=$this->arrResult[$x][$y]==1?1:-1; } $binData.=$rtl>0?'1':'0'; } $hexData=$this->Bin2Hex($binData); return array( 'bin'=>$binData, 'hex'=>$hexData ); } */ private function SizeCompress() //尺寸压缩 { if(!$this->hImg) return false; imagecopyresized($this->hTargetImg,$this->hImg,0,0,0,0,8,8,imagesx($this->hImg),imagesy($this->hImg)); return true; } private function ToGray() //灰度化 { for($x=0;$x<self::PRINT_WIDTH;$x++) { for($y=0;$y<self::PRINT_HEIGHT;$y++) { $color=imagecolorat($this->hTargetImg,$x,$y); $_red = ($color >> 16) & 0xff; $_green = ($color >> 8) & 0xff; $_blue = ($color) & 0xff; $newColor= intval(0.3 * $_red + 0.59 * $_green + 0.11 * $_blue); $this->arrResult[$x][$y]=$newColor; } } return true; } private function Binaryzation() //二值化 { if(!$this->arrResult) return false; if(sizeof($this->arrResult)!=self::PRINT_WIDTH) return false; $totalVal=0; $avgVal=0; for($x=0;$x<self::PRINT_WIDTH;$x++) { for($y=0;$y<self::PRINT_HEIGHT;$y++) { $totalVal+=$this->arrResult[$x][$y]; } } $avgVal=$totalVal/(self::PRINT_HEIGHT*self::PRINT_HEIGHT); //开始二值化 $binData=''; for($x=0;$x<self::PRINT_WIDTH;$x++) { for($y=0;$y<self::PRINT_HEIGHT;$y++) { if($this->arrResult[$x][$y]>=$avgVal) $this->arrResult[$x][$y]=1; else $this->arrResult[$x][$y]=0; //imagesetpixel($this->hTargetImg,$x,$y,$this->arrResult[$x][$y]==1?0xffffff:0x00); $binData.=strval($this->arrResult[$x][$y]); } } return $binData; } public function __destruct() { if($this->hImg) { imagedestroy($this->hImg); $this->hImg=null; } if($this->hTargetImg) { imagedestroy($this->hTargetImg); $this->hTargetImg=null; } } private function GetExt($path) { $arr=explode('.',$path); return strtolower($arr[sizeof($arr)-1]); } private function Bin2Hex($bin) { $hex=''; $i=1; while($bin) { $tmp_bin=substr($bin,-4,4); $hex=base_convert($tmp_bin,2,16).$hex; $bin=substr($bin,0,strlen($bin)-4); } return $hex; } }
调用
$oFingerPrint=new DFingerPrint('1.jpg'); $arrPrint=$oFingerPrint->CalPrint(); $arrDescendPrint=$oFingerPrint->DecendDimension(DFingerPrint::DESC_DM_8);
查看资料:
http://blog.csdn.net/lu597203933/article/details/45101859
http://blog.csdn.net/cshilin/article/details/52119682
附:从MYSQL中查出指定汉明距离的的SQL语句:
SELECT uid,img_path,full_print2, BIT_COUNT(CONVERT(CONV(full_print2, 16, 10),UNSIGNED) ^ CONVERT(CONV('ffffe229f9ffffff', 16, 10),UNSIGNED) ) as hamming_distance FROM mvm_img_fingerprint HAVING hamming_distance < 5 ORDER BY hamming_distance ASC;