数字验证码的识别

数字验证码很多地方都会用到,本文主要是针对 那些比较规范的验证码的识别,何谓规范?规范就是数字的大小几乎一致,颜色对比度挺高,没什么干扰线.识别的依据就是最最最基础的办法,比对,先取样,保 存成字模,再用字模去和将要识别的图片进行比较,取最接近的那个结果.不过在比较之前必须得到图片里面的数据提取出来并适当地去除一些干扰.

下面就是识别部份的代码:

 

/*
 * ImageCode.java
 *
 * Created on 2007年1月18日, 下午10:00
 *
 * To change this template, choose Tools | Template Manager
 * and open the template in the editor.
 
*/

package net.bccn.hadeslee.programfan;

import java.awt.image.BufferedImage;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StreamTokenizer;
import java.net.URL;
import javax.imageio.ImageIO;

/**
 * 验证码识别程序
 * 
@author hadeslee
 
*/
public class ImageCode {
    
    
private BufferedImage bi;
    
private static int[][][] model=new int[5][10][208];
    
//静态初始化块
    static{
        initNumModel();
    }
    
    
    
/**
     * Creates a new instance of ImageCode
     
*/
    
public ImageCode() {
        initNumModel();
    }
    

    
public String getNumber(InputStream is){
        
try{
            bi
= ImageIO.read( is );
            
final StringBuffer sb=new StringBuffer();
            
for(int i=0;i<4;i++){
                
int[] data=this.getData(i);
                sb.append(
this.doCheck(data));
            }
            
return sb.toString();
        } 
catch(Exception exe){
            exe.printStackTrace();
            
return "";
        }
    }
    
    
    
/**
     * 重载的方法,根据传进来的参数得到返回的字符串
     * 
@param bi
     * 
@return 结果
     
*/
    
public String getNumber(BufferedImage bi){
        
try{
            
this.bi= bi;
            StringBuffer sb
=new StringBuffer();
            
for(int i=0;i<4;i++){
                
int[] data=this.getData(i);
                sb.append(
this.doCheck(data));
            }
            
//System.out.println(sb.toString());
            return sb.toString();
        } 
catch(Exception exe){
            exe.printStackTrace();
            
return "";
        }
    }
    
    
    
    
/**
     * 静态初始化方法,
     * 用于初始化字模
     
*/
    
private static void initNumModel(){
        
try{
            
//System.out.println("初始化model");
            for(int i=0;i<10;i++){
                StreamTokenizer st
=new StreamTokenizer(new InputStreamReader(ImageCode.class.getResourceAsStream("/net/bccn/hadeslee/model/programfan_"+i+".mod")));
                st.whitespaceChars(
'#','#');
                st.whitespaceChars(
',',',');
                st.eolIsSignificant(
false);
                out:
while(true){
                    
int token=st.nextToken();
                    
if(token==StreamTokenizer.TT_WORD){
                        
int who=0;
                        
int index=0;
                        
if(st.sval.equals("center")){
                            who
=0;
                        }
else if(st.sval.equals("left")){
                            who
=1;
                        }
else if(st.sval.equals("right")){
                            who
=2;
                        }
else if(st.sval.equals("up")){
                            who
=3;
                        }
else if(st.sval.equals("down")){
                            who
=4;
                        }
                        
while(st.nextToken()==StreamTokenizer.TT_NUMBER){
                            model[who][i][index
++]=(int)st.nval;
                        }
                        st.pushBack();
                    }
else if(token==StreamTokenizer.TT_EOF){
                        
break out;
                    }
                }
            }
            
        } 
catch(Exception exe){
            exe.printStackTrace();
        }
        
//System.out.println("初始化结束model");
    }
    
    
    
    
//通过传进来的字符串得到BufferedImage对象
    private BufferedImage getBI(String url){
        
try {
            
return ImageIO.read(new URL(url));
        } 
catch (IOException ex) {
            ex.printStackTrace();
            
return null;
        }
    }
    
    
    
    
/**根据索引得到
     *某一块的图像转为数组
     *的文件
     
*/
    
private int[] getData(int index){
        BufferedImage sub
=bi.getSubimage(index*16,0,16,13);
        
int iw=sub.getWidth();
        
int ih=sub.getHeight();
        
int[] demo=new int[iw*ih];
        
for(int i=0;i<ih;i++){
            
for(int j=0;j<iw;j++){
                demo[i
*iw+j]=(sub.getRGB(j,i)==-1?0:1);
            }
        }
        
return demo;
    }
    
    
    
    
//根据传进来的数组,得到五个位置当中和差别最小的那个
    private int getMin(int who,int[] demo){
        
int temp=208;
        
for(int i=0;i<5;i++){
            
int x=0;
            
for(int j=0;j<demo.length;j++){
                x
+=(model[i][who][j]==demo[j]?0:1);
            }
            
if(x<temp){
                temp
=x;
            }
        }
        
//System.out.println("比对"+who+"最小值是"+temp);
        return temp;
    }
    
    
    
    
    
//分析689或者0的方法,以免这几个数字混淆
    private int get689(int[] demo,int origin){
        
boolean isLeft=false,isRight=false;
        
int temp=-1;
        
if((demo[75]==1&&demo[90]==1)||(demo[76]==1&&demo[91]==1)||
                (demo[
58]==1&&demo[74]==1&&demo[90]==1)||(demo[59]==1&&demo[75]==1&&demo[91]==1)||
                (demo[
60]==1&&demo[76]==1&&demo[92]==1)||(demo[28]==1&&demo[44]==1&&demo[60]==1)||
                (demo[
27]==1&&demo[43]==1&&demo[59]==1)){
            isRight
=true;
        }
        
if((demo[131]==1&&demo[147]==1)||(demo[132]==1&&demo[148]==1)||(demo[133]==1&&demo[149]==1)){
            isLeft
=true;
        }
        
if(isLeft&&isRight){
            temp
=8;
        }
else if(isLeft){
            temp
=6;
        }
else if(isRight){
            temp
=9;
        }
else{
            temp
=origin;
        }
        
if(temp==8&&(!((demo[103]==1&&demo[104]==1&&demo[105]==1&&demo[106]==1)||
                (demo[
87]==1&&demo[88]==1&&demo[89]==1&&demo[90]==1)||
                (demo[
103]+demo[104]+demo[105]+demo[106]+demo[87]+demo[88]+
                demo[
89]+demo[90]>3)))){
            
return temp=0;
        }
        
return temp;
    }
    
    
    
    
    
//比较传入的数据,返回最接近的值
    private int doCheck(int[] demo){
        
int number=-1;
        
int temp=208;
        
for(int i=0;i<10;i++){
            
int x=this.getMin(i,demo);
            
if(x<temp){
                temp
=x;
                number
=i;
            }
        }
        
//System.out.println("===========================================");
        if(number==6||number==8||number==9){
            number
=this.get689(demo,number);
        }
        
return number;
    }
    
}


下面是一些字模的内容,把它保存成相应的文件,并能让程序找到就可以了.

比如这是0的字模,它在不同位字模,以此类推.这些字模都是先取到样本,然后再分类的

#center
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,
#left
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,0,
0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,0,
#right
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,
0,0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,
#up
0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
#down
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,



在此算法的实现中主要是针对比较规范的验证码,然后还要针对外形比较相似的6890进行分辨,实现识别的方式有很多种,大家仁者见仁,智者见智吧.不过, 说句题外话,MOTO的识别就很牛,它对手写字体的支持都能达到很高的识别率,更不要说是正体了,这就是另外一个领域了.不是一两句代码就能搞得定的:)

posted @ 2014-01-23 23:53  brave-sailor  阅读(874)  评论(0编辑  收藏  举报