Patricia前缀树(Patricia Trie)及其基本操作

Patricia前缀树(Patricia Trie)及其基本操作

Trie是一种字典树,用于存储文本字符,并利用了单词之间共享前缀的特点,所以叫做前缀树。不像平衡BST,Trie的高度只与最长的文本串的长度s有 关系,而与单词的数量n无关。Trie的节点分两种:内部结点和叶子结点,内部结点用来存储单词key的成分字母,如果设字母表大小为d,那么每个内部结 点最多有d个孩子,叶子结点存储该单词作为key的数据内容(data)。注意内部结点和叶子结点并不是互斥的,一个内部结点本身可以有儿子结点,同时它 也可以是一个叶子结点。例如:




这里ab结点本身就是叶子结点,因为它以#结束符标记了,同时它有两个儿子结点a和c,c是叶子结点。

如果一颗Trie中有很多单词只有一个儿子结点,可以用Patricia Trie(Linux内核中叫做Radix Tree)压缩存储。由于#结束符标记被看作是一个叶子结点,那么一颗Patricia Trie的任何内部结点有2个或以上的孩子结点。

Patricia Trie的基本操作包括:插入、删除和查询。插入操作可能会涉及到split(拆分),删除操作可能会涉及到merge(合并)。基于Patricia Trie的基本性质,split和merge操作都是局部的,这样实现起来比较简单。详细内容请参考以下代码:

实现:

import java.util.LinkedList;
import java.util.List;
/**
 * Patricia Trie
 *   
 * Copyright (c) 2011 ljs (http://blog.csdn.net/ljsspace/)
 * Licensed under GPL (http://www.opensource.org/licenses/gpl-license.php) 
 * 
 * @author ljs
 * 2011-06-27
 *
 */
public class PatriciaTrie {
	private class PatriciaTrieNode {
	    private String key;
	    private Integer data;
	    private List<PatriciaTrieNode> children = new LinkedList<PatriciaTrieNode>();
	    
	    //use "#" for terminal char
	    private boolean terminal; 
	    
	    public PatriciaTrieNode(){	    	
	    	this.key = "";
	    }
	    public PatriciaTrieNode(String key){
	    	this.key = key;	    	
	    }	    
	    public String toString(){	    	
	    	return this.key + (this.terminal?"#":"") + "(" + children.size() +")";
	    }
	}
	private PatriciaTrieNode root;
	
		
	//return the value of the external node if found; 
	//otherwise, return null
	public Integer find(String key){
		if(key == null || key.length() == 0) 
			return null;
		
		if(root==null){
			return null;
		}else{
			return find(root,key);
		}
	}
	private Integer find(PatriciaTrieNode currNode,String key) {
		for(int i=0;i<currNode.children.size();i++){
			PatriciaTrieNode child = currNode.children.get(i);
			
			//use min(child.key.length, key.length)
			int len = child.key.length()<key.length()?child.key.length():
				key.length();
			int j = 0;
			for(;j<len;j++){
				if(key.charAt(j) != child.key.charAt(j)){
					break;
				}
			}
			
			if(j==0){//this child doesn't match	any character with the new key			
				//order keys by lexi-order
				if(key.charAt(0)<child.key.charAt(0)){
					//e.g. child="e", key="c" (currNode="abc")
					//	   abc                     
					//    /  \     
					//   e    h   
					return null;
				}else{
					//e.g. child="e", key="h" (currNode="abc")
					continue;
				}
			}else{//current child's key partially matches with the new key; 0<j<=len				
				if(j==len){
					if(key.length()==child.key.length()){
						if(child.terminal){
							//e.g. child="ab", key="ab"
							//	   ab#                    
							//       \    
							//        f#    
							return child.data;						
						}else{
							//e.g. child="ab", key="ab"
							//	   ab                    
							//    /  \    
							//   e    f    
							return null;
						}
					}else if(key.length()>child.key.length()){
						//e.g. child="ab#", key="abc"
						//	   ab#                     
						//    /  \     						
						//   a    c#   			
						String subkey = key.substring(j); //c
						//recursion
						return find(child,subkey);
					}else{ //key.length()<child.key.length()
						//e.g. child="abc", key="ab"
						//	   abc                      
						//    /   \       
						//   e     f     
						return null;						
					}					
				}else{//0<j<len
					//e.g. child="abc", key="abd"
					//	   abc                     
					//    /  \      
					//   e    f    
					return null;					
				}				
			}
			
		}
		return null;
	}
	
	public void delete(String key) throws Exception{
		if(key == null || key.length() == 0) return;
		
		if(root==null){
			return;		
		}
		delete(root,key);		
	}
	
	private void delete(PatriciaTrieNode currNode,String key) throws Exception{
		boolean done = false;
		for(int i=0;i<currNode.children.size();i++){
			PatriciaTrieNode child = currNode.children.get(i);
			
			//use min(child.key.length, key.length)
			int len = child.key.length()<key.length()?child.key.length():
				key.length();
			int j = 0;
			for(;j<len;j++){
				if(key.charAt(j) != child.key.charAt(j)){
					break;
				}
			}
			
			if(j==0){//this child doesn't match	any character with the new key			
				//order keys by lexi-order
				if(key.charAt(0)<child.key.charAt(0)){
					//e.g. child="e", key="c" (currNode="abc")
					//	   abc                     
					//    /  \     
					//   e    h   
					done = true;
					throw new Exception("No such key is found for removal!");
				}else{
					//e.g. child="e", key="h" (currNode="abc")					
					continue;
				}
			}else{//current child's key partially matches with the new key; 0<j<=len				
				if(j==len){
					if(key.length()==child.key.length()){
						if(child.terminal){							
							//found key, delete it
							if(child.children.size()==0){
								//e.g. child="ab#", key="ab", currNode="a"
								//	    a                    
								//     / \    
								//    d  ab#    
								currNode.children.remove(i);
								//merge node for currNode
								if(!currNode.terminal && currNode.children.size()==1){
									PatriciaTrieNode singleChild = currNode.children.get(0); //d
									currNode.key += singleChild.key;
									currNode.data = singleChild.data;
									currNode.terminal = singleChild.terminal;
									currNode.children = singleChild.children;
								}
							}else{ //child.children.size()>=1
								//e.g. child="ab#", key="ab", currNode="a"
								//    a#
								//     \
								//	   ab#                    
								//       \    
								//        f#    
								child.terminal = false;
								//merge node for child
								if(child.children.size()==1){
									PatriciaTrieNode singleChild = child.children.get(0); //f#
									child.key += singleChild.key;
									child.data = singleChild.data;  
									child.terminal = singleChild.terminal;  //Note: singleChild may not be external node
									child.children = singleChild.children;
								}
							}
						}else{
							//e.g. child="ab", key="ab"
							//	   ab                    
							//    /  \    
							//   e    f    
							throw new Exception("No such key is found for removal!");
						}
					}else if(key.length()>child.key.length()){
						//e.g. child="ab#", key="abc"
						//	   ab#                     
						//    /  \     						
						//   a    c#   			
						String subkey = key.substring(j); //c
						//recursion
						delete(child,subkey);
					}else{ //key.length()<child.key.length()
						//e.g. child="abc", key="ab"
						//	   abc                      
						//    /   \       
						//   e     f     
						throw new Exception("No such key is found for removal!");				
					}					
				}else{//0<j<len
					//e.g. child="abc", key="abd"
					//	   abc                     
					//    /  \      
					//   e    f    
					throw new Exception("No such key is found for removal!");				
				}
				done = true;
				break;
			}
		}
		if(!done) {
			throw new Exception("No such key is found for removal!");
		}
	}
	//value is only located at the external node
	private void insert(PatriciaTrieNode currNode,String key,Integer value) throws Exception{		
		boolean done = false;
		for(int i=0;i<currNode.children.size();i++){
			PatriciaTrieNode child = currNode.children.get(i);
			
			//use min(child.key.length, key.length)
			int len = child.key.length()<key.length()?child.key.length():
				key.length();
			int j = 0;
			for(;j<len;j++){
				if(key.charAt(j) != child.key.charAt(j)){
					break;
				}
			}
			if(j==0){//this child doesn't match	any character with the new key			
				//order keys by lexi-order
				if(key.charAt(0)<child.key.charAt(0)){
					//e.g. child="e" (currNode="abc")
					//	   abc                     abc
					//    /  \    =========>      / | \
					//   e    f   insert "c"     c# e  f
				
					PatriciaTrieNode node = new PatriciaTrieNode(key);
					currNode.children.add(i,node);
					node.terminal = true;		
					node.data = value;
					done = true;
					break;					
				}else{ //key.charAt(0)>child.key.charAt(0)
					//don't forget to add the largest new key after iterating all children
					continue;
				}
			}else{//current child's key partially matches with the new key; 0<j<=len				
				if(j==len){
					if(key.length()==child.key.length()){
						if(child.terminal){
							throw new Exception("Duplicate Key is found when insertion!");							
						}else{
							//e.g. child="ab"
							//	   ab                    ab#
							//    /  \    =========>    /   \
							//   e    f   insert "ab"  e     f
							child.terminal = true;
							child.data = value;
						}
					}else if(key.length()>child.key.length()){
						//e.g. child="ab#"
						//	   ab#                    ab#
						//    /  \    ==========>    / | \ 							
						//   e    f   insert "abc"  c# e  f						
						String subkey = key.substring(j);
						//recursion
						insert(child,subkey,value);
					}else{ //key.length()<child.key.length()
						//e.g. child="abc#"
						//	   abc#                      ab#
						//    /   \      =========>      /   
						//   e     f     insert "ab"    c#    
						//					           /  \
						//                            e    f													
						String childSubkey = child.key.substring(j); //c
						PatriciaTrieNode subChildNode = new PatriciaTrieNode(childSubkey);
						subChildNode.terminal = child.terminal;
						subChildNode.data = child.data;
						subChildNode.children = child.children; //inherited from parent
						
						child.key = key;  //ab
						child.terminal = true;  //ab#	
						child.data = value;
						
						child.children = new LinkedList<PatriciaTrieNode>();
						child.children.add(subChildNode);
					}					
				}else{//0<j<len
					//e.g. child="abc#"
					//	   abc#                     ab
					//    /  \     ==========>     / \
					//   e    f   insert "abd"    c#  d# 
					//                           /  \
					//                          e    f					
					//split at j
					String childSubkey = child.key.substring(j);  //c
					String subkey = key.substring(j); //d
					
					PatriciaTrieNode subChildNode = new PatriciaTrieNode(childSubkey);
					subChildNode.terminal = child.terminal;
					subChildNode.data = child.data;
					subChildNode.children = child.children; //inherited from parent
					
					//update child's key
					child.key = child.key.substring(0,j);
					//child is not terminal now due to split, it is inherited by subChildNode
					child.terminal = false;
					
					//Note: no need to merge subChildNode					
					
					PatriciaTrieNode node = new PatriciaTrieNode(subkey);
					node.terminal = true;
					node.data = value;
					child.children = new LinkedList<PatriciaTrieNode>();
					if(subkey.charAt(0)<childSubkey.charAt(0)){
						child.children.add(node);
						child.children.add(subChildNode);
					}else{
						child.children.add(subChildNode);
						child.children.add(node);
					}
				}
				done = true;
				break;
			}
		}
		if(!done){
			PatriciaTrieNode node = new PatriciaTrieNode(key);		
			node.terminal = true;
			node.data = value;
			currNode.children.add(node);
		}
	}
	public void insert(String key,Integer value) throws Exception{
		if(key == null || key.length() == 0) return;
		
		if(root==null){
			root = new PatriciaTrieNode();				
		}
		insert(root,key,value);		
	}
	
	public PatriciaTrieNode getRoot(){
		return root;
	}
	
	//for test purpose only
	public void printTree(){
		this.print(0, this.root);
	}
	private void print(int level, PatriciaTrieNode node){
		for (int i = 0; i < level; i++) {
            System.out.format(" ");
        }
		System.out.format("|");
        for (int i = 0; i < level; i++) {
        	System.out.format("-");
        }
        if (node.terminal)
        	System.out.format("%s[%s]#%n", node.key,node.data);
        else
        	System.out.format("%s%n", node.key);
        for (PatriciaTrieNode child : node.children) {
        	print(level + 1, child);
        }		
	}
	
	public void testFind(String key){
		Integer val = this.find(key);
		if(val != null)
			System.out.format("Found key \"%s\" at: %s%n",key,val);
		else
			System.out.format("Found no such key: \"%s\"%n",key);
	}
	public static void main(String[] args) throws Exception {
		 //test insertion
		PatriciaTrie ptrie = new PatriciaTrie();
		ptrie.insert("ab",1);
		ptrie.insert("abc",2);
		ptrie.insert("abde",3);
		ptrie.insert("abd",4);
				
		//ptrie.insert("dc");
		ptrie.insert("dce",5);
		ptrie.insert("dceh",6);
		ptrie.insert("dceg",7);
		ptrie.insert("dca",8);		
		ptrie.insert("dcf",9);
		
		ptrie.insert("ghk",10);
		ptrie.insert("gh",11);
		
		ptrie.insert("mns",12);
		ptrie.insert("mnt",13);
		ptrie.insert("mn",14);
		ptrie.insert("mg",15);		
		ptrie.printTree();
		
		String key = "dc";
		ptrie.testFind(key);
		key = "d";
		ptrie.testFind(key);
		key = "ab";
		ptrie.testFind(key);
		key = "ef";
		ptrie.testFind(key);
		key = "zz";
		ptrie.testFind(key);
		key = "dk";
		ptrie.testFind(key);
		key = "dcf";
		ptrie.testFind(key);
		key = "dck";
		ptrie.testFind(key);
		
		key = "abd";
		ptrie.delete(key);	
		System.out.format("After delete key: %s%n",key);
		ptrie.printTree();
		
		System.out.println("****************************");
		ptrie = new PatriciaTrie();
		ptrie.insert("bear",1);
		ptrie.insert("bell",2);
		ptrie.insert("bid",3);
		ptrie.insert("bull",4);
		ptrie.insert("buy",5);
		ptrie.insert("sell",6);
		ptrie.insert("stock",7);
		ptrie.insert("stop",8);
		ptrie.printTree();
		
		System.out.println("****************************");
		ptrie = new PatriciaTrie();
		ptrie.insert("allot",1);
		ptrie.insert("alloy",2);
		ptrie.insert("all",3);
		ptrie.insert("aloe",4);
		ptrie.insert("ant",5);
		ptrie.insert("an",6);
		ptrie.insert("are",7);
		ptrie.insert("ate",8);
		ptrie.insert("be",9);
		ptrie.printTree();
		
		System.out.println("****************************");
		ptrie = new PatriciaTrie();
		ptrie.insert("minimize",0);
		ptrie.insert("mize",4);
		ptrie.insert("ze",6);
		ptrie.insert("nimize",2);
		ptrie.insert("ize",5);
		ptrie.insert("inimize",1);
		ptrie.insert("imize",3);
		ptrie.insert("e",7);
		ptrie.printTree();
		
		key = "ize";
		ptrie.testFind(key);
		key = "zee";
		ptrie.testFind(key);
		key = "mize";
		ptrie.testFind(key);
		
		key = "mize";
		ptrie.delete(key);	
		System.out.format("After delete key: %s%n",key);
		ptrie.printTree();		
		
	}
}

测试输出:

|
 |-ab[1]#
  |--c[2]#
  |--d[4]#
   |---e[3]#
 |-dc
  |--a[8]#
  |--e[5]#
   |---g[7]#
   |---h[6]#
  |--f[9]#
 |-gh[11]#
  |--k[10]#
 |-m
  |--g[15]#
  |--n[14]#
   |---s[12]#
   |---t[13]#
Found no such key: "dc"
Found no such key: "d"
Found key "ab" at: 1
Found no such key: "ef"
Found no such key: "zz"
Found no such key: "dk"
Found key "dcf" at: 9
Found no such key: "dck"
After delete key: abd
|
 |-ab[1]#
  |--c[2]#
  |--de[3]#
 |-dc
  |--a[8]#
  |--e[5]#
   |---g[7]#
   |---h[6]#
  |--f[9]#
 |-gh[11]#
  |--k[10]#
 |-m
  |--g[15]#
  |--n[14]#
   |---s[12]#
   |---t[13]#
****************************
|
 |-b
  |--e
   |---ar[1]#
   |---ll[2]#
  |--id[3]#
  |--u
   |---ll[4]#
   |---y[5]#
 |-s
  |--ell[6]#
  |--to
   |---ck[7]#
   |---p[8]#
****************************
|
 |-a
  |--l
   |---l[3]#
    |----o
     |-----t[1]#
     |-----y[2]#
   |---oe[4]#
  |--n[6]#
   |---t[5]#
  |--re[7]#
  |--te[8]#
 |-be[9]#
****************************
|
 |-e[7]#
 |-i
  |--mize[3]#
  |--nimize[1]#
  |--ze[5]#
 |-mi
  |--nimize[0]#
  |--ze[4]#
 |-nimize[2]#
 |-ze[6]#
Found key "ize" at: 5
Found no such key: "zee"
Found key "mize" at: 4
After delete key: mize
|
 |-e[7]#
 |-i
  |--mize[3]#
  |--nimize[1]#
  |--ze[5]#
 |-minimize[0]#
 |-nimize[2]#
 |-ze[6]#

posted @ 2011-06-27 23:59  ljsspace  阅读(4170)  评论(0编辑  收藏  举报