代码改变世界

支持正则或通配符的hashmap

2015-10-19 14:30  Loull  阅读(3080)  评论(0编辑  收藏  举报

RegexpKeyedMap

 http://wiki.apache.org/jakarta/RegexpKeyedMap

 

RegexHashMap

https://heideltime.googlecode.com/hg-history/a354341d349e75262884706b830f237fd9eeb269/src/de/unihd/dbs/uima/annotator/heideltime/resources/RegexHashMap.java

 

原理基本都是get的时候去遍历key值,逐个正则匹配,效率不高。

 

nginx有支持通配符的实现,有时间可以了解下实现。

 

RegexHashMap

package de.unihd.dbs.uima.annotator.heideltime.resources;

import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;

/**
 * Implements a HashMap extended with regular expression keys and caching functionality.
 *  
 * @author Julian Zell
 *
 */
public class RegexHashMap<T> implements Map<String, T> {
    
    private HashMap<String, T> container = new HashMap<String, T>();
    private HashMap<String, T> cache = new HashMap<String, T>();
    
    /**
     * clears both the container and the cache hashmaps
     */
    public void clear() {
        container.clear();
        cache.clear();
    }
    
    /**
     * checks whether the cache or container contain a specific key, then evaluates the
     * container's keys as regexes and checks whether they match the specific key.
     */
    public boolean containsKey(Object key) {
        // the key is a direct hit from our cache
        if(cache.containsKey(key))
            return true;
        // the key is a direct hit from our hashmap
        if(container.containsKey(key))
            return true;

        // check if the requested key is a matching string of a regex key from our container
        Iterator<String> regexKeys = container.keySet().iterator();
        while(regexKeys.hasNext()) {
            if(Pattern.matches(regexKeys.next(), (String) key))
                return true;
        }
        
        // if the three previous tests yield no result, the key does not exist
        return false;
    }
    
    /**
     * checks whether a specific value is container within either container or cache
     */
    public boolean containsValue(Object value) {
        // the value is a direct hit from our cache
        if(cache.containsValue(value))
            return true;
        // the value is a direct hit from our hashmap
        if(container.containsValue(value))
            return true;
        
        // otherwise, the value isn't within this object
        return false;
    }
    
    /**
     * returns a merged entryset containing within both the container and cache entrysets
     */
    public Set<Entry<String, T>> entrySet() {
        // prepare the container
        HashSet<Entry<String, T>> set = new HashSet<Entry<String, T>>();
        // add the set from our container
        set.addAll(container.entrySet());
        // add the set from our cache
        set.addAll(cache.entrySet());
        
        return set;
    }
    
    /**
     * checks whether the requested key has a direct match in either cache or container, and if it
     * doesn't, also evaluates the container's keyset as regexes to match against the input key and
     * if any of those methods yield a value, returns that value
     * if a value is found doing regex evaluation, use that regex-key's match as a non-regex 
     * key with the regex's value to form a new entry in the cache.
     */
    public T get(Object key) {
        // output for requested key null is the value null; normal Map behavior
        if(key == null) return null;
        
        T result = null;
        if((result = cache.get(key)) != null) {
            // if the requested key maps to a value in the cache
            return result;
        } else if((result = container.get(key)) != null) {
            // if the requested key maps to a value in the container
            return result;
        } else {
            // check if the requested key is a matching string of a regex key from our container
            Iterator<Entry<String, T>> regexKeys = container.entrySet().iterator();
            while(regexKeys.hasNext()) {
                // prepare current entry
                Entry<String, T> entry = regexKeys.next();
                // check if the key is a regex matching the input key
                if(Pattern.matches(entry.getKey(), (String) key)) {
                    putCache((String) key, entry.getValue());
                    return entry.getValue();
                }
            }
        }
        
        // no value for the given key was found in any of container/cache/regexkey-container
        return null;
    }

    /**
     * checks whether both container and cache are empty
     */
    public boolean isEmpty() {
        return container.isEmpty() && cache.isEmpty();
    }
    
    /**
     * returns the keysets of both the container and cache hashmaps 
     */
    public Set<String> keySet() {
        // prepare container
        HashSet<String> set = new HashSet<String>();
        // add container keys
        set.addAll(container.keySet());
        // add cache keys
        set.addAll(cache.keySet());
        
        return set;
    }
    
    /**
     * associates a key with a value in the container hashmap
     */
    public T put(String key, T value) {
        return container.put(key, value);
    }
    
    /**
     * associates a key with a value in the cache hashmap.
     * @param key Key to map from
     * @param value Value to map to
     * @return previous value associated with the key, or null if unassociated before
     */
    public T putCache(String key, T value) {
        return cache.put(key, value);
    }

    /**
     * adds a map to the container
     */
    public void putAll(Map<? extends String, ? extends T> m) {
        container.putAll(m);
    }

    /**
     * removes a specific key's association from the container
     */
    public T remove(Object key) {
        return container.remove(key);
    }
    
    /**
     * returns the combined size of container and cache
     */
    public int size() {
        return container.size() + cache.size();
    }

    /**
     * returns the combined collection of both the values of the container as well as
     * the cache.
     */
    public Collection<T> values() {
        // prepare set
        HashSet<T> set = new HashSet<T>();
        // add all container values
        set.addAll(container.values());
        // add all cache values
        set.addAll(cache.values());
        
        return set;
    }    
}

 

 

RegexpKeyedMap

package org.apache.regexp.collections;

import java.util.HashMap;
import java.util.Iterator;

import org.apache.regexp.RE;
import org.apache.regexp.RESyntaxException;



/**
 * This map implementation uses a hashmap as the underlying storage.  
 * Note that the keySet() method will return a set of regular expressions rather than actual keys.
 * The put() method uses a regexp as a key.
 * The get() method gets any value that matches one of the regexps.  If there is more than one matching regexp, the first one 
 * encountered is returned - and hence could be indeterminate!
 * 
 * @author Manik Surtani
 *
 */
public class RegexpKeyedMap extends HashMap
{
    public Object put(Object key, Object value)
    {
        if (key instanceof String)
            return super.put(key, value);
        else
            throw new RuntimeException("RegexpKeyedMap - only accepts Strings as keys.");    
    }
        
    /**
     * The key passed in should always be a String.  The map will return the first element whose key, treated as a regular expression, matches the key passed in
     * NOTE: It is possible for this map to have more than one return value, for example, if a key is passed into get() which matches more than one regexp.
     * 
     * E.g., consider the following keys in the map - '[A-Za-z]*' and 'Hello'.  Passing in 'Hello' as a key to the get() method would match either of the regexps,
     * and whichever apears first in the map (which is indeterminate) will be returned. 
     *
     */
    public Object get(Object key)
    {
        Iterator regexps = keySet().iterator();
        String keyString;
        Object result = null;
        
        String stringToMatch = cleanKey( key );
        
        while (regexps.hasNext())
        {
            keyString = regexps.next().toString();
            try
            {
                RE regexp = new RE(keyString);
                if (regexp.match(stringToMatch))
                {
                    result = super.get(keyString);
                    break;
                }
            }
            catch (RESyntaxException e)
            {
                // invalid regexp.  ignore?
            }
        }
        return result;
    }
    
    /**
     * Strip any 'dirty' chars from the key we are searching for, 
     * otherwise we end up with funny results from the RE
     * 
     * @param obj
     * @return
     */
    private String cleanKey( Object obj )
    {
        String retVal = obj.toString();
        
        // remove any '^' from start of key - prevents the RE from matching !?!?  
        return ( retVal.charAt(0) == '^' )  ? retVal.substring(1) : retVal;
    }
    
}