bi-gram implementation
import java.io.*;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
public class kGram {
HashMap<String, Integer> terms = new HashMap<String, Integer>();
HashMap<String, ArrayList<String>> kindex = new HashMap<String, ArrayList<String>>();
HashMap<String, Integer> result = new HashMap<String, Integer>();
String[] resultArray;
Boolean flag;
String sKey;
static final int K = 2;
public static void main(String args[]) {
try {
String queries = null;
BufferedReader in = null;
in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
kGram kg = new kGram();
kg.read("directory.csv");
// kg.read("simple.csv");
kg.buildIndex(kGram.K);
// dump out the K-gram index
// kg.dumpMap(kg.kindex);
// kg.set_sKey("comp");
// kg.searching(kg.get_sKey());
// kg.sort();
// kg.print();
while (true) {
System.out.println("Enter query (no '*' at the end): ");
queries = in.readLine();
queries = queries.trim();
if (queries.length() <= 0) {
System.out.println("No input! Exit");
return;
} else if (queries.length() <= 1) {
System.out.println("Query is at least 2 characters: ");
continue;
}
System.out.println();
kg.set_sKey(queries);
kg.searching(queries);
if (kg.flag == true) {
kg.sort();
kg.print();
} else {
System.out.println("No suggestion!");
}
}
} catch (Exception e) {
System.err.println("Error: " + e.getMessage());
}
}
public void set_sKey(String s) {
this.sKey = s;
}
public String get_sKey() {
return this.sKey;
}
public void read(String input) {
try {
// Open the file that is the first
FileInputStream fstream = new FileInputStream(input);
// Get the object of DataInputStream
DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new InputStreamReader(in));
String strLine;
// Read File Line By Line
while ((strLine = br.readLine()) != null) {
// split string to array
String[] temp = null;
temp = strLine.split("/t");
this.buildTerms(temp);
}
// Close the input stream
in.close();
} catch (Exception e) {// Catch exception if any
System.err.println("Error: " + e.getMessage());
}
}
public void buildTerms(String[] item) {
this.terms.put(item[0], Integer.parseInt(item[1]));
}
public void buildIndex(int k) {
Set<Map.Entry<String, Integer>> set = this.terms.entrySet();
Iterator<Map.Entry<String, Integer>> i = set.iterator();
ArrayList<String> strarr;
String[] str;
while (i.hasNext()) {
Map.Entry<String, Integer> me = (Map.Entry<String, Integer>) i
.next();
String term = me.getKey();
if (term.length() > 1) {
str = this.segment(term, k);
for (int j = 0; j < str.length; j++) {
String key = str[j];
if (this.kindex.containsKey(key)) {
strarr = this.kindex.get(key);
strarr.add(term);
} else {
strarr = new ArrayList<String>();
strarr.add(term);
this.kindex.put(key, strarr);
}
}
}
}
}
private String[] segment(String s, int k) {
int i = s.length();
String[] str = new String[i];
for (int j = 0; j < i; j++) {
if (j - 1 >= 0)
str[j] = s.substring(j - 1, j - 1 + k);
else
str[j] = s.substring(0, 1);
}
return str;
}
public void dumpMap(HashMap hm) {
Set set = hm.entrySet();
Iterator i = set.iterator();
while (i.hasNext()) {
Map.Entry me = (Map.Entry) i.next();
System.out.println(me.getKey() + " => " + me.getValue());
}
}
public void searching(String str) {
this.flag = false;
HashMap<String, ArrayList<String>> resultMap = new HashMap<String, ArrayList<String>>();
String[] s = this.segment(str, kGram.K);
for (int i = 0; i < s.length; i++) {
String key = s[i];
if (this.kindex.containsKey(key)) {
ArrayList<String> temp = this.kindex.get(key);
resultMap.put(key, temp);
}
}
// intersection
Set<String> sets2 = null;
if (!resultMap.isEmpty()) {
Collection<ArrayList<String>> c = resultMap.values();
Iterator<ArrayList<String>> ite = c.iterator();
Set<String> sets1 = new HashSet<String>(ite.next());
while (ite.hasNext()) {
sets2 = new HashSet<String>(ite.next());
sets2.retainAll(sets1);
}
if (sets2.isEmpty())
sets2 = sets1;
}
// generate result
this.resultArray = sets2.toArray(new String[0]);
for (int i = 0; i < resultArray.length; i++) {
this.result.put((String) resultArray[i], this.terms
.get(resultArray[i]));
}
if (this.resultArray.length > 0)
// System.out.println(java.util.Arrays.toString(this.resultArray));
this.flag = true;
}
public void sort() {
if (!this.result.isEmpty()) {
// Insertion sort
for (int i = 1; i <= (this.resultArray.length - 1); i++) {
String cur = this.resultArray[i];
int current = this.result.get(this.resultArray[i]);
int position = i - 1;
while (position >= 0
&& current > this.result
.get(this.resultArray[position])) {
this.resultArray[position + 1] = this.resultArray[position];
position = position - 1;
}
this.resultArray[position + 1] = cur;
}
}
}
public void print() {
// print result with frequency
int count = 0;
for (int i = 0; i < this.resultArray.length; i++) {
if (this.filter(this.resultArray[i])) {
System.out.println(this.resultArray[i] + " ("
+ this.result.get(this.resultArray[i]) + ")");
count++;
if (i > 3)
break;
}
}
if (count == 0) {
System.out.println("Sorry, no matching!");
System.out.println();
} else
System.out.println();
}
// filter result
// for example search "com" should not return "income"
private boolean filter(String s) {
String key = this.get_sKey();
String str = s.substring(0, key.length());
if (key.equals(str))
return true;
else
return false;
}
}
一个很有用的小技巧,直接dump输出数组内容:
public class Test {
public static void main(String args[]) {
String s[] = {"a", "b", "c", "d"};
double d [][]= {
{0.50, 0.70, 0.40, 0.60},
{0.50, 1.10, 0.50, 0.80}
};
System.out.println(java.util.Arrays.toString(s));
System.out.println(java.util.Arrays.deepToString(d));
// output
// [a, b, c, d]
// [[0.5, 0.7, 0.4, 0.6], [0.5, 1.1, 0.5, 0.8]]
}
}
或者
public class Test {
public static void main(String args[]) {
String s[] = {"a", "b", "c", "d"};
for (String element : s)
System.out.println(element);
}
}