剑指 Offer II 065. 最短的单词编码(820. 单词的压缩编码)
题目:
思路:
【1】存储后缀
【2】利用字典树(从后缀开始存储)
代码展示:
字典树优化的两个版本:
//时间6 ms击败99.24% //内存47 MB击败29.35% class Solution { class TrieNode{ TrieNode[] childs=new TrieNode[26]; boolean isEnd = false; public void insert(String str){ TrieNode node = this; int len = str.length(); for(int i=len-1;i>=0;i--){ int index = str.charAt(i)-'a'; if(node.childs[index] == null){ node.childs[index] = new TrieNode(); if(i==0) { node.childs[index].isEnd = true; cnt+=len+1; } }else if(i!=0 && node.childs[index].isEnd){ cnt-=len-i+1; node.childs[index].isEnd = false; } node = node.childs[index]; } } } int cnt = 0; public int minimumLengthEncoding(String[] words) { TrieNode root = new TrieNode(); for(String str : words){ root.insert(str); } return cnt; } } //时间7 ms击败98.86% //内存47 MB击败29.35% class Solution { static class Trie { boolean isLeaf = false; Trie[] next = new Trie[26]; } public int minimumLengthEncoding(String[] words) { int minLen = 0; Trie root = new Trie(); for (String word : words) { Trie cur = root; boolean hasNewBranch = false; for (int i = word.length() - 1; i >= 0; i--) { int idx = word.charAt(i) - 'a'; if (cur.next[idx] == null) { cur.next[idx] = new Trie(); hasNewBranch = true; if (cur.isLeaf) { cur.isLeaf = false; minLen -= word.length() - i; } } cur = cur.next[idx]; } if (hasNewBranch) { cur.isLeaf = true; minLen += word.length() + 1; } } return minLen; } }
利用字典树(从后缀开始存储):
//时间17 ms击败34.71% //内存46.6 MB击败47.94% class Solution { public int minimumLengthEncoding(String[] words) { TrieNode trie = new TrieNode(); Map<TrieNode, Integer> nodes = new HashMap<TrieNode, Integer>(); for (int i = 0; i < words.length; ++i) { String word = words[i]; TrieNode cur = trie; for (int j = word.length() - 1; j >= 0; --j) { cur = cur.get(word.charAt(j)); } nodes.put(cur, i); } int ans = 0; for (TrieNode node: nodes.keySet()) { if (node.count == 0) { ans += words[nodes.get(node)].length() + 1; } } return ans; } } class TrieNode { TrieNode[] children; int count; TrieNode() { children = new TrieNode[26]; count = 0; } public TrieNode get(char c) { if (children[c - 'a'] == null) { children[c - 'a'] = new TrieNode(); count++; } return children[c - 'a']; } }
存储后缀的方式:
//时间19 ms击败23.49% //内存43 MB击败69.21% //通过set进行去重,然后再利用遍历set里面的元素,针对每个元素进行切割与去重,剩余的便是目标字符集合 //然后进行统计 //但是这里依旧会存在很多不必要的操作,如set集合中因为是利用hashMap的key进行去重的,所以是无序的 //可能存在【ab,abcde】这样的顺序,那么ab的遍历和分割操作其实是无用的(同理随着数组元素越多,这样无用的操作耗时也就越多) class Solution { public int minimumLengthEncoding(String[] words) { Set<String> good = new HashSet<String>(Arrays.asList(words)); for (String word: words) { for (int k = 1; k < word.length(); ++k) { good.remove(word.substring(k)); } } int ans = 0; for (String word: good) { ans += word.length() + 1; } return ans; } }