Description
基于任给一串大写英文字母序列(例如MNOPPPOPMMPOPOPPOPNP),编程实现求解一套二进制编码,使得上述正文的编码最短。
Input
有多组输入数据,每组一串字符串,每个字符串长度不超过1000且只包含大写英文字母。
Output
每组数据输出两行,第一行输出组数,接下来每行输出一个字母的编码,满足字典序小的字母的编码字典序也尽量小,下一行输出编码后串的长度, 若长度小于50,输出编码后的字符串,格式见样例。
Sample Input
ABC
Sample Output
Case #1:
A: 0
B: 10
C: 11
5 01011
HINT
考察知识点:哈夫曼树, 时间复杂度O(nlogn),空间复杂度O(n),好多人都过不了,数据已经减少了,大家可以试试。
Append Code
析:就是先建树,再遍历树,然后去求每个叶子结点的哈夫曼编码。建树可以用优先队列,优先频率高的和字典序大的,在遍历时,向左就加0,向右就加1,
到最后结点就存储起来。
代码如下:
#pragma comment(linker, "/STACK:1024000000,1024000000") #include <cstdio> #include <string> #include <cstdlib> #include <cmath> #include <iostream> #include <cstring> #include <set> #include <queue> #include <algorithm> #include <vector> #include <map> #include <cctype> #include <cmath> #include <stack> //#include <tr1/unordered_map> #define freopenr freopen("in.txt", "r", stdin) #define freopenw freopen("out.txt", "w", stdout) using namespace std; //using namespace std :: tr1; typedef long long LL; typedef pair<int, int> P; const int INF = 0x3f3f3f3f; const double inf = 0x3f3f3f3f3f3f; const LL LNF = 0x3f3f3f3f3f3f; const double PI = acos(-1.0); const double eps = 1e-8; const int maxn = 1000 + 5; const LL mod = 10000000000007; const int N = 1e6 + 5; const int dr[] = {-1, 0, 1, 0, 1, 1, -1, -1}; const int dc[] = {0, 1, 0, -1, 1, -1, 1, -1}; const int hr[]= {-2, -2, -1, -1, 1, 1, 2, 2}; const int hc[]= {-1, 1, -2, 2, -2, 2, -1, 1}; const char *Hex[] = {"0000", "0001", "0010", "0011", "0100", "0101", "0110", "0111", "1000", "1001", "1010", "1011", "1100", "1101", "1110", "1111"}; inline LL gcd(LL a, LL b){ return b == 0 ? a : gcd(b, a%b); } int n, m; const int mon[] = {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; const int monn[] = {0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; inline int Min(int a, int b){ return a < b ? a : b; } inline int Max(int a, int b){ return a > b ? a : b; } inline LL Min(LL a, LL b){ return a < b ? a : b; } inline LL Max(LL a, LL b){ return a > b ? a : b; } inline bool is_in(int r, int c){ return r >= 0 && r < n && c >= 0 && c < m; } struct Node{ int id, num, ch; Node *lchild, *rchild; bool operator < (const Node &p) const{ return num > p.num || (num == p.num && ch < p.ch); } }; Node a[120]; char s[maxn]; int num[30]; int cnt; bool ok; string str[30]; void solve(){ for(int i = 0; i < 26; ++i) if(num[i]){ a[cnt].ch = i; a[cnt].id = cnt; a[cnt++].num = num[i]; } priority_queue<Node> pq; for(int i = 0; i < cnt; ++i) pq.push(a[i]); if(cnt == 1) ok = true; while(pq.size() > 1){ int id1 = pq.top().id, num1 = pq.top().num, ch1 = pq.top().ch; pq.pop(); int id2 = pq.top().id, num2 = pq.top().num, ch2 = pq.top().ch; pq.pop(); if(num1 == num2 && ch1 > ch2){ a[cnt].ch = ch2; a[cnt].num = num1 + num2; a[cnt].id = cnt; a[cnt].lchild = &a[id2]; a[cnt].rchild = &a[id1]; } else{ a[cnt].ch = ch1; a[cnt].num = num1 + num2; a[cnt].id = cnt; a[cnt].lchild = &a[id1]; a[cnt].rchild = &a[id2]; } pq.push(a[cnt]); ++cnt; } } void dfs(Node *p, string s){ if(p->lchild == 0 && p->rchild == 0) str[p->ch] = s; if(p->lchild != 0) dfs(p->lchild, s + "0"); if(p->rchild != 0) dfs(p->rchild, s + "1"); } int main(){ int kase = 0; while(scanf("%s", s) == 1){ printf("Case #%d:\n", ++kase); for(int i = 0; i < 120; ++i){ a[i].id = a[i].num = 0; a[i].lchild = a[i].rchild = 0; if(i < 30) str[i].clear(); } n = strlen(s); memset(num, 0, sizeof num); for(int i = 0; i < n; ++i){ ++num[s[i]-'A']; } cnt = 0; ok = false; solve(); if(ok) str[s[0]-'A'] = "0"; else dfs(&a[cnt-1], ""); for(int i = 0; i < 26; ++i) if(str[i] != "") printf("%c: %s\n", i+'A', str[i].c_str()); string ans; for(int i = 0; i < n; ++i) ans += str[s[i]-'A']; printf("%d", ans.size()); if(ans.size() < 50) printf(" %s", ans.c_str()); printf("\n"); } return 0; }