OIIIIIIII

【hust数据结构】huffman树及编码/解码实验

 做了好久...

最后调出来还是蛮有成就感的,总结一个博客出来吧2333

要求

 一些细节

中文字符的处理

在头文件之后写:

#ifdef _WIN32
    #include<windows.h>//用来修控制台中文乱码
#endif

在main函数的最前面写:

#ifdef _WIN32
    SetConsoleOutputCP(65001);//用来修控制台的中文乱码
#endif

就可以改成65001编码了,能正确处理中文字符。

中文字符占三个char,英文字符还是一个char。

map<struct,int>:需重载struct的运算符

以struct为下标的map,需要重载运算符。

struct source_data{
    char ch1,ch2,ch3;
        bool operator < (const source_data &a) const{
        return ch1>a.ch1;//升序 
    }
};
map<source_data,int>nod;//存储字符对应的编号

 freopen读入多个文件

在freopen读完之后,写:

    fclose(stdin);
    cin.clear();

就可以把输入流给掰回控制台,也可以继续读入下一个。

vector:当任意长的数组用

「LuoguP3369」 【模板】普通平衡树 (用vector乱搞平衡树

还有vec.push_back(x),可以在vec末尾加数。

指针的优先队列

 1 struct NODE{
 2     NODE *lson,*rson;//lson:0 rson:1
 3     NODE *fa;//父节点
 4     int node=0,tim;//当前点的编号 & 出现次数之和
 5     source_data raw;
 6     int huff;//huffman编码
 7 };
 8 struct cmp{
 9     bool operator() (const NODE *a,const NODE *b){
10         return (a->tim) > (b->tim);
11     }
12 };
13 priority_queue<NODE *,vector<NODE*>,cmp>q;

遍历map:iterator的使用

指针置空:赋初值nullptr

1 map<source_data,int>::reverse_iterator iter;
2 for(iter = nod.rbegin();iter != nod.rend();iter++){
3         source_data c = iter->first;        
4         NODE *p = (NODE*)malloc(sizeof(NODE));
5         p->lson=p->rson=nullptr;//之后就能直接if(p->lson)了
6     }

 

总之感觉这个两百来行的代码,学到了很多东西!

代码

  1 /*
  2 author : qwerta
  3 date : 2021.11.04 - 11.11
  4 */
  5 #include<algorithm>
  6 #include<iostream>
  7 #include<cstdio>
  8 #include<cstring>
  9 #include<cstdlib>
 10 #include<queue>
 11 #include<stack>
 12 #include<map>
 13 #include<vector>
 14 #ifdef _WIN32
 15     #include<windows.h>//用来修控制台中文乱码
 16 #endif
 17 using namespace std;
 18 const int ALL_CHARACTER=1000+3;//总字符数
 19 struct source_data{
 20     char ch1,ch2,ch3;
 21         bool operator < (const source_data &a) const{
 22         return ch1>a.ch1;//升序 
 23     }
 24 };
 25 map<source_data,int>nod;//存储字符对应的编号
 26 int cont[ALL_CHARACTER];//记录编号为i的字符的出现次数
 27 int tot_char=0;//总字符数,从1开始存
 28 int tot_node=0;//总节点数
 29 void read_source_txt()
 30 {
 31     freopen("Sample.txt","r",stdin);
 32     char ch;
 33     while((ch=getchar())!=EOF)
 34     {
 35         if(ch>=0)//英文
 36         {
 37             if(nod.find((source_data){ch,' ',' '})==nod.end())
 38             {
 39                 nod[((source_data){ch,' ',' '})]=++tot_char;
 40                 cont[tot_char]=1;
 41             }
 42             else cont[nod[((source_data){ch,' ',' '})]]++;
 43             //putchar(ch);
 44         }
 45         else
 46         {
 47             //printf("汉字get\n");
 48             char ch0=getchar(),ch00=getchar();
 49             if(nod.find((source_data){ch,ch0,ch00})==nod.end())
 50             {
 51                 nod[((source_data){ch,ch0,ch00})]=++tot_char;
 52                 cont[tot_char]=1;
 53             }
 54             else cont[nod[((source_data){ch,ch0,ch00})]]++;
 55             //putchar(ch);putchar(ch0);putchar(ch00);
 56             //putchar(' ');
 57         }
 58     }
 59     fclose(stdin);
 60     cin.clear();
 61     //printf("\nread finished\n");
 62     return;
 63 }
 64 struct NODE{
 65     NODE *lson,*rson;//lson:0 rson:1
 66     NODE *fa;//父节点
 67     int node=0,tim;//当前点的编号 & 出现次数之和
 68     source_data raw;
 69     int huff;//huffman编码
 70 };
 71 struct cmp{
 72     bool operator() (const NODE *a,const NODE *b){
 73         return (a->tim) > (b->tim);
 74     }
 75 };
 76 priority_queue<NODE *,vector<NODE*>,cmp>q;
 77 NODE *s;//树根root
 78 void huffman_build()
 79 {
 80     map<source_data,int>::reverse_iterator iter;
 81     int i;
 82     for(iter = nod.rbegin(),i=1;iter != nod.rend();iter++,++i){
 83         source_data c = iter->first;        
 84         NODE *p = (NODE*)malloc(sizeof(NODE));
 85         p->node=i;
 86         p->tim=cont[nod[c]];
 87         p->lson=p->rson=nullptr;
 88         p->raw = c;
 89         q.push(p);
 90     }
 91     tot_node=tot_char;
 92     while(q.size()>1){
 93         NODE *p = (NODE*)malloc(sizeof(NODE));
 94         NODE *c1=q.top();q.pop();
 95         NODE *c2=q.top();q.pop();
 96         p->lson=c1;p->rson=c2;//注意
 97         c1->fa = c2->fa = p;
 98         p->tim=(c1->tim) + (c2->tim);
 99         p->node=++tot_node;
100         p->raw.ch1=0;
101         q.push(p);
102     }
103     s=q.top();q.pop();
104     //printf("huffman_build finished\n");
105     return;
106 }
107 int huffman_code[ALL_CHARACTER];//存编号为i的字符的huffman编码
108 void huffman_dfs(NODE *c,int now)//now:储存当前huffman编码(最高位之前,比实际huffman编码多加了一位1)
109 {
110     if(c->lson)huffman_dfs(c->lson,(now<<1));
111     //
112     if((c->raw).ch1)//到叶子节点了
113     {
114         huffman_code[nod[c->raw]]=c->huff=now;
115         //cout<<" "<<now<<endl;
116         return;
117     }
118     //
119     if(c->rson)huffman_dfs(c->rson,((now<<1)|1));
120     return;
121 }
122 stack<NODE *>st;
123 void huffman_with_stack()//非递归遍历huffman树
124 {
125     st.push(s);
126     s->huff=1;
127     while(!st.empty()){
128         NODE *c=st.top();st.pop();
129         if(c->lson){
130             (c->lson)->huff = (c->huff)<<1;
131             st.push(c->lson);
132         }
133         //
134         if((c->raw).ch1)huffman_code[nod[c->raw]]=c->huff;
135         if(c->rson){
136             (c->rson)->huff = ((c->huff)<<1)|1;
137             st.push(c->rson);
138         }
139     }
140     return;
141 }
142 void print_char(source_data c){
143     if(c.ch1>0)putchar(c.ch1);
144     else{putchar(c.ch1);putchar(c.ch2);putchar(c.ch3);}
145     return;
146 }
147 void print_huffman(int x){
148     //printf("\n print %d now :",x);
149     bool ans[103];
150     int tot=0;
151     for(;(1<<tot)<=x;++tot){
152         if((x>>tot)&1)ans[tot]=1;
153         else ans[tot]=0;
154     }
155     for(int i=tot-2;i>=0;--i)
156     printf("%d",ans[i]);
157     return;
158 }
159 void print_tim(){
160     printf("\n------------------字符出现次数-------------------\n\n");
161     map<source_data,int>::reverse_iterator iter;
162     int tot=0;
163     for(iter = nod.rbegin();iter != nod.rend();iter++){
164         source_data c = iter->first;
165         printf("--%d--//",tot++);
166         print_char(c);
167         printf("//,次数:%d",cont[nod[c]]);
168         putchar('\n');
169     }
170     return;
171 }
172 void print_tree_dfs(NODE *c)//采用递归遍历该huffman树
173 {
174     if(c->lson)print_tree_dfs(c->lson);
175     if(c->rson)print_tree_dfs(c->rson);
176     printf("序号:%d--W%d--",c->node,c->tim);
177     if(c!=s)printf("//P%d",c->fa->node);else printf("//P0");
178     if(c->lson)printf("//L%d",c->lson->node);else printf("//L0");
179     if(c->lson)printf("//R%d//",c->rson->node);else printf("//R0//");
180     if((c->raw).ch1){
181         print_char(c->raw);
182         printf("==>");
183         print_huffman(c->huff);
184     }
185     putchar('\n');
186     return;
187 }
188 vector<char>txt;//="In the animation industry, cartoon videos are produced from hand drawings of expert animators using a complex and precise procedure. To draw each frame of an animation video manually would consume tremendous time, thus leading to a prohibitively high cost. In practice, the animation producers usually replicate one drawing two or three times to reduce the cost, which results in the actual low frame rate of animation videos. Therefore, it is highly desirable to develop computational algorithms to interpolate the intermediate animation frames automatically.In recent years, video interpolation has made great progress on natural videos. However, in animations, existing video interpolation methods are not able to produce satisfying in-between frames. An example from the film Children Who Chase Lost Voices is illustrated in Figure 1, where the current state-of-the-art methods fail to generate a piece of complete luggage due to the incorrect motion estimation, which is shown in the lower-left corner of the image. The challenges here stem from the two unique characteristics of animation videos: 1) First, cartoon images consist of explicit sketches and lines, which split the image into segments of smooth color pieces. Pixels in one segment are similar, which yields insufficient textures to match the corresponding pixels between two frames and hence increases the difficulty to predict accurate motions. 2) Second, cartoon animations use exaggerated expressions in pursuit of artistic effects, which result in non-linear and extremely large motions between adjacent frames. Two typical cases are depicted in Figure 2 (a) and (b) which illustrate these challenges respectively. Due to these difficulties mentioned above, video interpolation in animations remains a challenging task.";//待编码文件
189 vector<bool>vec;//存编码后的字符串
190 void read_target_txt()
191 {
192     freopen("file.txt","r",stdin);
193     char ch;
194     while((ch=getchar())!=EOF)
195     {
196         txt.push_back(ch);
197     }
198     fclose(stdin);
199     cin.clear();
200     return;
201 }
202 void huffman_encode(){
203     for(int i=0;i<txt.size();++i){
204         int x;
205         if(txt[i]>0){
206             x = huffman_code[nod[(source_data){txt[i],' ',' '}]];
207         }
208         else{
209             x = huffman_code[nod[(source_data){txt[i],txt[i+1],txt[i+2]}]];
210             i+=2;
211         }
212         bool ans[103];
213         int tot=0;
214         for(;(1<<tot)<=x;++tot){
215             if((x>>tot)&1)ans[tot]=1;
216             else ans[tot]=0;
217         }
218         for(int i=tot-2;i>=0;--i)
219         vec.push_back(ans[i]);
220     }
221     //
222     printf("\n------------------对file.txt编码-------------------\n");
223     printf("\nvec.size : %d \n",vec.size());
224     printf("txt encoded : ");
225     for(int i=0;i<vec.size();++i)
226         cout<<vec[i];
227     printf("\n\n");
228     return;
229 }
230 void huffman_decode(){
231     printf("vec decoded : ");
232     NODE *p = s;
233     for(int i=0;i<vec.size();++i){
234         if(!vec[i]){
235             p = p->lson;
236         }
237         else{
238             p = p->rson;
239         }
240         //
241         if((p->raw).ch1){
242             print_char(p->raw);
243             p = s;
244         }
245     }
246     return;
247 }
248 void print_compress_rate(){
249     double rate=(double)(vec.size())/(8*txt.size());
250     printf("\nCompress rate for the txt : \n%.6f",rate);
251     return;
252 }
253 int main()
254 {
255     #ifdef _WIN32
256         SetConsoleOutputCP(65001);//用来修控制台的中文乱码
257     #endif
258     //
259     read_source_txt();
260     huffman_build();
261     //
262     bool need_dfs = 1;
263     if(need_dfs){
264         huffman_dfs(s,1);//递归遍历
265     }
266     else{
267         huffman_with_stack();//非递归遍历
268     }
269     //
270     print_tim();
271     putchar('\n');
272     printf("\n------------------递归输出字符huffman编码-------------------\n\n");
273     print_tree_dfs(s);
274     //
275     bool need_encode = 1;
276     if(need_encode){
277         read_target_txt();
278         huffman_encode();
279         huffman_decode();
280         print_compress_rate();
281     }
282     return 0;
283 }

 

posted @ 2021-11-11 15:27  qwertaya  阅读(154)  评论(0编辑  收藏  举报
MDZX
Changsha
Fulan