Tony's Log

Algorithms, Distributed System, Machine Learning

  博客园 :: 首页 :: 博问 :: 闪存 :: 新随笔 :: 联系 :: 订阅 订阅 :: 管理 ::

This is about https://en.wikipedia.org/wiki/Run-length_encoding. The trick is, for a valid char, we only compress up to 254 occurences - count 255 means end of a string.

typedef unsigned char UCHAR;
class Codec {
    const static int MAX_CNT = 255;
public:

    // Encodes a list of strings to a single string.
    string encode(vector<string>& strs) {
        string ret;
        for(auto &s : strs)
        {
            int i = 0, len = s.length();
            
            while(i < len)
            {
                UCHAR c = s[i];
                UCHAR cnt = 1;
                while(i < len - 1 && s[i + 1] == c && cnt < (MAX_CNT - 1))
                {
                    i ++; cnt ++;
                }
                ret += UCHAR(cnt);
                ret += UCHAR(c);
                
                i ++;
            }
            ret += UCHAR(MAX_CNT); // 0xFF: end
        }
        return ret;
    }

    // Decodes a single string to a list of strings.
    vector<string> decode(string s) 
    {
        vector<string> ret;

        size_t len = s.length();
        string cur; int inx = 0;
        while(inx < len)
        {
            UCHAR cnt = s[inx];
            if(cnt == UCHAR(MAX_CNT))
            {
                ret.push_back(cur);
                cur = "";
                inx ++;
                continue;
            }
            //
            UCHAR c = s[inx + 1];
            for(UCHAR i = 0; i < cnt; i ++)    cur += c;
            inx += 2;                        
        }
        return ret;
    }
};
posted on 2015-08-29 01:34  Tonix  阅读(314)  评论(0编辑  收藏  举报