引用字符串ref_str, 更高效的字符串

C 有 null-end string, C++有std::string, 各有千秋。

C字符串

     以 zero 作为终止符,所有字符串操作,都需要判断这个终止符;这也是每个字符串操作,都需要重新知道字符串长度,这很冗余。解决办法有,手动保持字符串长度,调用呆长度的C字符串函数。

std::string

C++的string试图解决C的问题,并把内存管理包揽了。在大多数情形下,这都不是问题。但当你不需要它管理内存时,没有了办法。比如字符串常量(继承C导致的问题),比如一个请求包中子串(包的周期很长)。

C++ string不够好,那么可以定义一个,不需要管理内存的string, 只提供便利操作接口的string, 我叫它 ref_str, 因为它像是引用了一段字符串,缺点是不能修改(很多语言实现的字符串都不能修改)。 对于需要修改的字符串,实现个StringBuilder就可以了。

 

RefStr的实现

ref_str.hpp

#ifndef __REF_STR_HPP_INCLUDE__
#define __REF_STR_HPP_INCLUDE__

#include <string>
#include <stddef.h>
#include <assert.h>
#include <algorithm>
#include <iterator>

class RefStr {
public:
    typedef char const* iterator;
    typedef char const* const_iterator;
    typedef std::reverse_iterator<iterator> reverse_iterator;
    typedef std::reverse_iterator<const_iterator> const_reverse_iterator;

    RefStr() {
        data_ = NULL;
        len_ = 0;
    }

#define REF_STR_LITERAL(str) RefStr(str, sizeof(str)-1)

    //can imply construct from c str.
    RefStr(char const *str) {
        data_ = str;
        len_ = str ? strlen(str) : 0;
    }

    template<typename iter_type>
    RefStr(iter_type begin, iter_type end) {
        data_ = &*begin;
        len_ = std::distance(begin, end);
    }

    RefStr(char const *str, int len) {
        assert(str || len == 0);
        data_ = str;
        len_ = len;
    }

    //can imply construct from std::string.
    RefStr(std::string const & str) {
        data_ = str.data();
        len_ = str.size();
    }

    //no destructor and assignmentor copy constructor

    const char * data() const {
        return data_;
    }

    char at(size_t pos) const {
        assert(0 <= pos && pos < len_);
        return data_[pos];
    }

    size_t size() const {
        return len_;
    }

    size_t length() const {
        return len_;
    }

    bool empty() const {
        return data_ == NULL || len_ == 0;
    }

    iterator begin() const {
        return data();
    }

    iterator end() const {
        return data() + size();
    }

    reverse_iterator rbegin() const {
        return std::reverse_iterator<char const *>(end());
    }

    reverse_iterator rend() const {
        return std::reverse_iterator<char const *>(begin());
    }

    RefStr substr(size_t pos, size_t len) const {
        assert(pos + len < len_);
        return RefStr(data_ + pos, len);
    }

    std::string to_str() const {
        return std::string(data_, len_);
    }

    iterator find(RefStr const &str) const {
        return std::search(begin(), end(), str.begin(), str.end());
    }

    iterator find(iterator pos, RefStr const &str) const {
        return std::search(pos, end(), str.begin(), str.end());
    }

private:
    size_t len_;
    const char * data_;
};

int compare(RefStr const &left, RefStr const &right);

int ignore_case_compare(RefStr const &left, RefStr const &right);

inline bool operator==(RefStr const &left, RefStr const &right) {

    return compare(left, right) == 0;
}

inline bool operator!=(RefStr const &left, RefStr const &right) {
    return compare(left, right) != 0;
}

inline bool operator<(RefStr const &left, RefStr const &right) {
    return compare(left, right) < 0;
}

inline bool operator>(RefStr const &left, RefStr const &right) {
    return compare(left, right) > 0;
}

inline bool operator<=(RefStr const &left, RefStr const &right) {
    return compare(left, right) <= 0;
}

inline bool operator>=(RefStr const &left, RefStr const &right) {
    return compare(left, right) >= 0;
}

inline
int compare(RefStr const &left, RefStr const &right) {
    int compare_len = std::min(left.size(), right.size());
    int ret = memcmp(left.begin(), right.begin(), compare_len);
    if (ret == 0) {
        ret = left.size() - right.size();
    }
    return ret;
}

 

inline

int ignore_case_compare(RefStr const &left, RefStr const &right) {
    static const char ignore_case_map[] = { '\000', '\001', '\002', '\003',
            '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013',
            '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023',
            '\024', '\025', '\026', '\027', '\030', '\031', '\032', '\033',
            '\034', '\035', '\036', '\037', '\040', '\041', '\042', '\043',
            '\044', '\045', '\046', '\047', '\050', '\051', '\052', '\053',
            '\054', '\055', '\056', '\057', '\060', '\061', '\062', '\063',
            '\064', '\065', '\066', '\067', '\070', '\071', '\072', '\073',
            '\074', '\075', '\076', '\077', '\100', '\141', '\142', '\143',
            '\144', '\145', '\146', '\147', '\150', '\151', '\152', '\153',
            '\154', '\155', '\156', '\157', '\160', '\161', '\162', '\163',
            '\164', '\165', '\166', '\167', '\170', '\171', '\172', '\133',
            '\134', '\135', '\136', '\137', '\140', '\141', '\142', '\143',
            '\144', '\145', '\146', '\147', '\150', '\151', '\152', '\153',
            '\154', '\155', '\156', '\157', '\160', '\161', '\162', '\163',
            '\164', '\165', '\166', '\167', '\170', '\171', '\172', '\173',
            '\174', '\175', '\176', '\177', '\200', '\201', '\202', '\203',
            '\204', '\205', '\206', '\207', '\210', '\211', '\212', '\213',
            '\214', '\215', '\216', '\217', '\220', '\221', '\222', '\223',
            '\224', '\225', '\226', '\227', '\230', '\231', '\232', '\233',
            '\234', '\235', '\236', '\237', '\240', '\241', '\242', '\243',
            '\244', '\245', '\246', '\247', '\250', '\251', '\252', '\253',
            '\254', '\255', '\256', '\257', '\260', '\261', '\262', '\263',
            '\264', '\265', '\266', '\267', '\270', '\271', '\272', '\273',
            '\274', '\275', '\276', '\277', '\300', '\301', '\302', '\303',
            '\304', '\305', '\306', '\307', '\310', '\311', '\312', '\313',
            '\314', '\315', '\316', '\317', '\320', '\321', '\322', '\323',
            '\324', '\325', '\326', '\327', '\330', '\331', '\332', '\333',
            '\334', '\335', '\336', '\337', '\340', '\341', '\342', '\343',
            '\344', '\345', '\346', '\347', '\350', '\351', '\352', '\353',
            '\354', '\355', '\356', '\357', '\360', '\361', '\362', '\363',
            '\364', '\365', '\366', '\367', '\370', '\371', '\372', '\373',
            '\374', '\375', '\376', '\377', };

    size_t left_len = left.length();
    size_t right_len = right.length();
    size_t compare_len = std::min(left_len, right_len);

    for (size_t pos = 0; pos < compare_len; ++pos) {
        char left_char = ignore_case_map[(unsigned char) left.at(pos)];
        char right_char = ignore_case_map[(unsigned char) right.at(pos)];
        if (left_char != right_char) {
            return left_char - right_char;
        }
    }
    return left_len - right_len;
}

#endif

 

便利操作函数

str_tool.h

#include "ref_str.hpp"
#include <utility>
#include <vector>
#include <algorithm>
#include <utility>

inline std::pair<RefStr, RefStr> split_once(RefStr const &val,
        RefStr const &sep) {
    RefStr::const_iterator begin = val.begin(), end = val.end();
    RefStr::const_iterator pos =
            std::search(begin, end, sep.begin(), sep.end());
    if (pos == end) {
        return std::make_pair<RefStr>(RefStr(begin, pos), RefStr());
    } else {
        return std::make_pair<RefStr>(RefStr(begin, pos), RefStr(pos
                + sep.size(), end));
    }
}

inline
int split(std::vector<RefStr> &result, RefStr const &str,
        RefStr const &seperator, RefStr const &null_str = "") {
    typedef RefStr::const_iterator iter_type;
    iter_type begin = str.begin();
    iter_type end = str.end();
    iter_type sep_begin = seperator.begin();
    iter_type sep_end = seperator.end();
    iter_type pos = begin;
    int sep_len = seperator.size();
    int cnt = 1;
    while ((pos = std::search(begin, end, sep_begin, sep_end)) != end) {
        if (pos == begin)
            result.push_back(null_str);
        else
            result.push_back(RefStr(begin, pos));
        ++cnt;
        begin = pos + sep_len;
    }
    result.push_back(RefStr(begin, pos));
    return cnt;
}

inline RefStr left_trim(RefStr const &str) {
    if (str.empty())
        return RefStr();
    RefStr::iterator it = str.begin();
    for (; isspace(*it) && it != str.end(); ++it) {
    }
    return RefStr(it, std::distance(it, str.end()));
}

inline RefStr right_trim(RefStr const &str) {
    if (str.empty())
        return RefStr();
    RefStr::iterator it = str.end();
    for (it; isspace(*(it - 1)) && it != str.begin(); --it)
        ;
    return RefStr(str.begin(), std::distance(str.begin(), it));
}

inline RefStr trim(RefStr const &str) {
    if (str.empty())
        return RefStr();
    RefStr::iterator begin = str.begin();
    for (; isspace(*begin) && begin != str.begin(); ++begin)
        ;
    RefStr::iterator end = str.end();
    for (; isspace(*(end - 1)) && end != begin; --end)
        ;

    return RefStr(begin, std::distance(begin, end));
}

inline RefStr left_trim(RefStr const &str, RefStr const&blanks) {
    RefStr::iterator begin_blanks = blanks.begin(), end_blanks = blanks.end(),
            it = str.begin(), iend = str.end();
    for (; it != iend; ++it) {
        if (std::find(begin_blanks, end_blanks, *it) == end_blanks)
            break;
    }
    int len = std::distance(it, iend);
    return RefStr(&*it, len);
}

inline RefStr right_trim(RefStr const &str, RefStr const&blanks) {
    RefStr::iterator begin_blanks = blanks.begin(), end_blanks = blanks.end();
    RefStr::reverse_iterator it = str.rbegin(), iend = str.rend();
    for (; it != iend; ++it) {
        if (std::find(begin_blanks, end_blanks, *it) == end_blanks)
            break;
    }
    int len = std::distance(it, iend);
    return RefStr(&*str.begin(), len);
}

inline RefStr trim(RefStr const &str, RefStr const &blanks) {
    return right_trim(left_trim(str, blanks), blanks);
}

template<typename like_map_type>
inline
int parse_get(like_map_type & result, RefStr const&content) {
    int cnt = 0;
    RefStr key, value;
    typedef RefStr::iterator iter_type;
    iter_type start_key = content.begin();
    iter_type end = content.end();
    iter_type end_key = start_key;
    iter_type start_value = end;
    iter_type end_value = end;

    while ((end_key = std::find(start_key, end, '=')) != end) {
        start_value = end_key;
        ++start_value;
        end_value = std::find(start_value, end, '&');
        result.insert(std::make_pair<RefStr>(RefStr(start_key, std::distance(
                start_key, end_key)), RefStr(start_value, std::distance(
                start_value, end_value))));
        ++cnt;
        if (end_value == end)
            break;
        start_key = end_value;
        ++start_key;
    }

    return cnt;
}

posted @ 2010-12-06 21:10  napoleon_liu  阅读(1039)  评论(0编辑  收藏  举报