Sherlock的程序人生

常用字符串处理工具封装

这些年C++下的开发习惯了大而全的库,突然有一天,不能使用这些库了,才发现缺少一套自己常用的库

基于std::string的字符串处理

使用.hpp文件,不需要加入项目中,只要#include文件即可使用

StringUtil.hpp

#ifndef STRING_UTIL_H
#define STRING_UTIL_H

#include <string>
#include <string.h>
#include <algorithm>
#include <vector>
#include <memory>

namespace sh
{
/**
 * @brief:  是否区分大小写
 * @author: sherlock_lht
 */
enum CaseSensitivity
{
    CaseInsensitive = 0,//不区分大小写
    CaseSensitive       //区分大小写
};

/**
 * @brief:  分割字符串的行为定义
 * @author: sherlock_lht
 */
enum SplitBehavior
{
    KeepEmptyParts = 0, //如果字段是空的,也放在结果里
    SkipEmptyParts      //如果字段是空的,不放在结果里
};
/**
 * 常用字符串工具,基于std::string,用于ASCII字符串处理
 * 接口参考Qt的接口风格
 * 使用时直接#include该文件即可,不需要加入项目中
 *
 * @author: sherlock_lht
 */
class StringUtil
{
public:
    /**
     * format方法来自github,支持C语言风格的字符串格式化
     */
    template<typename ... Args>
    static std::string format(const char *format, Args ... args)
    {
        auto size_buf = std::snprintf(nullptr, 0, format, args ...) + 1;
        std::unique_ptr<char[]> buf(new(std::nothrow) char[size_buf]);

        if (!buf)
            return std::string("");

        std::snprintf(buf.get(), size_buf, format, args ...);
        return std::string(buf.get(), buf.get() + size_buf - 1);
    }

    /**
     * Returns a lowercase copy of the string.
     */
    static std::string toLower(const std::string &origin_str)
    {
        std::string result = origin_str;
        std::transform(result.begin(), result.end(), result.begin(), ::tolower);
        return result;
    }

    /**
     * Returns an uppercase copy of the string.
     */
    static std::string toUpper(const std::string &origin_str)
    {
        std::string result = origin_str;
        std::transform(result.begin(), result.end(), result.begin(), ::toupper);
        return result;
    }

    /**
     * Returns true if the string starts with s; otherwise returns false.
     * If cs is sh::CaseSensitive (default), the search is case sensitive;
     * otherwise the search is case insensitive.
     */
    static bool startsWith(const std::string &origin_str,
                           const std::string &substr,
                           CaseSensitivity cs = CaseSensitivity::CaseSensitive)
    {
        if (CaseSensitivity::CaseSensitive == cs)
        {
            return origin_str.find(substr) == 0;
        }
        return toLower(origin_str).find(toLower(substr)) == 0;
    }

    /**
     * Returns true if the string ends with s; otherwise returns false.
     * If cs is sh::CaseSensitive (default), the search is case sensitive;
     * otherwise the search is case insensitive.
     */
    static bool endsWith(const std::string &origin_str,
                         const std::string &substr,
                         CaseSensitivity cs = CaseSensitivity::CaseSensitive)
    {
        std::size_t last_index = origin_str.length() - substr.length();
        if (CaseSensitivity::CaseSensitive == cs)
        {
            return origin_str.rfind(substr) == last_index;
        }
        return toLower(origin_str).rfind(toLower(substr)) == last_index;
    }

    /**
     * Compares s1 with s2 and returns an integer less than, equal to,
     * or greater than zero if s1 is less than, equal to, or greater than s2.
     *
     * If cs is sh::CaseSensitive, the comparison is case sensitive; otherwise the comparison is case insensitive.
     */
    static int compare(const std::string &s1, const std::string &s2,
                       CaseSensitivity cs = CaseSensitivity::CaseSensitive)
    {
        if (CaseSensitivity::CaseSensitive == cs)
        {
            return strcmp(s1.c_str(), s2.c_str());
        }
        return strcmp(toLower(s1).c_str(), toLower(s2).c_str());
    }

    /**
     * Returns true if this string contains an occurrence of the string str;
     * otherwise returns false.
     *
     * If cs is sh::CaseSensitive (default), the search is case sensitive;
     * otherwise the search is case insensitive.
     */
    static bool contains(const std::string &str, const std::string &substr,
                         CaseSensitivity cs = CaseSensitivity::CaseSensitive)
    {
        if (CaseSensitivity::CaseSensitive == cs)
        {
            return str.find(substr) != std::string::npos;
        }
        return toLower(substr).find(toLower(substr)) != std::string::npos;
    }

    /**
     * Splits the string into substrings wherever sep occurs, and returns the list of those strings.
     * If sep does not match anywhere in the string, split() returns a single-element list containing this string.
     * cs specifies whether sep should be matched case sensitively or case insensitively.
     * If behavior is sh::SkipEmptyParts, empty entries don't appear in the result.
     * By default, empty entries are kept.
     *
     */
    static std::vector<std::string> split(const std::string &str, const std::string &sep,
                                          SplitBehavior behavior = SplitBehavior::KeepEmptyParts,
                                          CaseSensitivity cs = CaseSensitivity::CaseSensitive)
    {
        std::string check_str = CaseSensitivity::CaseSensitive == cs ? str : toLower(str);
        std::string check_sep = CaseSensitivity::CaseSensitive == cs ? sep : toLower(sep);

        size_t length = check_sep.length();
        std::size_t index = check_str.find(check_sep);
        std::size_t start_index = 0;

        std::vector<std::string> result;
        while (true)
        {
            std::string element = str.substr(start_index, index - start_index);
            if (!element.empty() || (element.empty() && SplitBehavior::KeepEmptyParts == behavior))
            {
                result.emplace_back(element);
            }
            if (std::string::npos == index)
            {
                break;
            }
            start_index = index + length;
            index = check_str.find(check_sep, start_index);
        }
        return result;
    }

    /**
     * Returns a string that has token removed from the start.
     */
    static std::string trimLeft(const std::string &str, const std::string &token = " ")
    {
        std::string result = str;
        result.erase(0, result.find_first_not_of(token));
        return result;
    }

    /**
     * Returns a string that has token removed from the end.
     */
    static std::string trimRight(const std::string &str, const std::string &token = " ")
    {
        std::string result = str;
        result.erase(result.find_last_not_of(token) + 1);
        return result;
    }

    /**
     * Returns a string that has token removed from the start and end.
     */
    static std::string trim(const std::string &str, const std::string &token = " ")
    {
        std::string result = str;
        result.erase(0, result.find_first_not_of(token));
        result.erase(result.find_last_not_of(token) + 1);
        return result;
    }

    /**
     * Returns a string that has whitespace removed from the start and the end.
     * This includes the ASCII characters '\t', '\n', '\v', '\f', '\r', and ' '.
     */
    static std::string trimmed(const std::string &str)
    {
        std::string result = str;
        std::string token = "\t\n\v\f\r ";
        result.erase(0, result.find_first_not_of(token));
        result.erase(result.find_last_not_of(token) + 1);
        return result;
    }
};
}

#endif//STRING_UTIL_H

posted @ 2021-07-04 17:50  sherlock_lin  阅读(217)  评论(0编辑  收藏  举报