Boost.Spirit之解析C++文件
环境:
Windows平台:win7_64旗舰版,VS2019、boost_1.55
场景:
解析C++文件,提取typedef、struct、class信息等等,可用于自动化编程,例如将struct自动生成格式化日志和二进制协议(类似FlatBuffers)
开始:
一、Boost.Spirit介绍
Boost.Spirit主要由4部分组成
Spirit.Classic:这个是Boost.Spirit V1.8版本以前的功能实现。
Spirit.Qi:这是一个解析器库,允许你构建递归下降解析器。 在公开的领域特定语言可以用来描述要实现的语法以及存储解析信息的规则。
Spirit.Lex:这个用于创建标记化器(lexers),在公开的领域特定语言,允许你定义用于匹配标记的正则表达式,将这些正则表达式与匹配时要执行的代码关联,并将标记定义添加到词法分析器。
Spirit.Karma:这是一个生成器库,允许你为递归下降和数据类型输出的格式创建代码。
这里我们用Spirit.Classic来提取c++文件。
二、C++类型提取
using bs::space_p; using rule = bs::rule<>;
2.1 匹配注释
const rule comment_p = bs::comment_p("//") | bs::comment_p("/*", "*/");
2.2 匹配字母
const rule alpha_p = bs::range_p('a', 'z') | bs::range_p('A', 'Z');
2.3 匹配标识符(以字母或下划线开始,后面跟零个或多个字母、数字或下划线)
const rule identifier_p = (alpha_p | '_') >> (*(alpha_p | bs::digit_p | '_'));
2.4 匹配*或&
const rule ptr_ref_p = bs::ch_p('*') | '&';
2.5 匹配public、private、protected
const rule access_p = (bs::str_p("public") | "private" | "protected");
2.6 匹配括号,例如{}、{{}}、{{}{}}
extern rule bracket_p; rule backet_bound_p = ((~bs::ch_p('{')) & (~bs::ch_p('}'))); rule bracket_loop_p = (*backet_bound_p) >> bracket_p; rule bracket_p = bs::ch_p('{') >> (*backet_bound_p) >> (*bracket_loop_p) >> (*backet_bound_p) >> '}';
2.7 匹配类型
rule type_rule() { static const rule is_const_p = bs::eps_p(*space_p >> "const"); static const rule is_ptr_ref_p = bs::eps_p(*space_p >> (+ptr_ref_p)); static const rule longlong_p = bs::eps_p(*space_p >> "long" >> *space_p >> "long") >> "long" >> *space_p >> "long"; static const rule unsigned_char_p = bs::eps_p(*space_p >> "unsigned" >> *space_p >> "char") >> "unsigned" >> *space_p >> "char"; static const rule type_p = longlong_p | unsigned_char_p | identifier_p; return !(is_const_p >> "const" >> +space_p) >> type_p >> !(is_ptr_ref_p >> *space_p >> (+ptr_ref_p)) >> !(is_const_p >> *space_p >> "const"); } const rule type_p = type_rule();
2.8 匹配类型和名称
template<class action> class type_and_name_rule : public rule { public: template<class func> type_and_name_rule(action* act, func f) { _array = "-1"; auto on_type_f = [this](const char* start, const char* end) { _type.assign(start, end); }; auto on_array_f = [this](const char* start, const char* end) { _array.assign(start, end); }; auto on_name_f = [this](const char* start, const char* end) { _name.assign(start, end); }; auto on_end_f = [this, act, f](const char) { (act->*f)(_type.c_str(), atoi(_array.c_str()), _name.c_str()); _array = "-1"; }; static const rule array_p = (bs::ch_p('[') >> (+bs::digit_p)[on_array_f] >> ']'); static const rule end_p = bs::ch_p(';')[on_end_f]; rule::operator = (type_p[on_type_f] >> *space_p >> identifier_p[on_name_f] >> *space_p >> !array_p >> *space_p >> +end_p); } std::string _type; std::string _array; std::string _name; };
2.9 匹配typedef
template<class action> class typedef_rule : public rule { public: typedef_rule(action* act) : _type_and_name(act, &action::on_typedef) { static const rule prefix = bs::str_p("typedef"); rule::operator = (prefix >> *space_p >> _type_and_name); } type_and_name_rule<action> _type_and_name; };
2.10 匹配成员函数
template<class action> class member_func_rule : public rule { public: member_func_rule(action* act) { auto on_func_return = std::bind(&action::on_func_return, act, std::placeholders::_1, std::placeholders::_2); auto on_func_name = std::bind(&action::on_func_name, act, std::placeholders::_1, std::placeholders::_2); auto on_func_param_begin = std::bind(&action::on_func_param_begin, act, std::placeholders::_1); auto on_func_param_end = std::bind(&action::on_func_param_end, act, std::placeholders::_1); auto on_func_param_type = std::bind(&action::on_func_param_type, act, std::placeholders::_1, std::placeholders::_2); auto on_func_param_name = std::bind(&action::on_func_param_name, act, std::placeholders::_1, std::placeholders::_2); _prefix_p = bs::str_p("virtual") | "static"; _return_p = type_p[on_func_return]; _name_p = identifier_p[on_func_name]; _param_begin_p = bs::ch_p('(')[on_func_param_begin]; _param_end_p = bs::ch_p(')')[on_func_param_end]; _param_p = type_p[on_func_param_type] >> *space_p >> !identifier_p[on_func_param_name]; rule::operator = (!_prefix_p >> *space_p >> _return_p >> *space_p >> _name_p >> *space_p >> _param_begin_p >> *(*space_p >> _param_p >> *space_p >> !bs::ch_p(',')) >> _param_end_p >> *space_p >> bracket_p >> *space_p >> *bs::ch_p(';')); } protected: rule _prefix_p; rule _return_p; rule _name_p; rule _param_begin_p; rule _param_end_p; rule _param_p; };
2.11 匹配成员变量
template<class action> class member_data_rule : public rule { public: member_data_rule(action* act) : _member_type_name(act, &action::on_member_type_name) { rule::operator =(!bs::str_p("struct") >> *space_p >> _member_type_name); } type_and_name_rule<action> _member_type_name; };
2.12 解析类
template<class action> class class_rule : public rule { public: class_rule(action* act) : _member_func(act) , _member_data(act) { auto on_category_f = std::bind(&action::on_category, act, std::placeholders::_1, std::placeholders::_2); auto on_name_f = std::bind(&action::on_name, act, std::placeholders::_1, std::placeholders::_2); auto on_macro_f = std::bind(&action::on_macro, act, std::placeholders::_1, std::placeholders::_2); auto on_begin_f = std::bind(&action::on_begin, act, std::placeholders::_1); auto on_end_f = std::bind(&action::on_end, act, std::placeholders::_1); auto on_member_comment_f = std::bind(&action::on_member_comment, act, std::placeholders::_1, std::placeholders::_2); auto on_access_f = std::bind(&action::on_access, act, std::placeholders::_1, std::placeholders::_2); _category_p = (bs::str_p("class") | "struct")[on_category_f]; _name_p = identifier_p[on_name_f]; _macro_name_p = bs::eps_p(identifier_p >> +space_p >> identifier_p) >> identifier_p[on_macro_f] >> +space_p >> _name_p; _super_class = bs::ch_p(':') >> *space_p >> !access_p >> *space_p >> identifier_p; _begin_p = bs::ch_p('{')[on_begin_f]; _end_p = bs::ch_p('}')[on_end_f]; rule::operator = (_category_p >> *space_p >> (_macro_name_p | _name_p) >> *space_p >> *_super_class >> *space_p >> _begin_p >> *(*space_p >> (comment_p[on_member_comment_f] | (access_p[on_access_f] >> *space_p >> ':') | _member_func | _member_data)) >> *space_p >> _end_p >> *space_p >> +bs::ch_p(';')); } rule _category_p; rule _name_p; rule _macro_name_p; rule _super_class; rule _begin_p; rule _end_p; member_func_rule<action> _member_func; member_data_rule<action> _member_data; };
2.12 解析cpp
template<class action> class cpp_rule : public rule { public: cpp_rule(action* act) : _class(act), _typedef(act) { rule::operator =(*(space_p | comment_p | _class | _typedef | bs::anychar_p)); } class_rule<action> _class; typedef_rule<action> _typedef; };
三、生成c++解析树
cpp_parse.h
#ifndef CPP_PARSE_H_ #define CPP_PARSE_H_ #include <list> namespace cpp_parse { struct type_value { type_value() { } template<class t> type_value(t&& value) : _access(std::forward<t>(value._access)) , _name(std::forward<t>(value._name)) , _type(std::forward<t>(value._type)) , _array(value._array) { } std::string _access; std::string _name; std::string _type; int _array{ -1 }; }; using type_list = std::list<type_value>; using typedef_list = std::list<type_value>; struct func_value { func_value() { } template<class t> func_value(t&& value) : _access(std::forward<t>(value._access)) , _name(std::forward<t>(value._name)) , _return(std::forward<t>(value._return)) , _params(std::forward<t>(value._params)) { } std::string _access; std::string _name; std::string _return; type_list _params; }; using func_list = std::list<func_value>; struct class_value { class_value() { } template<class t> class_value(t&& value) : _category(std::forward<t>(value._category)) , _macro(std::forward<t>(value._macro)) , _name(std::forward<t>(value._name)) , _types(std::forward<t>(value._types)) , _funcs(std::forward<t>(value._funcs)) { } std::string _category; std::string _macro; std::string _name; type_list _types; func_list _funcs; }; using class_list = std::list<class_value>; struct cpp_value { typedef_list _typedefs; class_list _classes; }; bool parse(const char* file, cpp_value* value); } #endif // !CPP_PARSE_H_
cpp_parse.cpp
#include <fstream> #include <sstream> #include "cpp_rule.h" #include "cpp_parse.h" namespace cpp_parse { class class_parse { public: class_parse(class_list* list) : _class_list(list) { } public: void on_func_return(const char* start, const char* end) { _func_value._return.assign(start, end); } void on_func_name(const char* start, const char* end) { _func_value._name.assign(start, end); } void on_func_param_begin(const char) { } void on_func_param_type(const char* start, const char* end) { type_value value; value._type.assign(start, end); _func_value._params.push_back(std::move(value)); } void on_func_param_name(const char* start, const char* end) { _func_value._params.back()._name.assign(start, end); } void on_func_param_end(const char) { _class_value._funcs.push_back(std::move(_func_value)); } public: void on_member_type_name(const char* type, int array, const char* name) { _type_value._type = type; _type_value._array = array; _type_value._name = name; _class_value._types.push_back(std::move(_type_value)); } void on_member_data_type(const char* start, const char* end) { _type_value._type.assign(start, end); } void on_member_data_name(const char* start, const char* end) { _type_value._name.assign(start, end); _class_value._types.push_back(std::move(_type_value)); } public: void on_category(const char* start, const char* end) { _class_value._category.assign(start, end); } void on_macro(const char* start, const char* end) { _class_value._macro.assign(start, end); } void on_name(const char* start, const char* end) { _class_value._name.assign(start, end); } void on_begin(const char) {} void on_access(const char* start, const char* end) {} void on_member_comment(const char* start, const char* end) {} void on_end(const char) { _class_list->push_back(std::move(_class_value)); } protected: func_value _func_value; type_value _type_value; class_value _class_value; class_list* _class_list; }; class typedef_parse { public: typedef_parse(typedef_list* list) : _typedef_list(list) { } void on_typedef(const char* type, int array, const char* name) { type_value value; value._type = type; value._array = array; value._name = name; _typedef_list->push_back(std::move(value)); } typedef_list* _typedef_list; }; class cpp_parser : public class_parse, public typedef_parse { public: cpp_parser(cpp_value* value) : class_parse(&(value->_classes)) , typedef_parse(&(value->_typedefs)) { } }; static std::string read_file_all(const char* file) { std::string content; std::ifstream fs; fs.open(file); if (!fs.is_open()) { std::cout << "file open fail,path=[" << file << "]" << std::endl; } std::stringstream buffer; buffer << fs.rdbuf(); return buffer.str(); } bool parse(const char* file, cpp_value* value) { cpp_parser root(value); cpp_grammar::cpp_rule<cpp_parser> cpp(&root); std::string content = read_file_all(file); bs::parse_info<> pi = bs::parse(content.c_str(), cpp); return pi.hit; } }
四、生成struct格式化日志
struct_to_log.cpp
#include <io.h> #include <fstream> #include <iostream> #include <string> #include <algorithm> #include <list> #include <map> #include "cpp_parse.h" void get_files(std::string path, std::list<std::string>& files) { //文件句柄 long hFile = 0; //文件信息 struct _finddata_t fileinfo; std::string p; if ((hFile = _findfirst(p.assign(path).append("\\*").c_str(), &fileinfo)) != -1) { do { //如果是目录,迭代之 //如果不是,加入列表 if ((fileinfo.attrib & _A_SUBDIR)) { } else { files.push_back(p.assign(path).append("\\").append(fileinfo.name)); } } while (_findnext(hFile, &fileinfo) == 0); _findclose(hFile); } } std::string get_file_name(const std::string& path) { size_t pos = path.rfind('/'); if (pos == path.npos) { pos = path.rfind('\\'); } std::string filename = path; if (pos != path.npos) { filename = path.substr(pos + 1); } pos = filename.rfind('.'); if (pos != filename.npos) { filename = filename.substr(0, pos); } return filename; } std::string to_upper(const std::string& s) { std::string content = s; std::transform(content.begin(), content.end(), content.begin(), ::toupper); return content; } std::string log_head(const char* name) { std::string content; content = content + "#ifndef " + name + "\n"; content = content + "#define " + name + "\n"; content = content + "\n"; content += "#ifdef _WIN32\n" " #ifndef FMT_INT64\n" " #define FMT_INT64 \"%I64d\"\n" " #endif\n" " #ifndef FMT_LONG\n" " #define FMT_LONG \"%d\"\n" " #endif\n" "#else\n" " #ifndef FMT_INT64\n" " #define FMT_INT64 \"%lld\"\n" " #endif\n" " #ifndef FMT_LONG\n" " #define FMT_LONG \"%lld\"\n" " #endif\n" "#endif\n"; content += "\n"; return content; } std::string log_tail(const char* name) { return std::string("\n#endif") + " // !" + name; } void member_to_log(const std::map<std::string, cpp_parse::type_value>& typedefs, const std::map<std::string, cpp_parse::class_value>& structs, const cpp_parse::class_value& value, const std::string& variant, std::list<std::string>* formats, std::list<std::string>* args) { for (auto& member : value._types) { std::string type = member._type; if (member._array == 0) { continue; } { auto iter = structs.find(type); if (iter != structs.end()) { member_to_log(typedefs, structs, iter->second, variant + member._name + ".", formats, args); continue; } } { auto iter = typedefs.find(type); if (iter != typedefs.end()) { type = iter->second._type; } } if (type == "char" || type == "unsigned char") { if (member._array == -1 || member._array == 1) { formats->push_back(member._name + "=[%d]"); } else if (member._array > 1) { formats->push_back(member._name + "=[%s]"); } else { continue; } } else if (type == "int" || type == "short") { formats->push_back(member._name + "=[%d]"); } else if (type == "double") { formats->push_back(member._name + "=[%.3f]"); } else if (strncmp(type.c_str(), "long", 4) == 0) { formats->push_back(member._name + "=[\" FMT_LONG \"]"); } else if (strncmp(type.c_str(), "long long", 9) == 0) { formats->push_back(member._name + "=[\" FMT_INT64 \"]"); } else { std::cout << "[WARN] class=[" << value._name << "],member=[" << member._name << "],type=[" << type << "]" << std::endl; continue; } args->push_back(std::string(variant) + member._name); } } std::string struct_to_log(cpp_parse::cpp_value& cpp) { std::map<std::string, cpp_parse::type_value> typedefs; for (auto& def : cpp._typedefs) { typedefs[def._name] = std::move(def); } std::map<std::string, cpp_parse::class_value> structs; for (auto& cls : cpp._classes) { if (cls._category == "struct") { structs[cls._name] = cls; } } std::cout << "typedef: [" << typedefs.size() << "]" << std::endl; std::cout << "struct: [" << structs.size() << "]" << std::endl; std::string contents; contents.reserve(8 * 1024); for (auto& cls : cpp._classes) { if (cls._category != "struct") { continue; } std::list<std::string> formats; std::list<std::string> args; member_to_log(typedefs, structs, cls, "(data)->", &formats, &args); int index = 0; contents = contents + "#define " + cls._name + "_LOG(data) \""; for (const auto& format : formats) { //0, 5, 10, 15, 20 if (index > 0 && index % 4 == 0) { contents += "\" \\\n \""; } ++index; contents = contents + format + ","; } if (!contents.empty() && contents.back() == ',') { contents.erase(contents.length() - 1); contents += ".\\n"; } contents += "\",\\\n "; index = 0; for (const auto& arg : args) { //0, 5, 10, 15, 20 if (index > 0 && index % 4 == 0) { contents += "\\\n "; } ++index; contents = contents + arg + ","; } if (!contents.empty() && contents.back() == ',') { contents.erase(contents.length() - 1); } contents += "\n"; } return contents; } void usage() { std::cout << "usage:" << std::endl; std::cout << " struct_to_log [directory] [out_file]" << std::endl; } int main(int argc, char* argv[]) { //getchar(); if (argc < 3) { usage(); return 0; } cpp_parse::cpp_value cpp; std::list<std::string> files; get_files(argv[1], files); for (const auto& file : files) { if (file.length() > 2 && file.substr(file.length() - 2) == ".h") { bool rv = cpp_parse::parse(file.c_str(), &cpp); if (!rv) { std::cout << "parse file fail,path=[" << file << "]" << std::endl; continue; } } } std::string content = struct_to_log(cpp); std::ofstream out_file; out_file.open(argv[2], std::ios::ate); if (!out_file.is_open()) { std::cout << "file open fail,path=[" << argv[2] << "]" << std::endl; return 0; } std::string marco = get_file_name(argv[2]); marco = to_upper(marco); marco += "_H_"; out_file << log_head(marco.c_str()); out_file << content; out_file << log_tail(marco.c_str()); std::cout << "output: [" << argv[2] << "]" << std::endl; return 0; }
五、输入输出
输入文件test.h
struct SubscribeField { int exchangeID; double securityID; };
输出文件test_log.h
#ifndef TEST_LOG_H_ #define TEST_LOG_H_ #ifdef _WIN32 #ifndef FMT_INT64 #define FMT_INT64 "%I64d" #endif #ifndef FMT_LONG #define FMT_LONG "%d" #endif #else #ifndef FMT_INT64 #define FMT_INT64 "%lld" #endif #ifndef FMT_LONG #define FMT_LONG "%lld" #endif #endif #define SubscribeField_LOG(data) "exchangeID=[%d],securityID=[%.3f].\n",\ (data)->exchangeID,(data)->securityID #endif // !TEST_LOG_H_