Linux企业级项目实践之网络爬虫(5)——处理配置文件


配置文件在Linux下使用得非常普遍,但是Linux下没有统一个配置文件标准。

我们把配置文件的规则制定如下:

1、把“#”视作注释开始

2、所有的配置项都都是以键值对的形式出现

3、严格区分大小写

4、允许数据类型为整型的配置项

5、允许数据类型为字符串类型的配置项

6、允许数据类型为逻辑型的配置项,取值为yes或者no。

 

同时我们需要对配置文件做初始化和载入两个操作。

 

代码如下:

/* confparser.c*/
 
#ifndef CONFPARSER_H
#define CONFPARSER_H
 
#include <vector>
using namespace std;
 
#define MAX_CONF_LEN  1024
#define CONF_FILE     "spider.conf"
 
/* see the spiderq.conf to get meaning foreach member variable below */
typedef struct Config {
   int              max_job_num;
   char            *seeds;
   char            *include_prefixes;
   char            *exclude_prefixes;
   char            *logfile;
   int              log_level;
   int              max_depth;
   int              make_hostdir;
   int              stat_interval;
 
   char *           module_path;
   vector<char *>   modules;
   vector<char *>  accept_types;
};
 
extern Config * initconfig();
 
extern void loadconfig(Config *conf);
 
#endif

 
/* confparser.c*/
 
#include "spider.h"
#include "qstring.h"
#include "confparser.h"
 
#define INF 0x7FFFFFFF
 
Config * initconfig()
{
   Config *conf = (Config *)malloc(sizeof(Config));
 
   conf->max_job_num = 10;
   conf->seeds = NULL;
   conf->include_prefixes = NULL;
   conf->exclude_prefixes = NULL;
   conf->logfile = NULL;
   conf->log_level = 0;
   conf->max_depth = INF;
   conf->make_hostdir = 0;
   conf->module_path = NULL;
   conf->stat_interval = 0;
   //conf->modules
 
   return conf;
}
 
void loadconfig(Config *conf)
{
   FILE *fp = NULL;
   char buf[MAX_CONF_LEN+1];
   int argc = 0;
   char **argv = NULL;
   int linenum = 0;
   char *line = NULL;
   const char *err = NULL;
 
   if ((fp = fopen(CONF_FILE, "r")) == NULL) {
       SPIDER_LOG(SPIDER_LEVEL_ERROR, "Can't load conf_file %s",CONF_FILE);      
    }
 
   while (fgets(buf, MAX_CONF_LEN+1, fp) != NULL) {
       linenum++;
       line = strim(buf);
 
       if (line[0] == '#' || line[0] == '\0') continue;
 
       argv = strsplit(line, '=', &argc, 1);
       if (argc == 2) {
           if (strcasecmp(argv[0], "max_job_num") == 0) {
                conf->max_job_num =atoi(argv[1]);
           } else if (strcasecmp(argv[0], "logfile") == 0) {
               conf->logfile =strdup(argv[1]);
           } else if (strcasecmp(argv[0], "include_prefixes") == 0) {
                conf->include_prefixes =strdup(argv[1]);
           } else if (strcasecmp(argv[0], "exclude_prefixes") == 0) {
                conf->exclude_prefixes =strdup(argv[1]);
           } else if (strcasecmp(argv[0], "seeds") == 0) {
                conf->seeds =strdup(argv[1]);
           } else if (strcasecmp(argv[0], "module_path") == 0) {
                conf->module_path =strdup(argv[1]);
           } else if (strcasecmp(argv[0], "load_module") == 0) {
               conf->modules.push_back(strdup(argv[1]));
           } else if (strcasecmp(argv[0], "log_level") == 0) {
                conf->log_level =atoi(argv[1]);
           } else if (strcasecmp(argv[0],"max_depth") == 0) {
                conf->max_depth =atoi(argv[1]);
           } else if (strcasecmp(argv[0], "stat_interval") == 0) {
                conf->stat_interval =atoi(argv[1]);
           } else if (strcasecmp(argv[0], "make_hostdir") == 0) {
                conf->make_hostdir =yesnotoi(argv[1]);
           } else if (strcasecmp(argv[0], "accept_types") == 0) {
               conf->accept_types.push_back(strdup(argv[1]));
            } else {
                err = "Unknowndirective"; goto conferr;
           }
       } else {
           err = "directive must be 'key=value'"; goto conferr;
       }
 
    }
   return;
 
conferr:
   SPIDER_LOG(SPIDER_LEVEL_ERROR, "Bad directive in %s[line:%d]%s", CONF_FILE, linenum, err);  
}


 

 


posted on 2014-08-28 01:31  三少爷的剑123  阅读(342)  评论(0编辑  收藏  举报

导航