kafka 搭建与使用

消息量非超级多不建议使用,可以使用redis或Beanstalkd 使用简单 

Beanstalkd 客户端建议用:
composer require pda/pheanstalk

如果无JAVA JDK 先下载JAVA JDK 并安装

添加.profile变量:JAVA_HOME

export JAVA_HOME=/usr/java/jdk1.8.0_31

下载:
http://gradle.org/downloads
gradle-2.3-all.zip (binaries, sources and documentation)
解压到:
/usr/local/gradle-2.3/
添加.profile变量:GRADLE_HOME(在PATH=**下)

export GRADLE_HOME=/usr/local/gradle-2.3
PATH=$PATH:$GRADLE_HOME/bin

下载:
http://kafka.apache.org/downloads.html
Source download:
Source download: kafka-0.8.2.0-src.tgz (asc, md5)
解压到:
/usr/local/kafka/
执行:

gradle
./gradlew

#如果无独立 zookeeper 执行 下载 zookeeper,如果想使用zookeeper集群,请独立安装zookeeper

 ./gradlew jar

#启动 zookeeper

bin/zookeeper-server-start.sh config/zookeeper.properties &

配置:config/server.properties
启动 kafka

bin/kafka-server-start.sh config/server.properties &

#测试

创建topic :test

bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic test

--replication-factor 表示副本的数量(kafka服务的数量)
--partitions 分区的数量
备注:一个topic 可以有多个分区.产生消息可以随机或用指定算法写入到指定分区.

查看topic列表:

bin/kafka-topics.sh --list --zookeeper localhost:2181

查看topic状态:

bin/kafka-topics.sh --describe --zookeeper localhost:2181 --topic test

往topic添加消息(Leader kafka):

bin/kafka-console-producer.sh --broker-list localhost:9092 --topic test 
this is message
^C

启动topic观察者:

bin/kafka-console-consumer.sh --zookeeper localhost:2181 --topic test --from-beginning
#这里会输出
^C

#集群.配置等其他资料查阅:http://kafka.apache.org/documentation.html#quickstart
#备注: zookeeper为外部服务提供统一接口,内部自动选择Leader等. kafka 也依赖zookeeper 来实现部分功能

 

安装管理软件:https://github.com/yahoo/kafka-manager

下载,解压,编译

修改 config/application.conf

./sbt clean dist

编译完成进入 ./

解压 并移动到/usr/local/下

启动:

./bin/kafka-manager -Dconfig.file=./config/application.conf -Dhttp.port=9009

配置NGINX代理到此服务(提供用户验证)

在NGINX配置目录/conf 下执行

htpasswd ./pwd username

        proxy_buffering    off;
        proxy_set_header   X-Real-IP $remote_addr;
        proxy_set_header   X-Forwarded-Proto $scheme;
        proxy_set_header   X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header   Host $http_host;
        proxy_http_version 1.1;
        upstream my-backend {
            server 127.0.0.1:9009;
        }
        server {
             listen       192.168.4.200:8080;
             charset utf-8;
             auth_basic "kafka panel!!";
             auth_basic_user_file pwd;
             location / {
                proxy_pass http://my-backend;
             }
        }

 

PHP 接口:
https://github.com/nmred/kafka-php
#Produce.php

for ($i=0;$i<10;$i++) {
            $produce = \Kafka\Produce::getInstance('localhost:2181',300);
    $partitions = $produce->getAvailablePartitions('testp'); 

    if(count($partitions)==1)
    $partition=array_pop($partitions);
    else
    $partition=rand(0,count($partitions)-1);
    $produce->setRequireAck(-1);

	//参数:
	// topic 主题
	// partition 分组
	// message 消息数组
           $produce->setMessages('testp',$partition, array('test'.$i));
          $result = $produce->send();
          var_dump($result);
}

  

#Consumer.php

$consumer = \Kafka\Consumer::getInstance('localhost:2181');
$group = 'testgroup';//代表消费的组,一组的成员数据消费一次
$consumer->setGroup($group);
//$consumer->setPartition('testp', 0);//设置具体topic分组
$consumer->setTopic('testp');//遍历全部分组
$result = $consumer->fetch();
foreach ($result as $topicName => $partition) {
    foreach ($partition as $partId => $messageSet) {
		var_dump($partition->getHighOffset());//最大值
		var_dump($partition->getMessageOffset());//当前分组的最大偏移
		foreach ($messageSet as $k=>$message) {
			var_dump($message);
			flush();    
		}
    }
}

#其他example 参考即可 ,为 Produce Consumer 的分拆测试部分

 

关于其他hadoop 的参考:http://hadoop.apache.org/ 可以先看: HDFS MapReduce Hive 实现

 

PHP得到通知解决办法:

用C实现一个后台程序监听kafka,当有消息时候,启动PHP执行消息处理

C接口:

https://github.com/edenhill/librdkafka

简单实现:

#include <ctype.h>
#include <signal.h>
#include <string.h>
#include <unistd.h>
#include <stdlib.h>
#include <syslog.h>
#include <sys/time.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/wait.h>


/* Typical include path would be <librdkafka/rdkafka.h>, but this program
 * is builtin from within the librdkafka source tree and thus differs. */
#include "rdkafka.h"  /* for Kafka driver */
#include "rdkafka_int.h"

#include <zookeeper.h>
#include <zookeeper.jute.h>
#include <jansson.h>

#define BROKER_PATH "/brokers/ids"

static int run = 1;
static rd_kafka_t *rk;
static int exit_eof = 0;
static int quiet = 0;

//signal
static void sig_stop (int sig) {
    run = 0;
    fclose(stdin); /* abort fgets() */
}

static void sig_usr1 (int sig) {
    rd_kafka_dump(stdout, rk);
}

static void sig_child_stop(int sig){
   pid_t t;
   while((t=waitpid(-1,NULL,WNOHANG)>0)){
       if(quiet)
           printf("stop child:%d \n",t);
   }
}

/**
 * Kafka logger callback (optional)
 */
static void logger (const rd_kafka_t *rk, int level,
            const char *fac, const char *buf) {
    struct timeval tv;
    gettimeofday(&tv, NULL);
    fprintf(stderr, "%u.%03u RDKAFKA-%i-%s: %s: %s\n",
        (int)tv.tv_sec, (int)(tv.tv_usec / 1000),
        level, fac, rd_kafka_name(rk), buf);
}


static void notify_php(
        const char *site_dir,const char *php,const char *bootstarp,
        const char * topic,int partition,rd_kafka_message_t *rkmessage
        ){
    if (rkmessage->err) {
        if (rkmessage->err == RD_KAFKA_RESP_ERR__PARTITION_EOF) {
            fprintf(stderr,
                "%% Consumer reached end of %s [%"PRId32"] "
                   "message queue at offset %"PRId64"\n",
                   rd_kafka_topic_name(rkmessage->rkt),
                   rkmessage->partition, rkmessage->offset);

            if (exit_eof)
                run = 0;

            return;
        }

        fprintf(stderr, "%% Consume error for topic \"%s\" [%"PRId32"] "
               "offset %"PRId64": %s\n",
               rd_kafka_topic_name(rkmessage->rkt),
               rkmessage->partition,
               rkmessage->offset,
               rd_kafka_message_errstr(rkmessage));
        return;
    }

    int pid=fork();
    if(pid==0){
        chdir(site_dir);
        char _topic[120];
        char _partition[20];
        sprintf(_topic,"--topic=%s",topic);
        sprintf(_partition,"--partition=%d",partition);
        execl(php,"php",bootstarp,"--task=tips",_topic,_partition,NULL);
        exit(errno);
    }
}


static void set_brokerlist_from_zookeeper(zhandle_t *zzh, char *brokers)
{
    if (zzh)
    {
        struct String_vector brokerlist;
        if (zoo_get_children(zzh, BROKER_PATH, 1, &brokerlist) != ZOK)
        {
            fprintf(stderr, "No brokers found on path %s\n", BROKER_PATH);
            return;
        }

        int i;
        char *brokerptr = brokers;
        for (i = 0; i < brokerlist.count; i++)
        {
            char path[255], cfg[1024];
            sprintf(path, "/brokers/ids/%s", brokerlist.data[i]);
            int len = sizeof(cfg);
            zoo_get(zzh, path, 0, cfg, &len, NULL);

            if (len > 0)
            {
                cfg[len] = '\0';
                json_error_t jerror;
                json_t *jobj = json_loads(cfg, 0, &jerror);
                if (jobj)
                {
                    json_t *jhost = json_object_get(jobj, "host");
                    json_t *jport = json_object_get(jobj, "port");

                    if (jhost && jport)
                    {
                        const char *host = json_string_value(jhost);
                        const int   port = json_integer_value(jport);
                        sprintf(brokerptr, "%s:%d", host, port);

                        brokerptr += strlen(brokerptr);
                        if (i < brokerlist.count - 1)
                        {
                            *brokerptr++ = ',';
                        }
                    }
                    json_decref(jobj);
                }
            }
        }
        deallocate_String_vector(&brokerlist);
        printf("Found brokers %s\n", brokers);
    }
}


static void watcher(zhandle_t *zh, int type, int state, const char *path, void *watcherCtx)
{
    char brokers[1024];
    if (type == ZOO_CHILD_EVENT && strncmp(path, BROKER_PATH, sizeof(BROKER_PATH) - 1) == 0)
    {
        brokers[0] = '\0';
        set_brokerlist_from_zookeeper(zh, brokers);
        if (brokers[0] != '\0' && rk != NULL)
        {
            rd_kafka_brokers_add(rk, brokers);
            rd_kafka_poll(rk, 10);
        }
    }
}


static zhandle_t* initialize_zookeeper(const char * zookeeper, const int debug)
{
    zhandle_t *zh;
    if (debug)
    {
        zoo_set_debug_level(ZOO_LOG_LEVEL_DEBUG);
    }
    zh = zookeeper_init(zookeeper, watcher, 10000, 0, 0, 0);
    if (zh == NULL)
    {
        fprintf(stderr, "Zookeeper connection not established.");
        exit(1);
    }
    return zh;
}


int main (int argc, char **argv) {
    rd_kafka_topic_t *rkt;

    char *site_dir="/usr/local/nginx/html";
    char *php="/usr/local/php/bin/php";
    char *bootstarp="index.php";

    char *zookeeper = "localhost:2181";
    zhandle_t *zh = NULL;
    char brokers[1024];
    char *topic = NULL;
    int partition = RD_KAFKA_PARTITION_UA;
    int opt;
    rd_kafka_conf_t *conf;
    rd_kafka_topic_conf_t *topic_conf;
    char errstr[512];
    const char *debug = NULL;
    int64_t start_offset = RD_KAFKA_OFFSET_STORED;
    int do_conf_dump = 0;

    memset(brokers, 0, sizeof(brokers));
    quiet = !isatty(STDIN_FILENO);

    /* Kafka configuration */
    conf = rd_kafka_conf_new();

    /* Topic configuration */
    topic_conf = rd_kafka_topic_conf_new();

    while ((opt = getopt(argc, argv, "t:p:k:z:qd:o:eX:s:r:b")) != -1) {
        switch (opt) {
        case 's':
            site_dir = optarg;
            break;
        case 'r':
            php = optarg;
            break;
        case 'b':
            bootstarp = optarg;
            break;
        case 't':
            topic = optarg;
            break;
        case 'p':
            partition = atoi(optarg);
            break;
        case 'k':
            zookeeper = optarg;
            break;
        case 'z':
            if (rd_kafka_conf_set(conf, "compression.codec",
                          optarg,
                          errstr, sizeof(errstr)) !=
                RD_KAFKA_CONF_OK) {
                fprintf(stderr, "%% %s\n", errstr);
                exit(1);
            }
            break;
        case 'o':
            if (!strcmp(optarg, "end"))
                start_offset = RD_KAFKA_OFFSET_END;
            else if (!strcmp(optarg, "beginning"))
                start_offset = RD_KAFKA_OFFSET_BEGINNING;
            else if (!strcmp(optarg, "stored"))
                start_offset = RD_KAFKA_OFFSET_STORED;
            else
                start_offset = strtoll(optarg, NULL, 10);
            break;
        case 'e':
            exit_eof = 1;
            break;
        case 'd':
            debug = optarg;
            break;
        case 'q':
            quiet = 1;
            break;
        case 'X':
        {
            char *name, *val;
            rd_kafka_conf_res_t res;

            if (!strcmp(optarg, "list") ||
                !strcmp(optarg, "help")) {
                rd_kafka_conf_properties_show(stdout);
                exit(0);
            }

            if (!strcmp(optarg, "dump")) {
                do_conf_dump = 1;
                continue;
            }

            name = optarg;
            if (!(val = strchr(name, '='))) {
                fprintf(stderr, "%% Expected "
                    "-X property=value, not %s\n", name);
                exit(1);
            }

            *val = '\0';
            val++;

            res = RD_KAFKA_CONF_UNKNOWN;
            /* Try "topic." prefixed properties on topic
             * conf first, and then fall through to global if
             * it didnt match a topic configuration property. */
            if (!strncmp(name, "topic.", strlen("topic.")))
                res = rd_kafka_topic_conf_set(topic_conf,
                                  name+
                                  strlen("topic."),
                                  val,
                                  errstr,
                                  sizeof(errstr));

            if (res == RD_KAFKA_CONF_UNKNOWN)
                res = rd_kafka_conf_set(conf, name, val,
                            errstr, sizeof(errstr));

            if (res != RD_KAFKA_CONF_OK) {
                fprintf(stderr, "%% %s\n", errstr);
                exit(1);
            }
        }
        break;

        default:
            goto usage;
        }
    }


    if (do_conf_dump) {
        const char **arr;
        size_t cnt;
        int pass;

        for (pass = 0 ; pass < 2 ; pass++) {
            int i;

            if (pass == 0) {
                arr = rd_kafka_conf_dump(conf, &cnt);
                printf("# Global config\n");
            } else {
                printf("# Topic config\n");
                arr = rd_kafka_topic_conf_dump(topic_conf,
                                   &cnt);
            }

            for (i = 0 ; i < cnt ; i += 2)
                printf("%s = %s\n",
                       arr[i], arr[i+1]);

            printf("\n");

            rd_kafka_conf_dump_free(arr, cnt);
        }

        exit(0);
    }


    if (optind != argc || !topic) {
    usage:
        fprintf(stderr,
            "Usage: %s -C|-P|-L -t <topic> "
            "[-p <partition>] [-b <host1:port1,host2:port2,..>]\n"
            "\n"
            "librdkafka version %s (0x%08x)\n"
            "\n"
            " Options:\n"
            "  -t <topic>      Topic to fetch / produce\n"
            "  -p <num>        Partition (random partitioner)\n"
            "  -k <zookeepers> Zookeeper address (localhost:2181)\n"
            "  -z <codec>      Enable compression:\n"
            "                  none|gzip|snappy\n"
            "  -o <offset>     Start offset (consumer)\n"
            "  -e              Exit consumer when last message\n"
            "                  in partition has been received.\n"
            "  -d [facs..]     Enable debugging contexts:\n"
            "  -q              Be quiet\n"
            "  -X <prop=name> Set arbitrary librdkafka "
            "configuration property\n"
            "               Properties prefixed with \"topic.\" "
            "will be set on topic object.\n"
            "               Use '-X list' to see the full list\n"
            "               of supported properties.\n"
            "\n"
            " In Consumer mode:\n"
            "  writes fetched messages to stdout\n"
            " In Producer mode:\n"
            "  reads messages from stdin and sends to broker\n"
                        " In List mode:\n"
                        "  queries broker for metadata information, "
                        "topic is optional.\n"
            "\n"
            "\n"
            "\n",
            argv[0],
            rd_kafka_version_str(), rd_kafka_version(),
            RD_KAFKA_DEBUG_CONTEXTS);
        exit(1);
    }


    signal(SIGINT, sig_stop);
    signal(SIGUSR1, sig_usr1);
    signal(SIGCHLD,sig_child_stop);

    if (debug &&
        rd_kafka_conf_set(conf, "debug", debug, errstr, sizeof(errstr)) !=
        RD_KAFKA_CONF_OK) {
        fprintf(stderr, "%% Debug configuration failed: %s: %s\n",
            errstr, debug);
        exit(1);
    }

    /** Initialize zookeeper */
    zh = initialize_zookeeper(zookeeper, debug != NULL);

    /* Add brokers */
    set_brokerlist_from_zookeeper(zh, brokers);
        if (rd_kafka_conf_set(conf, "metadata.broker.list",
                              brokers, errstr, sizeof(errstr) !=
                              RD_KAFKA_CONF_OK)) {
                fprintf(stderr, "%% Failed to set brokers: %s\n", errstr);
                exit(1);
        }

    if (debug) {
        printf("Broker list from zookeeper cluster %s: %s\n", zookeeper, brokers);
    }

    /*
     * Consumer
     */

    /* Create Kafka handle */
    if (!(rk = rd_kafka_new(RD_KAFKA_CONSUMER, conf,
                errstr, sizeof(errstr)))) {
        fprintf(stderr,
            "%% Failed to create new consumer: %s\n",
            errstr);
        exit(1);
    }

    /* Set logger */
    rd_kafka_set_logger(rk, logger);
    rd_kafka_set_log_level(rk, LOG_DEBUG);

    /* Create topic */
    rkt = rd_kafka_topic_new(rk, topic, topic_conf);

    /* Start consuming */
    if (rd_kafka_consume_start(rkt, partition, start_offset) == -1){
        fprintf(stderr, "%% Failed to start consuming: %s\n",
            rd_kafka_err2str(rd_kafka_errno2err(errno)));
        exit(1);
    }

    while (run) {
        rd_kafka_message_t *rkmessage;

        /* Consume single message.
         * See rdkafka_performance.c for high speed
         * consuming of messages. */
        rkmessage = rd_kafka_consume(rkt, partition, RD_POLL_INFINITE);
        if (!rkmessage) /* timeout */
            continue;

        notify_php(site_dir,php,bootstarp,topic,partition,rkmessage);

        /* Return message to rdkafka */
        rd_kafka_message_destroy(rkmessage);
    }

    /* Stop consuming */
    rd_kafka_consume_stop(rkt, partition);

    rd_kafka_topic_destroy(rkt);

    rd_kafka_destroy(rk);

    /* Let background threads clean up and terminate cleanly. */
    rd_kafka_wait_destroyed(2000);

    /** Free the zookeeper data. */
    zookeeper_close(zh);

    return 0;
}
View Code
posted @ 2015-02-27 17:19  liushan  阅读(4136)  评论(0编辑  收藏  举报