最近的业务用到kafka,过程中发现nodejs这边的kafka client库,现状不是很好(相对其他语言)。
比如provider这边用了python,就很很好用的流行库pykafka:
from pykafka import KafkaClient
client = KafkaClient(zookeeper_hosts='192.168.99.100:2181')
topic = client.topics['Hello-Kafka']
with topic.get_sync_producer() as producer:
producer.produce('msg')
可以看到还直接支持zookeeper,很方便。
consumer这边需要Node.js来做,生态就没那么好了。比较流行的库kafka-node,API设计地不怎么好用。
看起来设计较好的kafkajs,还流行不起来(3860 weekly downloads)。
下面说下kafka-node这个库使用的一些问题:
1. 一旦提供topic立刻开始fetch消息
const kafka = require('kafka-node')
const client = new kafka.KafkaClient({ kafkaHost: '192.168.99.100:32768' });
const consumer = new kafka.Consumer(client, [
{ topic: 'Hello-Kafka', partition: 0 }
]);
consumer.on('message', function (message) {
console.log('接收一条', message.value);
});
这样做是有问题的,在consumer声明这一行,实际就已经开始异步fetch消息了,而此时on()还没有执行。这会造成历史信息丢失。在两行间加一个延时,会很明显地看到效果。
可以改成这样:
const kafka = require('kafka-node')
const client = new kafka.KafkaClient({ kafkaHost: '192.168.99.100:32768' });
const consumer = new kafka.Consumer(client, []);
consumer.on('message', function (message) {
console.log('接收一条', message.value);
});
consumer.addTopics([{ topic: 'Hello-Kafka', partition: 0 }]);
这样在最后一行addTopics之后,fetch才开始。
2. 只能通过Eventemitter来获取信息
consumer获取信息只能通过on('message', cb)
,不好和现在的js代码结合。下面给一种写法:
function getMessages(consumer, num) {
return new Promise((resolve) => {
let messages = [];
let n = 0;
const listener = function (message) {
messages.push(message);
n++;
if (num == n) {
consumer.pause();
consumer.removeListener('message', listener);
resolve(messages);
}
}
consumer.on('message', listener);
});
}
const tenMsgs = await getMessages(consumer, 10);
consumer.addTopics([{ topic: 'Hello-Kafka', partition: 0 }]);
3. 不支持zookeeper
只能通过其他zookeeper库来获取brokers地址。
最流行的是node-zookeeper-client。比较难用,下面提供关键代码:
const zookeeper = require('node-zookeeper-client');
function getNodeInfo(client, id) {
return new Promise((resolve, reject) => {
const path = '/brokers/ids';
client.getData(path + '/' + id, function (err, data) {
if (err) return reject(err);
resolve(nodeInfo = JSON.parse('' + data));
});
});
}
function getBrokers(zookeeperUrl) {
return new Promise((resolve, reject) => {
const client = zookeeper.createClient(zookeeperUrl);
const path = '/brokers/ids';
client.once('connected', function () {
client.getChildren(path, function (err, nodeIds) {
if (err) {
client.close();
return reject(err);
}
if (nodeIds.length === 0) {
client.close();
return reject(new Error('GetChildren returns no node'));
}
resolve(Promise.all(
nodeIds.map(id => getNodeInfo(client, id))
));
// const nodeInfo = await getNodeInfo(client, id);
// addrStr += nodeInfo.host + ':' + nodeInfo.port;
client.close();
});
});
client.connect();
});
}
function getBrokersAddr(brokers) {
const addrStr = '';
brokers.forEach(b => {
addrStr += b.host + ':' + b.port;
});
return addrStr;
}
brokers = await getBrokers('192.168.99.100:2181')
brokerStr = getBrokersAddr(brokers);
输出192.168.99.100:32768,可直接用于上面的kafka-node client。