前言:
本文章适用于在Windows上使用Flume 监听Avro Client,模拟数据库表的增量同步到Kafka中。首先确保你的flume-ng可以启动,跳过个别步骤可自行百度。
1、MySQL创建表:
DROP TABLE IF EXISTS `avro`;
CREATE TABLE `avro` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`name` varchar(255) DEFAULT NULL,
`createdt` datetime DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=11 DEFAULT CHARSET=latin1;
INSERT INTO `avro` VALUES ('1', 'a', '2018-11-21 04:00:00');
INSERT INTO `avro` VALUES ('2', 'b', '2018-11-22 05:00:00');
INSERT INTO `avro` VALUES ('3', 'c', '2018-11-23 06:00:00');
INSERT INTO `avro` VALUES ('4', 'd', '2018-11-24 07:00:00');
INSERT INTO `avro` VALUES ('5', 'e', '2018-11-25 08:00:00');
INSERT INTO `avro` VALUES ('6', 'f', '2018-11-26 09:00:00');
INSERT INTO `avro` VALUES ('7', 'g', '2018-11-27 10:00:00');
INSERT INTO `avro` VALUES ('8', 'h', '2018-11-28 11:00:00');
INSERT INTO `avro` VALUES ('9', 'i', '2018-11-29 12:00:00');
INSERT INTO `avro` VALUES ('10', 'j', '2018-11-30 13:56:41');
avro表如图
2、Avro 的官网实例
2.1、创建Flume Avro Client :(Thrift 同理)
打开Eclipse 右击
src/main/java
新建一个package
org.flume.avro
新建Class MyApp.java
package org.flume.avro;
import org.apache.flume.Event;
import org.apache.flume.EventDeliveryException;
import org.apache.flume.api.RpcClient;
import org.apache.flume.api.RpcClientFactory;
import org.apache.flume.event.EventBuilder;
import java.nio.charset.Charset;
public class MyApp {
public static void main(String[] args) {
// TODO Auto-generated method stub
MyRpcClientFacade client = new MyRpcClientFacade();
// Initialize client with the remote Flume agent's host and port
//端口与avro.conf a1.sources.r1.port一致
client.init("localhost", 41414);
String sampleData = "Hello Flume!";
for (int i = 0; i < 5; i++) {
client.sendDataToFlume(sampleData+" " + i);
}
System.out.println("输出完毕");
client.cleanUp();
}
}
class MyRpcClientFacade {
private RpcClient client;
private String hostname;
private int port;
public void init(String hostname, int port) {
// Setup the RPC connection
this.hostname = hostname;
this.port = port;
this.client = RpcClientFactory.getDefaultInstance(hostname, port); //创建avro客户端
// Use the following method to create a thrift client (instead of the above line):
// this.client = RpcClientFactory.getThriftInstance(hostname, port); //创建Thrift客户端
}
public void sendDataToFlume(String data) {
// Create a Flume Event object that encapsulates the sample data
// 调用EventBuilder重载的withBody()方法。
Event event = EventBuilder.withBody(data, Charset.forName("UTF-8"));
try {
client.append(event); // Send the event 发送数据
} catch (EventDeliveryException e) {
// clean up and recreate the client 清理并重新创建客户端
client.close();
client = null;
client = RpcClientFactory.getDefaultInstance(hostname, port);
// Use the following method to create a thrift client (instead of the above line):
// this.client = RpcClientFactory.getThriftInstance(hostname, port);
}
}
public void cleanUp() {
// Close the RPC connection
client.close();
}
}
2.2、配置conf
Flume的conf
目录新建 avro.conf
a1.channels = c1
a1.sources = r1
a1.sinks = k1
a1.sources.r1.type = avro
a1.sources.r1.bind = 0.0.0.0
a1.sources.r1.port = 41414 //端口与MyApp.java中的port一致
a1.channels.c1.type = memory
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.topic = avrosrc
a1.sinks.k1.brokerList = localhost:9092
a1.sinks.k1.requiredAcks = 1
a1.sinks.k1.batchSize = 20
a1.sinks.k1.channel = c1
a1.sources.r1.channels = c1
2.3、输出到Kafka
此处省略Kafka启动步骤,详见链接
新建Kafka Topic avrosrc
kafka-run-class.bat kafka.admin.TopicCommand --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic avrosrc
查看Topic avrosrc(此时为空)
kafka-console-consumer.bat --bootstrap-server localhost:9092 --topic avrosrc --from-beginning
启动flume-ng
D:\com\apache-flume-1.8.0-bin>flume-ng agent -c conf -f conf/avro.conf -n a1 -property "flume.root.logger=INFO,console"
Eclipse 运行 MyApp.java(右键 → Run As → Java Application)
此时观察 Topic 有数据进入
3、Avro 自定义
每秒随机读取数据库avro表的一条数据,并输出到Kafka,模拟增量数据
修改 MyApp.java
package org.flume.avro;
import org.apache.flume.Event;
import org.apache.flume.EventDeliveryException;
import org.apache.flume.api.RpcClient;
import org.apache.flume.api.RpcClientFactory;
import org.apache.flume.event.EventBuilder;
import java.nio.charset.Charset;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.sql.Timestamp;
public class MyApp {
static final String DB_URL = "jdbc:mysql://localhost:3306/***"; //输入DB名称
static final String USER = "***"; //DB用户名
static final String PASS = "***"; //DB密码
public static void main(String[] args) {
// TODO Auto-generated method stub
MyRpcClientFacade client = new MyRpcClientFacade();
client.init("localhost", 41414);
Connection conn = null;
Statement stmt = null; //真实场景使用PreparedStatement防止SQL注入
try{
Class.forName("com.mysql.jdbc.Driver"); // 注册 JDBC 驱动
conn = DriverManager.getConnection(DB_URL,USER,PASS); // 打开链接
client.sendDataToFlume("Connect to db");
stmt = conn.createStatement(); // 执行查询
for(int i = 0;i < 10;i++){
int index = (int)(Math.random()*10) + 1;
String sql = "SELECT * FROM avro where id=" + index;
ResultSet rs = stmt.executeQuery(sql); // 保存到结果集
while(rs.next()){
int id = rs.getInt("id");
String name = rs.getString("name");
Timestamp createdt = rs.getTimestamp("createdt");
System.out.print("ID: " + id);
System.out.print(", 名称: " + name);
System.out.print(", 创建时间: " + createdt);
System.out.print("\n");
//client.sendDataToFlume 发送数据!
client.sendDataToFlume("id: " + id + ", name: " + name + ", createdt: " + createdt);
}
rs.close();
try {
Thread.sleep(1000); //等待一秒,模拟增量场景
} catch (InterruptedException e) {
e.printStackTrace();
}
}
stmt.close();
conn.close();
}catch(SQLException se){ // 处理 JDBC 错误
se.printStackTrace();
}catch(Exception e){ // 处理 Class.forName 错误
e.printStackTrace();
}finally{ // 关闭资源
try{
if(stmt!=null) stmt.close();
}catch(SQLException se2){
}
try{
if(conn!=null) conn.close();
}catch(SQLException se){
se.printStackTrace();
}
}
client.sendDataToFlume("avro结束"); //测试中文是否乱码:是
client.sendDataToFlume("avro over");
System.out.println("avro结束");
client.cleanUp();
}
}
class MyRpcClientFacade {
private RpcClient client;
private String hostname;
private int port;
public void init(String hostname, int port) {
// Setup the RPC connection
this.hostname = hostname;
this.port = port;
this.client = RpcClientFactory.getDefaultInstance(hostname, port); //创建avro客户端
// Use the following method to create a thrift client (instead of the above line):
// this.client = RpcClientFactory.getThriftInstance(hostname, port); //创建Thrift客户端
}
public void sendDataToFlume(String data) {
// Create a Flume Event object that encapsulates the sample data
// 调用EventBuilder重载的withBody()方法。
Event event = EventBuilder.withBody(data, Charset.forName("UTF-8"));
try {
client.append(event); // Send the event 发送数据
} catch (EventDeliveryException e) {
// clean up and recreate the client 清理并重新创建客户端
client.close();
client = null;
client = RpcClientFactory.getDefaultInstance(hostname, port);
// Use the following method to create a thrift client (instead of the above line):
// this.client = RpcClientFactory.getThriftInstance(hostname, port);
}
}
public void cleanUp() {
// Close the RPC connection
client.close();
}
}
再次运行 MyApp.java
随机读取表中10条数据(每秒一条),输出到Kafka