kafka offset判断

原文:http://blog.csdn.net/rongyongfeikai2/article/details/50727661

在使用Spark streaming读取kafka数据时,为了避免数据丢失,我们会在zookeeper中保存kafka的topic对应的partition的offset信息(每次执行成功后,才更新zk中的offset信息);从而保证执行失败的下一轮,可以从特定的offset开始读。

实现方式类似下面文章所示:

http://blog.csdn.net/rongyongfeikai2/article/details/49784785

但,kafka的topic是可能会被删除的,而更糟糕的情况是,用户又新建了一个相同名字的topic。这是,zk中保存的offset信息会已经不再准确了,此时就需要与kafka的broker保存的offset信息进行比对,从而把zk中的offset信息修正成功。

实现方式如下:

1.用一个类来保存特定topic的leader信息,以及partition的offset信息

[java]  view plain  copy
  1. import java.io.Serializable;  
  2. import java.util.HashMap;  
  3.   
  4. /** 
  5.  * @function:kafka记录类 
  6.  */  
  7. public class KafkaTopicOffset implements Serializable{  
  8.     private String topicName;  
  9.     private HashMap<Integer,Long> offsetList;  
  10.     private HashMap<Integer,String> leaderList;  
  11.   
  12.     public KafkaTopicOffset(String topicName){  
  13.         this.topicName = topicName;  
  14.         this.offsetList = new HashMap<Integer,Long>();  
  15.         this.leaderList = new HashMap<Integer, String>();  
  16.     }  
  17.   
  18.     public String getTopicName() {  
  19.         return topicName;  
  20.     }  
  21.   
  22.     public HashMap<Integer, Long> getOffsetList() {  
  23.         return offsetList;  
  24.     }  
  25.   
  26.     public void setTopicName(String topicName) {  
  27.         this.topicName = topicName;  
  28.     }  
  29.   
  30.     public void setOffsetList(HashMap<Integer, Long> offsetList) {  
  31.         this.offsetList = offsetList;  
  32.     }  
  33.   
  34.     public HashMap<Integer, String> getLeaderList() {  
  35.         return leaderList;  
  36.     }  
  37.   
  38.     public void setLeaderList(HashMap<Integer, String> leaderList) {  
  39.         this.leaderList = leaderList;  
  40.     }  
  41.   
  42.     public String toString(){  
  43.         return "topic:"+topicName+",offsetList:"+this.offsetList+",leaderList:"+this.leaderList;  
  44.     }  
  45. }  


2.从kafka的broker中得到topic-partition的offset信息(主要是利用SimpleConsumer发送相应的Request)

[java]  view plain  copy
  1. import java.io.Serializable;  
  2. import java.util.*;  
  3. import com.nsfocus.bsaips.common.Constant;  
  4. import com.nsfocus.bsaips.model.KafkaTopicOffset;  
  5. import kafka.javaapi.OffsetResponse;  
  6. import kafka.api.PartitionOffsetRequestInfo;  
  7. import kafka.common.TopicAndPartition;  
  8. import kafka.javaapi.TopicMetadataRequest;  
  9. import kafka.javaapi.consumer.SimpleConsumer;  
  10. import kafka.javaapi.TopicMetadata;  
  11. import kafka.javaapi.PartitionMetadata;  
  12.   
  13. /** 
  14.  * @function:kafka相关工具类 
  15.  */  
  16. public class KafkaUtil implements Serializable {  
  17.     private static KafkaUtil kafkaUtil = null;  
  18.   
  19.     private KafkaUtil(){}  
  20.   
  21.     public static KafkaUtil getInstance(){  
  22.         if(kafkaUtil == null){  
  23.             kafkaUtil = new KafkaUtil();  
  24.         }  
  25.         return kafkaUtil;  
  26.     }  
  27.   
  28.     private String[] getIpsFromBrokerList(String brokerlist){  
  29.         StringBuilder sb = new StringBuilder();  
  30.         String[] brokers = brokerlist.split(",");  
  31.         for(int i=0;i<brokers.length;i++){  
  32.             brokers[i] = brokers[i].split(":")[0];  
  33.         }  
  34.         return brokers;  
  35.     }  
  36.   
  37.     private Map<String,Integer> getPortFromBrokerList(String brokerlist){  
  38.         Map<String,Integer> map = new HashMap<String,Integer>();  
  39.         String[] brokers = brokerlist.split(",");  
  40.         for(String item:brokers){  
  41.             String[] itemArr = item.split(":");  
  42.             if(itemArr.length > 1){  
  43.                 map.put(itemArr[0],Integer.parseInt(itemArr[1]));  
  44.             }  
  45.         }  
  46.         return map;  
  47.     }  
  48.   
  49.     public KafkaTopicOffset topicMetadataRequest(String brokerlist,String topic){  
  50.         List<String> topics = Collections.singletonList(topic);  
  51.         TopicMetadataRequest topicMetadataRequest = new TopicMetadataRequest(topics);  
  52.   
  53.         KafkaTopicOffset kafkaTopicOffset = new KafkaTopicOffset(topic);  
  54.         String[] seeds = getIpsFromBrokerList(brokerlist);  
  55.         Map<String,Integer> portMap = getPortFromBrokerList(brokerlist);  
  56.   
  57.         for(int i=0;i<seeds.length;i++){  
  58.             SimpleConsumer consumer = null;  
  59.             try{  
  60.                 consumer = new SimpleConsumer(seeds[i],  
  61.                         portMap.get(seeds[i]),  
  62.                         Constant.TIMEOUT,  
  63.                         Constant.BUFFERSIZE,  
  64.                         Constant.groupId);  
  65.                 kafka.javaapi.TopicMetadataResponse resp = consumer.send(topicMetadataRequest);  
  66.                 List<TopicMetadata> metaData = resp.topicsMetadata();  
  67.                 for (TopicMetadata item : metaData) {  
  68.                     for (PartitionMetadata part : item.partitionsMetadata()) {  
  69.                         kafkaTopicOffset.getLeaderList().put(part.partitionId(),part.leader().host());  
  70.                         kafkaTopicOffset.getOffsetList().put(part.partitionId(),0L);  
  71.                     }  
  72.                 }  
  73.             }catch(Exception ex){  
  74.                 ex.printStackTrace();  
  75.             }finally{  
  76.                 if(consumer != null){  
  77.                     consumer.close();  
  78.                 }  
  79.             }  
  80.         }  
  81.   
  82.         return kafkaTopicOffset;  
  83.     }  
  84.   
  85.     public KafkaTopicOffset getLastOffsetByTopic(String brokerlist,String topic){  
  86.         KafkaTopicOffset kafkaTopicOffset = topicMetadataRequest(brokerlist, topic);  
  87.         String[] seeds = getIpsFromBrokerList(brokerlist);  
  88.         Map<String,Integer> portMap = getPortFromBrokerList(brokerlist);  
  89.   
  90.         for(int i=0;i<seeds.length;i++){  
  91.             SimpleConsumer consumer = null;  
  92.             Iterator iterator = kafkaTopicOffset.getOffsetList().entrySet().iterator();  
  93.   
  94.             try{  
  95.                 consumer = new SimpleConsumer(seeds[i],  
  96.                         portMap.get(seeds[i]),  
  97.                         Constant.TIMEOUT,  
  98.                         Constant.BUFFERSIZE,  
  99.                         Constant.groupId);  
  100.   
  101.                 while(iterator.hasNext()){  
  102.                     Map.Entry<Integer,Long> entry = (Map.Entry<Integer, Long>) iterator.next();  
  103.                     int partitonId = entry.getKey();  
  104.   
  105.                     if(!kafkaTopicOffset.getLeaderList().get(partitonId).equals(seeds[i])){  
  106.                         continue;  
  107.                     }  
  108.   
  109.                     TopicAndPartition topicAndPartition = new TopicAndPartition(topic,  
  110.                             partitonId);  
  111.                     Map<TopicAndPartition,PartitionOffsetRequestInfo> requestInfo =  
  112.                             new HashMap<TopicAndPartition, PartitionOffsetRequestInfo>();  
  113.   
  114.                     requestInfo.put(topicAndPartition,  
  115.                             new PartitionOffsetRequestInfo(kafka.api.OffsetRequest.LatestTime(),1)  
  116.                     );  
  117.                     kafka.javaapi.OffsetRequest request = new kafka.javaapi.OffsetRequest(  
  118.                             requestInfo, kafka.api.OffsetRequest.CurrentVersion(),  
  119.                             Constant.groupId);  
  120.                     OffsetResponse response = consumer.getOffsetsBefore(request);  
  121.                     long[] offsets = response.offsets(topic,partitonId);  
  122.                     if(offsets.length > 0){  
  123.                         kafkaTopicOffset.getOffsetList().put(partitonId,offsets[0]);  
  124.                     }  
  125.                 }  
  126.             }catch(Exception ex){  
  127.                 ex.printStackTrace();  
  128.             }finally{  
  129.                 if(consumer != null){  
  130.                     consumer.close();  
  131.                 }  
  132.             }  
  133.         }  
  134.   
  135.         return kafkaTopicOffset;  
  136.     }  
  137.   
  138.     public Map<String,KafkaTopicOffset> getKafkaOffsetByTopicList(String brokerList,List<String> topics){  
  139.         Map<String,KafkaTopicOffset> map = new HashMap<String,KafkaTopicOffset>();  
  140.         for(int i=0;i<topics.size();i++){  
  141.             map.put(topics.get(i),getLastOffsetByTopic(brokerList, topics.get(i)));  
  142.         }  
  143.         return map;  
  144.     }  
  145.   
  146.     public static void main(String[] args){  
  147.         try{  
  148.               System.out.println(KafkaUtil.getInstance().getKafkaOffsetByTopicList(  
  149.                       ConfigUtil.getInstance().getKafkaConf().get("brokerlist"),  
  150.                       Arrays.asList(new String[]{"pj_test_tmp","test"})));  
  151.         }catch(Exception ex) {  
  152.             ex.printStackTrace();  
  153.         }  
  154.     }  
  155. }  

3.再在KafkaCluster从zk中得到offset信息时,与从broker得到的offset信息中比对(假定调用KafkaUtil的getKafkaOffsetByTopicList得到的返回值放在了offsetMap中):



经过讨论,已经知道early offset是最新的起始offset的值,而last offset则是最新的终止offset的值,所以应对过期的情况,应该是从最新的起始offset开始消费。所以应该发送的是EarliestOffsetRequest而非LastOffsetRequest。修改后的代码如下:

[java]  view plain  copy
  1. package com.nsfocus.bsaips.util;  
  2. import java.io.Serializable;  
  3. import java.util.*;  
  4. import com.nsfocus.bsaips.common.Constant;  
  5. import com.nsfocus.bsaips.model.KafkaTopicOffset;  
  6. import kafka.javaapi.OffsetResponse;  
  7. import kafka.api.PartitionOffsetRequestInfo;  
  8. import kafka.common.TopicAndPartition;  
  9. import kafka.javaapi.TopicMetadataRequest;  
  10. import kafka.javaapi.consumer.SimpleConsumer;  
  11. import kafka.javaapi.TopicMetadata;  
  12. import kafka.javaapi.PartitionMetadata;  
  13.   
  14. /** 
  15.  * @function:kafka相关工具类 
  16.  */  
  17. public class KafkaUtil implements Serializable {  
  18.     private static KafkaUtil kafkaUtil = null;  
  19.   
  20.     private KafkaUtil(){}  
  21.   
  22.     public static KafkaUtil getInstance(){  
  23.         if(kafkaUtil == null){  
  24.             kafkaUtil = new KafkaUtil();  
  25.         }  
  26.         return kafkaUtil;  
  27.     }  
  28.   
  29.     private String[] getIpsFromBrokerList(String brokerlist){  
  30.         StringBuilder sb = new StringBuilder();  
  31.         String[] brokers = brokerlist.split(",");  
  32.         for(int i=0;i<brokers.length;i++){  
  33.             brokers[i] = brokers[i].split(":")[0];  
  34.         }  
  35.         return brokers;  
  36.     }  
  37.   
  38.     private Map<String,Integer> getPortFromBrokerList(String brokerlist){  
  39.         Map<String,Integer> map = new HashMap<String,Integer>();  
  40.         String[] brokers = brokerlist.split(",");  
  41.         for(String item:brokers){  
  42.             String[] itemArr = item.split(":");  
  43.             if(itemArr.length > 1){  
  44.                 map.put(itemArr[0],Integer.parseInt(itemArr[1]));  
  45.             }  
  46.         }  
  47.         return map;  
  48.     }  
  49.   
  50.     public KafkaTopicOffset topicMetadataRequest(String brokerlist,String topic){  
  51.         List<String> topics = Collections.singletonList(topic);  
  52.         TopicMetadataRequest topicMetadataRequest = new TopicMetadataRequest(topics);  
  53.   
  54.         KafkaTopicOffset kafkaTopicOffset = new KafkaTopicOffset(topic);  
  55.         String[] seeds = getIpsFromBrokerList(brokerlist);  
  56.         Map<String,Integer> portMap = getPortFromBrokerList(brokerlist);  
  57.   
  58.         for(int i=0;i<seeds.length;i++){  
  59.             SimpleConsumer consumer = null;  
  60.             try{  
  61.                 consumer = new SimpleConsumer(seeds[i],  
  62.                         portMap.get(seeds[i]),  
  63.                         Constant.TIMEOUT,  
  64.   &n">        Map<String,Integer> portMap = getPortFromBrokerList(brokerlist);  
  65.   
  66.         for(int i=0;i<seeds.length;i++){  
  67.             SimpleConsumer consumer = null;  
  68.             try{  
  69.                 consumer = new SimpleConsumer(seeds[i],  
  70.                         portMap.get(seeds[i]),  
  71.                         Constant.TIMEOUT,  
  72.                         Constant.BUFFERSIZE,  
  73.                         Constant.groupId);  
  74.                 kafka.javaapi.TopicMetadataResponse resp = consumer.send(topicMetadataRequest);  
  75.                 List<TopicMetadata> metaData = resp.topicsMetadata();  
  76.                 for (TopicMetadata item : metaData) {  
  77.                     for (PartitionMetadata part : item.partitionsMetadata()) {