RabbitMQ源代码分析系列三：消息存储

时间 2020-01-24

原文原文链接

今天分析RabbitMQ消息的持久化，即客户端发送一条持久化的MQ消息后，服务端作了哪些事情。缓存

下面是客户端的发送代码：网络

$client = new Client('127.0.0.1', 5672, 'guest', 'guest');app

//设置正常交换机、队列
    $type = 'topic';
    $routingKey = 'hello';
    $exchangeName = 'hello_exchange'
    $exchange = new Exchange($client, $exchangeName, $type);
    $exchange->setDurable(true);

    //队列
    $queue = new Queue(
        $client, $this->queueName, [
            new Consumer(
                function (AMQPMessage $msg) {
                    var_dump($msg);
                }
            ),
        ]
    );

    $binding = new Binding($exchange, $queue);
    $binding->setRoutingKey($routingKey);

    $client->register($binding);

    $message = new Message("hello" . str_repeat('123456789', 13));
    $res     = $exchange->publish($message, $routingKey);

分析下网络包，发送消息的时候，其实是往服务端发送basic.publish命令。框架

调用链分析 异步

入口在rabbit_channel文件：性能

handle_method(#'basic.publish'{exchange = ExchangeNameBin,this

routing_key = RoutingKey,
                           mandatory   = Mandatory},
          Content, State = #ch{virtual_host    = VHostPath,
                               tx              = Tx,
                               channel         = ChannelNum,
                               confirm_enabled = ConfirmEnabled,
                               trace_state     = TraceState,
                               user            = #user{username = Username},
                               conn_name       = ConnName,
                               delivery_flow   = Flow}) ->

……spa

case rabbit_basic:message(ExchangeName, RoutingKey, DecodedContent) of
    {ok, Message} ->
        Delivery = rabbit_basic:delivery(
                     Mandatory, DoConfirm, Message, MsgSeqNo),
        QNames = rabbit_exchange:route(Exchange, Delivery),
        DQ = {Delivery#delivery{flow = Flow}, QNames},
        {noreply, case Tx of
                      none         -> deliver_to_queues(DQ, State1);
                      {Msgs, Acks} -> Msgs1 = queue:in(DQ, Msgs),
                                      State1#ch{tx = {Msgs1, Acks}}
                  end};
end;

上面删除了一些非关键代码，这里看是否有事务，若是没事务则经过 deliver_to_queues发送，有事务先进队列，今天主要分析无事务的处理过程。hibernate

deliver_to_queues({Delivery = #delivery{message = Message = #basic_message{设计

exchange_name = XName},
                                    mandatory  = Mandatory,
                                    confirm    = Confirm,
                                    msg_seq_no = MsgSeqNo},
               DelQNames}, State = #ch{queue_names    = QNames,
                                       queue_monitors = QMons}) ->
Qs = rabbit_amqqueue:lookup(DelQNames),
DeliveredQPids = rabbit_amqqueue:deliver(Qs, Delivery),

后者调用 rabbit_amqqueue:deliver来处理：

deliver(Qs, Delivery = #delivery{flow = Flow}) ->

{MPids, SPids} = qpids(Qs),
QPids = MPids ++ SPids,

MMsg = {deliver, Delivery, false},
SMsg = {deliver, Delivery, true},
delegate:cast(MPids, MMsg),
delegate:cast(SPids, SMsg),
QPids.

deliver的逻辑就比较简单，分主、从进程ID，若是没有开启镜像队列，从进程ID是空的，今天先不分析镜像队列。

发送deliver消息到主进程，这个进程是rabbit-amqueue-process。

再来看rabbit-amqueue-process是如何处理的：

handle_cast({deliver, Delivery = #delivery{sender = Sender,

flow   = Flow}, SlaveWhenPublished},
        State = #q{senders = Senders}) ->

%% SlaveWhenPublished 只有在从的时候才为true

noreply(deliver_or_enqueue(Delivery, SlaveWhenPublished, State1));

中间的代码还比较多，就不一一贴了，大概说下，deliver_or_enqueue会调用attempt_delivery，而后调用到rabbit-variable-queue:publish

publish(Msg = #basic_message { is_persistent = IsPersistent, id = MsgId },

MsgProps = #message_properties { needs_confirming = NeedsConfirming },
    IsDelivered, _ChPid, _Flow,
    State = #vqstate { q1 = Q1, q3 = Q3, q4 = Q4,
                       qi_embed_msgs_below = IndexMaxSize,
                       next_seq_id         = SeqId,
                       in_counter          = InCount,
                       durable             = IsDurable,
                       unconfirmed         = UC }) ->
IsPersistent1 = IsDurable andalso IsPersistent,
MsgStatus = msg_status(IsPersistent1, IsDelivered, SeqId, Msg, MsgProps, IndexMaxSize),
 {MsgStatus1, State1} = maybe_write_to_disk(false, false, MsgStatus, State),

调用maybe_write_to_disk 进行消息的持久化：

maybe_write_to_disk(ForceMsg, ForceIndex, MsgStatus, State) ->

maybe_write_msg_to_disk用来将消息持久化，maybe_write_index_to_disk用来将索引持久化。

maybe_write_to_disk(ForceMsg, ForceIndex, MsgStatus, State) ->

{MsgStatus1, State1} = maybe_write_msg_to_disk(ForceMsg, MsgStatus, State),
maybe_write_index_to_disk(ForceIndex, MsgStatus1, State1).

若是消息大小小于配置文件中的queue_index_embed_msgs_below，

则persist_to返回queue_index，反之返回 msg_store，这个参数默认是4096，即若是消息体大小小于4096，是不会将消息写到消息持久化文件，而是写到索引文件中。

消息的持久化由文件rabbit_msgstore负责，msgstorewrite会调用writemessage进行消息的保存：

maybe_write_msg_to_disk(Force, MsgStatus = #msg_status {

msg = Msg, msg_id = MsgId,
                             is_persistent = IsPersistent },
                    State = #vqstate{ msg_store_clients = MSCState,
                                      disk_write_count  = Count})

when Force orelse IsPersistent ->

case persist_to(MsgStatus) of
    msg_store   -> ok = msg_store_write(MSCState, IsPersistent, MsgId,
                                        prepare_to_store(Msg)),
                   {MsgStatus#msg_status{msg_in_store = true},
                    State#vqstate{disk_write_count = Count + 1}};
    queue_index -> {MsgStatus, State}
end;

这里的逻辑就比较简单了，将消息内容到当前文件，再判断当前文件的大小，若是须要，则建立一个新的持久化文件。

这里讲一下segment，每一个segment对应一个文件（所在的目录在mnesia数据目录下的msg_store_persistent）。每一个文件最多能够保存SEGMEN_ENTRY_COUNT(16384)个消息索引信息。

这些文件是以整数来命名的，某条消息对应哪一个segment文件呢？用消息索引自己对SEGMENT_ENTRY_COUNT取整，相关代码能够看下

rabbit_queue_index:add_to_journal。

最后再看索引的持久化

maybe_write_msg_to_disk(Force, MsgStatus = #msg_status {

msg = Msg, msg_id = MsgId,
                             is_persistent = IsPersistent },
                    State = #vqstate{ msg_store_clients = MSCState,
                                      disk_write_count  = Count})

when Force orelse IsPersistent ->

case persist_to(MsgStatus) of
    msg_store   -> ok = msg_store_write(MSCState, IsPersistent, MsgId,
                                        prepare_to_store(Msg)),
                   {MsgStatus#msg_status{msg_in_store = true},
                    State#vqstate{disk_write_count = Count + 1}};
    queue_index -> {MsgStatus, State}
end;

索引经过rabbit_queue_index:publish 来落盘：

publish(MsgOrId, SeqId, MsgProps, IsPersistent, JournalSizeHint,

State = #qistate{unconfirmed     = UC,
                     unconfirmed_msg = UCM}) ->
MsgId = case MsgOrId of
            #basic_message{id = Id} -> Id;
            Id when is_binary(Id)   -> Id
        end,
?MSG_ID_BYTES = size(MsgId),

%%JournalHd1对应journal.jif

{JournalHdl, State1} =
    get_journal_handle(
      case {MsgProps#message_properties.needs_confirming, MsgOrId} of
          {true,  MsgId} -> UC1  = gb_sets:add_element(MsgId, UC),
                            State#qistate{unconfirmed     = UC1};
          {true,  _}     -> UCM1 = gb_sets:add_element(MsgId, UCM),
                            State#qistate{unconfirmed_msg = UCM1};
          {false, _}     -> State
      end),
file_handle_cache_stats:update(queue_index_journal_write),
{Bin, MsgBin} = create_pub_record_body(MsgOrId, MsgProps),
ok = file_handle_cache:append(
       JournalHdl, [<<(case IsPersistent of
                           true  -> ?PUB_PERSIST_JPREFIX;
                           false -> ?PUB_TRANS_JPREFIX
                       end):?JPREFIX_BITS,
                      SeqId:?SEQ_BITS, Bin/binary,
                      (size(MsgBin)):?EMBEDDED_SIZE_BITS>>, MsgBin]),
  maybe_flush_journal(
  JournalSizeHint,
  add_to_journal(SeqId, {IsPersistent, Bin, MsgBin}, State1)).

索引文件会先写到 journal缓存中，再按期刷到磁盘中，相关参数为

queue_index_max_journal_entries，

判断当前写入次数是否达到queue_index_max_journal_entries，是则进行刷盘到索引持久化文件。

实际刷盘是在 rabbit_variable_queue:handle_pre_hibernate中异步去刷的，这里不详述。

索引持久化文件在mnesia目录的queues目录下，文件扩展名为idx。

如何保证消息的不丢呢，即若是写入journal文件成功了，但没有刷新到索引的持久化文件中如何恢复，能够看下代码 rabbit_variable_queue:init， RabbitMQ启动的时候启动每一个队列以前会调用它来从journal中恢复索引和消息。

最后总结

持久化分消息体和索引的持久化，若是消息体小于queue_index_embed_msgs_below，则将消息写入到索引文件中，只进行1次磁盘操做，反之要写2次磁盘：消息体+索引，消息体写入到segment文件中，一个segment能够保存16384条消息。

为了加快写入的性能，写入消息体时是追加方式进行的；索引的持久化则是先追加到journal文件中，再异步刷新到索引文件中。

RabbitMQ网络框架代码分析二：命令分发
 RabbitMQ网络框架代码分析
 从RabbitMQ Channel设计看链接复用