解读MCE(Machine Check Exception)和Intel CSR寄存器(configuration Registers)的代码含义工具

近期一直处理客户服务器宕机问题,对于系统出现的MCE代码,或者客服收集的CPU底层寄存器的都要人工排查那些寄存器需要分析,然后找到分析寄存器进行16进制高低位转换,再转为二进制代码,然后对应intel 开发手册或者是CPU DataSheet去解读信息,非常麻烦,费事费力,更不用说客户运维去查。 其实这一系列查询并不复杂,只是耗费时间,在这个AI时代,其实何不自己做一个工具,一键抓取有用的寄存器,并自己通过读取数据库里面数据马上解读出来,说做就做,用EXCEL VBA就可以轻松实现,后期也可以方便添加寄存器信息,而且客户使用也方便不用安装任何APP,不需要记住Linux命令,点点鼠标就可以查看到了。 


一种用带外工具收集CPU寄存器如下,各个厂商差不多,CSR寄存器为32为,如下后面00 00 b8 00 为寄存器的信息, 

(Bus:1  Dev:30 Fun:2  Reg:0xEC  ) CPU0 MCA_ERR_SRC_LOG      57 01 00 40 00 00 b8 00
(Bus:0  Dev:5  Fun:2  Reg:0xA0  ) CPU0 viral                57 01 00 40 00 00 00 00

(Bus:0  Dev:5  Fun:2  Reg:0x1C4 ) CPU0 gferrst              57 01 00 40 00 00 00 00
(Bus:0  Dev:5  Fun:2  Reg:0x1CC ) CPU0 gsysst               57 01 00 40 00 00 00 00
(Bus:0  Dev:5  Fun:2  Reg:0x1DC ) CPU0 gfferrst             57 01 00 40 00 00 00 00


另外MCE /MSR 为64位,通过读取后面8字节寄存器值查询对应intel文档就可以知道机器发生过什么问题。 

CPU0_Proc14 IA32_MC2_STATUS              57 01 00 40 00 00 00 00 00 00 00 00
CPU0_Proc15 IA32_MC2_STATUS              57 01 00 40 00 00 00 00 00 00 00 00
CPU0_Proc16 IA32_MC2_STATUS              57 01 00 40 00 00 00 00 00 00 00 00
CPU0_Proc17 IA32_MC2_STATUS              57 01 00 40 00 00 00 00 00 00 00 00


系统也会报出MCE 日志,系统报格式如下,直接贴到EXCEL 就可以解释了。

MC1_STATUS: 0xf200000000020151




一.双击底层寄存器解释

 



二.选择底层寄存器文件

 

解释CSR寄存器(范例定位PCIE错误)

 

解释MCE信息(范例定位内存Channel B出错)

 


维护CSR 表格数据,自己可以根据CPU换代,增加相应数据到CSR页面 


上代码:




Function bin_d(content As String) As String  '格式化二进制输出
 ' For k = 0 To Len(content) - 1
 '  If k Mod 4 = 0 Then
  '  me1 = me1 & " " & Mid(content, k + 1, 1)
 '  Else
'   me1 = me1 & Mid(content, k, 1)
'  End If
'  Next
' bin_d = me1
bin_d = content
End Function




Function mem_c(content As String) As String ' 内存Channel解码
If StrComp(content, "0000", vbTextCompare) = 0 Then
                 mem_c = "[Channel A]"
               ElseIf StrComp(content, "0001", vbTextCompare) = 0 Then
                  mem_c = "[Channel B]"
               ElseIf StrComp(content, "0010", vbTextCompare) = 0 Then
                  mem_c = "[Channel C]"
                  ElseIf StrComp(content, "0011", vbTextCompare) = 0 Then
                  mem_c = "[Channel D]"
                  ElseIf StrComp(content, "0100", vbTextCompare) = 0 Then
                  mem_c = "[Channel E]"
                  ElseIf StrComp(content, "0101", vbTextCompare) = 0 Then
                  mem_c = "[Channel F]"
                  ElseIf StrComp(content, "0110", vbTextCompare) = 0 Then
                  mem_c = "[Channel G]"
                  ElseIf StrComp(content, "0111", vbTextCompare) = 0 Then
                  mem_c = "[Channel H]"
               End If
End Function


Function rrrr(content As String) As String 'rrrr值解码


If StrComp(content, "0000", vbTextCompare) = 0 Then
                 rrrr = "[Generic error]"
               ElseIf StrComp(content, "0001", vbTextCompare) = 0 Then
                  rrrr = "[Generic read]"
               ElseIf StrComp(content, "0010", vbTextCompare) = 0 Then
                  rrrr = "[Generic write]"
                  ElseIf StrComp(content, "0011", vbTextCompare) = 0 Then
                  rrrr = "[Data read]"
                  ElseIf StrComp(content, "0100", vbTextCompare) = 0 Then
                  rrrr = "[Data write]"
                  ElseIf StrComp(content, "0101", vbTextCompare) = 0 Then
                  rrrr = "[Instruction fetch]"
                  ElseIf StrComp(content, "0110", vbTextCompare) = 0 Then
                  rrrr = "[Prefetch]"
                  ElseIf StrComp(content, "0111", vbTextCompare) = 0 Then
                  rrrr = "[Evict]"
                  ElseIf StrComp(content, "1000", vbTextCompare) = 0 Then
                  rrrr = "[Snoop (probe)]"
               End If
End Function




Function ll(content As String) As String   'LL值解码


'MsgBox content




If StrComp(content, "00", vbTextCompare) = 0 Then
                 ll = "[Level 0]"
ElseIf StrComp(content, "01", vbTextCompare) = 0 Then
                  ll = "[level 1]"
ElseIf StrComp(content, "10", vbTextCompare) = 0 Then
                  ll = "[level 2]"
ElseIf StrComp(content, "11", vbTextCompare) = 0 Then
                  ll = "[Generic]"
End If
 'MsgBox mem_c
End Function


Function tt(content As String) As String






If StrComp(content, "00", vbTextCompare) = 0 Then
                 tt = "[Instruction]"
ElseIf StrComp(content, "01", vbTextCompare) = 0 Then
                  tt = "[Data]"
ElseIf StrComp(content, "10", vbTextCompare) = 0 Then
                  tt = "[Generic]"
ElseIf StrComp(content, "11", vbTextCompare) = 0 Then
                  tt = "[Reserved]"
End If
 
End Function


Function pp(content As String) As String






If StrComp(content, "00", vbTextCompare) = 0 Then
                 pp = "[Local node originated the request.]"
ElseIf StrComp(content, "01", vbTextCompare) = 0 Then
                  pp = "[Local node responded to the request.]"
ElseIf StrComp(content, "10", vbTextCompare) = 0 Then
                  pp = "[Local node observed error as third-party.]"
ElseIf StrComp(content, "11", vbTextCompare) = 0 Then
                  pp = "[Generic]"
End If
 
End Function




Function ii(content As String) As String






If StrComp(content, "00", vbTextCompare) = 0 Then
                 ii = "[Memory access]"
ElseIf StrComp(content, "01", vbTextCompare) = 0 Then
                  ii = "[Reserved]"
ElseIf StrComp(content, "10", vbTextCompare) = 0 Then
                  ii = "[I/O]"
ElseIf StrComp(content, "11", vbTextCompare) = 0 Then
                  ii = "[Other]"
End If
 
End Function


Function t(content As String) As String






If StrComp(content, "0", vbTextCompare) = 0 Then
                 t = "[Request did not timeout.]"
ElseIf StrComp(content, "1", vbTextCompare) = 0 Then
                 t = "[Request did timeout.]"


             
End If
 
End Function




Function decode_c(title As String, content As String) As String


For cl = 1 To Sheets("csr").UsedRange.Columns.Count


If InStr(1, Sheets("csr").Cells(1, cl), Trim(title)) Then


   Dim Strl As String
    Strl = Hex2Bin(content)
    Dim i
    For i = 1 To Len(Strl)
         rebin = Mid(Strl, i, 1) & rebin
    Next


 For k = 3 To 21
       pos = Sheets("CSR").Cells(k, cl).Value
        If Mid(rebin, Sheets("CSR").Cells(k, cl).Value + 1, 1) = "1" Then
          csrstr = csrstr & vbCrLf & Sheets("CSR").Cells(k, cl + 1).Value
          decode_c = "二进制值为" & bin_d(Strl) & vbCrLf & "解释为:" & csrstr
      
        End If
    Next k


Exit For
Else
'MsgBox "not found"
 decode_c = "寄存器数据暂未添加,请在CSR页面添加"
 
End If


Next cl






'decode_c = content & title
End Function




Function decode_m(title As String, content As String) As String




 If InStr(1, UCase(title), "STATUS", vbBinaryCompare) > 1 Then
 
 
 
   Dim Strl As String
    Strl = Hex2Bin(content)
    Dim i
    For i = 1 To Len(Strl)
         rebin = Mid(Strl, i, 1) & rebin
    Next
    
    
     For k = 1 To 7
       'pos = Sheets("CSR").Cells(k, cl).Value
        If Mid(Strl, k, 1) = "1" Then
          mcestr = mcestr & vbCrLf & Sheets("MCE").Cells(k + 20, 1).Value
          decode_m = "二进制值为" & bin_d(Strl) & vbCrLf & "解释为:" & mcestr
      
        End If
     Next k
     
    For k = 53 To 64
     bin = bin & Mid(Strl, k, 1)
     
    ' pos = InStr(1, bin, "1", vbTextCompare)
     'MsgBox pos
       'pos = Sheets("CSR").Cells(k, cl).Value
       ' If Mid(Strl, k, 1) = "1" Then
       '   mcestr = mcestr & vbCrLf & Sheets("MCE").Cells(k + 20, 1).Value
       '   decode_m = "二进制值为" & Strl & vbCrLf & "解释为:" & mcestr
      
       ' End If
     Next k
    
  '  MsgBox bin
     pos = InStr(1, bin, "1", vbTextCompare)
      Select Case (Int(pos))
      
      Case 0
         decode_m = decode_m & vbCrLf & "No error has been reported to this bank."
       
      Case 12
         decode_m = decode_m & vbCrLf & "Unclassified. This error has not been classified into the MCA error classes. The additional information section may have meaning."
      
      Case 11
           If Mid(bin, 12, 1) = "0" Then
           decode_m = decode_m & vbCrLf & "Parity error in internal microcode ROM"
           Else
            decode_m = decode_m & vbCrLf & " The BINT# from another processor caused this processor to enter machine-check."
           End If
           
           
      Case 10
          If Mid(bin, 12, 1) = "0" Then
           decode_m = decode_m & vbCrLf & "Functional redundancy check (FRC) master/slave error."
          Else
           decode_m = decode_m & vbCrLf & "Internal parity error."
           End If
        
       Case 9
           If Mid(bin, 10, 1) = "1" Then
           decode_m = decode_m & vbCrLf & " Generic cache hierarchy errors."
           End If
           
              For kk = 11 To 12
                me1 = me1 & Mid(bin, kk, 1)
               Next
             llc = ll(CStr(me1))
             
           decode_m = decode_m & vbCrLf & llc
           
        Case 8
           decode_m = decode_m & vbCrLf & " TLB errors."
                 For kk = 11 To 12
                  me1 = me1 & Mid(bin, kk, 1)
                 Next
                 llc = ll(CStr(me1))
                 decode_m = decode_m & vbCrLf & llc
             
                 For jj = 9 To 10
                  me1 = me1 & Mid(bin, jj, 1)
                 Next
                ttc = tt(CStr(me1))
                decode_m = decode_m & vbCrLf & ttc
        
    
         Case 5
           decode_m = decode_m & vbCrLf & "Memory controller errors (Intel-only)."
               For kk = 6 To 8
                me1 = me1 & Mid(bin, kk, 1)
               Next
                'MsgBox me1
               If StrComp(me1, "000", vbTextCompare) = 0 Then
                 decode_m = decode_m & vbCrLf & "[ Generic undefined request]"
               ElseIf StrComp(me1, "001", vbTextCompare) = 0 Then
                   decode_m = decode_m & vbCrLf & " [ memory read error]"
               ElseIf StrComp(me1, "010", vbTextCompare) = 0 Then
                    decode_m = decode_m & vbCrLf & " [Memory write error.]"
               ElseIf StrComp(me1, "011", vbTextCompare) = 0 Then
                      decode_m = decode_m & vbCrLf & " [Address or command error.]"
                  ElseIf StrComp(me1, "100", vbTextCompare) = 0 Then
                        decode_m = decode_m & vbCrLf & "[ Memory scrubbing error.]"
               End If
               
              
              For jj = 9 To 12
                 me2 = me2 & Mid(bin, jj, 1)
               Next
              mem_cn = mem_c(CStr(me2))
               decode_m = decode_m & vbCrLf & mem_cn
           Case 4
           decode_m = decode_m & vbCrLf & "Memory errors in the cache hierarchy."
              For kk = 5 To 8
                me1 = me1 & Mid(bin, kk, 1)
               Next
           
               rrrr_c = rrrr(CStr(me1))
               decode_m = decode_m & vbCrLf & rrrr_c
               For kk2 = 11 To 12
                  me2 = me2 & Mid(bin, kk2, 1)
                 Next
                 llc = ll(CStr(me2))
                 decode_m = decode_m & vbCrLf & llc
                  
                 For kk3 = 9 To 10
                  me3 = me3 & Mid(bin, kk3, 1)
                 Next
                ttc = tt(CStr(me3))
                decode_m = decode_m & vbCrLf & ttc
                
           Case 2
          If Mid(bin, 12, 1) = "0" Then
           decode_m = decode_m & vbCrLf & "Internal timer error."
          Else
           decode_m = decode_m & vbCrLf & " Internal unclassified error. At least one x equals 1"
           End If
           
            Case 1
           decode_m = decode_m & vbCrLf & "Bus and interconnect errors."
           
               For kk1 = 2 To 3     '检查PP值
                me1 = me1 & Mid(bin, kk, 1)
               Next
               ppc = pp(CStr(me1))
               decode_m = decode_m & vbCrLf & ppc
               
               
               For kk2 = 4 To 4   '检查T值
                  me2 = me2 & Mid(bin, kk2, 1)
                 Next
                 tc = t(CStr(me2))
                 decode_m = decode_m & vbCrLf & tc
                  
                 For kk3 = 5 To 8
                  me3 = me3 & Mid(bin, kk3, 1)
                 Next
                rrrr_c = rrrr(CStr(me3))
               decode_m = decode_m & vbCrLf & rrrr_c
               
                For kk4 = 9 To 10
                  me4 = me4 & Mid(bin, kk4, 1)
                 Next
                iic = ii(CStr(me4))
               decode_m = decode_m & vbCrLf & iic
               
                For kk5 = 11 To 12
                  me5 = me5 & Mid(bin, kk5, 1)
                 Next
                llc = ll(CStr(me5))
               decode_m = decode_m & vbCrLf & llc
           
           
       End Select
           
           
     
    ' MsgBox pos
  'decode_m = "二进制值为" & Strl & "MCE STatus decode"
  
 Else
 
 decode_m = "暂无数据"
 
 End If
 










End Function






Function Hex2Bin(TP As String) As String '处理连串的16进制转化为2进制


Dim TP2() As String
L = Len(TP)
ReDim TP2(1 To L)
Dim P As String
P = "" '初始化为空字符串
For i = 1 To L
    
    TP2(i) = Mid(TP, i, 1)
    TP2(i) = Hex2BinSingleByte(TP2(i))
    P = P + TP2(i)
Next i
    Hex2Bin = P
End Function


Function Hex2BinSingleByte(H As String) As String


H = UCase(H)


Dim B As String


Select Case H


    Case "0"
        B = "0000"
    Case "1"
        B = "0001"
    Case "2"
        B = "0010"
    Case "3"
        B = "0011"
    Case "4"
        B = "0100"
    Case "5"
        B = "0101"
    Case "6"
        B = "0110"
    Case "7"
        B = "0111"
    Case "8"
        B = "1000"
    Case "9"
        B = "1001"
    Case "A"
        B = "1010"
    Case "B"
        B = "1011"
    Case "C"
        B = "1100"
    Case "D"
        B = "1101"
    Case "E"
        B = "1110"
    Case "F"
        B = "1111"
        
End Select


    Hex2BinSingleByte = B


End Function






Private Sub Worksheet_SelectionChange(ByVal Target As Range)
'If Target.Column = 2 And Target.Row >= 2 And Target.Value <> "" Then
'   MsgBox "Can not update this value,if update need inform TPM."
'    ActiveSheet.Cells(1, 5).Select
   
'End If
 
 
 
 
 If Target.Column = 1 And Target.Row > 1 Then '
   On Error Resume Next
    If Target.Value = "" Then
     Exit Sub
    ElseIf Right(Target.Value, 1) = "P" Then
     Target.Value = Left(Target.Value, Len(Target.Value) - 1)
     ActiveSheet.Range("A1").Select
    Else
    
     Key1 = MsgBox("是否查询文档", vbOKCancel, "提示")
      If Key1 = 1 Then
      
        
        
        
          hexd = Split(Trim(Cells(Target.Row, Target.Column)), " ")
              For j = LBound(hexd) To UBound(hexd)
              Next j
 
             'MsgBox hexd(j - 1)
              
              
              
          If InStr(1, CStr(Trim(hexd(j - 1))), "IA32", vbTextCompare) > 0 Then
           co = decode_m(CStr(hexd(j - 1)), Cells(Target.Row, Target.Column + 1))
           Cells(Target.Row, Target.Column + 2) = co
          Else
           co = decode_c(CStr(Trim(hexd(j - 1))), Cells(Target.Row, Target.Column + 1))
           Cells(Target.Row, Target.Column + 2) = co
          End If
 
    ' MsgBox co
         Target.Value = Target.Value & "P"
         With Target.Characters(Start:=Len(Target.Value), Length:=1).Font
        .Name = "Wingdings 2"  '这个字体
        .Size = 20 '字号
        .Strikethrough = False
        .Superscript = False
        .Subscript = False
        .OutlineFont = False
        .Shadow = False
        .Underline = xlUnderlineStyleNone '加粗
        .ColorIndex = 23
        .Bold = True
         End With
       End If
      End If
End If
 
 
 
 
 
 
End Sub


MCE和CSR数值参考文档:

项目

参考文档

出处

MCE

64-ia-32-architectures-software-developer-system-programming-manual-325384

https://software.intel.com/en-us/articles/intel-sdm

CSR

xeon-e5-v4-datasheet-vol-2

https://www.intel.cn/content/dam/www/public/us/en/documents/datasheets/xeon-e5-v4-datasheet-vol-2.pdf