字符串的模式匹配

1. 模式匹配方法

假如给定长度为n的字符串A,以及长度为m的字符串B,求B在A中的位置,这个位置也称为起始位置。本文假设:javascript

var strA = "ABBACC"
var strB = "ACC"

求得strA的长度为 la,strB的长度为 lb,一共有(n-m)个起始位置,若是从0开始,那么起始位置的范围为:0到(n-m),若是从1开始,起始位置的范围为1到(n-m + 1)。html

2. 朴素模式匹配 (BF算法)

BF算法是最简单的一种匹配方法,它主要利用双指针进行 (n x m) 次循环,至关于暴力破解,其时间复杂度为: O(n*m),迭代步骤以下:java

  • 设B在A中的位置为i
  • 检测 i = 0 时,strB 与 strA 是否匹配成功
  • 检测 i = 1 时,strB 与 strA 是否匹配成功
  • 检测 i = 2 时,strB 与 strA 是否匹配成功
  • ......
// JS第一种写法
function getStrLoc(str, pattern) {
  let i = 0, j = 0;
  let n = str.length;
  let m = pattern.length;

  while(i < n && j < m) {
    for (; j < m; ) {
      if (str[i + j] === pattern[j]) {
        j++; // 判断 j + 1
      } else {
        j = 0;
        i++;
        break; // i 失效
      }
    }
  }
  return (j == m) ? i : -1;
}

console.log(getStrLoc("ababcababd", "ababd")) // 5
console.log(getStrLoc("aabaacaaa", "aac")) // 3
console.log(getStrLoc("aabb", "bb")) // 2
console.log(getStrLoc("aaa", "aa")) // 0

// JS 第二种写法(逻辑更清晰)
function getStrLoc(str, pattern) {
  let i = 0, j = 0;
  let n = str.length;
  let m = pattern.length;

  while(i < n && j < m) {
    if (str[i] === pattern[j]) {
      i++;
      j++;
    } else {
      i = i - j + 1;
      j = 0; 
    }
  }

  return (j == m) ? (i - m) : -1;
}

console.log(getStrLoc("ababcababd", "ababd")) // 5
console.log(getStrLoc("aabaacaaa", "aac")) // 3
console.log(getStrLoc("aabb", "bb")) // 2

3. KMP模式匹配

KMP 算法的核心在于不回溯str指针,将模式字符串指针回退到合适的位置:
算法

第一种写法:ui

<!DOCTYPE html>
<html>
<head>
    <meta charset="utf-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <title></title>
    <link rel="stylesheet" href="">
</head>
<body>
    
</body>
<script>
  function BuildMatch(str, next) {
    for(let i = 0; i < str.length; i++) {
      var sstr = str.substr(0, i + 1);
      if (sstr.length < 2) {
        next[i] = -1
      } else {
        var preffix = []
        var affix = []
        for(let index = 0; index < sstr.length; index++) {
          if (index === 0) {
            preffix.push(sstr[index])
          } else if (index === sstr.length - 1) {
            affix.push(sstr[sstr.length - 1])
          } else {
            preffix.push(sstr.substr(0, index + 1))
            affix.push(sstr.substr(index))
          }
        }
        var common = preffix.filter(ele => affix.indexOf(ele) !== -1);
        var max = -1
        common.forEach(el => {
          if (el.length > max) max = el.length - 1
        })
        next[i] = max
      }
    }
    return next
  }

function KMP(string, pattern) {
    var n = string.length;
    var m = pattern.length;
    var s = 0, p = 0, match = [];

    BuildMatch(pattern, match);
    console.log(match)

    while(s < n && p < m) {
        if (string[s] == pattern[p]) {
            s++;
            p++;
        } else if (p > 0) {
      // 调整p位置
            p = match[p - 1] + 1;
        } else {
      // p = 0 时匹配失败, 调整s位置
            s++;
        }
    }

    return (p == m)? (s - m) : -1;
}

  var str = 'aaabbbcccaaabbbccc'
  var pattern = 'cccaaa'
  var p = KMP(str, pattern)
  console.log(p)
</script>
</html>

第二种写法:指针

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="utf-8">
    <title>Document</title>
 
</head>
<body>
    <script>
function BuildMatch(pattern, match) {
  var i, j;
  var m = pattern.length;
  match[0] = -1;
   
  for (j = 1; j < m; j++) {
    i = match[j - 1];
    while ((i >= 0) && (pattern[i + 1] != pattern[j])) {
        i = match[i];
    }
    if (pattern[i+1] == pattern[j]) {
        match[j] = i+1;
    } else {
        match[j] = -1;
    }
  }
}

function KMP(string, pattern) {
    var n = string.length;
    var m = pattern.length;
    var s = 0, p = 0, match = [];

    BuildMatch(pattern, match);
    console.log(match)

    while(s < n && p < m) {
        if (string[s] == pattern[p]) {
            s++;
            p++;
        } else if (p > 0) {
      // 调整p位置
            p = match[p - 1] + 1;
        } else {
      // p = 0时匹配失败, 调整s位置
            s++;
        }
    }

    return (p == m)? (s - m) : -1;
}

function main() {
    var string = "ababcababe";
  var pattern = "ababe";
  var p = KMP(string, pattern);
  if (p == -1) {
    console.log("Not Found.\n");
  } else {
    console.log("%s\n", p);
  }
}

main()
    </script>
</body>
</html>
相关文章
相关标签/搜索