String.prototype.codePointAt()和String.fromCodePoint()

String.prototype.codePointAt()javascript

codePointAt()方法返回一个 Unicode 编码点值的非负整数。java

这是一个ES6方法。数组

str.codePointAt(pos)

posapp

pos参数是字符串中字符的位置。this

若是在指定的位置没有元素则返回 undefined 。若是在索引处开始没有UTF-16 代理对,将直接返回在那个索引处的编码单元。编码

Surrogate Pair是UTF-16中用于扩展字符而使用的编码方式,是一种采用四个字节(两个UTF-16编码)来表示一个字符,称做代理对spa

'ABC'.codePointAt(1);          // 66
'\uD800\uDC00'.codePointAt(0); // 65536

'XYZ'.codePointAt(42); // undefined

polyfillprototype

if (!String.prototype.codePointAt) {
  (function() {
    'use strict'; // 严格模式,needed to support `apply`/`call` with `undefined`/`null`
    var codePointAt = function(position) {
      if (this == null) {
        throw TypeError();
      }
      var string = String(this);//this格式转换字符串
      var size = string.length;//字符串长度
      // 变成整数
      var index = position ? Number(position) : 0;//position转换成数字
      if (index != index) { // better `isNaN`判断是否是NaN,若是是NaN,就变成0
        index = 0;
      }
      // 边界
      if (index < 0 || index >= size) {//若是位置超出字符串边界,就返回undefined
        return undefined;
      }
      // 第一个编码单元
      var first = string.charCodeAt(index);//获取位置编码
      var second;
      if ( // 检查是否开始 surrogate pair
        first >= 0xD800 && first <= 0xDBFF && // high surrogate若是是高半区字符而且还有下一个字符
        size > index + 1 // 下一个编码单元
      ) {
        second = string.charCodeAt(index + 1);//获取紧跟着的字符编码
        if (second >= 0xDC00 && second <= 0xDFFF) { // low surrogate若是是低半区
          // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
          return (first - 0xD800) * 0x400 + second - 0xDC00 + 0x10000;//转码成辅助区字符unicode编码
        }
      }
      return first;//若是没有低半区字符,就返回原位置的编码
    };
    if (Object.defineProperty) {
      Object.defineProperty(String.prototype, 'codePointAt', {
        'value': codePointAt,
        'configurable': true,
        'writable': true
      });
    } else {
      String.prototype.codePointAt = codePointAt;
    }
  }());
}

String.fromCodePoint()代理

String.fromCodePoint() 静态方法返回使用指定的代码点序列建立的字符串。code

这是一个ES6方法。

参数是一串unicode编码,返回unicode对应的字符串。

若是传入无效的 Unicode 编码,将会抛出一个RangeError (例如: "RangeError: NaN is not a valid code point")。

String.fromCodePoint(42);       // "*"
String.fromCodePoint(65, 90);   // "AZ"
String.fromCodePoint(0x404);    // "\u0404"
String.fromCodePoint(0x2F804);  // "\uD87E\uDC04"
String.fromCodePoint(194564);   // "\uD87E\uDC04"
String.fromCodePoint(0x1D306, 0x61, 0x1D307) // "\uD834\uDF06a\uD834\uDF07"

String.fromCodePoint('_');      // RangeError
String.fromCodePoint(Infinity); // RangeError
String.fromCodePoint(-1);       // RangeError
String.fromCodePoint(3.14);     // RangeError
String.fromCodePoint(3e-2);     // RangeError
String.fromCodePoint(NaN);      // RangeError

// String.fromCharCode() 方法不能单独获取在高代码点位上的字符
// 另外一方面,下列的示例中,能够返回 4 字节,也能够返回 2 字节的字符
// (即,它能够返回单独的字符,使用长度 2 代替 1!) 
console.log(String.fromCodePoint(0x2F804)); // or 194564 in decimal

polyfill

if (!String.fromCodePoint) {
  (function() {
    var defineProperty = (function() {
      // IE 8 only supports `Object.defineProperty` on DOM elements
      try {
        var object = {};
        var $defineProperty = Object.defineProperty;
        var result = $defineProperty(object, object, object) && $defineProperty;
      } catch(error) {}
      return result;
    }());
    var stringFromCharCode = String.fromCharCode;
    var floor = Math.floor;
    var fromCodePoint = function() {
      var MAX_SIZE = 0x4000;
      var codeUnits = [];
      var highSurrogate;
      var lowSurrogate;
      var index = -1;
      var length = arguments.length;
      if (!length) {//没有传参数返回空字符串
        return '';
      }
      var result = '';
      while (++index < length) {//循环参数数组
        var codePoint = Number(arguments[index]);//参数转换成数字
        if (
          !isFinite(codePoint) ||       // `NaN`, `+Infinity`, or `-Infinity`
          codePoint < 0 ||              // not a valid Unicode code point
          codePoint > 0x10FFFF ||       // not a valid Unicode code point
          floor(codePoint) != codePoint // not an integer
        ) {
          throw RangeError('Invalid code point: ' + codePoint);//若是参数不符合要求就抛出错误
        }
        if (codePoint <= 0xFFFF) { // BMP code point 若是是基础平面字符
          codeUnits.push(codePoint);//存入codeUnits数组
        } else { // Astral code point; split in surrogate halves 若是是辅助平面字符,计算出高半区和低半区而后存入codeUnits
          // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
          codePoint -= 0x10000;
          highSurrogate = (codePoint >> 10) + 0xD800;
          lowSurrogate = (codePoint % 0x400) + 0xDC00;
          codeUnits.push(highSurrogate, lowSurrogate);
        }
        if (index + 1 == length || codeUnits.length > MAX_SIZE) {//???这里MAX_SIZE是什麽意思
          result += stringFromCharCode.apply(null, codeUnits);//使用fromCharCode获取结果字符串链接到result后面
          codeUnits.length = 0;
        }
      }
      return result;
    };
    if (defineProperty) {
      defineProperty(String, 'fromCodePoint', {
        'value': fromCodePoint,
        'configurable': true,
        'writable': true
      });
    } else {
      String.fromCodePoint = fromCodePoint;
    }
  }());
}
相关文章
相关标签/搜索