385 lines
		
	
	
		
			40 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
		
		
			
		
	
	
			385 lines
		
	
	
		
			40 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
|  | /* | |||
|  |  * text-segmentation 1.0.3 <https://github.com/niklasvh/text-segmentation>
 | |||
|  |  * Copyright (c) 2022 Niklas von Hertzen <https://hertzen.com>
 | |||
|  |  * Released under MIT License | |||
|  |  */ | |||
|  | var base64 = 'AAAAAAAAAAAAEA4AGBkAAFAaAAACAAAAAAAIABAAGAAwADgACAAQAAgAEAAIABAACAAQAAgAEAAIABAACAAQAAgAEAAIABAAQABIAEQATAAIABAACAAQAAgAEAAIABAAVABcAAgAEAAIABAACAAQAGAAaABwAHgAgACIAI4AlgAIABAAmwCjAKgAsAC2AL4AvQDFAMoA0gBPAVYBWgEIAAgACACMANoAYgFkAWwBdAF8AX0BhQGNAZUBlgGeAaMBlQGWAasBswF8AbsBwwF0AcsBYwHTAQgA2wG/AOMBdAF8AekB8QF0AfkB+wHiAHQBfAEIAAMC5gQIAAsCEgIIAAgAFgIeAggAIgIpAggAMQI5AkACygEIAAgASAJQAlgCYAIIAAgACAAKBQoFCgUTBRMFGQUrBSsFCAAIAAgACAAIAAgACAAIAAgACABdAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACABoAmgCrwGvAQgAbgJ2AggAHgEIAAgACADnAXsCCAAIAAgAgwIIAAgACAAIAAgACACKAggAkQKZAggAPADJAAgAoQKkAqwCsgK6AsICCADJAggA0AIIAAgACAAIANYC3gIIAAgACAAIAAgACABAAOYCCAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAkASoB+QIEAAgACAA8AEMCCABCBQgACABJBVAFCAAIAAgACAAIAAgACAAIAAgACABTBVoFCAAIAFoFCABfBWUFCAAIAAgACAAIAAgAbQUIAAgACAAIAAgACABzBXsFfQWFBYoFigWKBZEFigWKBYoFmAWfBaYFrgWxBbkFCAAIAAgACAAIAAgACAAIAAgACAAIAMEFCAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAMgFCADQBQgACAAIAAgACAAIAAgACAAIAAgACAAIAO4CCAAIAAgAiQAIAAgACABAAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAD0AggACAD8AggACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIANYFCAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAMDvwAIAAgAJAIIAAgACAAIAAgACAAIAAgACwMTAwgACAB9BOsEGwMjAwgAKwMyAwsFYgE3A/MEPwMIAEUDTQNRAwgAWQOsAGEDCAAIAAgACAAIAAgACABpAzQFNQU2BTcFOAU5BToFNAU1BTYFNwU4BTkFOgU0BTUFNgU3BTgFOQU6BTQFNQU2BTcFOAU5BToFNAU1BTYFNwU4BTkFOgU0BTUFNgU3BTgFOQU6BTQFNQU2BTcFOAU5BToFNAU1BTYFNwU4BTkFOgU0BTUFNgU3BTgFOQU6BTQFNQU2BTcFOAU5BToFNAU1BTYFNwU4BTkFOgU0BTUFNgU3BTgFOQU6BTQFNQU2BTcFOAU5BToFNAU1BTYFNwU4BTkFOgU0BTUFNgU3BTgFOQU6BTQFNQU2BTcFOAU5BToFNAU1BTYFNwU4BTkFOgU0BTUFNgU3BTgFOQU6BTQFNQU2BTcFOAU5BToFNAU1BTYFNwU4BTkFOgU0BTUFN | |||
|  | 
 | |||
|  | /* | |||
|  |  * utrie 1.0.2 <https://github.com/niklasvh/utrie>
 | |||
|  |  * Copyright (c) 2022 Niklas von Hertzen <https://hertzen.com>
 | |||
|  |  * Released under MIT License | |||
|  |  */ | |||
|  | var chars$1 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'; | |||
|  | // Use a lookup table to find the index.
 | |||
|  | var lookup$1 = typeof Uint8Array === 'undefined' ? [] : new Uint8Array(256); | |||
|  | for (var i$1 = 0; i$1 < chars$1.length; i$1++) { | |||
|  |     lookup$1[chars$1.charCodeAt(i$1)] = i$1; | |||
|  | } | |||
|  | var decode = function (base64) { | |||
|  |     var bufferLength = base64.length * 0.75, len = base64.length, i, p = 0, encoded1, encoded2, encoded3, encoded4; | |||
|  |     if (base64[base64.length - 1] === '=') { | |||
|  |         bufferLength--; | |||
|  |         if (base64[base64.length - 2] === '=') { | |||
|  |             bufferLength--; | |||
|  |         } | |||
|  |     } | |||
|  |     var buffer = typeof ArrayBuffer !== 'undefined' && | |||
|  |         typeof Uint8Array !== 'undefined' && | |||
|  |         typeof Uint8Array.prototype.slice !== 'undefined' | |||
|  |         ? new ArrayBuffer(bufferLength) | |||
|  |         : new Array(bufferLength); | |||
|  |     var bytes = Array.isArray(buffer) ? buffer : new Uint8Array(buffer); | |||
|  |     for (i = 0; i < len; i += 4) { | |||
|  |         encoded1 = lookup$1[base64.charCodeAt(i)]; | |||
|  |         encoded2 = lookup$1[base64.charCodeAt(i + 1)]; | |||
|  |         encoded3 = lookup$1[base64.charCodeAt(i + 2)]; | |||
|  |         encoded4 = lookup$1[base64.charCodeAt(i + 3)]; | |||
|  |         bytes[p++] = (encoded1 << 2) | (encoded2 >> 4); | |||
|  |         bytes[p++] = ((encoded2 & 15) << 4) | (encoded3 >> 2); | |||
|  |         bytes[p++] = ((encoded3 & 3) << 6) | (encoded4 & 63); | |||
|  |     } | |||
|  |     return buffer; | |||
|  | }; | |||
|  | var polyUint16Array = function (buffer) { | |||
|  |     var length = buffer.length; | |||
|  |     var bytes = []; | |||
|  |     for (var i = 0; i < length; i += 2) { | |||
|  |         bytes.push((buffer[i + 1] << 8) | buffer[i]); | |||
|  |     } | |||
|  |     return bytes; | |||
|  | }; | |||
|  | var polyUint32Array = function (buffer) { | |||
|  |     var length = buffer.length; | |||
|  |     var bytes = []; | |||
|  |     for (var i = 0; i < length; i += 4) { | |||
|  |         bytes.push((buffer[i + 3] << 24) | (buffer[i + 2] << 16) | (buffer[i + 1] << 8) | buffer[i]); | |||
|  |     } | |||
|  |     return bytes; | |||
|  | }; | |||
|  | 
 | |||
|  | /** Shift size for getting the index-2 table offset. */ | |||
|  | var UTRIE2_SHIFT_2 = 5; | |||
|  | /** Shift size for getting the index-1 table offset. */ | |||
|  | var UTRIE2_SHIFT_1 = 6 + 5; | |||
|  | /** | |||
|  |  * Shift size for shifting left the index array values. | |||
|  |  * Increases possible data size with 16-bit index values at the cost | |||
|  |  * of compactability. | |||
|  |  * This requires data blocks to be aligned by UTRIE2_DATA_GRANULARITY. | |||
|  |  */ | |||
|  | var UTRIE2_INDEX_SHIFT = 2; | |||
|  | /** | |||
|  |  * Difference between the two shift sizes, | |||
|  |  * for getting an index-1 offset from an index-2 offset. 6=11-5 | |||
|  |  */ | |||
|  | var UTRIE2_SHIFT_1_2 = UTRIE2_SHIFT_1 - UTRIE2_SHIFT_2; | |||
|  | /** | |||
|  |  * The part of the index-2 table for U+D800..U+DBFF stores values for | |||
|  |  * lead surrogate code _units_ not code _points_. | |||
|  |  * Values for lead surrogate code _points_ are indexed with this portion of the table. | |||
|  |  * Length=32=0x20=0x400>>UTRIE2_SHIFT_2. (There are 1024=0x400 lead surrogates.) | |||
|  |  */ | |||
|  | var UTRIE2_LSCP_INDEX_2_OFFSET = 0x10000 >> UTRIE2_SHIFT_2; | |||
|  | /** Number of entries in a data block. 32=0x20 */ | |||
|  | var UTRIE2_DATA_BLOCK_LENGTH = 1 << UTRIE2_SHIFT_2; | |||
|  | /** Mask for getting the lower bits for the in-data-block offset. */ | |||
|  | var UTRIE2_DATA_MASK = UTRIE2_DATA_BLOCK_LENGTH - 1; | |||
|  | var UTRIE2_LSCP_INDEX_2_LENGTH = 0x400 >> UTRIE2_SHIFT_2; | |||
|  | /** Count the lengths of both BMP pieces. 2080=0x820 */ | |||
|  | var UTRIE2_INDEX_2_BMP_LENGTH = UTRIE2_LSCP_INDEX_2_OFFSET + UTRIE2_LSCP_INDEX_2_LENGTH; | |||
|  | /** | |||
|  |  * The 2-byte UTF-8 version of the index-2 table follows at offset 2080=0x820. | |||
|  |  * Length 32=0x20 for lead bytes C0..DF, regardless of UTRIE2_SHIFT_2. | |||
|  |  */ | |||
|  | var UTRIE2_UTF8_2B_INDEX_2_OFFSET = UTRIE2_INDEX_2_BMP_LENGTH; | |||
|  | var UTRIE2_UTF8_2B_INDEX_2_LENGTH = 0x800 >> 6; /* U+0800 is the first code point after 2-byte UTF-8 */ | |||
|  | /** | |||
|  |  * The index-1 table, only used for supplementary code points, at offset 2112=0x840. | |||
|  |  * Variable length, for code points up to highStart, where the last single-value range starts. | |||
|  |  * Maximum length 512=0x200=0x100000>>UTRIE2_SHIFT_1. | |||
|  |  * (For 0x100000 supplementary code points U+10000..U+10ffff.) | |||
|  |  * | |||
|  |  * The part of the index-2 table for supplementary code points starts | |||
|  |  * after this index-1 table. | |||
|  |  * | |||
|  |  * Both the index-1 table and the following part of the index-2 table | |||
|  |  * are omitted completely if there is only BMP data. | |||
|  |  */ | |||
|  | var UTRIE2_INDEX_1_OFFSET = UTRIE2_UTF8_2B_INDEX_2_OFFSET + UTRIE2_UTF8_2B_INDEX_2_LENGTH; | |||
|  | /** | |||
|  |  * Number of index-1 entries for the BMP. 32=0x20 | |||
|  |  * This part of the index-1 table is omitted from the serialized form. | |||
|  |  */ | |||
|  | var UTRIE2_OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> UTRIE2_SHIFT_1; | |||
|  | /** Number of entries in an index-2 block. 64=0x40 */ | |||
|  | var UTRIE2_INDEX_2_BLOCK_LENGTH = 1 << UTRIE2_SHIFT_1_2; | |||
|  | /** Mask for getting the lower bits for the in-index-2-block offset. */ | |||
|  | var UTRIE2_INDEX_2_MASK = UTRIE2_INDEX_2_BLOCK_LENGTH - 1; | |||
|  | var slice16 = function (view, start, end) { | |||
|  |     if (view.slice) { | |||
|  |         return view.slice(start, end); | |||
|  |     } | |||
|  |     return new Uint16Array(Array.prototype.slice.call(view, start, end)); | |||
|  | }; | |||
|  | var slice32 = function (view, start, end) { | |||
|  |     if (view.slice) { | |||
|  |         return view.slice(start, end); | |||
|  |     } | |||
|  |     return new Uint32Array(Array.prototype.slice.call(view, start, end)); | |||
|  | }; | |||
|  | var createTrieFromBase64 = function (base64, _byteLength) { | |||
|  |     var buffer = decode(base64); | |||
|  |     var view32 = Array.isArray(buffer) ? polyUint32Array(buffer) : new Uint32Array(buffer); | |||
|  |     var view16 = Array.isArray(buffer) ? polyUint16Array(buffer) : new Uint16Array(buffer); | |||
|  |     var headerLength = 24; | |||
|  |     var index = slice16(view16, headerLength / 2, view32[4] / 2); | |||
|  |     var data = view32[5] === 2 | |||
|  |         ? slice16(view16, (headerLength + view32[4]) / 2) | |||
|  |         : slice32(view32, Math.ceil((headerLength + view32[4]) / 4)); | |||
|  |     return new Trie(view32[0], view32[1], view32[2], view32[3], index, data); | |||
|  | }; | |||
|  | var Trie = /** @class */ (function () { | |||
|  |     function Trie(initialValue, errorValue, highStart, highValueIndex, index, data) { | |||
|  |         this.initialValue = initialValue; | |||
|  |         this.errorValue = errorValue; | |||
|  |         this.highStart = highStart; | |||
|  |         this.highValueIndex = highValueIndex; | |||
|  |         this.index = index; | |||
|  |         this.data = data; | |||
|  |     } | |||
|  |     /** | |||
|  |      * Get the value for a code point as stored in the Trie. | |||
|  |      * | |||
|  |      * @param codePoint the code point | |||
|  |      * @return the value | |||
|  |      */ | |||
|  |     Trie.prototype.get = function (codePoint) { | |||
|  |         var ix; | |||
|  |         if (codePoint >= 0) { | |||
|  |             if (codePoint < 0x0d800 || (codePoint > 0x0dbff && codePoint <= 0x0ffff)) { | |||
|  |                 // Ordinary BMP code point, excluding leading surrogates.
 | |||
|  |                 // BMP uses a single level lookup.  BMP index starts at offset 0 in the Trie2 index.
 | |||
|  |                 // 16 bit data is stored in the index array itself.
 | |||
|  |                 ix = this.index[codePoint >> UTRIE2_SHIFT_2]; | |||
|  |                 ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); | |||
|  |                 return this.data[ix]; | |||
|  |             } | |||
|  |             if (codePoint <= 0xffff) { | |||
|  |                 // Lead Surrogate Code Point.  A Separate index section is stored for
 | |||
|  |                 // lead surrogate code units and code points.
 | |||
|  |                 //   The main index has the code unit data.
 | |||
|  |                 //   For this function, we need the code point data.
 | |||
|  |                 // Note: this expression could be refactored for slightly improved efficiency, but
 | |||
|  |                 //       surrogate code points will be so rare in practice that it's not worth it.
 | |||
|  |                 ix = this.index[UTRIE2_LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> UTRIE2_SHIFT_2)]; | |||
|  |                 ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); | |||
|  |                 return this.data[ix]; | |||
|  |             } | |||
|  |             if (codePoint < this.highStart) { | |||
|  |                 // Supplemental code point, use two-level lookup.
 | |||
|  |                 ix = UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH + (codePoint >> UTRIE2_SHIFT_1); | |||
|  |                 ix = this.index[ix]; | |||
|  |                 ix += (codePoint >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK; | |||
|  |                 ix = this.index[ix]; | |||
|  |                 ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); | |||
|  |                 return this.data[ix]; | |||
|  |             } | |||
|  |             if (codePoint <= 0x10ffff) { | |||
|  |                 return this.data[this.highValueIndex]; | |||
|  |             } | |||
|  |         } | |||
|  |         // Fall through.  The code point is outside of the legal range of 0..0x10ffff.
 | |||
|  |         return this.errorValue; | |||
|  |     }; | |||
|  |     return Trie; | |||
|  | }()); | |||
|  | 
 | |||
|  | /* | |||
|  |  * base64-arraybuffer 1.0.2 <https://github.com/niklasvh/base64-arraybuffer>
 | |||
|  |  * Copyright (c) 2022 Niklas von Hertzen <https://hertzen.com>
 | |||
|  |  * Released under MIT License | |||
|  |  */ | |||
|  | var chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'; | |||
|  | // Use a lookup table to find the index.
 | |||
|  | var lookup = typeof Uint8Array === 'undefined' ? [] : new Uint8Array(256); | |||
|  | for (var i = 0; i < chars.length; i++) { | |||
|  |     lookup[chars.charCodeAt(i)] = i; | |||
|  | } | |||
|  | 
 | |||
|  | var Prepend = 1; | |||
|  | var CR = 2; | |||
|  | var LF = 3; | |||
|  | var Control = 4; | |||
|  | var Extend = 5; | |||
|  | var SpacingMark = 7; | |||
|  | var L = 8; | |||
|  | var V = 9; | |||
|  | var T = 10; | |||
|  | var LV = 11; | |||
|  | var LVT = 12; | |||
|  | var ZWJ = 13; | |||
|  | var Extended_Pictographic = 14; | |||
|  | var RI = 15; | |||
|  | var toCodePoints = function (str) { | |||
|  |     var codePoints = []; | |||
|  |     var i = 0; | |||
|  |     var length = str.length; | |||
|  |     while (i < length) { | |||
|  |         var value = str.charCodeAt(i++); | |||
|  |         if (value >= 0xd800 && value <= 0xdbff && i < length) { | |||
|  |             var extra = str.charCodeAt(i++); | |||
|  |             if ((extra & 0xfc00) === 0xdc00) { | |||
|  |                 codePoints.push(((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000); | |||
|  |             } | |||
|  |             else { | |||
|  |                 codePoints.push(value); | |||
|  |                 i--; | |||
|  |             } | |||
|  |         } | |||
|  |         else { | |||
|  |             codePoints.push(value); | |||
|  |         } | |||
|  |     } | |||
|  |     return codePoints; | |||
|  | }; | |||
|  | var fromCodePoint = function () { | |||
|  |     var codePoints = []; | |||
|  |     for (var _i = 0; _i < arguments.length; _i++) { | |||
|  |         codePoints[_i] = arguments[_i]; | |||
|  |     } | |||
|  |     if (String.fromCodePoint) { | |||
|  |         return String.fromCodePoint.apply(String, codePoints); | |||
|  |     } | |||
|  |     var length = codePoints.length; | |||
|  |     if (!length) { | |||
|  |         return ''; | |||
|  |     } | |||
|  |     var codeUnits = []; | |||
|  |     var index = -1; | |||
|  |     var result = ''; | |||
|  |     while (++index < length) { | |||
|  |         var codePoint = codePoints[index]; | |||
|  |         if (codePoint <= 0xffff) { | |||
|  |             codeUnits.push(codePoint); | |||
|  |         } | |||
|  |         else { | |||
|  |             codePoint -= 0x10000; | |||
|  |             codeUnits.push((codePoint >> 10) + 0xd800, (codePoint % 0x400) + 0xdc00); | |||
|  |         } | |||
|  |         if (index + 1 === length || codeUnits.length > 0x4000) { | |||
|  |             result += String.fromCharCode.apply(String, codeUnits); | |||
|  |             codeUnits.length = 0; | |||
|  |         } | |||
|  |     } | |||
|  |     return result; | |||
|  | }; | |||
|  | var UnicodeTrie = createTrieFromBase64(base64); | |||
|  | var BREAK_NOT_ALLOWED = '×'; | |||
|  | var BREAK_ALLOWED = '÷'; | |||
|  | var codePointToClass = function (codePoint) { return UnicodeTrie.get(codePoint); }; | |||
|  | var _graphemeBreakAtIndex = function (_codePoints, classTypes, index) { | |||
|  |     var prevIndex = index - 2; | |||
|  |     var prev = classTypes[prevIndex]; | |||
|  |     var current = classTypes[index - 1]; | |||
|  |     var next = classTypes[index]; | |||
|  |     // GB3 Do not break between a CR and LF
 | |||
|  |     if (current === CR && next === LF) { | |||
|  |         return BREAK_NOT_ALLOWED; | |||
|  |     } | |||
|  |     // GB4 Otherwise, break before and after controls.
 | |||
|  |     if (current === CR || current === LF || current === Control) { | |||
|  |         return BREAK_ALLOWED; | |||
|  |     } | |||
|  |     // GB5
 | |||
|  |     if (next === CR || next === LF || next === Control) { | |||
|  |         return BREAK_ALLOWED; | |||
|  |     } | |||
|  |     // Do not break Hangul syllable sequences.
 | |||
|  |     // GB6
 | |||
|  |     if (current === L && [L, V, LV, LVT].indexOf(next) !== -1) { | |||
|  |         return BREAK_NOT_ALLOWED; | |||
|  |     } | |||
|  |     // GB7
 | |||
|  |     if ((current === LV || current === V) && (next === V || next === T)) { | |||
|  |         return BREAK_NOT_ALLOWED; | |||
|  |     } | |||
|  |     // GB8
 | |||
|  |     if ((current === LVT || current === T) && next === T) { | |||
|  |         return BREAK_NOT_ALLOWED; | |||
|  |     } | |||
|  |     // GB9 Do not break before extending characters or ZWJ.
 | |||
|  |     if (next === ZWJ || next === Extend) { | |||
|  |         return BREAK_NOT_ALLOWED; | |||
|  |     } | |||
|  |     // Do not break before SpacingMarks, or after Prepend characters.
 | |||
|  |     // GB9a
 | |||
|  |     if (next === SpacingMark) { | |||
|  |         return BREAK_NOT_ALLOWED; | |||
|  |     } | |||
|  |     // GB9a
 | |||
|  |     if (current === Prepend) { | |||
|  |         return BREAK_NOT_ALLOWED; | |||
|  |     } | |||
|  |     // GB11 Do not break within emoji modifier sequences or emoji zwj sequences.
 | |||
|  |     if (current === ZWJ && next === Extended_Pictographic) { | |||
|  |         while (prev === Extend) { | |||
|  |             prev = classTypes[--prevIndex]; | |||
|  |         } | |||
|  |         if (prev === Extended_Pictographic) { | |||
|  |             return BREAK_NOT_ALLOWED; | |||
|  |         } | |||
|  |     } | |||
|  |     // GB12 Do not break within emoji flag sequences.
 | |||
|  |     // That is, do not break between regional indicator (RI) symbols
 | |||
|  |     // if there is an odd number of RI characters before the break point.
 | |||
|  |     if (current === RI && next === RI) { | |||
|  |         var countRI = 0; | |||
|  |         while (prev === RI) { | |||
|  |             countRI++; | |||
|  |             prev = classTypes[--prevIndex]; | |||
|  |         } | |||
|  |         if (countRI % 2 === 0) { | |||
|  |             return BREAK_NOT_ALLOWED; | |||
|  |         } | |||
|  |     } | |||
|  |     return BREAK_ALLOWED; | |||
|  | }; | |||
|  | var GraphemeBreaker = function (str) { | |||
|  |     var codePoints = toCodePoints(str); | |||
|  |     var length = codePoints.length; | |||
|  |     var index = 0; | |||
|  |     var lastEnd = 0; | |||
|  |     var classTypes = codePoints.map(codePointToClass); | |||
|  |     return { | |||
|  |         next: function () { | |||
|  |             if (index >= length) { | |||
|  |                 return { done: true, value: null }; | |||
|  |             } | |||
|  |             var graphemeBreak = BREAK_NOT_ALLOWED; | |||
|  |             while (index < length && | |||
|  |                 (graphemeBreak = _graphemeBreakAtIndex(codePoints, classTypes, ++index)) === BREAK_NOT_ALLOWED) { } | |||
|  |             if (graphemeBreak !== BREAK_NOT_ALLOWED || index === length) { | |||
|  |                 var value = fromCodePoint.apply(null, codePoints.slice(lastEnd, index)); | |||
|  |                 lastEnd = index; | |||
|  |                 return { value: value, done: false }; | |||
|  |             } | |||
|  |             return { done: true, value: null }; | |||
|  |         }, | |||
|  |     }; | |||
|  | }; | |||
|  | var splitGraphemes = function (str) { | |||
|  |     var breaker = GraphemeBreaker(str); | |||
|  |     var graphemes = []; | |||
|  |     var bk; | |||
|  |     while (!(bk = breaker.next()).done) { | |||
|  |         if (bk.value) { | |||
|  |             graphemes.push(bk.value.slice()); | |||
|  |         } | |||
|  |     } | |||
|  |     return graphemes; | |||
|  | }; | |||
|  | 
 | |||
|  | export { GraphemeBreaker, fromCodePoint, splitGraphemes, toCodePoints }; | |||
|  | //# sourceMappingURL=text-segmentation.es5.js.map
 |