385 lines
		
	
	
		
			40 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
		
		
			
		
	
	
			385 lines
		
	
	
		
			40 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
| 
								 | 
							
								/*
							 | 
						|||
| 
								 | 
							
								 * text-segmentation 1.0.3 <https://github.com/niklasvh/text-segmentation>
							 | 
						|||
| 
								 | 
							
								 * Copyright (c) 2022 Niklas von Hertzen <https://hertzen.com>
							 | 
						|||
| 
								 | 
							
								 * Released under MIT License
							 | 
						|||
| 
								 | 
							
								 */
							 | 
						|||
| 
								 | 
							
								var base64 = 'AAAAAAAAAAAAEA4AGBkAAFAaAAACAAAAAAAIABAAGAAwADgACAAQAAgAEAAIABAACAAQAAgAEAAIABAACAAQAAgAEAAIABAAQABIAEQATAAIABAACAAQAAgAEAAIABAAVABcAAgAEAAIABAACAAQAGAAaABwAHgAgACIAI4AlgAIABAAmwCjAKgAsAC2AL4AvQDFAMoA0gBPAVYBWgEIAAgACACMANoAYgFkAWwBdAF8AX0BhQGNAZUBlgGeAaMBlQGWAasBswF8AbsBwwF0AcsBYwHTAQgA2wG/AOMBdAF8AekB8QF0AfkB+wHiAHQBfAEIAAMC5gQIAAsCEgIIAAgAFgIeAggAIgIpAggAMQI5AkACygEIAAgASAJQAlgCYAIIAAgACAAKBQoFCgUTBRMFGQUrBSsFCAAIAAgACAAIAAgACAAIAAgACABdAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACABoAmgCrwGvAQgAbgJ2AggAHgEIAAgACADnAXsCCAAIAAgAgwIIAAgACAAIAAgACACKAggAkQKZAggAPADJAAgAoQKkAqwCsgK6AsICCADJAggA0AIIAAgACAAIANYC3gIIAAgACAAIAAgACABAAOYCCAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAkASoB+QIEAAgACAA8AEMCCABCBQgACABJBVAFCAAIAAgACAAIAAgACAAIAAgACABTBVoFCAAIAFoFCABfBWUFCAAIAAgACAAIAAgAbQUIAAgACAAIAAgACABzBXsFfQWFBYoFigWKBZEFigWKBYoFmAWfBaYFrgWxBbkFCAAIAAgACAAIAAgACAAIAAgACAAIAMEFCAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAMgFCADQBQgACAAIAAgACAAIAAgACAAIAAgACAAIAO4CCAAIAAgAiQAIAAgACABAAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAD0AggACAD8AggACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIANYFCAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAgACAAIAAMDvwAIAAgAJAIIAAgACAAIAAgACAAIAAgACwMTAwgACAB9BOsEGwMjAwgAKwMyAwsFYgE3A/MEPwMIAEUDTQNRAwgAWQOsAGEDCAAIAAgACAAIAAgACABpAzQFNQU2BTcFOAU5BToFNAU1BTYFNwU4BTkFOgU0BTUFNgU3BTgFOQU6BTQFNQU2BTcFOAU5BToFNAU1BTYFNwU4BTkFOgU0BTUFNgU3BTgFOQU6BTQFNQU2BTcFOAU5BToFNAU1BTYFNwU4BTkFOgU0BTUFNgU3BTgFOQU6BTQFNQU2BTcFOAU5BToFNAU1BTYFNwU4BTkFOgU0BTUFNgU3BTgFOQU6BTQFNQU2BTcFOAU5BToFNAU1BTYFNwU4BTkFOgU0BTUFNgU3BTgFOQU6BTQFNQU2BTcFOAU5BToFNAU1BTYFNwU4BTkFOgU0BTUFNgU3BTgFOQU6BTQFNQU2BTcFOAU5BToFNAU1BTYFNwU4BTkFOgU0BTUFN
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								/*
							 | 
						|||
| 
								 | 
							
								 * utrie 1.0.2 <https://github.com/niklasvh/utrie>
							 | 
						|||
| 
								 | 
							
								 * Copyright (c) 2022 Niklas von Hertzen <https://hertzen.com>
							 | 
						|||
| 
								 | 
							
								 * Released under MIT License
							 | 
						|||
| 
								 | 
							
								 */
							 | 
						|||
| 
								 | 
							
								var chars$1 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';
							 | 
						|||
| 
								 | 
							
								// Use a lookup table to find the index.
							 | 
						|||
| 
								 | 
							
								var lookup$1 = typeof Uint8Array === 'undefined' ? [] : new Uint8Array(256);
							 | 
						|||
| 
								 | 
							
								for (var i$1 = 0; i$1 < chars$1.length; i$1++) {
							 | 
						|||
| 
								 | 
							
								    lookup$1[chars$1.charCodeAt(i$1)] = i$1;
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								var decode = function (base64) {
							 | 
						|||
| 
								 | 
							
								    var bufferLength = base64.length * 0.75, len = base64.length, i, p = 0, encoded1, encoded2, encoded3, encoded4;
							 | 
						|||
| 
								 | 
							
								    if (base64[base64.length - 1] === '=') {
							 | 
						|||
| 
								 | 
							
								        bufferLength--;
							 | 
						|||
| 
								 | 
							
								        if (base64[base64.length - 2] === '=') {
							 | 
						|||
| 
								 | 
							
								            bufferLength--;
							 | 
						|||
| 
								 | 
							
								        }
							 | 
						|||
| 
								 | 
							
								    }
							 | 
						|||
| 
								 | 
							
								    var buffer = typeof ArrayBuffer !== 'undefined' &&
							 | 
						|||
| 
								 | 
							
								        typeof Uint8Array !== 'undefined' &&
							 | 
						|||
| 
								 | 
							
								        typeof Uint8Array.prototype.slice !== 'undefined'
							 | 
						|||
| 
								 | 
							
								        ? new ArrayBuffer(bufferLength)
							 | 
						|||
| 
								 | 
							
								        : new Array(bufferLength);
							 | 
						|||
| 
								 | 
							
								    var bytes = Array.isArray(buffer) ? buffer : new Uint8Array(buffer);
							 | 
						|||
| 
								 | 
							
								    for (i = 0; i < len; i += 4) {
							 | 
						|||
| 
								 | 
							
								        encoded1 = lookup$1[base64.charCodeAt(i)];
							 | 
						|||
| 
								 | 
							
								        encoded2 = lookup$1[base64.charCodeAt(i + 1)];
							 | 
						|||
| 
								 | 
							
								        encoded3 = lookup$1[base64.charCodeAt(i + 2)];
							 | 
						|||
| 
								 | 
							
								        encoded4 = lookup$1[base64.charCodeAt(i + 3)];
							 | 
						|||
| 
								 | 
							
								        bytes[p++] = (encoded1 << 2) | (encoded2 >> 4);
							 | 
						|||
| 
								 | 
							
								        bytes[p++] = ((encoded2 & 15) << 4) | (encoded3 >> 2);
							 | 
						|||
| 
								 | 
							
								        bytes[p++] = ((encoded3 & 3) << 6) | (encoded4 & 63);
							 | 
						|||
| 
								 | 
							
								    }
							 | 
						|||
| 
								 | 
							
								    return buffer;
							 | 
						|||
| 
								 | 
							
								};
							 | 
						|||
| 
								 | 
							
								var polyUint16Array = function (buffer) {
							 | 
						|||
| 
								 | 
							
								    var length = buffer.length;
							 | 
						|||
| 
								 | 
							
								    var bytes = [];
							 | 
						|||
| 
								 | 
							
								    for (var i = 0; i < length; i += 2) {
							 | 
						|||
| 
								 | 
							
								        bytes.push((buffer[i + 1] << 8) | buffer[i]);
							 | 
						|||
| 
								 | 
							
								    }
							 | 
						|||
| 
								 | 
							
								    return bytes;
							 | 
						|||
| 
								 | 
							
								};
							 | 
						|||
| 
								 | 
							
								var polyUint32Array = function (buffer) {
							 | 
						|||
| 
								 | 
							
								    var length = buffer.length;
							 | 
						|||
| 
								 | 
							
								    var bytes = [];
							 | 
						|||
| 
								 | 
							
								    for (var i = 0; i < length; i += 4) {
							 | 
						|||
| 
								 | 
							
								        bytes.push((buffer[i + 3] << 24) | (buffer[i + 2] << 16) | (buffer[i + 1] << 8) | buffer[i]);
							 | 
						|||
| 
								 | 
							
								    }
							 | 
						|||
| 
								 | 
							
								    return bytes;
							 | 
						|||
| 
								 | 
							
								};
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								/** Shift size for getting the index-2 table offset. */
							 | 
						|||
| 
								 | 
							
								var UTRIE2_SHIFT_2 = 5;
							 | 
						|||
| 
								 | 
							
								/** Shift size for getting the index-1 table offset. */
							 | 
						|||
| 
								 | 
							
								var UTRIE2_SHIFT_1 = 6 + 5;
							 | 
						|||
| 
								 | 
							
								/**
							 | 
						|||
| 
								 | 
							
								 * Shift size for shifting left the index array values.
							 | 
						|||
| 
								 | 
							
								 * Increases possible data size with 16-bit index values at the cost
							 | 
						|||
| 
								 | 
							
								 * of compactability.
							 | 
						|||
| 
								 | 
							
								 * This requires data blocks to be aligned by UTRIE2_DATA_GRANULARITY.
							 | 
						|||
| 
								 | 
							
								 */
							 | 
						|||
| 
								 | 
							
								var UTRIE2_INDEX_SHIFT = 2;
							 | 
						|||
| 
								 | 
							
								/**
							 | 
						|||
| 
								 | 
							
								 * Difference between the two shift sizes,
							 | 
						|||
| 
								 | 
							
								 * for getting an index-1 offset from an index-2 offset. 6=11-5
							 | 
						|||
| 
								 | 
							
								 */
							 | 
						|||
| 
								 | 
							
								var UTRIE2_SHIFT_1_2 = UTRIE2_SHIFT_1 - UTRIE2_SHIFT_2;
							 | 
						|||
| 
								 | 
							
								/**
							 | 
						|||
| 
								 | 
							
								 * The part of the index-2 table for U+D800..U+DBFF stores values for
							 | 
						|||
| 
								 | 
							
								 * lead surrogate code _units_ not code _points_.
							 | 
						|||
| 
								 | 
							
								 * Values for lead surrogate code _points_ are indexed with this portion of the table.
							 | 
						|||
| 
								 | 
							
								 * Length=32=0x20=0x400>>UTRIE2_SHIFT_2. (There are 1024=0x400 lead surrogates.)
							 | 
						|||
| 
								 | 
							
								 */
							 | 
						|||
| 
								 | 
							
								var UTRIE2_LSCP_INDEX_2_OFFSET = 0x10000 >> UTRIE2_SHIFT_2;
							 | 
						|||
| 
								 | 
							
								/** Number of entries in a data block. 32=0x20 */
							 | 
						|||
| 
								 | 
							
								var UTRIE2_DATA_BLOCK_LENGTH = 1 << UTRIE2_SHIFT_2;
							 | 
						|||
| 
								 | 
							
								/** Mask for getting the lower bits for the in-data-block offset. */
							 | 
						|||
| 
								 | 
							
								var UTRIE2_DATA_MASK = UTRIE2_DATA_BLOCK_LENGTH - 1;
							 | 
						|||
| 
								 | 
							
								var UTRIE2_LSCP_INDEX_2_LENGTH = 0x400 >> UTRIE2_SHIFT_2;
							 | 
						|||
| 
								 | 
							
								/** Count the lengths of both BMP pieces. 2080=0x820 */
							 | 
						|||
| 
								 | 
							
								var UTRIE2_INDEX_2_BMP_LENGTH = UTRIE2_LSCP_INDEX_2_OFFSET + UTRIE2_LSCP_INDEX_2_LENGTH;
							 | 
						|||
| 
								 | 
							
								/**
							 | 
						|||
| 
								 | 
							
								 * The 2-byte UTF-8 version of the index-2 table follows at offset 2080=0x820.
							 | 
						|||
| 
								 | 
							
								 * Length 32=0x20 for lead bytes C0..DF, regardless of UTRIE2_SHIFT_2.
							 | 
						|||
| 
								 | 
							
								 */
							 | 
						|||
| 
								 | 
							
								var UTRIE2_UTF8_2B_INDEX_2_OFFSET = UTRIE2_INDEX_2_BMP_LENGTH;
							 | 
						|||
| 
								 | 
							
								var UTRIE2_UTF8_2B_INDEX_2_LENGTH = 0x800 >> 6; /* U+0800 is the first code point after 2-byte UTF-8 */
							 | 
						|||
| 
								 | 
							
								/**
							 | 
						|||
| 
								 | 
							
								 * The index-1 table, only used for supplementary code points, at offset 2112=0x840.
							 | 
						|||
| 
								 | 
							
								 * Variable length, for code points up to highStart, where the last single-value range starts.
							 | 
						|||
| 
								 | 
							
								 * Maximum length 512=0x200=0x100000>>UTRIE2_SHIFT_1.
							 | 
						|||
| 
								 | 
							
								 * (For 0x100000 supplementary code points U+10000..U+10ffff.)
							 | 
						|||
| 
								 | 
							
								 *
							 | 
						|||
| 
								 | 
							
								 * The part of the index-2 table for supplementary code points starts
							 | 
						|||
| 
								 | 
							
								 * after this index-1 table.
							 | 
						|||
| 
								 | 
							
								 *
							 | 
						|||
| 
								 | 
							
								 * Both the index-1 table and the following part of the index-2 table
							 | 
						|||
| 
								 | 
							
								 * are omitted completely if there is only BMP data.
							 | 
						|||
| 
								 | 
							
								 */
							 | 
						|||
| 
								 | 
							
								var UTRIE2_INDEX_1_OFFSET = UTRIE2_UTF8_2B_INDEX_2_OFFSET + UTRIE2_UTF8_2B_INDEX_2_LENGTH;
							 | 
						|||
| 
								 | 
							
								/**
							 | 
						|||
| 
								 | 
							
								 * Number of index-1 entries for the BMP. 32=0x20
							 | 
						|||
| 
								 | 
							
								 * This part of the index-1 table is omitted from the serialized form.
							 | 
						|||
| 
								 | 
							
								 */
							 | 
						|||
| 
								 | 
							
								var UTRIE2_OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> UTRIE2_SHIFT_1;
							 | 
						|||
| 
								 | 
							
								/** Number of entries in an index-2 block. 64=0x40 */
							 | 
						|||
| 
								 | 
							
								var UTRIE2_INDEX_2_BLOCK_LENGTH = 1 << UTRIE2_SHIFT_1_2;
							 | 
						|||
| 
								 | 
							
								/** Mask for getting the lower bits for the in-index-2-block offset. */
							 | 
						|||
| 
								 | 
							
								var UTRIE2_INDEX_2_MASK = UTRIE2_INDEX_2_BLOCK_LENGTH - 1;
							 | 
						|||
| 
								 | 
							
								var slice16 = function (view, start, end) {
							 | 
						|||
| 
								 | 
							
								    if (view.slice) {
							 | 
						|||
| 
								 | 
							
								        return view.slice(start, end);
							 | 
						|||
| 
								 | 
							
								    }
							 | 
						|||
| 
								 | 
							
								    return new Uint16Array(Array.prototype.slice.call(view, start, end));
							 | 
						|||
| 
								 | 
							
								};
							 | 
						|||
| 
								 | 
							
								var slice32 = function (view, start, end) {
							 | 
						|||
| 
								 | 
							
								    if (view.slice) {
							 | 
						|||
| 
								 | 
							
								        return view.slice(start, end);
							 | 
						|||
| 
								 | 
							
								    }
							 | 
						|||
| 
								 | 
							
								    return new Uint32Array(Array.prototype.slice.call(view, start, end));
							 | 
						|||
| 
								 | 
							
								};
							 | 
						|||
| 
								 | 
							
								var createTrieFromBase64 = function (base64, _byteLength) {
							 | 
						|||
| 
								 | 
							
								    var buffer = decode(base64);
							 | 
						|||
| 
								 | 
							
								    var view32 = Array.isArray(buffer) ? polyUint32Array(buffer) : new Uint32Array(buffer);
							 | 
						|||
| 
								 | 
							
								    var view16 = Array.isArray(buffer) ? polyUint16Array(buffer) : new Uint16Array(buffer);
							 | 
						|||
| 
								 | 
							
								    var headerLength = 24;
							 | 
						|||
| 
								 | 
							
								    var index = slice16(view16, headerLength / 2, view32[4] / 2);
							 | 
						|||
| 
								 | 
							
								    var data = view32[5] === 2
							 | 
						|||
| 
								 | 
							
								        ? slice16(view16, (headerLength + view32[4]) / 2)
							 | 
						|||
| 
								 | 
							
								        : slice32(view32, Math.ceil((headerLength + view32[4]) / 4));
							 | 
						|||
| 
								 | 
							
								    return new Trie(view32[0], view32[1], view32[2], view32[3], index, data);
							 | 
						|||
| 
								 | 
							
								};
							 | 
						|||
| 
								 | 
							
								var Trie = /** @class */ (function () {
							 | 
						|||
| 
								 | 
							
								    function Trie(initialValue, errorValue, highStart, highValueIndex, index, data) {
							 | 
						|||
| 
								 | 
							
								        this.initialValue = initialValue;
							 | 
						|||
| 
								 | 
							
								        this.errorValue = errorValue;
							 | 
						|||
| 
								 | 
							
								        this.highStart = highStart;
							 | 
						|||
| 
								 | 
							
								        this.highValueIndex = highValueIndex;
							 | 
						|||
| 
								 | 
							
								        this.index = index;
							 | 
						|||
| 
								 | 
							
								        this.data = data;
							 | 
						|||
| 
								 | 
							
								    }
							 | 
						|||
| 
								 | 
							
								    /**
							 | 
						|||
| 
								 | 
							
								     * Get the value for a code point as stored in the Trie.
							 | 
						|||
| 
								 | 
							
								     *
							 | 
						|||
| 
								 | 
							
								     * @param codePoint the code point
							 | 
						|||
| 
								 | 
							
								     * @return the value
							 | 
						|||
| 
								 | 
							
								     */
							 | 
						|||
| 
								 | 
							
								    Trie.prototype.get = function (codePoint) {
							 | 
						|||
| 
								 | 
							
								        var ix;
							 | 
						|||
| 
								 | 
							
								        if (codePoint >= 0) {
							 | 
						|||
| 
								 | 
							
								            if (codePoint < 0x0d800 || (codePoint > 0x0dbff && codePoint <= 0x0ffff)) {
							 | 
						|||
| 
								 | 
							
								                // Ordinary BMP code point, excluding leading surrogates.
							 | 
						|||
| 
								 | 
							
								                // BMP uses a single level lookup.  BMP index starts at offset 0 in the Trie2 index.
							 | 
						|||
| 
								 | 
							
								                // 16 bit data is stored in the index array itself.
							 | 
						|||
| 
								 | 
							
								                ix = this.index[codePoint >> UTRIE2_SHIFT_2];
							 | 
						|||
| 
								 | 
							
								                ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
							 | 
						|||
| 
								 | 
							
								                return this.data[ix];
							 | 
						|||
| 
								 | 
							
								            }
							 | 
						|||
| 
								 | 
							
								            if (codePoint <= 0xffff) {
							 | 
						|||
| 
								 | 
							
								                // Lead Surrogate Code Point.  A Separate index section is stored for
							 | 
						|||
| 
								 | 
							
								                // lead surrogate code units and code points.
							 | 
						|||
| 
								 | 
							
								                //   The main index has the code unit data.
							 | 
						|||
| 
								 | 
							
								                //   For this function, we need the code point data.
							 | 
						|||
| 
								 | 
							
								                // Note: this expression could be refactored for slightly improved efficiency, but
							 | 
						|||
| 
								 | 
							
								                //       surrogate code points will be so rare in practice that it's not worth it.
							 | 
						|||
| 
								 | 
							
								                ix = this.index[UTRIE2_LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> UTRIE2_SHIFT_2)];
							 | 
						|||
| 
								 | 
							
								                ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
							 | 
						|||
| 
								 | 
							
								                return this.data[ix];
							 | 
						|||
| 
								 | 
							
								            }
							 | 
						|||
| 
								 | 
							
								            if (codePoint < this.highStart) {
							 | 
						|||
| 
								 | 
							
								                // Supplemental code point, use two-level lookup.
							 | 
						|||
| 
								 | 
							
								                ix = UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH + (codePoint >> UTRIE2_SHIFT_1);
							 | 
						|||
| 
								 | 
							
								                ix = this.index[ix];
							 | 
						|||
| 
								 | 
							
								                ix += (codePoint >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK;
							 | 
						|||
| 
								 | 
							
								                ix = this.index[ix];
							 | 
						|||
| 
								 | 
							
								                ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
							 | 
						|||
| 
								 | 
							
								                return this.data[ix];
							 | 
						|||
| 
								 | 
							
								            }
							 | 
						|||
| 
								 | 
							
								            if (codePoint <= 0x10ffff) {
							 | 
						|||
| 
								 | 
							
								                return this.data[this.highValueIndex];
							 | 
						|||
| 
								 | 
							
								            }
							 | 
						|||
| 
								 | 
							
								        }
							 | 
						|||
| 
								 | 
							
								        // Fall through.  The code point is outside of the legal range of 0..0x10ffff.
							 | 
						|||
| 
								 | 
							
								        return this.errorValue;
							 | 
						|||
| 
								 | 
							
								    };
							 | 
						|||
| 
								 | 
							
								    return Trie;
							 | 
						|||
| 
								 | 
							
								}());
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								/*
							 | 
						|||
| 
								 | 
							
								 * base64-arraybuffer 1.0.2 <https://github.com/niklasvh/base64-arraybuffer>
							 | 
						|||
| 
								 | 
							
								 * Copyright (c) 2022 Niklas von Hertzen <https://hertzen.com>
							 | 
						|||
| 
								 | 
							
								 * Released under MIT License
							 | 
						|||
| 
								 | 
							
								 */
							 | 
						|||
| 
								 | 
							
								var chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';
							 | 
						|||
| 
								 | 
							
								// Use a lookup table to find the index.
							 | 
						|||
| 
								 | 
							
								var lookup = typeof Uint8Array === 'undefined' ? [] : new Uint8Array(256);
							 | 
						|||
| 
								 | 
							
								for (var i = 0; i < chars.length; i++) {
							 | 
						|||
| 
								 | 
							
								    lookup[chars.charCodeAt(i)] = i;
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								var Prepend = 1;
							 | 
						|||
| 
								 | 
							
								var CR = 2;
							 | 
						|||
| 
								 | 
							
								var LF = 3;
							 | 
						|||
| 
								 | 
							
								var Control = 4;
							 | 
						|||
| 
								 | 
							
								var Extend = 5;
							 | 
						|||
| 
								 | 
							
								var SpacingMark = 7;
							 | 
						|||
| 
								 | 
							
								var L = 8;
							 | 
						|||
| 
								 | 
							
								var V = 9;
							 | 
						|||
| 
								 | 
							
								var T = 10;
							 | 
						|||
| 
								 | 
							
								var LV = 11;
							 | 
						|||
| 
								 | 
							
								var LVT = 12;
							 | 
						|||
| 
								 | 
							
								var ZWJ = 13;
							 | 
						|||
| 
								 | 
							
								var Extended_Pictographic = 14;
							 | 
						|||
| 
								 | 
							
								var RI = 15;
							 | 
						|||
| 
								 | 
							
								var toCodePoints = function (str) {
							 | 
						|||
| 
								 | 
							
								    var codePoints = [];
							 | 
						|||
| 
								 | 
							
								    var i = 0;
							 | 
						|||
| 
								 | 
							
								    var length = str.length;
							 | 
						|||
| 
								 | 
							
								    while (i < length) {
							 | 
						|||
| 
								 | 
							
								        var value = str.charCodeAt(i++);
							 | 
						|||
| 
								 | 
							
								        if (value >= 0xd800 && value <= 0xdbff && i < length) {
							 | 
						|||
| 
								 | 
							
								            var extra = str.charCodeAt(i++);
							 | 
						|||
| 
								 | 
							
								            if ((extra & 0xfc00) === 0xdc00) {
							 | 
						|||
| 
								 | 
							
								                codePoints.push(((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000);
							 | 
						|||
| 
								 | 
							
								            }
							 | 
						|||
| 
								 | 
							
								            else {
							 | 
						|||
| 
								 | 
							
								                codePoints.push(value);
							 | 
						|||
| 
								 | 
							
								                i--;
							 | 
						|||
| 
								 | 
							
								            }
							 | 
						|||
| 
								 | 
							
								        }
							 | 
						|||
| 
								 | 
							
								        else {
							 | 
						|||
| 
								 | 
							
								            codePoints.push(value);
							 | 
						|||
| 
								 | 
							
								        }
							 | 
						|||
| 
								 | 
							
								    }
							 | 
						|||
| 
								 | 
							
								    return codePoints;
							 | 
						|||
| 
								 | 
							
								};
							 | 
						|||
| 
								 | 
							
								var fromCodePoint = function () {
							 | 
						|||
| 
								 | 
							
								    var codePoints = [];
							 | 
						|||
| 
								 | 
							
								    for (var _i = 0; _i < arguments.length; _i++) {
							 | 
						|||
| 
								 | 
							
								        codePoints[_i] = arguments[_i];
							 | 
						|||
| 
								 | 
							
								    }
							 | 
						|||
| 
								 | 
							
								    if (String.fromCodePoint) {
							 | 
						|||
| 
								 | 
							
								        return String.fromCodePoint.apply(String, codePoints);
							 | 
						|||
| 
								 | 
							
								    }
							 | 
						|||
| 
								 | 
							
								    var length = codePoints.length;
							 | 
						|||
| 
								 | 
							
								    if (!length) {
							 | 
						|||
| 
								 | 
							
								        return '';
							 | 
						|||
| 
								 | 
							
								    }
							 | 
						|||
| 
								 | 
							
								    var codeUnits = [];
							 | 
						|||
| 
								 | 
							
								    var index = -1;
							 | 
						|||
| 
								 | 
							
								    var result = '';
							 | 
						|||
| 
								 | 
							
								    while (++index < length) {
							 | 
						|||
| 
								 | 
							
								        var codePoint = codePoints[index];
							 | 
						|||
| 
								 | 
							
								        if (codePoint <= 0xffff) {
							 | 
						|||
| 
								 | 
							
								            codeUnits.push(codePoint);
							 | 
						|||
| 
								 | 
							
								        }
							 | 
						|||
| 
								 | 
							
								        else {
							 | 
						|||
| 
								 | 
							
								            codePoint -= 0x10000;
							 | 
						|||
| 
								 | 
							
								            codeUnits.push((codePoint >> 10) + 0xd800, (codePoint % 0x400) + 0xdc00);
							 | 
						|||
| 
								 | 
							
								        }
							 | 
						|||
| 
								 | 
							
								        if (index + 1 === length || codeUnits.length > 0x4000) {
							 | 
						|||
| 
								 | 
							
								            result += String.fromCharCode.apply(String, codeUnits);
							 | 
						|||
| 
								 | 
							
								            codeUnits.length = 0;
							 | 
						|||
| 
								 | 
							
								        }
							 | 
						|||
| 
								 | 
							
								    }
							 | 
						|||
| 
								 | 
							
								    return result;
							 | 
						|||
| 
								 | 
							
								};
							 | 
						|||
| 
								 | 
							
								var UnicodeTrie = createTrieFromBase64(base64);
							 | 
						|||
| 
								 | 
							
								var BREAK_NOT_ALLOWED = '×';
							 | 
						|||
| 
								 | 
							
								var BREAK_ALLOWED = '÷';
							 | 
						|||
| 
								 | 
							
								var codePointToClass = function (codePoint) { return UnicodeTrie.get(codePoint); };
							 | 
						|||
| 
								 | 
							
								var _graphemeBreakAtIndex = function (_codePoints, classTypes, index) {
							 | 
						|||
| 
								 | 
							
								    var prevIndex = index - 2;
							 | 
						|||
| 
								 | 
							
								    var prev = classTypes[prevIndex];
							 | 
						|||
| 
								 | 
							
								    var current = classTypes[index - 1];
							 | 
						|||
| 
								 | 
							
								    var next = classTypes[index];
							 | 
						|||
| 
								 | 
							
								    // GB3 Do not break between a CR and LF
							 | 
						|||
| 
								 | 
							
								    if (current === CR && next === LF) {
							 | 
						|||
| 
								 | 
							
								        return BREAK_NOT_ALLOWED;
							 | 
						|||
| 
								 | 
							
								    }
							 | 
						|||
| 
								 | 
							
								    // GB4 Otherwise, break before and after controls.
							 | 
						|||
| 
								 | 
							
								    if (current === CR || current === LF || current === Control) {
							 | 
						|||
| 
								 | 
							
								        return BREAK_ALLOWED;
							 | 
						|||
| 
								 | 
							
								    }
							 | 
						|||
| 
								 | 
							
								    // GB5
							 | 
						|||
| 
								 | 
							
								    if (next === CR || next === LF || next === Control) {
							 | 
						|||
| 
								 | 
							
								        return BREAK_ALLOWED;
							 | 
						|||
| 
								 | 
							
								    }
							 | 
						|||
| 
								 | 
							
								    // Do not break Hangul syllable sequences.
							 | 
						|||
| 
								 | 
							
								    // GB6
							 | 
						|||
| 
								 | 
							
								    if (current === L && [L, V, LV, LVT].indexOf(next) !== -1) {
							 | 
						|||
| 
								 | 
							
								        return BREAK_NOT_ALLOWED;
							 | 
						|||
| 
								 | 
							
								    }
							 | 
						|||
| 
								 | 
							
								    // GB7
							 | 
						|||
| 
								 | 
							
								    if ((current === LV || current === V) && (next === V || next === T)) {
							 | 
						|||
| 
								 | 
							
								        return BREAK_NOT_ALLOWED;
							 | 
						|||
| 
								 | 
							
								    }
							 | 
						|||
| 
								 | 
							
								    // GB8
							 | 
						|||
| 
								 | 
							
								    if ((current === LVT || current === T) && next === T) {
							 | 
						|||
| 
								 | 
							
								        return BREAK_NOT_ALLOWED;
							 | 
						|||
| 
								 | 
							
								    }
							 | 
						|||
| 
								 | 
							
								    // GB9 Do not break before extending characters or ZWJ.
							 | 
						|||
| 
								 | 
							
								    if (next === ZWJ || next === Extend) {
							 | 
						|||
| 
								 | 
							
								        return BREAK_NOT_ALLOWED;
							 | 
						|||
| 
								 | 
							
								    }
							 | 
						|||
| 
								 | 
							
								    // Do not break before SpacingMarks, or after Prepend characters.
							 | 
						|||
| 
								 | 
							
								    // GB9a
							 | 
						|||
| 
								 | 
							
								    if (next === SpacingMark) {
							 | 
						|||
| 
								 | 
							
								        return BREAK_NOT_ALLOWED;
							 | 
						|||
| 
								 | 
							
								    }
							 | 
						|||
| 
								 | 
							
								    // GB9a
							 | 
						|||
| 
								 | 
							
								    if (current === Prepend) {
							 | 
						|||
| 
								 | 
							
								        return BREAK_NOT_ALLOWED;
							 | 
						|||
| 
								 | 
							
								    }
							 | 
						|||
| 
								 | 
							
								    // GB11 Do not break within emoji modifier sequences or emoji zwj sequences.
							 | 
						|||
| 
								 | 
							
								    if (current === ZWJ && next === Extended_Pictographic) {
							 | 
						|||
| 
								 | 
							
								        while (prev === Extend) {
							 | 
						|||
| 
								 | 
							
								            prev = classTypes[--prevIndex];
							 | 
						|||
| 
								 | 
							
								        }
							 | 
						|||
| 
								 | 
							
								        if (prev === Extended_Pictographic) {
							 | 
						|||
| 
								 | 
							
								            return BREAK_NOT_ALLOWED;
							 | 
						|||
| 
								 | 
							
								        }
							 | 
						|||
| 
								 | 
							
								    }
							 | 
						|||
| 
								 | 
							
								    // GB12 Do not break within emoji flag sequences.
							 | 
						|||
| 
								 | 
							
								    // That is, do not break between regional indicator (RI) symbols
							 | 
						|||
| 
								 | 
							
								    // if there is an odd number of RI characters before the break point.
							 | 
						|||
| 
								 | 
							
								    if (current === RI && next === RI) {
							 | 
						|||
| 
								 | 
							
								        var countRI = 0;
							 | 
						|||
| 
								 | 
							
								        while (prev === RI) {
							 | 
						|||
| 
								 | 
							
								            countRI++;
							 | 
						|||
| 
								 | 
							
								            prev = classTypes[--prevIndex];
							 | 
						|||
| 
								 | 
							
								        }
							 | 
						|||
| 
								 | 
							
								        if (countRI % 2 === 0) {
							 | 
						|||
| 
								 | 
							
								            return BREAK_NOT_ALLOWED;
							 | 
						|||
| 
								 | 
							
								        }
							 | 
						|||
| 
								 | 
							
								    }
							 | 
						|||
| 
								 | 
							
								    return BREAK_ALLOWED;
							 | 
						|||
| 
								 | 
							
								};
							 | 
						|||
| 
								 | 
							
								var GraphemeBreaker = function (str) {
							 | 
						|||
| 
								 | 
							
								    var codePoints = toCodePoints(str);
							 | 
						|||
| 
								 | 
							
								    var length = codePoints.length;
							 | 
						|||
| 
								 | 
							
								    var index = 0;
							 | 
						|||
| 
								 | 
							
								    var lastEnd = 0;
							 | 
						|||
| 
								 | 
							
								    var classTypes = codePoints.map(codePointToClass);
							 | 
						|||
| 
								 | 
							
								    return {
							 | 
						|||
| 
								 | 
							
								        next: function () {
							 | 
						|||
| 
								 | 
							
								            if (index >= length) {
							 | 
						|||
| 
								 | 
							
								                return { done: true, value: null };
							 | 
						|||
| 
								 | 
							
								            }
							 | 
						|||
| 
								 | 
							
								            var graphemeBreak = BREAK_NOT_ALLOWED;
							 | 
						|||
| 
								 | 
							
								            while (index < length &&
							 | 
						|||
| 
								 | 
							
								                (graphemeBreak = _graphemeBreakAtIndex(codePoints, classTypes, ++index)) === BREAK_NOT_ALLOWED) { }
							 | 
						|||
| 
								 | 
							
								            if (graphemeBreak !== BREAK_NOT_ALLOWED || index === length) {
							 | 
						|||
| 
								 | 
							
								                var value = fromCodePoint.apply(null, codePoints.slice(lastEnd, index));
							 | 
						|||
| 
								 | 
							
								                lastEnd = index;
							 | 
						|||
| 
								 | 
							
								                return { value: value, done: false };
							 | 
						|||
| 
								 | 
							
								            }
							 | 
						|||
| 
								 | 
							
								            return { done: true, value: null };
							 | 
						|||
| 
								 | 
							
								        },
							 | 
						|||
| 
								 | 
							
								    };
							 | 
						|||
| 
								 | 
							
								};
							 | 
						|||
| 
								 | 
							
								var splitGraphemes = function (str) {
							 | 
						|||
| 
								 | 
							
								    var breaker = GraphemeBreaker(str);
							 | 
						|||
| 
								 | 
							
								    var graphemes = [];
							 | 
						|||
| 
								 | 
							
								    var bk;
							 | 
						|||
| 
								 | 
							
								    while (!(bk = breaker.next()).done) {
							 | 
						|||
| 
								 | 
							
								        if (bk.value) {
							 | 
						|||
| 
								 | 
							
								            graphemes.push(bk.value.slice());
							 | 
						|||
| 
								 | 
							
								        }
							 | 
						|||
| 
								 | 
							
								    }
							 | 
						|||
| 
								 | 
							
								    return graphemes;
							 | 
						|||
| 
								 | 
							
								};
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								export { GraphemeBreaker, fromCodePoint, splitGraphemes, toCodePoints };
							 | 
						|||
| 
								 | 
							
								//# sourceMappingURL=text-segmentation.es5.js.map
							 |