8
8
#error "TEXTDECODER must be either 1 or 2"
9
9
#endif
10
10
11
+ #if TEXTENCODER != 1 && TEXTENCODER != 2
12
+ #error "TEXTENCODER must be either 1 or 2"
13
+ #endif
14
+
11
15
addToLibrary ( {
12
16
// TextDecoder constructor defaults to UTF-8
13
17
#if TEXTDECODER == 2
@@ -16,6 +20,13 @@ addToLibrary({
16
20
$UTF8Decoder : "typeof TextDecoder != 'undefined' ? new TextDecoder() : undefined" ,
17
21
#endif
18
22
23
+ // TextEncoder constructor defaults to UTF-8
24
+ #if TEXTENCODER == 2
25
+ $UTF8Encoder : "new TextEncoder()" ,
26
+ #else
27
+ $UTF8Encoder : "typeof TextEncoder != 'undefined' ? new TextEncoder() : undefined" ,
28
+ #endif
29
+
19
30
$findStringEnd : ( heapOrArray , idx , maxBytesToRead , ignoreNul ) => {
20
31
var maxIdx = idx + maxBytesToRead ;
21
32
if ( ignoreNul ) return maxIdx ;
@@ -133,7 +144,7 @@ addToLibrary({
133
144
* terminator) that this function will write.
134
145
*
135
146
* @param {string } str - The Javascript string to copy.
136
- * @param {ArrayBufferView|Array<number> } heap - The array to copy to. Each
147
+ * @param {ArrayBufferView } heap - The array to copy to. Each
137
148
* index in this array is assumed
138
149
* to be one 8-byte element.
139
150
* @param {number } outIdx - The starting offset in the array to begin the copying.
@@ -147,9 +158,12 @@ addToLibrary({
147
158
* terminator.
148
159
* @return {number } The number of bytes written, EXCLUDING the null terminator.
149
160
*/
161
+ $stringToUTF8Array__deps : [
162
+ '$UTF8Encoder' ,
150
163
#if ASSERTIONS
151
- $stringToUTF8Array__deps : [ '$warnOnce' ] ,
164
+ '$warnOnce' ,
152
165
#endif
166
+ ] ,
153
167
$stringToUTF8Array : ( str , heap , outIdx , maxBytesToWrite ) => {
154
168
#if CAN_ADDRESS_2GB
155
169
outIdx >>>= 0 ;
@@ -162,6 +176,26 @@ addToLibrary({
162
176
if ( ! ( maxBytesToWrite > 0 ) )
163
177
return 0 ;
164
178
179
+ #if TEXTENCODER == 2
180
+ // Always use TextEncoder when TEXTENCODER == 2
181
+ var encoded = UTF8Encoder . encode ( str ) ;
182
+ var bytesToWrite = Math . min ( encoded . length , maxBytesToWrite - 1 ) ; // -1 for null terminator
183
+ encoded = encoded . subarray ( 0 , bytesToWrite )
184
+ heap . set ( encoded , outIdx )
185
+ heap [ outIdx + bytesToWrite ] = 0 ;
186
+ return bytesToWrite ;
187
+ #else
188
+ // When using conditional TextEncoder, use it for longer strings if available
189
+ if ( str . length > 16 && UTF8Encoder ) {
190
+ var encoded = UTF8Encoder . encode ( str ) ;
191
+ var bytesToWrite = Math . min ( encoded . length , maxBytesToWrite - 1 ) ; // -1 for null terminator
192
+ encoded = encoded . subarray ( 0 , bytesToWrite )
193
+ heap . set ( encoded , outIdx )
194
+ heap [ outIdx + bytesToWrite ] = 0 ;
195
+ return bytesToWrite ;
196
+ }
197
+
198
+ // Fallback: manual UTF-8 encoding
165
199
var startIdx = outIdx ;
166
200
var endIdx = outIdx + maxBytesToWrite - 1 ; // -1 for string null terminator.
167
201
for ( var i = 0 ; i < str . length ; ++ i ) {
@@ -198,6 +232,7 @@ addToLibrary({
198
232
// Null-terminate the pointer to the buffer.
199
233
heap [ outIdx ] = 0 ;
200
234
return outIdx - startIdx ;
235
+ #endif // TEXTENCODER == 2
201
236
} ,
202
237
203
238
/**
@@ -218,24 +253,54 @@ addToLibrary({
218
253
} ,
219
254
220
255
/**
221
- * Returns the number of bytes the given JavaScript string takes if encoded as a
256
+ * Returns the number of bytes the given Javascript string takes if encoded as a
222
257
* UTF8 byte array, EXCLUDING the null terminator byte.
223
258
*
224
- * @param {string } str - The JavaScript string to operate on.
225
- * @return {number } The length , in bytes, of the UTF-8 encoded string.
259
+ * @param {string } str - JavaScript string to operator on
260
+ * @return {number } Length , in bytes, of the UTF8 encoded string.
226
261
*/
262
+ $lengthBytesUTF8__deps : [ '$UTF8Encoder' ] ,
227
263
$lengthBytesUTF8 : ( str ) => {
228
- return UTF8Decoder . encode ( str ) . length ;
264
+ #if TEXTENCODER == 2
265
+ // Always use TextEncoder when TEXTENCODER == 2
266
+ return UTF8Encoder . encode ( str ) . length ;
267
+ #else
268
+ // When using conditional TextEncoder, use it for longer strings if available
269
+ if ( UTF8Encoder ) {
270
+ return UTF8Encoder . encode ( str ) . length ;
271
+ }
272
+
273
+ // Fallback: manual calculation
274
+ var len = 0 ;
275
+ for ( var i = 0 ; i < str . length ; ++ i ) {
276
+ // Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code
277
+ // unit, not a Unicode code point of the character! So decode
278
+ // UTF16->UTF32->UTF8.
279
+ // See http://unicode.org/faq/utf_bom.html#utf16-3
280
+ var c = str . charCodeAt ( i ) ; // possibly a lead surrogate
281
+ if ( c <= 0x7F ) {
282
+ len ++ ;
283
+ } else if ( c <= 0x7FF ) {
284
+ len += 2 ;
285
+ } else if ( c >= 0xD800 && c <= 0xDFFF ) {
286
+ len += 4 ; ++ i ;
287
+ } else {
288
+ len += 3 ;
289
+ }
290
+ }
291
+ return len ;
292
+ #endif // TEXTENCODER == 2
229
293
} ,
230
294
231
295
$intArrayFromString__docs : '/** @type {function(string, boolean=, number=)} */' ,
232
296
$intArrayFromString__deps : [ '$lengthBytesUTF8' , '$stringToUTF8Array' ] ,
233
297
$intArrayFromString : ( stringy , dontAddNull , length ) => {
234
298
var len = length > 0 ? length : lengthBytesUTF8 ( stringy ) + 1 ;
235
- var u8array = new Array ( len ) ;
299
+ var u8array = new Uint8Array ( len ) ;
236
300
var numBytesWritten = stringToUTF8Array ( stringy , u8array , 0 , u8array . length ) ;
237
- if ( dontAddNull ) u8array . length = numBytesWritten ;
238
- return u8array ;
301
+ if ( dontAddNull )
302
+ u8array = u8array . subarray ( 0 , numBytesWritten ) ;
303
+ return Array . from ( u8array ) ;
239
304
} ,
240
305
241
306
$intArrayToString : ( array ) => {
0 commit comments