| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295 | 
							- "use strict";
 
- const whatwgEncoding = require("whatwg-encoding");
 
- // https://html.spec.whatwg.org/#encoding-sniffing-algorithm
 
- module.exports = (buffer, { transportLayerEncodingLabel, defaultEncoding = "windows-1252" } = {}) => {
 
-   let encoding = whatwgEncoding.getBOMEncoding(buffer); // see https://github.com/whatwg/html/issues/1910
 
-   if (encoding === null && transportLayerEncodingLabel !== undefined) {
 
-     encoding = whatwgEncoding.labelToName(transportLayerEncodingLabel);
 
-   }
 
-   if (encoding === null) {
 
-     encoding = prescanMetaCharset(buffer);
 
-   }
 
-   if (encoding === null) {
 
-     encoding = defaultEncoding;
 
-   }
 
-   return encoding;
 
- };
 
- // https://html.spec.whatwg.org/multipage/syntax.html#prescan-a-byte-stream-to-determine-its-encoding
 
- function prescanMetaCharset(buffer) {
 
-   const l = Math.min(buffer.length, 1024);
 
-   for (let i = 0; i < l; i++) {
 
-     let c = buffer[i];
 
-     if (c === 0x3C) {
 
-       // "<"
 
-       const c1 = buffer[i + 1];
 
-       const c2 = buffer[i + 2];
 
-       const c3 = buffer[i + 3];
 
-       const c4 = buffer[i + 4];
 
-       const c5 = buffer[i + 5];
 
-       // !-- (comment start)
 
-       if (c1 === 0x21 && c2 === 0x2D && c3 === 0x2D) {
 
-         i += 4;
 
-         for (; i < l; i++) {
 
-           c = buffer[i];
 
-           const cMinus1 = buffer[i - 1];
 
-           const cMinus2 = buffer[i - 2];
 
-           // --> (comment end)
 
-           if (c === 0x3E && cMinus1 === 0x2D && cMinus2 === 0x2D) {
 
-             break;
 
-           }
 
-         }
 
-       } else if ((c1 === 0x4D || c1 === 0x6D) &&
 
-          (c2 === 0x45 || c2 === 0x65) &&
 
-          (c3 === 0x54 || c3 === 0x74) &&
 
-          (c4 === 0x41 || c4 === 0x61) &&
 
-          (isSpaceCharacter(c5) || c5 === 0x2F)) {
 
-         // "meta" + space or /
 
-         i += 6;
 
-         const attributeList = new Set();
 
-         let gotPragma = false;
 
-         let needPragma = null;
 
-         let charset = null;
 
-         let attrRes;
 
-         do {
 
-           attrRes = getAttribute(buffer, i, l);
 
-           if (attrRes.attr && !attributeList.has(attrRes.attr.name)) {
 
-             attributeList.add(attrRes.attr.name);
 
-             if (attrRes.attr.name === "http-equiv") {
 
-               gotPragma = attrRes.attr.value === "content-type";
 
-             } else if (attrRes.attr.name === "content" && !charset) {
 
-               charset = extractCharacterEncodingFromMeta(attrRes.attr.value);
 
-               if (charset !== null) {
 
-                 needPragma = true;
 
-               }
 
-             } else if (attrRes.attr.name === "charset") {
 
-               charset = whatwgEncoding.labelToName(attrRes.attr.value);
 
-               needPragma = false;
 
-             }
 
-           }
 
-           i = attrRes.i;
 
-         } while (attrRes.attr);
 
-         if (needPragma === null) {
 
-           continue;
 
-         }
 
-         if (needPragma === true && gotPragma === false) {
 
-           continue;
 
-         }
 
-         if (charset === null) {
 
-           continue;
 
-         }
 
-         if (charset === "UTF-16LE" || charset === "UTF-16BE") {
 
-           charset = "UTF-8";
 
-         }
 
-         if (charset === "x-user-defined") {
 
-           charset = "windows-1252";
 
-         }
 
-         return charset;
 
-       } else if ((c1 >= 0x41 && c1 <= 0x5A) || (c1 >= 0x61 && c1 <= 0x7A)) {
 
-         // a-z or A-Z
 
-         for (i += 2; i < l; i++) {
 
-           c = buffer[i];
 
-           // space or >
 
-           if (isSpaceCharacter(c) || c === 0x3E) {
 
-             break;
 
-           }
 
-         }
 
-         let attrRes;
 
-         do {
 
-           attrRes = getAttribute(buffer, i, l);
 
-           i = attrRes.i;
 
-         } while (attrRes.attr);
 
-       } else if (c1 === 0x21 || c1 === 0x2F || c1 === 0x3F) {
 
-         // ! or / or ?
 
-         for (i += 2; i < l; i++) {
 
-           c = buffer[i];
 
-           // >
 
-           if (c === 0x3E) {
 
-             break;
 
-           }
 
-         }
 
-       }
 
-     }
 
-   }
 
-   return null;
 
- }
 
- // https://html.spec.whatwg.org/multipage/syntax.html#concept-get-attributes-when-sniffing
 
- function getAttribute(buffer, i, l) {
 
-   for (; i < l; i++) {
 
-     let c = buffer[i];
 
-     // space or /
 
-     if (isSpaceCharacter(c) || c === 0x2F) {
 
-       continue;
 
-     }
 
-     // ">"
 
-     if (c === 0x3E) {
 
-       break;
 
-     }
 
-     let name = "";
 
-     let value = "";
 
-     nameLoop:for (; i < l; i++) {
 
-       c = buffer[i];
 
-       // "="
 
-       if (c === 0x3D && name !== "") {
 
-         i++;
 
-         break;
 
-       }
 
-       // space
 
-       if (isSpaceCharacter(c)) {
 
-         for (i++; i < l; i++) {
 
-           c = buffer[i];
 
-           // space
 
-           if (isSpaceCharacter(c)) {
 
-             continue;
 
-           }
 
-           // not "="
 
-           if (c !== 0x3D) {
 
-             return { attr: { name, value }, i };
 
-           }
 
-           i++;
 
-           break nameLoop;
 
-         }
 
-         break;
 
-       }
 
-       // / or >
 
-       if (c === 0x2F || c === 0x3E) {
 
-         return { attr: { name, value }, i };
 
-       }
 
-       // A-Z
 
-       if (c >= 0x41 && c <= 0x5A) {
 
-         name += String.fromCharCode(c + 0x20); // lowercase
 
-       } else {
 
-         name += String.fromCharCode(c);
 
-       }
 
-     }
 
-     c = buffer[i];
 
-     // space
 
-     if (isSpaceCharacter(c)) {
 
-       for (i++; i < l; i++) {
 
-         c = buffer[i];
 
-         // space
 
-         if (isSpaceCharacter(c)) {
 
-           continue;
 
-         } else {
 
-           break;
 
-         }
 
-       }
 
-     }
 
-     // " or '
 
-     if (c === 0x22 || c === 0x27) {
 
-       const quote = c;
 
-       for (i++; i < l; i++) {
 
-         c = buffer[i];
 
-         if (c === quote) {
 
-           i++;
 
-           return { attr: { name, value }, i };
 
-         }
 
-         // A-Z
 
-         if (c >= 0x41 && c <= 0x5A) {
 
-           value += String.fromCharCode(c + 0x20); // lowercase
 
-         } else {
 
-           value += String.fromCharCode(c);
 
-         }
 
-       }
 
-     }
 
-     // >
 
-     if (c === 0x3E) {
 
-       return { attr: { name, value }, i };
 
-     }
 
-     // A-Z
 
-     if (c >= 0x41 && c <= 0x5A) {
 
-       value += String.fromCharCode(c + 0x20); // lowercase
 
-     } else {
 
-       value += String.fromCharCode(c);
 
-     }
 
-     for (i++; i < l; i++) {
 
-       c = buffer[i];
 
-       // space or >
 
-       if (isSpaceCharacter(c) || c === 0x3E) {
 
-         return { attr: { name, value }, i };
 
-       }
 
-       // A-Z
 
-       if (c >= 0x41 && c <= 0x5A) {
 
-         value += String.fromCharCode(c + 0x20); // lowercase
 
-       } else {
 
-         value += String.fromCharCode(c);
 
-       }
 
-     }
 
-   }
 
-   return { i };
 
- }
 
- function extractCharacterEncodingFromMeta(string) {
 
-   let position = 0;
 
-   while (true) {
 
-     const indexOfCharset = string.substring(position).search(/charset/i);
 
-     if (indexOfCharset === -1) {
 
-       return null;
 
-     }
 
-     let subPosition = position + indexOfCharset + "charset".length;
 
-     while (isSpaceCharacter(string[subPosition].charCodeAt(0))) {
 
-       ++subPosition;
 
-     }
 
-     if (string[subPosition] !== "=") {
 
-       position = subPosition - 1;
 
-       continue;
 
-     }
 
-     ++subPosition;
 
-     while (isSpaceCharacter(string[subPosition].charCodeAt(0))) {
 
-       ++subPosition;
 
-     }
 
-     position = subPosition;
 
-     break;
 
-   }
 
-   if (string[position] === "\"" || string[position] === "'") {
 
-     const nextIndex = string.indexOf(string[position], position + 1);
 
-     if (nextIndex !== -1) {
 
-       return whatwgEncoding.labelToName(string.substring(position + 1, nextIndex));
 
-     }
 
-     // It is an unmatched quotation mark
 
-     return null;
 
-   }
 
-   if (string.length === position + 1) {
 
-     return null;
 
-   }
 
-   const indexOfASCIIWhitespaceOrSemicolon = string.substring(position + 1).search(/\x09|\x0A|\x0C|\x0D|\x20|;/);
 
-   const end = indexOfASCIIWhitespaceOrSemicolon === -1 ?
 
-     string.length :
 
-     position + indexOfASCIIWhitespaceOrSemicolon + 1;
 
-   return whatwgEncoding.labelToName(string.substring(position, end));
 
- }
 
- function isSpaceCharacter(c) {
 
-   return c === 0x09 || c === 0x0A || c === 0x0C || c === 0x0D || c === 0x20;
 
- }
 
 
  |