internal.js 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. "use strict";
  2. var Buffer = require("buffer").Buffer;
  3. // Export Node.js internal encodings.
  4. module.exports = {
  5. // Encodings
  6. utf8: { type: "_internal", bomAware: true},
  7. cesu8: { type: "_internal", bomAware: true},
  8. unicode11utf8: "utf8",
  9. ucs2: { type: "_internal", bomAware: true},
  10. utf16le: "ucs2",
  11. binary: { type: "_internal" },
  12. iso88591: "binary",
  13. base64: { type: "_internal" },
  14. hex: { type: "_internal" },
  15. // Codec.
  16. _internal: InternalCodec,
  17. };
  18. //------------------------------------------------------------------------------
  19. function InternalCodec(codecOptions, iconv) {
  20. this.enc = codecOptions.encodingName;
  21. this.bomAware = codecOptions.bomAware;
  22. if (this.enc === "base64")
  23. this.encoder = InternalEncoderBase64;
  24. else if (this.enc === "cesu8") {
  25. this.enc = "utf8"; // Use utf8 for decoding.
  26. this.encoder = InternalEncoderCesu8;
  27. // Add decoder for versions of Node not supporting CESU-8
  28. if (new Buffer("eda080", 'hex').toString().length == 3) {
  29. this.decoder = InternalDecoderCesu8;
  30. this.defaultCharUnicode = iconv.defaultCharUnicode;
  31. }
  32. }
  33. }
  34. InternalCodec.prototype.encoder = InternalEncoder;
  35. InternalCodec.prototype.decoder = InternalDecoder;
  36. //------------------------------------------------------------------------------
  37. // We use node.js internal decoder. Its signature is the same as ours.
  38. var StringDecoder = require('string_decoder').StringDecoder;
  39. if (!StringDecoder.prototype.end) // Node v0.8 doesn't have this method.
  40. StringDecoder.prototype.end = function() {};
  41. function InternalDecoder(options, codec) {
  42. StringDecoder.call(this, codec.enc);
  43. }
  44. InternalDecoder.prototype = StringDecoder.prototype;
  45. //------------------------------------------------------------------------------
  46. // Encoder is mostly trivial
  47. function InternalEncoder(options, codec) {
  48. this.enc = codec.enc;
  49. }
  50. InternalEncoder.prototype.write = function(str) {
  51. return new Buffer(str, this.enc);
  52. }
  53. InternalEncoder.prototype.end = function() {
  54. }
  55. //------------------------------------------------------------------------------
  56. // Except base64 encoder, which must keep its state.
  57. function InternalEncoderBase64(options, codec) {
  58. this.prevStr = '';
  59. }
  60. InternalEncoderBase64.prototype.write = function(str) {
  61. str = this.prevStr + str;
  62. var completeQuads = str.length - (str.length % 4);
  63. this.prevStr = str.slice(completeQuads);
  64. str = str.slice(0, completeQuads);
  65. return new Buffer(str, "base64");
  66. }
  67. InternalEncoderBase64.prototype.end = function() {
  68. return new Buffer(this.prevStr, "base64");
  69. }
  70. //------------------------------------------------------------------------------
  71. // CESU-8 encoder is also special.
  72. function InternalEncoderCesu8(options, codec) {
  73. }
  74. InternalEncoderCesu8.prototype.write = function(str) {
  75. var buf = new Buffer(str.length * 3), bufIdx = 0;
  76. for (var i = 0; i < str.length; i++) {
  77. var charCode = str.charCodeAt(i);
  78. // Naive implementation, but it works because CESU-8 is especially easy
  79. // to convert from UTF-16 (which all JS strings are encoded in).
  80. if (charCode < 0x80)
  81. buf[bufIdx++] = charCode;
  82. else if (charCode < 0x800) {
  83. buf[bufIdx++] = 0xC0 + (charCode >>> 6);
  84. buf[bufIdx++] = 0x80 + (charCode & 0x3f);
  85. }
  86. else { // charCode will always be < 0x10000 in javascript.
  87. buf[bufIdx++] = 0xE0 + (charCode >>> 12);
  88. buf[bufIdx++] = 0x80 + ((charCode >>> 6) & 0x3f);
  89. buf[bufIdx++] = 0x80 + (charCode & 0x3f);
  90. }
  91. }
  92. return buf.slice(0, bufIdx);
  93. }
  94. InternalEncoderCesu8.prototype.end = function() {
  95. }
  96. //------------------------------------------------------------------------------
  97. // CESU-8 decoder is not implemented in Node v4.0+
  98. function InternalDecoderCesu8(options, codec) {
  99. this.acc = 0;
  100. this.contBytes = 0;
  101. this.accBytes = 0;
  102. this.defaultCharUnicode = codec.defaultCharUnicode;
  103. }
  104. InternalDecoderCesu8.prototype.write = function(buf) {
  105. var acc = this.acc, contBytes = this.contBytes, accBytes = this.accBytes,
  106. res = '';
  107. for (var i = 0; i < buf.length; i++) {
  108. var curByte = buf[i];
  109. if ((curByte & 0xC0) !== 0x80) { // Leading byte
  110. if (contBytes > 0) { // Previous code is invalid
  111. res += this.defaultCharUnicode;
  112. contBytes = 0;
  113. }
  114. if (curByte < 0x80) { // Single-byte code
  115. res += String.fromCharCode(curByte);
  116. } else if (curByte < 0xE0) { // Two-byte code
  117. acc = curByte & 0x1F;
  118. contBytes = 1; accBytes = 1;
  119. } else if (curByte < 0xF0) { // Three-byte code
  120. acc = curByte & 0x0F;
  121. contBytes = 2; accBytes = 1;
  122. } else { // Four or more are not supported for CESU-8.
  123. res += this.defaultCharUnicode;
  124. }
  125. } else { // Continuation byte
  126. if (contBytes > 0) { // We're waiting for it.
  127. acc = (acc << 6) | (curByte & 0x3f);
  128. contBytes--; accBytes++;
  129. if (contBytes === 0) {
  130. // Check for overlong encoding, but support Modified UTF-8 (encoding NULL as C0 80)
  131. if (accBytes === 2 && acc < 0x80 && acc > 0)
  132. res += this.defaultCharUnicode;
  133. else if (accBytes === 3 && acc < 0x800)
  134. res += this.defaultCharUnicode;
  135. else
  136. // Actually add character.
  137. res += String.fromCharCode(acc);
  138. }
  139. } else { // Unexpected continuation byte
  140. res += this.defaultCharUnicode;
  141. }
  142. }
  143. }
  144. this.acc = acc; this.contBytes = contBytes; this.accBytes = accBytes;
  145. return res;
  146. }
  147. InternalDecoderCesu8.prototype.end = function() {
  148. var res = 0;
  149. if (this.contBytes > 0)
  150. res += this.defaultCharUnicode;
  151. return res;
  152. }