protocolbuffers
diff --git a/‎binary/decoder.js
Lines changed: 36 additions & 65 deletions b/‎binary/decoder.js
Lines changed: 36 additions & 65 deletions
diff --git a/‎binary/decoder_test.js
Lines changed: 5 additions & 5 deletions b/‎binary/decoder_test.js
Lines changed: 5 additions & 5 deletions
diff --git a/‎binary/reader.js
Lines changed: 40 additions & 1 deletion b/‎binary/reader.js
Lines changed: 40 additions & 1 deletion
@@ -47,7 +47,7 @@
 goog.provide('jspb.BinaryDecoder');
 
 goog.require('jspb.asserts');
-goog.require('goog.crypt');
+goog.require('jspb.binary.utf8');
 goog.require('jspb.utils');
 
 
@@ -256,7 +256,7 @@ jspb.BinaryDecoder.prototype.setCursor = function(cursor) {
  */
 jspb.BinaryDecoder.prototype.advance = function(count) {
   this.cursor_ += count;
-  jspb.asserts.assert(this.cursor_ <= this.end_);
+  this.checkCursor();
 };
 
 
@@ -397,6 +397,17 @@ jspb.BinaryDecoder.prototype.readSplitFixed64 = function(convert) {
   return convert(lowBits, highBits);
 };
 
+/**
+  * Asserts that our cursor is in bounds.
+  *
+  * @private
+  * @return {void}
+  */
+jspb.BinaryDecoder.prototype.checkCursor = function () {
+  if (this.cursor_ > this.end_) {
+    asserts.fail('Read past the end ' + this.cursor_ + ' > ' + this.end_);
+  }
+}
 
 /**
  * Skips over a varint in the block without decoding it.
@@ -452,31 +463,31 @@ jspb.BinaryDecoder.prototype.readUnsignedVarint32 = function() {
   var x = (temp & 0x7F);
   if (temp < 128) {
     this.cursor_ += 1;
-    jspb.asserts.assert(this.cursor_ <= this.end_);
+    this.checkCursor();
     return x;
   }
 
   temp = bytes[this.cursor_ + 1];
   x |= (temp & 0x7F) << 7;
   if (temp < 128) {
     this.cursor_ += 2;
-    jspb.asserts.assert(this.cursor_ <= this.end_);
+    this.checkCursor();
     return x;
   }
 
   temp = bytes[this.cursor_ + 2];
   x |= (temp & 0x7F) << 14;
   if (temp < 128) {
     this.cursor_ += 3;
-    jspb.asserts.assert(this.cursor_ <= this.end_);
+    this.checkCursor();
     return x;
   }
 
   temp = bytes[this.cursor_ + 3];
   x |= (temp & 0x7F) << 21;
   if (temp < 128) {
     this.cursor_ += 4;
-    jspb.asserts.assert(this.cursor_ <= this.end_);
+    this.checkCursor();
     return x;
   }
 
@@ -486,7 +497,7 @@ jspb.BinaryDecoder.prototype.readUnsignedVarint32 = function() {
     // We're reading the high bits of an unsigned varint. The byte we just read
     // also contains bits 33 through 35, which we're going to discard.
     this.cursor_ += 5;
-    jspb.asserts.assert(this.cursor_ <= this.end_);
+    this.checkCursor();
     return x >>> 0;
   }
 
@@ -500,7 +511,7 @@ jspb.BinaryDecoder.prototype.readUnsignedVarint32 = function() {
     jspb.asserts.assert(false);
   }
 
-  jspb.asserts.assert(this.cursor_ <= this.end_);
+  this.checkCursor();
   return x;
 };
 
@@ -679,7 +690,7 @@ jspb.BinaryDecoder.prototype.readZigzagVarint64String = function() {
 jspb.BinaryDecoder.prototype.readUint8 = function() {
   var a = this.bytes_[this.cursor_ + 0];
   this.cursor_ += 1;
-  jspb.asserts.assert(this.cursor_ <= this.end_);
+  this.checkCursor();
   return a;
 };
 
@@ -694,7 +705,7 @@ jspb.BinaryDecoder.prototype.readUint16 = function() {
   var a = this.bytes_[this.cursor_ + 0];
   var b = this.bytes_[this.cursor_ + 1];
   this.cursor_ += 2;
-  jspb.asserts.assert(this.cursor_ <= this.end_);
+  this.checkCursor();
   return (a << 0) | (b << 8);
 };
 
@@ -711,7 +722,7 @@ jspb.BinaryDecoder.prototype.readUint32 = function() {
   var c = this.bytes_[this.cursor_ + 2];
   var d = this.bytes_[this.cursor_ + 3];
   this.cursor_ += 4;
-  jspb.asserts.assert(this.cursor_ <= this.end_);
+  this.checkCursor();
   return ((a << 0) | (b << 8) | (c << 16) | (d << 24)) >>> 0;
 };
 
@@ -756,7 +767,7 @@ jspb.BinaryDecoder.prototype.readUint64String = function() {
 jspb.BinaryDecoder.prototype.readInt8 = function() {
   var a = this.bytes_[this.cursor_ + 0];
   this.cursor_ += 1;
-  jspb.asserts.assert(this.cursor_ <= this.end_);
+  this.checkCursor();
   return (a << 24) >> 24;
 };
 
@@ -771,7 +782,7 @@ jspb.BinaryDecoder.prototype.readInt16 = function() {
   var a = this.bytes_[this.cursor_ + 0];
   var b = this.bytes_[this.cursor_ + 1];
   this.cursor_ += 2;
-  jspb.asserts.assert(this.cursor_ <= this.end_);
+  this.checkCursor();
   return (((a << 0) | (b << 8)) << 16) >> 16;
 };
 
@@ -788,7 +799,7 @@ jspb.BinaryDecoder.prototype.readInt32 = function() {
   var c = this.bytes_[this.cursor_ + 2];
   var d = this.bytes_[this.cursor_ + 3];
   this.cursor_ += 4;
-  jspb.asserts.assert(this.cursor_ <= this.end_);
+  this.checkCursor();
   return (a << 0) | (b << 8) | (c << 16) | (d << 24);
 };
 
@@ -858,7 +869,9 @@ jspb.BinaryDecoder.prototype.readDouble = function() {
  * @export
  */
 jspb.BinaryDecoder.prototype.readBool = function() {
-  return !!this.bytes_[this.cursor_++];
+  const b = !!this.bytes_[this.cursor_++];
+  this.checkCursor();
+  return b;
 };
 
 
@@ -879,59 +892,17 @@ jspb.BinaryDecoder.prototype.readEnum = function() {
  * Supports codepoints from U+0000 up to U+10FFFF.
  * (http://en.wikipedia.org/wiki/UTF-8).
  * @param {number} length The length of the string to read.
+ * @param {boolean} requireUtf8 Whether to throw when invalid utf8 is found.
  * @return {string} The decoded string.
  * @export
  */
-jspb.BinaryDecoder.prototype.readString = function(length) {
-  var bytes = this.bytes_;
-  var cursor = this.cursor_;
-  var end = cursor + length;
-  var codeUnits = [];
-
-  var result = '';
-  while (cursor < end) {
-    var c = bytes[cursor++];
-    if (c < 128) {  // Regular 7-bit ASCII.
-      codeUnits.push(c);
-    } else if (c < 192) {
-      // UTF-8 continuation mark. We are out of sync. This
-      // might happen if we attempted to read a character
-      // with more than four bytes.
-      continue;
-    } else if (c < 224) {  // UTF-8 with two bytes.
-      var c2 = bytes[cursor++];
-      codeUnits.push(((c & 31) << 6) | (c2 & 63));
-    } else if (c < 240) {  // UTF-8 with three bytes.
-      var c2 = bytes[cursor++];
-      var c3 = bytes[cursor++];
-      codeUnits.push(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63));
-    } else if (c < 248) {  // UTF-8 with 4 bytes.
-      var c2 = bytes[cursor++];
-      var c3 = bytes[cursor++];
-      var c4 = bytes[cursor++];
-      // Characters written on 4 bytes have 21 bits for a codepoint.
-      // We can't fit that on 16bit characters, so we use surrogates.
-      var codepoint =
-          ((c & 7) << 18) | ((c2 & 63) << 12) | ((c3 & 63) << 6) | (c4 & 63);
-      // Surrogates formula from wikipedia.
-      // 1. Subtract 0x10000 from codepoint
-      codepoint -= 0x10000;
-      // 2. Split this into the high 10-bit value and the low 10-bit value
-      // 3. Add 0xD800 to the high value to form the high surrogate
-      // 4. Add 0xDC00 to the low value to form the low surrogate:
-      var low = (codepoint & 1023) + 0xDC00;
-      var high = ((codepoint >> 10) & 1023) + 0xD800;
-      codeUnits.push(high, low);
-    }
 
-    // Avoid exceeding the maximum stack size when calling `apply`.
-    if (codeUnits.length >= 8192) {
-      result += String.fromCharCode.apply(null, codeUnits);
-      codeUnits.length = 0;
-    }
-  }
-  result += goog.crypt.byteArrayToString(codeUnits);
-  this.cursor_ = cursor;
+jspb.BinaryDecoder.prototype.readString = function (length, requireUtf8) {
+  const cursor = this.cursor_;
+  this.cursor_ += length;
+  this.checkCursor();
+  const result =
+    jspb.binary.utf8.decodeUtf8(jspb.asserts.assert(this.bytes_), cursor, length, requireUtf8);
   return result;
 };
 
@@ -966,7 +937,7 @@ jspb.BinaryDecoder.prototype.readBytes = function(length) {
   var result = this.bytes_.subarray(this.cursor_, this.cursor_ + length);
 
   this.cursor_ += length;
-  jspb.asserts.assert(this.cursor_ <= this.end_);
+  this.checkCursor();
   return result;
 };
 
 
@@ -354,7 +354,7 @@ describe('binaryDecoderTest', () => {
 
     const decoder = jspb.BinaryDecoder.alloc(encoder.end());
 
-    expect(decoder.readString(len)).toEqual(long_string);
+    expect(decoder.readString(len, true)).toEqual(long_string);
   });
 
   /**
@@ -375,11 +375,11 @@ describe('binaryDecoderTest', () => {
 
     const decoder = jspb.BinaryDecoder.alloc(encoder.end());
 
-    expect(decoder.readString(ascii.length)).toEqual(ascii);
-    expect(utf8_two_bytes).toEqual(decoder.readString(utf8_two_bytes.length));
+    expect(decoder.readString(ascii.length, true)).toEqual(ascii);
+    expect(utf8_two_bytes).toEqual(decoder.readString(2, true));
     expect(utf8_three_bytes)
-        .toEqual(decoder.readString(utf8_three_bytes.length));
-    expect(utf8_four_bytes).toEqual(decoder.readString(utf8_four_bytes.length));
+      .toEqual(decoder.readString(3, true));
+    expect(utf8_four_bytes).toEqual(decoder.readString(4, true));
   });
 
   /**
 
@@ -52,6 +52,26 @@ goog.require('jspb.BinaryConstants');
 goog.require('jspb.BinaryDecoder');
 goog.require('jspb.utils');
 
+/**
+ * Whether to enforce that string fields are valid utf8.
+ *
+ * <p>Currently set to `ALWAYS`, can be set to `DEPRECATED_PROTO3_ONLY` to only
+ * enforce utf8 for proto3 string fields, for proto2 string fields it will use
+ * replacement characters when encoding errors are found.
+ *
+ * <p>TODO: Remove the flag, simplify BinaryReader to remove
+ * readStringRequireUtf8 and related support in the code generator et. al.
+ *
+ * @define {string}
+ */
+const ENFORCE_UTF8 = goog.define('jspb.binary.ENFORCE_UTF8', 'ALWAYS');
+
+// Constrain the set of values to only these two.
+jspb.asserts.assert(
+  ENFORCE_UTF8 === 'DEPRECATED_PROTO3_ONLY' || ENFORCE_UTF8 === 'ALWAYS');
+
+const /** boolean */ UTF8_PARSING_ERRORS_ARE_FATAL = ENFORCE_UTF8 === 'ALWAYS';
+
 
 
 /**
@@ -996,10 +1016,29 @@ jspb.BinaryReader.prototype.readEnum = function() {
  * @export
  */
 jspb.BinaryReader.prototype.readString = function() {
+  // delegate to the other reader so that inlining can eliminate this method
+  // in the common case.
+  if (UTF8_PARSING_ERRORS_ARE_FATAL) {
+    return this.readStringRequireUtf8();
+  }
+
   jspb.asserts.assert(
       this.nextWireType_ == jspb.BinaryConstants.WireType.DELIMITED);
   var length = this.decoder_.readUnsignedVarint32();
-  return this.decoder_.readString(length);
+  return this.decoder_.readString(length, /*requireUtf8=*/ false);
+};
+
+/**
+ * Reads a string field from the binary stream, or throws an error if the next
+ * field in the stream is not of the correct wire type, or if the string is
+ * not valid utf8.
+ *
+ * @return {string} The value of the string field.
+ */
+jspb.BinaryReader.prototype.readStringRequireUtf8 = function () {
+  jspb.asserts.assert(this.nextWireType_ == jspb.BinaryConstants.WireType.DELIMITED);
+  const length = this.decoder_.readUnsignedVarint32();
+  return this.decoder_.readString(length, /*requireUtf8=*/ true);
 };