Skip to content

Commit 42e47e5

Browse files
Refactoring Java parsing (3.16.x) (#10668)
* Update changelog * Porting java cleanup * Extension patch * Remove extra allocations * Fixing merge issues * More merge fixes * Fix TextFormat parser
1 parent 98884a8 commit 42e47e5

40 files changed

+1923
-1122
lines changed

CHANGES.txt

+18-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,21 @@
1-
2022-09-13 version 16.2 (C++/Java/Python/PHP/Objective-C/C#/Ruby)
1+
2022-09-27 version 3.16.3 (C++/Java/Python/PHP/Objective-C/C#/Ruby)
2+
3+
Java
4+
* Refactoring java full runtime to reuse sub-message builders and prepare to
5+
migrate parsing logic from parse constructor to builder.
6+
* Move proto wireformat parsing functionality from the private "parsing
7+
constructor" to the Builder class.
8+
* Change the Lite runtime to prefer merging from the wireformat into mutable
9+
messages rather than building up a new immutable object before merging. This
10+
way results in fewer allocations and copy operations.
11+
* Make message-type extensions merge from wire-format instead of building up
12+
instances and merging afterwards. This has much better performance.
13+
* Fix TextFormat parser to build up recurring (but supposedly not repeated)
14+
sub-messages directly from text rather than building a new sub-message and
15+
merging the fully formed message into the existing field.
16+
17+
18+
2022-09-13 version 3.16.2 (C++/Java/Python/PHP/Objective-C/C#/Ruby)
219

320
C++
421
* Reduce memory consumption of MessageSet parsing

java/core/src/main/java/com/google/protobuf/AbstractMessage.java

+11-16
Original file line numberDiff line numberDiff line change
@@ -426,27 +426,22 @@ public BuilderType mergeFrom(
426426
throws IOException {
427427
boolean discardUnknown = input.shouldDiscardUnknownFields();
428428
final UnknownFieldSet.Builder unknownFields =
429-
discardUnknown ? null : UnknownFieldSet.newBuilder(getUnknownFields());
430-
while (true) {
431-
final int tag = input.readTag();
432-
if (tag == 0) {
433-
break;
434-
}
435-
436-
MessageReflection.BuilderAdapter builderAdapter =
437-
new MessageReflection.BuilderAdapter(this);
438-
if (!MessageReflection.mergeFieldFrom(
439-
input, unknownFields, extensionRegistry, getDescriptorForType(), builderAdapter, tag)) {
440-
// end group tag
441-
break;
442-
}
443-
}
429+
discardUnknown ? null : getUnknownFieldSetBuilder();
430+
MessageReflection.mergeMessageFrom(this, unknownFields, input, extensionRegistry);
444431
if (unknownFields != null) {
445-
setUnknownFields(unknownFields.build());
432+
setUnknownFieldSetBuilder(unknownFields);
446433
}
447434
return (BuilderType) this;
448435
}
449436

437+
protected UnknownFieldSet.Builder getUnknownFieldSetBuilder() {
438+
return UnknownFieldSet.newBuilder(getUnknownFields());
439+
}
440+
441+
protected void setUnknownFieldSetBuilder(final UnknownFieldSet.Builder builder) {
442+
setUnknownFields(builder.build());
443+
}
444+
450445
@Override
451446
public BuilderType mergeUnknownFields(final UnknownFieldSet unknownFields) {
452447
setUnknownFields(

java/core/src/main/java/com/google/protobuf/ArrayDecoders.java

+86-60
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,29 @@ static int decodeBytes(byte[] data, int position, Registers registers)
234234
@SuppressWarnings({"unchecked", "rawtypes"})
235235
static int decodeMessageField(
236236
Schema schema, byte[] data, int position, int limit, Registers registers) throws IOException {
237+
Object msg = schema.newInstance();
238+
int offset = mergeMessageField(msg, schema, data, position, limit, registers);
239+
schema.makeImmutable(msg);
240+
registers.object1 = msg;
241+
return offset;
242+
}
243+
244+
/** Decodes a group value. */
245+
@SuppressWarnings({"unchecked", "rawtypes"})
246+
static int decodeGroupField(
247+
Schema schema, byte[] data, int position, int limit, int endGroup, Registers registers)
248+
throws IOException {
249+
Object msg = schema.newInstance();
250+
int offset = mergeGroupField(msg, schema, data, position, limit, endGroup, registers);
251+
schema.makeImmutable(msg);
252+
registers.object1 = msg;
253+
return offset;
254+
}
255+
256+
@SuppressWarnings({"unchecked", "rawtypes"})
257+
static int mergeMessageField(
258+
Object msg, Schema schema, byte[] data, int position, int limit, Registers registers)
259+
throws IOException {
237260
int length = data[position++];
238261
if (length < 0) {
239262
position = decodeVarint32(length, data, position, registers);
@@ -242,27 +265,28 @@ static int decodeMessageField(
242265
if (length < 0 || length > limit - position) {
243266
throw InvalidProtocolBufferException.truncatedMessage();
244267
}
245-
Object result = schema.newInstance();
246-
schema.mergeFrom(result, data, position, position + length, registers);
247-
schema.makeImmutable(result);
248-
registers.object1 = result;
268+
schema.mergeFrom(msg, data, position, position + length, registers);
269+
registers.object1 = msg;
249270
return position + length;
250271
}
251272

252-
/** Decodes a group value. */
253273
@SuppressWarnings({"unchecked", "rawtypes"})
254-
static int decodeGroupField(
255-
Schema schema, byte[] data, int position, int limit, int endGroup, Registers registers)
274+
static int mergeGroupField(
275+
Object msg,
276+
Schema schema,
277+
byte[] data,
278+
int position,
279+
int limit,
280+
int endGroup,
281+
Registers registers)
256282
throws IOException {
257283
// A group field must has a MessageSchema (the only other subclass of Schema is MessageSetSchema
258284
// and it can't be used in group fields).
259285
final MessageSchema messageSchema = (MessageSchema) schema;
260-
Object result = messageSchema.newInstance();
261286
// It's OK to directly use parseProto2Message since proto3 doesn't have group.
262287
final int endPosition =
263-
messageSchema.parseProto2Message(result, data, position, limit, endGroup, registers);
264-
messageSchema.makeImmutable(result);
265-
registers.object1 = result;
288+
messageSchema.parseProto2Message(msg, data, position, limit, endGroup, registers);
289+
registers.object1 = msg;
266290
return endPosition;
267291
}
268292

@@ -847,26 +871,19 @@ static int decodeExtension(
847871
break;
848872
}
849873
case ENUM:
850-
{
851-
IntArrayList list = new IntArrayList();
852-
position = decodePackedVarint32List(data, position, list, registers);
853-
UnknownFieldSetLite unknownFields = message.unknownFields;
854-
if (unknownFields == UnknownFieldSetLite.getDefaultInstance()) {
855-
unknownFields = null;
856-
}
857-
unknownFields =
858-
SchemaUtil.filterUnknownEnumList(
859-
fieldNumber,
860-
list,
861-
extension.descriptor.getEnumType(),
862-
unknownFields,
863-
unknownFieldSchema);
864-
if (unknownFields != null) {
865-
message.unknownFields = unknownFields;
874+
{
875+
IntArrayList list = new IntArrayList();
876+
position = decodePackedVarint32List(data, position, list, registers);
877+
SchemaUtil.filterUnknownEnumList(
878+
message,
879+
fieldNumber,
880+
list,
881+
extension.descriptor.getEnumType(),
882+
null,
883+
unknownFieldSchema);
884+
extensions.setField(extension.descriptor, list);
885+
break;
866886
}
867-
extensions.setField(extension.descriptor, list);
868-
break;
869-
}
870887
default:
871888
throw new IllegalStateException(
872889
"Type cannot be packed: " + extension.descriptor.getLiteType());
@@ -878,13 +895,8 @@ static int decodeExtension(
878895
position = decodeVarint32(data, position, registers);
879896
Object enumValue = extension.descriptor.getEnumType().findValueByNumber(registers.int1);
880897
if (enumValue == null) {
881-
UnknownFieldSetLite unknownFields = ((GeneratedMessageLite) message).unknownFields;
882-
if (unknownFields == UnknownFieldSetLite.getDefaultInstance()) {
883-
unknownFields = UnknownFieldSetLite.newInstance();
884-
((GeneratedMessageLite) message).unknownFields = unknownFields;
885-
}
886898
SchemaUtil.storeUnknownEnum(
887-
fieldNumber, registers.int1, unknownFields, unknownFieldSchema);
899+
message, fieldNumber, registers.int1, null, unknownFieldSchema);
888900
return position;
889901
}
890902
// Note, we store the integer value instead of the actual enum object in FieldSet.
@@ -941,38 +953,52 @@ static int decodeExtension(
941953
value = registers.object1;
942954
break;
943955
case GROUP:
944-
final int endTag = (fieldNumber << 3) | WireFormat.WIRETYPE_END_GROUP;
945-
position = decodeGroupField(
946-
Protobuf.getInstance().schemaFor(extension.getMessageDefaultInstance().getClass()),
947-
data, position, limit, endTag, registers);
948-
value = registers.object1;
949-
break;
950-
956+
{
957+
final int endTag = (fieldNumber << 3) | WireFormat.WIRETYPE_END_GROUP;
958+
final Schema fieldSchema =
959+
Protobuf.getInstance()
960+
.schemaFor(extension.getMessageDefaultInstance().getClass());
961+
if (extension.isRepeated()) {
962+
position = decodeGroupField(fieldSchema, data, position, limit, endTag, registers);
963+
extensions.addRepeatedField(extension.descriptor, registers.object1);
964+
} else {
965+
Object oldValue = extensions.getField(extension.descriptor);
966+
if (oldValue == null) {
967+
oldValue = fieldSchema.newInstance();
968+
extensions.setField(extension.descriptor, oldValue);
969+
}
970+
position =
971+
mergeGroupField(
972+
oldValue, fieldSchema, data, position, limit, endTag, registers);
973+
}
974+
return position;
975+
}
951976
case MESSAGE:
952-
position = decodeMessageField(
953-
Protobuf.getInstance().schemaFor(extension.getMessageDefaultInstance().getClass()),
954-
data, position, limit, registers);
955-
value = registers.object1;
956-
break;
957-
977+
{
978+
final Schema fieldSchema =
979+
Protobuf.getInstance()
980+
.schemaFor(extension.getMessageDefaultInstance().getClass());
981+
if (extension.isRepeated()) {
982+
position = decodeMessageField(fieldSchema, data, position, limit, registers);
983+
extensions.addRepeatedField(extension.descriptor, registers.object1);
984+
} else {
985+
Object oldValue = extensions.getField(extension.descriptor);
986+
if (oldValue == null) {
987+
oldValue = fieldSchema.newInstance();
988+
extensions.setField(extension.descriptor, oldValue);
989+
}
990+
position =
991+
mergeMessageField(oldValue, fieldSchema, data, position, limit, registers);
992+
}
993+
return position;
994+
}
958995
case ENUM:
959996
throw new IllegalStateException("Shouldn't reach here.");
960997
}
961998
}
962999
if (extension.isRepeated()) {
9631000
extensions.addRepeatedField(extension.descriptor, value);
9641001
} else {
965-
switch (extension.getLiteType()) {
966-
case MESSAGE:
967-
case GROUP:
968-
Object oldValue = extensions.getField(extension.descriptor);
969-
if (oldValue != null) {
970-
value = Internal.mergeMessage(oldValue, value);
971-
}
972-
break;
973-
default:
974-
break;
975-
}
9761002
extensions.setField(extension.descriptor, value);
9771003
}
9781004
}

java/core/src/main/java/com/google/protobuf/BinaryReader.java

+20-12
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,15 @@ public <T> T readMessageBySchemaWithCheck(
247247

248248
private <T> T readMessage(Schema<T> schema, ExtensionRegistryLite extensionRegistry)
249249
throws IOException {
250+
T newInstance = schema.newInstance();
251+
mergeMessageField(newInstance, schema, extensionRegistry);
252+
schema.makeImmutable(newInstance);
253+
return newInstance;
254+
}
255+
256+
@Override
257+
public <T> void mergeMessageField(
258+
T target, Schema<T> schema, ExtensionRegistryLite extensionRegistry) throws IOException {
250259
int size = readVarint32();
251260
requireBytes(size);
252261

@@ -256,15 +265,10 @@ private <T> T readMessage(Schema<T> schema, ExtensionRegistryLite extensionRegis
256265
limit = newLimit;
257266

258267
try {
259-
// Allocate and read the message.
260-
T message = schema.newInstance();
261-
schema.mergeFrom(message, this, extensionRegistry);
262-
schema.makeImmutable(message);
263-
268+
schema.mergeFrom(target, this, extensionRegistry);
264269
if (pos != newLimit) {
265270
throw InvalidProtocolBufferException.parseFailure();
266271
}
267-
return message;
268272
} finally {
269273
// Restore the limit.
270274
limit = prevLimit;
@@ -287,19 +291,23 @@ public <T> T readGroupBySchemaWithCheck(
287291

288292
private <T> T readGroup(Schema<T> schema, ExtensionRegistryLite extensionRegistry)
289293
throws IOException {
294+
T newInstance = schema.newInstance();
295+
mergeGroupField(newInstance, schema, extensionRegistry);
296+
schema.makeImmutable(newInstance);
297+
return newInstance;
298+
}
299+
300+
@Override
301+
public <T> void mergeGroupField(
302+
T target, Schema<T> schema, ExtensionRegistryLite extensionRegistry) throws IOException {
290303
int prevEndGroupTag = endGroupTag;
291304
endGroupTag = WireFormat.makeTag(WireFormat.getTagFieldNumber(tag), WIRETYPE_END_GROUP);
292305

293306
try {
294-
// Allocate and read the message.
295-
T message = schema.newInstance();
296-
schema.mergeFrom(message, this, extensionRegistry);
297-
schema.makeImmutable(message);
298-
307+
schema.mergeFrom(target, this, extensionRegistry);
299308
if (tag != endGroupTag) {
300309
throw InvalidProtocolBufferException.parseFailure();
301310
}
302-
return message;
303311
} finally {
304312
// Restore the old end group tag.
305313
endGroupTag = prevEndGroupTag;

0 commit comments

Comments
 (0)