google.protobuf.text

1 # Protocol Buffers - Google's data interchange format 2 # Copyright 2008 Google Inc. All rights reserved. 3 # https://developers.google.com/protocol-buffers/ 4 # 5 # Redistribution and use in source and binary forms, with or without 6 # modification, are permitted provided that the following conditions are 7 # met: 8 # 9 # * Redistributions of source code must retain the above copyright 10 # notice, this list of conditions and the following disclaimer. 11 # * Redistributions in binary form must reproduce the above 12 # copyright notice, this list of conditions and the following disclaimer 13 # in the documentation and/or other materials provided with the 14 # distribution. 15 # * Neither the name of Google Inc. nor the names of its 16 # contributors may be used to endorse or promote products derived from 17 # this software without specific prior written permission. 18 # 19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 """Contains routines for printing protocol messages in text format. 32 33 Simple usage example: 34 35 # Create a proto object and serialize it to a text proto string. 36 message = my_proto_pb2.MyMessage(foo='bar') 37 text_proto = text_format.MessageToString(message) 38 39 # Parse a text proto string. 40 message = text_format.Parse(text_proto, my_proto_pb2.MyMessage()) 41 """ 42 43 __author__ = 'kenton@google.com (Kenton Varda)' 44 45 # TODO(b/129989314) Import thread contention leads to test failures. 46 import encodings.raw_unicode_escape # pylint: disable=unused-import 47 import encodings.unicode_escape # pylint: disable=unused-import 48 import io 49 import re 50 51 import six 52 53 if six.PY3: 54 long = int # pylint: disable=redefined-builtin,invalid-name 55 56 # pylint: disable=g-import-not-at-top 57 from google.protobuf.internal import decoder 58 from google.protobuf.internal import type_checkers 59 from google.protobuf import descriptor 60 from google.protobuf import text_encoding 61 62 __all__ = ['MessageToString', 'Parse', 'PrintMessage', 'PrintField', 63 'PrintFieldValue', 'Merge', 'MessageToBytes'] 64 65 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(), 66 type_checkers.Int32ValueChecker(), 67 type_checkers.Uint64ValueChecker(), 68 type_checkers.Int64ValueChecker()) 69 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?$', re.IGNORECASE) 70 _FLOAT_NAN = re.compile('nanf?$', re.IGNORECASE) 71 _QUOTES = frozenset(("'", '"')) 72 _ANY_FULL_TYPE_NAME = 'google.protobuf.Any'

73 74 75 -class Error(Exception):

76 """Top-level module error for text_format."""

77

78 79 -class ParseError(Error):

80 """Thrown in case of text parsing or tokenizing error.""" 81

82 - def __init__(self, message=None, line=None, column=None):

83 if message is not None and line is not None: 84 loc = str(line) 85 if column is not None: 86 loc += ':{0}'.format(column) 87 message = '{0} : {1}'.format(loc, message) 88 if message is not None: 89 super(ParseError, self).__init__(message) 90 else: 91 super(ParseError, self).__init__() 92 self._line = line 93 self._column = column

94

95 - def GetLine(self):

96 return self._line

97

98 - def GetColumn(self):

99 return self._column

100

101 102 -class TextWriter(object):

103

104 - def __init__(self, as_utf8):

105 if six.PY2: 106 self._writer = io.BytesIO() 107 else: 108 self._writer = io.StringIO()

109

110 - def write(self, val):

111 if six.PY2: 112 if isinstance(val, six.text_type): 113 val = val.encode('utf-8') 114 return self._writer.write(val)

115

116 - def close(self):

117 return self._writer.close()

118

119 - def getvalue(self):

120 return self._writer.getvalue()

121

122 123 -def MessageToString(message, 124 as_utf8=False, 125 as_one_line=False, 126 use_short_repeated_primitives=False, 127 pointy_brackets=False, 128 use_index_order=False, 129 float_format=None, 130 double_format=None, 131 use_field_number=False, 132 descriptor_pool=None, 133 indent=0, 134 message_formatter=None, 135 print_unknown_fields=False):

136 # type: (...) -> str 137 """Convert protobuf message to text format. 138 139 Double values can be formatted compactly with 15 digits of 140 precision (which is the most that IEEE 754 "double" can guarantee) 141 using double_format='.15g'. To ensure that converting to text and back to a 142 proto will result in an identical value, double_format='.17g' should be used. 143 144 Args: 145 message: The protocol buffers message. 146 as_utf8: Return unescaped Unicode for non-ASCII characters. 147 In Python 3 actual Unicode characters may appear as is in strings. 148 In Python 2 the return value will be valid UTF-8 rather than only ASCII. 149 as_one_line: Don't introduce newlines between fields. 150 use_short_repeated_primitives: Use short repeated format for primitives. 151 pointy_brackets: If True, use angle brackets instead of curly braces for 152 nesting. 153 use_index_order: If True, fields of a proto message will be printed using 154 the order defined in source code instead of the field number, extensions 155 will be printed at the end of the message and their relative order is 156 determined by the extension number. By default, use the field number 157 order. 158 float_format: If set, use this to specify float field formatting 159 (per the "Format Specification Mini-Language"); otherwise, 8 valid digits 160 is used (default '.8g'). Also affect double field if double_format is 161 not set but float_format is set. 162 double_format: If set, use this to specify double field formatting 163 (per the "Format Specification Mini-Language"); if it is not set but 164 float_format is set, use float_format. Otherwise, use str() 165 use_field_number: If True, print field numbers instead of names. 166 descriptor_pool: A DescriptorPool used to resolve Any types. 167 indent: The initial indent level, in terms of spaces, for pretty print. 168 message_formatter: A function(message, indent, as_one_line): unicode|None 169 to custom format selected sub-messages (usually based on message type). 170 Use to pretty print parts of the protobuf for easier diffing. 171 print_unknown_fields: If True, unknown fields will be printed. 172 173 Returns: 174 A string of the text formatted protocol buffer message. 175 """ 176 out = TextWriter(as_utf8) 177 printer = _Printer(out, indent, as_utf8, as_one_line, 178 use_short_repeated_primitives, pointy_brackets, 179 use_index_order, float_format, double_format, 180 use_field_number, 181 descriptor_pool, message_formatter, 182 print_unknown_fields=print_unknown_fields) 183 printer.PrintMessage(message) 184 result = out.getvalue() 185 out.close() 186 if as_one_line: 187 return result.rstrip() 188 return result

189

190 191 -def MessageToBytes(message, **kwargs):

192 # type: (...) -> bytes 193 """Convert protobuf message to encoded text format. See MessageToString.""" 194 text = MessageToString(message, **kwargs) 195 if isinstance(text, bytes): 196 return text 197 codec = 'utf-8' if kwargs.get('as_utf8') else 'ascii' 198 return text.encode(codec)

199

200 201 -def _IsMapEntry(field):

202 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and 203 field.message_type.has_options and 204 field.message_type.GetOptions().map_entry)

205

206 207 -def PrintMessage(message, 208 out, 209 indent=0, 210 as_utf8=False, 211 as_one_line=False, 212 use_short_repeated_primitives=False, 213 pointy_brackets=False, 214 use_index_order=False, 215 float_format=None, 216 double_format=None, 217 use_field_number=False, 218 descriptor_pool=None, 219 message_formatter=None, 220 print_unknown_fields=False):

221 printer = _Printer( 222 out=out, indent=indent, as_utf8=as_utf8, 223 as_one_line=as_one_line, 224 use_short_repeated_primitives=use_short_repeated_primitives, 225 pointy_brackets=pointy_brackets, 226 use_index_order=use_index_order, 227 float_format=float_format, 228 double_format=double_format, 229 use_field_number=use_field_number, 230 descriptor_pool=descriptor_pool, 231 message_formatter=message_formatter, 232 print_unknown_fields=print_unknown_fields) 233 printer.PrintMessage(message)

234

235 236 -def PrintField(field, 237 value, 238 out, 239 indent=0, 240 as_utf8=False, 241 as_one_line=False, 242 use_short_repeated_primitives=False, 243 pointy_brackets=False, 244 use_index_order=False, 245 float_format=None, 246 double_format=None, 247 message_formatter=None, 248 print_unknown_fields=False):

249 """Print a single field name/value pair.""" 250 printer = _Printer(out, indent, as_utf8, as_one_line, 251 use_short_repeated_primitives, pointy_brackets, 252 use_index_order, float_format, double_format, 253 message_formatter=message_formatter, 254 print_unknown_fields=print_unknown_fields) 255 printer.PrintField(field, value)

256

257 258 -def PrintFieldValue(field, 259 value, 260 out, 261 indent=0, 262 as_utf8=False, 263 as_one_line=False, 264 use_short_repeated_primitives=False, 265 pointy_brackets=False, 266 use_index_order=False, 267 float_format=None, 268 double_format=None, 269 message_formatter=None, 270 print_unknown_fields=False):

271 """Print a single field value (not including name).""" 272 printer = _Printer(out, indent, as_utf8, as_one_line, 273 use_short_repeated_primitives, pointy_brackets, 274 use_index_order, float_format, double_format, 275 message_formatter=message_formatter, 276 print_unknown_fields=print_unknown_fields) 277 printer.PrintFieldValue(field, value)

278

279 280 -def _BuildMessageFromTypeName(type_name, descriptor_pool):

281 """Returns a protobuf message instance. 282 283 Args: 284 type_name: Fully-qualified protobuf message type name string. 285 descriptor_pool: DescriptorPool instance. 286 287 Returns: 288 A Message instance of type matching type_name, or None if the a Descriptor 289 wasn't found matching type_name. 290 """ 291 # pylint: disable=g-import-not-at-top 292 if descriptor_pool is None: 293 from google.protobuf import descriptor_pool as pool_mod 294 descriptor_pool = pool_mod.Default() 295 from google.protobuf import symbol_database 296 database = symbol_database.Default() 297 try: 298 message_descriptor = descriptor_pool.FindMessageTypeByName(type_name) 299 except KeyError: 300 return None 301 message_type = database.GetPrototype(message_descriptor) 302 return message_type()

303 304 305 # These values must match WireType enum in google/protobuf/wire_format.h. 306 WIRETYPE_LENGTH_DELIMITED = 2 307 WIRETYPE_START_GROUP = 3

308 309 310 -class _Printer(object):

311 """Text format printer for protocol message.""" 312

313 - def __init__(self, 314 out, 315 indent=0, 316 as_utf8=False, 317 as_one_line=False, 318 use_short_repeated_primitives=False, 319 pointy_brackets=False, 320 use_index_order=False, 321 float_format=None, 322 double_format=None, 323 use_field_number=False, 324 descriptor_pool=None, 325 message_formatter=None, 326 print_unknown_fields=False):

327 """Initialize the Printer. 328 329 Double values can be formatted compactly with 15 digits of precision 330 (which is the most that IEEE 754 "double" can guarantee) using 331 double_format='.15g'. To ensure that converting to text and back to a proto 332 will result in an identical value, double_format='.17g' should be used. 333 334 Args: 335 out: To record the text format result. 336 indent: The initial indent level for pretty print. 337 as_utf8: Return unescaped Unicode for non-ASCII characters. 338 In Python 3 actual Unicode characters may appear as is in strings. 339 In Python 2 the return value will be valid UTF-8 rather than ASCII. 340 as_one_line: Don't introduce newlines between fields. 341 use_short_repeated_primitives: Use short repeated format for primitives. 342 pointy_brackets: If True, use angle brackets instead of curly braces for 343 nesting. 344 use_index_order: If True, print fields of a proto message using the order 345 defined in source code instead of the field number. By default, use the 346 field number order. 347 float_format: If set, use this to specify float field formatting 348 (per the "Format Specification Mini-Language"); otherwise, 8 valid 349 digits is used (default '.8g'). Also affect double field if 350 double_format is not set but float_format is set. 351 double_format: If set, use this to specify double field formatting 352 (per the "Format Specification Mini-Language"); if it is not set but 353 float_format is set, use float_format. Otherwise, str() is used. 354 use_field_number: If True, print field numbers instead of names. 355 descriptor_pool: A DescriptorPool used to resolve Any types. 356 message_formatter: A function(message, indent, as_one_line): unicode|None 357 to custom format selected sub-messages (usually based on message type). 358 Use to pretty print parts of the protobuf for easier diffing. 359 print_unknown_fields: If True, unknown fields will be printed. 360 """ 361 self.out = out 362 self.indent = indent 363 self.as_utf8 = as_utf8 364 self.as_one_line = as_one_line 365 self.use_short_repeated_primitives = use_short_repeated_primitives 366 self.pointy_brackets = pointy_brackets 367 self.use_index_order = use_index_order 368 self.float_format = float_format 369 if double_format is not None: 370 self.double_format = double_format 371 else: 372 self.double_format = float_format 373 self.use_field_number = use_field_number 374 self.descriptor_pool = descriptor_pool 375 self.message_formatter = message_formatter 376 self.print_unknown_fields = print_unknown_fields

377

378 - def _TryPrintAsAnyMessage(self, message):

379 """Serializes if message is a google.protobuf.Any field.""" 380 if '/' not in message.type_url: 381 return False 382 packed_message = _BuildMessageFromTypeName(message.TypeName(), 383 self.descriptor_pool) 384 if packed_message: 385 packed_message.MergeFromString(message.value) 386 self.out.write('%s[%s] ' % (self.indent * ' ', message.type_url)) 387 self._PrintMessageFieldValue(packed_message) 388 self.out.write(' ' if self.as_one_line else '\n') 389 return True 390 else: 391 return False

392

393 - def _TryCustomFormatMessage(self, message):

394 formatted = self.message_formatter(message, self.indent, self.as_one_line) 395 if formatted is None: 396 return False 397 398 out = self.out 399 out.write(' ' * self.indent) 400 out.write(formatted) 401 out.write(' ' if self.as_one_line else '\n') 402 return True

403

404 - def PrintMessage(self, message):

405 """Convert protobuf message to text format. 406 407 Args: 408 message: The protocol buffers message. 409 """ 410 if self.message_formatter and self._TryCustomFormatMessage(message): 411 return 412 if (message.DESCRIPTOR.full_name == _ANY_FULL_TYPE_NAME and 413 self._TryPrintAsAnyMessage(message)): 414 return 415 fields = message.ListFields() 416 if self.use_index_order: 417 fields.sort( 418 key=lambda x: x[0].number if x[0].is_extension else x[0].index) 419 for field, value in fields: 420 if _IsMapEntry(field): 421 for key in sorted(value): 422 # This is slow for maps with submessage entries because it copies the 423 # entire tree. Unfortunately this would take significant refactoring 424 # of this file to work around. 425 # 426 # TODO(haberman): refactor and optimize if this becomes an issue. 427 entry_submsg = value.GetEntryClass()(key=key, value=value[key]) 428 self.PrintField(field, entry_submsg) 429 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED: 430 if (self.use_short_repeated_primitives 431 and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE 432 and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_STRING): 433 self._PrintShortRepeatedPrimitivesValue(field, value) 434 else: 435 for element in value: 436 self.PrintField(field, element) 437 else: 438 self.PrintField(field, value) 439 440 if self.print_unknown_fields: 441 self._PrintUnknownFields(message.UnknownFields())

442

443 - def _PrintUnknownFields(self, unknown_fields):

444 """Print unknown fields.""" 445 out = self.out 446 for field in unknown_fields: 447 out.write(' ' * self.indent) 448 out.write(str(field.field_number)) 449 if field.wire_type == WIRETYPE_START_GROUP: 450 if self.as_one_line: 451 out.write(' { ') 452 else: 453 out.write(' {\n') 454 self.indent += 2 455 456 self._PrintUnknownFields(field.data) 457 458 if self.as_one_line: 459 out.write('} ') 460 else: 461 self.indent -= 2 462 out.write(' ' * self.indent + '}\n') 463 elif field.wire_type == WIRETYPE_LENGTH_DELIMITED: 464 try: 465 # If this field is parseable as a Message, it is probably 466 # an embedded message. 467 # pylint: disable=protected-access 468 (embedded_unknown_message, pos) = decoder._DecodeUnknownFieldSet( 469 memoryview(field.data), 0, len(field.data)) 470 except Exception: # pylint: disable=broad-except 471 pos = 0 472 473 if pos == len(field.data): 474 if self.as_one_line: 475 out.write(' { ') 476 else: 477 out.write(' {\n') 478 self.indent += 2 479 480 self._PrintUnknownFields(embedded_unknown_message) 481 482 if self.as_one_line: 483 out.write('} ') 484 else: 485 self.indent -= 2 486 out.write(' ' * self.indent + '}\n') 487 else: 488 # A string or bytes field. self.as_utf8 may not work. 489 out.write(': \"') 490 out.write(text_encoding.CEscape(field.data, False)) 491 out.write('\" ' if self.as_one_line else '\"\n') 492 else: 493 # varint, fixed32, fixed64 494 out.write(': ') 495 out.write(str(field.data)) 496 out.write(' ' if self.as_one_line else '\n')

497

498 - def _PrintFieldName(self, field):

499 """Print field name.""" 500 out = self.out 501 out.write(' ' * self.indent) 502 if self.use_field_number: 503 out.write(str(field.number)) 504 else: 505 if field.is_extension: 506 out.write('[') 507 if (field.containing_type.GetOptions().message_set_wire_format and 508 field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and 509 field.label == descriptor.FieldDescriptor.LABEL_OPTIONAL): 510 out.write(field.message_type.full_name) 511 else: 512 out.write(field.full_name) 513 out.write(']') 514 elif field.type == descriptor.FieldDescriptor.TYPE_GROUP: 515 # For groups, use the capitalized name. 516 out.write(field.message_type.name) 517 else: 518 out.write(field.name) 519 520 if field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 521 # The colon is optional in this case, but our cross-language golden files 522 # don't include it. 523 out.write(':')

524

525 - def PrintField(self, field, value):

526 """Print a single field name/value pair.""" 527 self._PrintFieldName(field) 528 self.out.write(' ') 529 self.PrintFieldValue(field, value) 530 self.out.write(' ' if self.as_one_line else '\n')

531

532 - def _PrintShortRepeatedPrimitivesValue(self, field, value):

533 # Note: this is called only when value has at least one element. 534 self._PrintFieldName(field) 535 self.out.write(' [') 536 for i in six.moves.range(len(value) - 1): 537 self.PrintFieldValue(field, value[i]) 538 self.out.write(', ') 539 self.PrintFieldValue(field, value[-1]) 540 self.out.write(']') 541 self.out.write(' ' if self.as_one_line else '\n')

542

543 - def _PrintMessageFieldValue(self, value):

544 if self.pointy_brackets: 545 openb = '<' 546 closeb = '>' 547 else: 548 openb = '{' 549 closeb = '}' 550 551 if self.as_one_line: 552 self.out.write('%s ' % openb) 553 self.PrintMessage(value) 554 self.out.write(closeb) 555 else: 556 self.out.write('%s\n' % openb) 557 self.indent += 2 558 self.PrintMessage(value) 559 self.indent -= 2 560 self.out.write(' ' * self.indent + closeb)

561

562 - def PrintFieldValue(self, field, value):

563 """Print a single field value (not including name). 564 565 For repeated fields, the value should be a single element. 566 567 Args: 568 field: The descriptor of the field to be printed. 569 value: The value of the field. 570 """ 571 out = self.out 572 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 573 self._PrintMessageFieldValue(value) 574 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM: 575 enum_value = field.enum_type.values_by_number.get(value, None) 576 if enum_value is not None: 577 out.write(enum_value.name) 578 else: 579 out.write(str(value)) 580 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: 581 out.write('\"') 582 if isinstance(value, six.text_type) and (six.PY2 or not self.as_utf8): 583 out_value = value.encode('utf-8') 584 else: 585 out_value = value 586 if field.type == descriptor.FieldDescriptor.TYPE_BYTES: 587 # We always need to escape all binary data in TYPE_BYTES fields. 588 out_as_utf8 = False 589 else: 590 out_as_utf8 = self.as_utf8 591 out.write(text_encoding.CEscape(out_value, out_as_utf8)) 592 out.write('\"') 593 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL: 594 if value: 595 out.write('true') 596 else: 597 out.write('false') 598 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_FLOAT: 599 if self.float_format is not None: 600 out.write('{1:{0}}'.format(self.float_format, value)) 601 else: 602 out.write(str(float(format(value, '.8g')))) 603 elif (field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_DOUBLE and 604 self.double_format is not None): 605 out.write('{1:{0}}'.format(self.double_format, value)) 606 else: 607 out.write(str(value))

608

609 610 -def Parse(text, 611 message, 612 allow_unknown_extension=False, 613 allow_field_number=False, 614 descriptor_pool=None, 615 allow_unknown_field=False):

616 """Parses a text representation of a protocol message into a message. 617 618 NOTE: for historical reasons this function does not clear the input 619 message. This is different from what the binary msg.ParseFrom(...) does. 620 621 Example 622 a = MyProto() 623 a.repeated_field.append('test') 624 b = MyProto() 625 626 text_format.Parse(repr(a), b) 627 text_format.Parse(repr(a), b) # repeated_field contains ["test", "test"] 628 629 # Binary version: 630 b.ParseFromString(a.SerializeToString()) # repeated_field is now "test" 631 632 Caller is responsible for clearing the message as needed. 633 634 Args: 635 text: Message text representation. 636 message: A protocol buffer message to merge into. 637 allow_unknown_extension: if True, skip over missing extensions and keep 638 parsing 639 allow_field_number: if True, both field number and field name are allowed. 640 descriptor_pool: A DescriptorPool used to resolve Any types. 641 allow_unknown_field: if True, skip over unknown field and keep 642 parsing. Avoid to use this option if possible. It may hide some 643 errors (e.g. spelling error on field name) 644 645 Returns: 646 The same message passed as argument. 647 648 Raises: 649 ParseError: On text parsing problems. 650 """ 651 return ParseLines(text.split(b'\n' if isinstance(text, bytes) else u'\n'), 652 message, 653 allow_unknown_extension, 654 allow_field_number, 655 descriptor_pool=descriptor_pool, 656 allow_unknown_field=allow_unknown_field)

657

658 659 -def Merge(text, 660 message, 661 allow_unknown_extension=False, 662 allow_field_number=False, 663 descriptor_pool=None, 664 allow_unknown_field=False):

665 """Parses a text representation of a protocol message into a message. 666 667 Like Parse(), but allows repeated values for a non-repeated field, and uses 668 the last one. 669 670 Args: 671 text: Message text representation. 672 message: A protocol buffer message to merge into. 673 allow_unknown_extension: if True, skip over missing extensions and keep 674 parsing 675 allow_field_number: if True, both field number and field name are allowed. 676 descriptor_pool: A DescriptorPool used to resolve Any types. 677 allow_unknown_field: if True, skip over unknown field and keep 678 parsing. Avoid to use this option if possible. It may hide some 679 errors (e.g. spelling error on field name) 680 681 Returns: 682 The same message passed as argument. 683 684 Raises: 685 ParseError: On text parsing problems. 686 """ 687 return MergeLines( 688 text.split(b'\n' if isinstance(text, bytes) else u'\n'), 689 message, 690 allow_unknown_extension, 691 allow_field_number, 692 descriptor_pool=descriptor_pool, 693 allow_unknown_field=allow_unknown_field)

694

695 696 -def ParseLines(lines, 697 message, 698 allow_unknown_extension=False, 699 allow_field_number=False, 700 descriptor_pool=None, 701 allow_unknown_field=False):

702 """Parses a text representation of a protocol message into a message. 703 704 Args: 705 lines: An iterable of lines of a message's text representation. 706 message: A protocol buffer message to merge into. 707 allow_unknown_extension: if True, skip over missing extensions and keep 708 parsing 709 allow_field_number: if True, both field number and field name are allowed. 710 descriptor_pool: A DescriptorPool used to resolve Any types. 711 allow_unknown_field: if True, skip over unknown field and keep 712 parsing. Avoid to use this option if possible. It may hide some 713 errors (e.g. spelling error on field name) 714 715 Returns: 716 The same message passed as argument. 717 718 Raises: 719 ParseError: On text parsing problems. 720 """ 721 parser = _Parser(allow_unknown_extension, 722 allow_field_number, 723 descriptor_pool=descriptor_pool, 724 allow_unknown_field=allow_unknown_field) 725 return parser.ParseLines(lines, message)

726

727 728 -def MergeLines(lines, 729 message, 730 allow_unknown_extension=False, 731 allow_field_number=False, 732 descriptor_pool=None, 733 allow_unknown_field=False):

734 """Parses a text representation of a protocol message into a message. 735 736 Like ParseLines(), but allows repeated values for a non-repeated field, and 737 uses the last one. 738 739 Args: 740 lines: An iterable of lines of a message's text representation. 741 message: A protocol buffer message to merge into. 742 allow_unknown_extension: if True, skip over missing extensions and keep 743 parsing 744 allow_field_number: if True, both field number and field name are allowed. 745 descriptor_pool: A DescriptorPool used to resolve Any types. 746 allow_unknown_field: if True, skip over unknown field and keep 747 parsing. Avoid to use this option if possible. It may hide some 748 errors (e.g. spelling error on field name) 749 750 Returns: 751 The same message passed as argument. 752 753 Raises: 754 ParseError: On text parsing problems. 755 """ 756 parser = _Parser(allow_unknown_extension, 757 allow_field_number, 758 descriptor_pool=descriptor_pool, 759 allow_unknown_field=allow_unknown_field) 760 return parser.MergeLines(lines, message)

761

762 763 -class _Parser(object):

764 """Text format parser for protocol message.""" 765

766 - def __init__(self, 767 allow_unknown_extension=False, 768 allow_field_number=False, 769 descriptor_pool=None, 770 allow_unknown_field=False):

771 self.allow_unknown_extension = allow_unknown_extension 772 self.allow_field_number = allow_field_number 773 self.descriptor_pool = descriptor_pool 774 self.allow_unknown_field = allow_unknown_field

775

776 - def ParseLines(self, lines, message):

777 """Parses a text representation of a protocol message into a message.""" 778 self._allow_multiple_scalars = False 779 self._ParseOrMerge(lines, message) 780 return message

781

782 - def MergeLines(self, lines, message):

783 """Merges a text representation of a protocol message into a message.""" 784 self._allow_multiple_scalars = True 785 self._ParseOrMerge(lines, message) 786 return message

787

788 - def _ParseOrMerge(self, lines, message):

789 """Converts a text representation of a protocol message into a message. 790 791 Args: 792 lines: Lines of a message's text representation. 793 message: A protocol buffer message to merge into. 794 795 Raises: 796 ParseError: On text parsing problems. 797 """ 798 # Tokenize expects native str lines. 799 if six.PY2: 800 str_lines = (line if isinstance(line, str) else line.encode('utf-8') 801 for line in lines) 802 else: 803 str_lines = (line if isinstance(line, str) else line.decode('utf-8') 804 for line in lines) 805 tokenizer = Tokenizer(str_lines) 806 while not tokenizer.AtEnd(): 807 self._MergeField(tokenizer, message)

808

809 - def _MergeField(self, tokenizer, message):

810 """Merges a single protocol message field into a message. 811 812 Args: 813 tokenizer: A tokenizer to parse the field name and values. 814 message: A protocol message to record the data. 815 816 Raises: 817 ParseError: In case of text parsing problems. 818 """ 819 message_descriptor = message.DESCRIPTOR 820 if (message_descriptor.full_name == _ANY_FULL_TYPE_NAME and 821 tokenizer.TryConsume('[')): 822 type_url_prefix, packed_type_name = self._ConsumeAnyTypeUrl(tokenizer) 823 tokenizer.Consume(']') 824 tokenizer.TryConsume(':') 825 if tokenizer.TryConsume('<'): 826 expanded_any_end_token = '>' 827 else: 828 tokenizer.Consume('{') 829 expanded_any_end_token = '}' 830 expanded_any_sub_message = _BuildMessageFromTypeName(packed_type_name, 831 self.descriptor_pool) 832 if not expanded_any_sub_message: 833 raise ParseError('Type %s not found in descriptor pool' % 834 packed_type_name) 835 while not tokenizer.TryConsume(expanded_any_end_token): 836 if tokenizer.AtEnd(): 837 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % 838 (expanded_any_end_token,)) 839 self._MergeField(tokenizer, expanded_any_sub_message) 840 message.Pack(expanded_any_sub_message, 841 type_url_prefix=type_url_prefix) 842 return 843 844 if tokenizer.TryConsume('['): 845 name = [tokenizer.ConsumeIdentifier()] 846 while tokenizer.TryConsume('.'): 847 name.append(tokenizer.ConsumeIdentifier()) 848 name = '.'.join(name) 849 850 if not message_descriptor.is_extendable: 851 raise tokenizer.ParseErrorPreviousToken( 852 'Message type "%s" does not have extensions.' % 853 message_descriptor.full_name) 854 # pylint: disable=protected-access 855 field = message.Extensions._FindExtensionByName(name) 856 # pylint: enable=protected-access 857 if not field: 858 if self.allow_unknown_extension: 859 field = None 860 else: 861 raise tokenizer.ParseErrorPreviousToken( 862 'Extension "%s" not registered. ' 863 'Did you import the _pb2 module which defines it? ' 864 'If you are trying to place the extension in the MessageSet ' 865 'field of another message that is in an Any or MessageSet field, ' 866 'that message\'s _pb2 module must be imported as well' % name) 867 elif message_descriptor != field.containing_type: 868 raise tokenizer.ParseErrorPreviousToken( 869 'Extension "%s" does not extend message type "%s".' % 870 (name, message_descriptor.full_name)) 871 872 tokenizer.Consume(']') 873 874 else: 875 name = tokenizer.ConsumeIdentifierOrNumber() 876 if self.allow_field_number and name.isdigit(): 877 number = ParseInteger(name, True, True) 878 field = message_descriptor.fields_by_number.get(number, None) 879 if not field and message_descriptor.is_extendable: 880 field = message.Extensions._FindExtensionByNumber(number) 881 else: 882 field = message_descriptor.fields_by_name.get(name, None) 883 884 # Group names are expected to be capitalized as they appear in the 885 # .proto file, which actually matches their type names, not their field 886 # names. 887 if not field: 888 field = message_descriptor.fields_by_name.get(name.lower(), None) 889 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP: 890 field = None 891 892 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and 893 field.message_type.name != name): 894 field = None 895 896 if not field and not self.allow_unknown_field: 897 raise tokenizer.ParseErrorPreviousToken( 898 'Message type "%s" has no field named "%s".' % 899 (message_descriptor.full_name, name)) 900 901 if field: 902 if not self._allow_multiple_scalars and field.containing_oneof: 903 # Check if there's a different field set in this oneof. 904 # Note that we ignore the case if the same field was set before, and we 905 # apply _allow_multiple_scalars to non-scalar fields as well. 906 which_oneof = message.WhichOneof(field.containing_oneof.name) 907 if which_oneof is not None and which_oneof != field.name: 908 raise tokenizer.ParseErrorPreviousToken( 909 'Field "%s" is specified along with field "%s", another member ' 910 'of oneof "%s" for message type "%s".' % 911 (field.name, which_oneof, field.containing_oneof.name, 912 message_descriptor.full_name)) 913 914 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 915 tokenizer.TryConsume(':') 916 merger = self._MergeMessageField 917 else: 918 tokenizer.Consume(':') 919 merger = self._MergeScalarField 920 921 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED and 922 tokenizer.TryConsume('[')): 923 # Short repeated format, e.g. "foo: [1, 2, 3]" 924 if not tokenizer.TryConsume(']'): 925 while True: 926 merger(tokenizer, message, field) 927 if tokenizer.TryConsume(']'): 928 break 929 tokenizer.Consume(',') 930 931 else: 932 merger(tokenizer, message, field) 933 934 else: # Proto field is unknown. 935 assert (self.allow_unknown_extension or self.allow_unknown_field) 936 _SkipFieldContents(tokenizer) 937 938 # For historical reasons, fields may optionally be separated by commas or 939 # semicolons. 940 if not tokenizer.TryConsume(','): 941 tokenizer.TryConsume(';')

942

943 - def _ConsumeAnyTypeUrl(self, tokenizer):

944 """Consumes a google.protobuf.Any type URL and returns the type name.""" 945 # Consume "type.googleapis.com/". 946 prefix = [tokenizer.ConsumeIdentifier()] 947 tokenizer.Consume('.') 948 prefix.append(tokenizer.ConsumeIdentifier()) 949 tokenizer.Consume('.') 950 prefix.append(tokenizer.ConsumeIdentifier()) 951 tokenizer.Consume('/') 952 # Consume the fully-qualified type name. 953 name = [tokenizer.ConsumeIdentifier()] 954 while tokenizer.TryConsume('.'): 955 name.append(tokenizer.ConsumeIdentifier()) 956 return '.'.join(prefix), '.'.join(name)

957

958 - def _MergeMessageField(self, tokenizer, message, field):

959 """Merges a single scalar field into a message. 960 961 Args: 962 tokenizer: A tokenizer to parse the field value. 963 message: The message of which field is a member. 964 field: The descriptor of the field to be merged. 965 966 Raises: 967 ParseError: In case of text parsing problems. 968 """ 969 is_map_entry = _IsMapEntry(field) 970 971 if tokenizer.TryConsume('<'): 972 end_token = '>' 973 else: 974 tokenizer.Consume('{') 975 end_token = '}' 976 977 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: 978 if field.is_extension: 979 sub_message = message.Extensions[field].add() 980 elif is_map_entry: 981 sub_message = getattr(message, field.name).GetEntryClass()() 982 else: 983 sub_message = getattr(message, field.name).add() 984 else: 985 if field.is_extension: 986 if (not self._allow_multiple_scalars and 987 message.HasExtension(field)): 988 raise tokenizer.ParseErrorPreviousToken( 989 'Message type "%s" should not have multiple "%s" extensions.' % 990 (message.DESCRIPTOR.full_name, field.full_name)) 991 sub_message = message.Extensions[field] 992 else: 993 # Also apply _allow_multiple_scalars to message field. 994 # TODO(jieluo): Change to _allow_singular_overwrites. 995 if (not self._allow_multiple_scalars and 996 message.HasField(field.name)): 997 raise tokenizer.ParseErrorPreviousToken( 998 'Message type "%s" should not have multiple "%s" fields.' % 999 (message.DESCRIPTOR.full_name, field.name)) 1000 sub_message = getattr(message, field.name) 1001 sub_message.SetInParent() 1002 1003 while not tokenizer.TryConsume(end_token): 1004 if tokenizer.AtEnd(): 1005 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token,)) 1006 self._MergeField(tokenizer, sub_message) 1007 1008 if is_map_entry: 1009 value_cpptype = field.message_type.fields_by_name['value'].cpp_type 1010 if value_cpptype == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 1011 value = getattr(message, field.name)[sub_message.key] 1012 value.MergeFrom(sub_message.value) 1013 else: 1014 getattr(message, field.name)[sub_message.key] = sub_message.value

1015 1016 @staticmethod

1017 - def _IsProto3Syntax(message):

1018 message_descriptor = message.DESCRIPTOR 1019 return (hasattr(message_descriptor, 'syntax') and 1020 message_descriptor.syntax == 'proto3')

1021

1022 - def _MergeScalarField(self, tokenizer, message, field):

1023 """Merges a single scalar field into a message. 1024 1025 Args: 1026 tokenizer: A tokenizer to parse the field value. 1027 message: A protocol message to record the data. 1028 field: The descriptor of the field to be merged. 1029 1030 Raises: 1031 ParseError: In case of text parsing problems. 1032 RuntimeError: On runtime errors. 1033 """ 1034 _ = self.allow_unknown_extension 1035 value = None 1036 1037 if field.type in (descriptor.FieldDescriptor.TYPE_INT32, 1038 descriptor.FieldDescriptor.TYPE_SINT32, 1039 descriptor.FieldDescriptor.TYPE_SFIXED32): 1040 value = _ConsumeInt32(tokenizer) 1041 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64, 1042 descriptor.FieldDescriptor.TYPE_SINT64, 1043 descriptor.FieldDescriptor.TYPE_SFIXED64): 1044 value = _ConsumeInt64(tokenizer) 1045 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32, 1046 descriptor.FieldDescriptor.TYPE_FIXED32): 1047 value = _ConsumeUint32(tokenizer) 1048 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64, 1049 descriptor.FieldDescriptor.TYPE_FIXED64): 1050 value = _ConsumeUint64(tokenizer) 1051 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT, 1052 descriptor.FieldDescriptor.TYPE_DOUBLE): 1053 value = tokenizer.ConsumeFloat() 1054 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL: 1055 value = tokenizer.ConsumeBool() 1056 elif field.type == descriptor.FieldDescriptor.TYPE_STRING: 1057 value = tokenizer.ConsumeString() 1058 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES: 1059 value = tokenizer.ConsumeByteString() 1060 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM: 1061 value = tokenizer.ConsumeEnum(field) 1062 else: 1063 raise RuntimeError('Unknown field type %d' % field.type) 1064 1065 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: 1066 if field.is_extension: 1067 message.Extensions[field].append(value) 1068 else: 1069 getattr(message, field.name).append(value) 1070 else: 1071 if field.is_extension: 1072 if (not self._allow_multiple_scalars and 1073 not self._IsProto3Syntax(message) and 1074 message.HasExtension(field)): 1075 raise tokenizer.ParseErrorPreviousToken( 1076 'Message type "%s" should not have multiple "%s" extensions.' % 1077 (message.DESCRIPTOR.full_name, field.full_name)) 1078 else: 1079 message.Extensions[field] = value 1080 else: 1081 duplicate_error = False 1082 if not self._allow_multiple_scalars: 1083 if self._IsProto3Syntax(message): 1084 # Proto3 doesn't represent presence so we try best effort to check 1085 # multiple scalars by compare to default values. 1086 duplicate_error = bool(getattr(message, field.name)) 1087 else: 1088 duplicate_error = message.HasField(field.name) 1089 1090 if duplicate_error: 1091 raise tokenizer.ParseErrorPreviousToken( 1092 'Message type "%s" should not have multiple "%s" fields.' % 1093 (message.DESCRIPTOR.full_name, field.name)) 1094 else: 1095 setattr(message, field.name, value)

1096

1097 1098 -def _SkipFieldContents(tokenizer):

1099 """Skips over contents (value or message) of a field. 1100 1101 Args: 1102 tokenizer: A tokenizer to parse the field name and values. 1103 """ 1104 # Try to guess the type of this field. 1105 # If this field is not a message, there should be a ":" between the 1106 # field name and the field value and also the field value should not 1107 # start with "{" or "<" which indicates the beginning of a message body. 1108 # If there is no ":" or there is a "{" or "<" after ":", this field has 1109 # to be a message or the input is ill-formed. 1110 if tokenizer.TryConsume(':') and not tokenizer.LookingAt( 1111 '{') and not tokenizer.LookingAt('<'): 1112 _SkipFieldValue(tokenizer) 1113 else: 1114 _SkipFieldMessage(tokenizer)

1115

1116 1117 -def _SkipField(tokenizer):

1118 """Skips over a complete field (name and value/message). 1119 1120 Args: 1121 tokenizer: A tokenizer to parse the field name and values. 1122 """ 1123 if tokenizer.TryConsume('['): 1124 # Consume extension name. 1125 tokenizer.ConsumeIdentifier() 1126 while tokenizer.TryConsume('.'): 1127 tokenizer.ConsumeIdentifier() 1128 tokenizer.Consume(']') 1129 else: 1130 tokenizer.ConsumeIdentifierOrNumber() 1131 1132 _SkipFieldContents(tokenizer) 1133 1134 # For historical reasons, fields may optionally be separated by commas or 1135 # semicolons. 1136 if not tokenizer.TryConsume(','): 1137 tokenizer.TryConsume(';')

1138

1139 1140 -def _SkipFieldMessage(tokenizer):

1141 """Skips over a field message. 1142 1143 Args: 1144 tokenizer: A tokenizer to parse the field name and values. 1145 """ 1146 1147 if tokenizer.TryConsume('<'): 1148 delimiter = '>' 1149 else: 1150 tokenizer.Consume('{') 1151 delimiter = '}' 1152 1153 while not tokenizer.LookingAt('>') and not tokenizer.LookingAt('}'): 1154 _SkipField(tokenizer) 1155 1156 tokenizer.Consume(delimiter)

1157

1158 1159 -def _SkipFieldValue(tokenizer):

1160 """Skips over a field value. 1161 1162 Args: 1163 tokenizer: A tokenizer to parse the field name and values. 1164 1165 Raises: 1166 ParseError: In case an invalid field value is found. 1167 """ 1168 # String/bytes tokens can come in multiple adjacent string literals. 1169 # If we can consume one, consume as many as we can. 1170 if tokenizer.TryConsumeByteString(): 1171 while tokenizer.TryConsumeByteString(): 1172 pass 1173 return 1174 1175 if (not tokenizer.TryConsumeIdentifier() and 1176 not _TryConsumeInt64(tokenizer) and not _TryConsumeUint64(tokenizer) and 1177 not tokenizer.TryConsumeFloat()): 1178 raise ParseError('Invalid field value: ' + tokenizer.token)

1179

1180 1181 -class Tokenizer(object):

1182 """Protocol buffer text representation tokenizer. 1183 1184 This class handles the lower level string parsing by splitting it into 1185 meaningful tokens. 1186 1187 It was directly ported from the Java protocol buffer API. 1188 """ 1189 1190 _WHITESPACE = re.compile(r'\s+') 1191 _COMMENT = re.compile(r'(\s*#.*$)', re.MULTILINE) 1192 _WHITESPACE_OR_COMMENT = re.compile(r'(\s|(#.*$))+', re.MULTILINE) 1193 _TOKEN = re.compile('|'.join([ 1194 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier 1195 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number 1196 ] + [ # quoted str for each quote mark 1197 # Avoid backtracking! https://stackoverflow.com/a/844267 1198 r'{qt}[^{qt}\n\\]*((\\.)+[^{qt}\n\\]*)*({qt}|\\?$)'.format(qt=mark) 1199 for mark in _QUOTES 1200 ])) 1201 1202 _IDENTIFIER = re.compile(r'[^\d\W]\w*') 1203 _IDENTIFIER_OR_NUMBER = re.compile(r'\w+') 1204

1205 - def __init__(self, lines, skip_comments=True):

1206 self._position = 0 1207 self._line = -1 1208 self._column = 0 1209 self._token_start = None 1210 self.token = '' 1211 self._lines = iter(lines) 1212 self._current_line = '' 1213 self._previous_line = 0 1214 self._previous_column = 0 1215 self._more_lines = True 1216 self._skip_comments = skip_comments 1217 self._whitespace_pattern = (skip_comments and self._WHITESPACE_OR_COMMENT 1218 or self._WHITESPACE) 1219 self._SkipWhitespace() 1220 self.NextToken()

1221

1222 - def LookingAt(self, token):

1223 return self.token == token

1224

1225 - def AtEnd(self):

1226 """Checks the end of the text was reached. 1227 1228 Returns: 1229 True iff the end was reached. 1230 """ 1231 return not self.token

1232

1233 - def _PopLine(self):

1234 while len(self._current_line) <= self._column: 1235 try: 1236 self._current_line = next(self._lines) 1237 except StopIteration: 1238 self._current_line = '' 1239 self._more_lines = False 1240 return 1241 else: 1242 self._line += 1 1243 self._column = 0

1244

1245 - def _SkipWhitespace(self):

1246 while True: 1247 self._PopLine() 1248 match = self._whitespace_pattern.match(self._current_line, self._column) 1249 if not match: 1250 break 1251 length = len(match.group(0)) 1252 self._column += length

1253

1254 - def TryConsume(self, token):

1255 """Tries to consume a given piece of text. 1256 1257 Args: 1258 token: Text to consume. 1259 1260 Returns: 1261 True iff the text was consumed. 1262 """ 1263 if self.token == token: 1264 self.NextToken() 1265 return True 1266 return False

1267

1268 - def Consume(self, token):

1269 """Consumes a piece of text. 1270 1271 Args: 1272 token: Text to consume. 1273 1274 Raises: 1275 ParseError: If the text couldn't be consumed. 1276 """ 1277 if not self.TryConsume(token): 1278 raise self.ParseError('Expected "%s".' % token)

1279

1280 - def ConsumeComment(self):

1281 result = self.token 1282 if not self._COMMENT.match(result): 1283 raise self.ParseError('Expected comment.') 1284 self.NextToken() 1285 return result

1286

1287 - def ConsumeCommentOrTrailingComment(self):

1288 """Consumes a comment, returns a 2-tuple (trailing bool, comment str).""" 1289 1290 # Tokenizer initializes _previous_line and _previous_column to 0. As the 1291 # tokenizer starts, it looks like there is a previous token on the line. 1292 just_started = self._line == 0 and self._column == 0 1293 1294 before_parsing = self._previous_line 1295 comment = self.ConsumeComment() 1296 1297 # A trailing comment is a comment on the same line than the previous token. 1298 trailing = (self._previous_line == before_parsing 1299 and not just_started) 1300 1301 return trailing, comment

1302

1303 - def TryConsumeIdentifier(self):

1304 try: 1305 self.ConsumeIdentifier() 1306 return True 1307 except ParseError: 1308 return False

1309

1310 - def ConsumeIdentifier(self):

1311 """Consumes protocol message field identifier. 1312 1313 Returns: 1314 Identifier string. 1315 1316 Raises: 1317 ParseError: If an identifier couldn't be consumed. 1318 """ 1319 result = self.token 1320 if not self._IDENTIFIER.match(result): 1321 raise self.ParseError('Expected identifier.') 1322 self.NextToken() 1323 return result

1324

1325 - def TryConsumeIdentifierOrNumber(self):

1326 try: 1327 self.ConsumeIdentifierOrNumber() 1328 return True 1329 except ParseError: 1330 return False

1331

1332 - def ConsumeIdentifierOrNumber(self):

1333 """Consumes protocol message field identifier. 1334 1335 Returns: 1336 Identifier string. 1337 1338 Raises: 1339 ParseError: If an identifier couldn't be consumed. 1340 """ 1341 result = self.token 1342 if not self._IDENTIFIER_OR_NUMBER.match(result): 1343 raise self.ParseError('Expected identifier or number, got %s.' % result) 1344 self.NextToken() 1345 return result

1346

1347 - def TryConsumeInteger(self):

1348 try: 1349 # Note: is_long only affects value type, not whether an error is raised. 1350 self.ConsumeInteger() 1351 return True 1352 except ParseError: 1353 return False

1354

1355 - def ConsumeInteger(self, is_long=False):

1356 """Consumes an integer number. 1357 1358 Args: 1359 is_long: True if the value should be returned as a long integer. 1360 Returns: 1361 The integer parsed. 1362 1363 Raises: 1364 ParseError: If an integer couldn't be consumed. 1365 """ 1366 try: 1367 result = _ParseAbstractInteger(self.token, is_long=is_long) 1368 except ValueError as e: 1369 raise self.ParseError(str(e)) 1370 self.NextToken() 1371 return result

1372

1373 - def TryConsumeFloat(self):

1374 try: 1375 self.ConsumeFloat() 1376 return True 1377 except ParseError: 1378 return False

1379

1380 - def ConsumeFloat(self):

1381 """Consumes an floating point number. 1382 1383 Returns: 1384 The number parsed. 1385 1386 Raises: 1387 ParseError: If a floating point number couldn't be consumed. 1388 """ 1389 try: 1390 result = ParseFloat(self.token) 1391 except ValueError as e: 1392 raise self.ParseError(str(e)) 1393 self.NextToken() 1394 return result

1395

1396 - def ConsumeBool(self):

1397 """Consumes a boolean value. 1398 1399 Returns: 1400 The bool parsed. 1401 1402 Raises: 1403 ParseError: If a boolean value couldn't be consumed. 1404 """ 1405 try: 1406 result = ParseBool(self.token) 1407 except ValueError as e: 1408 raise self.ParseError(str(e)) 1409 self.NextToken() 1410 return result

1411

1412 - def TryConsumeByteString(self):

1413 try: 1414 self.ConsumeByteString() 1415 return True 1416 except ParseError: 1417 return False

1418

1419 - def ConsumeString(self):

1420 """Consumes a string value. 1421 1422 Returns: 1423 The string parsed. 1424 1425 Raises: 1426 ParseError: If a string value couldn't be consumed. 1427 """ 1428 the_bytes = self.ConsumeByteString() 1429 try: 1430 return six.text_type(the_bytes, 'utf-8') 1431 except UnicodeDecodeError as e: 1432 raise self._StringParseError(e)

1433

1434 - def ConsumeByteString(self):

1435 """Consumes a byte array value. 1436 1437 Returns: 1438 The array parsed (as a string). 1439 1440 Raises: 1441 ParseError: If a byte array value couldn't be consumed. 1442 """ 1443 the_list = [self._ConsumeSingleByteString()] 1444 while self.token and self.token[0] in _QUOTES: 1445 the_list.append(self._ConsumeSingleByteString()) 1446 return b''.join(the_list)

1447

1448 - def _ConsumeSingleByteString(self):

1449 """Consume one token of a string literal. 1450 1451 String literals (whether bytes or text) can come in multiple adjacent 1452 tokens which are automatically concatenated, like in C or Python. This 1453 method only consumes one token. 1454 1455 Returns: 1456 The token parsed. 1457 Raises: 1458 ParseError: When the wrong format data is found. 1459 """ 1460 text = self.token 1461 if len(text) < 1 or text[0] not in _QUOTES: 1462 raise self.ParseError('Expected string but found: %r' % (text,)) 1463 1464 if len(text) < 2 or text[-1] != text[0]: 1465 raise self.ParseError('String missing ending quote: %r' % (text,)) 1466 1467 try: 1468 result = text_encoding.CUnescape(text[1:-1]) 1469 except ValueError as e: 1470 raise self.ParseError(str(e)) 1471 self.NextToken() 1472 return result

1473

1474 - def ConsumeEnum(self, field):

1475 try: 1476 result = ParseEnum(field, self.token) 1477 except ValueError as e: 1478 raise self.ParseError(str(e)) 1479 self.NextToken() 1480 return result

1481

1482 - def ParseErrorPreviousToken(self, message):

1483 """Creates and *returns* a ParseError for the previously read token. 1484 1485 Args: 1486 message: A message to set for the exception. 1487 1488 Returns: 1489 A ParseError instance. 1490 """ 1491 return ParseError(message, self._previous_line + 1, 1492 self._previous_column + 1)

1493

1494 - def ParseError(self, message):

1495 """Creates and *returns* a ParseError for the current token.""" 1496 return ParseError('\'' + self._current_line + '\': ' + message, 1497 self._line + 1, self._column + 1)

1498

1499 - def _StringParseError(self, e):

1500 return self.ParseError('Couldn\'t parse string: ' + str(e))

1501

1502 - def NextToken(self):

1503 """Reads the next meaningful token.""" 1504 self._previous_line = self._line 1505 self._previous_column = self._column 1506 1507 self._column += len(self.token) 1508 self._SkipWhitespace() 1509 1510 if not self._more_lines: 1511 self.token = '' 1512 return 1513 1514 match = self._TOKEN.match(self._current_line, self._column) 1515 if not match and not self._skip_comments: 1516 match = self._COMMENT.match(self._current_line, self._column) 1517 if match: 1518 token = match.group(0) 1519 self.token = token 1520 else: 1521 self.token = self._current_line[self._column]

1522 1523 # Aliased so it can still be accessed by current visibility violators. 1524 # TODO(dbarnett): Migrate violators to textformat_tokenizer. 1525 _Tokenizer = Tokenizer # pylint: disable=invalid-name

1526 1527 1528 -def _ConsumeInt32(tokenizer):

1529 """Consumes a signed 32bit integer number from tokenizer. 1530 1531 Args: 1532 tokenizer: A tokenizer used to parse the number. 1533 1534 Returns: 1535 The integer parsed. 1536 1537 Raises: 1538 ParseError: If a signed 32bit integer couldn't be consumed. 1539 """ 1540 return _ConsumeInteger(tokenizer, is_signed=True, is_long=False)

1541

1542 1543 -def _ConsumeUint32(tokenizer):

1544 """Consumes an unsigned 32bit integer number from tokenizer. 1545 1546 Args: 1547 tokenizer: A tokenizer used to parse the number. 1548 1549 Returns: 1550 The integer parsed. 1551 1552 Raises: 1553 ParseError: If an unsigned 32bit integer couldn't be consumed. 1554 """ 1555 return _ConsumeInteger(tokenizer, is_signed=False, is_long=False)

1556

1557 1558 -def _TryConsumeInt64(tokenizer):

1559 try: 1560 _ConsumeInt64(tokenizer) 1561 return True 1562 except ParseError: 1563 return False

1564

1565 1566 -def _ConsumeInt64(tokenizer):

1567 """Consumes a signed 32bit integer number from tokenizer. 1568 1569 Args: 1570 tokenizer: A tokenizer used to parse the number. 1571 1572 Returns: 1573 The integer parsed. 1574 1575 Raises: 1576 ParseError: If a signed 32bit integer couldn't be consumed. 1577 """ 1578 return _ConsumeInteger(tokenizer, is_signed=True, is_long=True)

1579

1580 1581 -def _TryConsumeUint64(tokenizer):

1582 try: 1583 _ConsumeUint64(tokenizer) 1584 return True 1585 except ParseError: 1586 return False

1587

1588 1589 -def _ConsumeUint64(tokenizer):

1590 """Consumes an unsigned 64bit integer number from tokenizer. 1591 1592 Args: 1593 tokenizer: A tokenizer used to parse the number. 1594 1595 Returns: 1596 The integer parsed. 1597 1598 Raises: 1599 ParseError: If an unsigned 64bit integer couldn't be consumed. 1600 """ 1601 return _ConsumeInteger(tokenizer, is_signed=False, is_long=True)

1602

1603 1604 -def _TryConsumeInteger(tokenizer, is_signed=False, is_long=False):

1605 try: 1606 _ConsumeInteger(tokenizer, is_signed=is_signed, is_long=is_long) 1607 return True 1608 except ParseError: 1609 return False

1610

1611 1612 -def _ConsumeInteger(tokenizer, is_signed=False, is_long=False):

1613 """Consumes an integer number from tokenizer. 1614 1615 Args: 1616 tokenizer: A tokenizer used to parse the number. 1617 is_signed: True if a signed integer must be parsed. 1618 is_long: True if a long integer must be parsed. 1619 1620 Returns: 1621 The integer parsed. 1622 1623 Raises: 1624 ParseError: If an integer with given characteristics couldn't be consumed. 1625 """ 1626 try: 1627 result = ParseInteger(tokenizer.token, is_signed=is_signed, is_long=is_long) 1628 except ValueError as e: 1629 raise tokenizer.ParseError(str(e)) 1630 tokenizer.NextToken() 1631 return result

1632

1633 1634 -def ParseInteger(text, is_signed=False, is_long=False):

1635 """Parses an integer. 1636 1637 Args: 1638 text: The text to parse. 1639 is_signed: True if a signed integer must be parsed. 1640 is_long: True if a long integer must be parsed. 1641 1642 Returns: 1643 The integer value. 1644 1645 Raises: 1646 ValueError: Thrown Iff the text is not a valid integer. 1647 """ 1648 # Do the actual parsing. Exception handling is propagated to caller. 1649 result = _ParseAbstractInteger(text, is_long=is_long) 1650 1651 # Check if the integer is sane. Exceptions handled by callers. 1652 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)] 1653 checker.CheckValue(result) 1654 return result

1655

1656 1657 -def _ParseAbstractInteger(text, is_long=False):

1658 """Parses an integer without checking size/signedness. 1659 1660 Args: 1661 text: The text to parse. 1662 is_long: True if the value should be returned as a long integer. 1663 1664 Returns: 1665 The integer value. 1666 1667 Raises: 1668 ValueError: Thrown Iff the text is not a valid integer. 1669 """ 1670 # Do the actual parsing. Exception handling is propagated to caller. 1671 orig_text = text 1672 c_octal_match = re.match(r'(-?)0(\d+)$', text) 1673 if c_octal_match: 1674 # Python 3 no longer supports 0755 octal syntax without the 'o', so 1675 # we always use the '0o' prefix for multi-digit numbers starting with 0. 1676 text = c_octal_match.group(1) + '0o' + c_octal_match.group(2) 1677 try: 1678 # We force 32-bit values to int and 64-bit values to long to make 1679 # alternate implementations where the distinction is more significant 1680 # (e.g. the C++ implementation) simpler. 1681 if is_long: 1682 return long(text, 0) 1683 else: 1684 return int(text, 0) 1685 except ValueError: 1686 raise ValueError('Couldn\'t parse integer: %s' % orig_text)

1687

1688 1689 -def ParseFloat(text):

1690 """Parse a floating point number. 1691 1692 Args: 1693 text: Text to parse. 1694 1695 Returns: 1696 The number parsed. 1697 1698 Raises: 1699 ValueError: If a floating point number couldn't be parsed. 1700 """ 1701 try: 1702 # Assume Python compatible syntax. 1703 return float(text) 1704 except ValueError: 1705 # Check alternative spellings. 1706 if _FLOAT_INFINITY.match(text): 1707 if text[0] == '-': 1708 return float('-inf') 1709 else: 1710 return float('inf') 1711 elif _FLOAT_NAN.match(text): 1712 return float('nan') 1713 else: 1714 # assume '1.0f' format 1715 try: 1716 return float(text.rstrip('f')) 1717 except ValueError: 1718 raise ValueError('Couldn\'t parse float: %s' % text)

1719

1720 1721 -def ParseBool(text):

1722 """Parse a boolean value. 1723 1724 Args: 1725 text: Text to parse. 1726 1727 Returns: 1728 Boolean values parsed 1729 1730 Raises: 1731 ValueError: If text is not a valid boolean. 1732 """ 1733 if text in ('true', 't', '1', 'True'): 1734 return True 1735 elif text in ('false', 'f', '0', 'False'): 1736 return False 1737 else: 1738 raise ValueError('Expected "true" or "false".')

1739

1740 1741 -def ParseEnum(field, value):

1742 """Parse an enum value. 1743 1744 The value can be specified by a number (the enum value), or by 1745 a string literal (the enum name). 1746 1747 Args: 1748 field: Enum field descriptor. 1749 value: String value. 1750 1751 Returns: 1752 Enum value number. 1753 1754 Raises: 1755 ValueError: If the enum value could not be parsed. 1756 """ 1757 enum_descriptor = field.enum_type 1758 try: 1759 number = int(value, 0) 1760 except ValueError: 1761 # Identifier. 1762 enum_value = enum_descriptor.values_by_name.get(value, None) 1763 if enum_value is None: 1764 raise ValueError('Enum type "%s" has no value named %s.' % 1765 (enum_descriptor.full_name, value)) 1766 else: 1767 # Numeric value. 1768 if hasattr(field.file, 'syntax'): 1769 # Attribute is checked for compatibility. 1770 if field.file.syntax == 'proto3': 1771 # Proto3 accept numeric unknown enums. 1772 return number 1773 enum_value = enum_descriptor.values_by_number.get(number, None) 1774 if enum_value is None: 1775 raise ValueError('Enum type "%s" has no value with number %d.' % 1776 (enum_descriptor.full_name, number)) 1777 return enum_value.number

1778

Source Code for Module google.protobuf.text_format