1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 """Contains routines for printing protocol messages in text format.
32
33 Simple usage example:
34
35 # Create a proto object and serialize it to a text proto string.
36 message = my_proto_pb2.MyMessage(foo='bar')
37 text_proto = text_format.MessageToString(message)
38
39 # Parse a text proto string.
40 message = text_format.Parse(text_proto, my_proto_pb2.MyMessage())
41 """
42
43 __author__ = 'kenton@google.com (Kenton Varda)'
44
45
46 import encodings.raw_unicode_escape
47 import encodings.unicode_escape
48 import io
49 import re
50
51 import six
52
53 if six.PY3:
54 long = int
55
56
57 from google.protobuf.internal import decoder
58 from google.protobuf.internal import type_checkers
59 from google.protobuf import descriptor
60 from google.protobuf import text_encoding
61
62 __all__ = ['MessageToString', 'Parse', 'PrintMessage', 'PrintField',
63 'PrintFieldValue', 'Merge', 'MessageToBytes']
64
65 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),
66 type_checkers.Int32ValueChecker(),
67 type_checkers.Uint64ValueChecker(),
68 type_checkers.Int64ValueChecker())
69 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?$', re.IGNORECASE)
70 _FLOAT_NAN = re.compile('nanf?$', re.IGNORECASE)
71 _QUOTES = frozenset(("'", '"'))
72 _ANY_FULL_TYPE_NAME = 'google.protobuf.Any'
73
74
75 -class Error(Exception):
76 """Top-level module error for text_format."""
77
80 """Thrown in case of text parsing or tokenizing error."""
81
82 - def __init__(self, message=None, line=None, column=None):
94
97
100
101
102 -class TextWriter(object):
103
104 - def __init__(self, as_utf8):
105 if six.PY2:
106 self._writer = io.BytesIO()
107 else:
108 self._writer = io.StringIO()
109
110 - def write(self, val):
111 if six.PY2:
112 if isinstance(val, six.text_type):
113 val = val.encode('utf-8')
114 return self._writer.write(val)
115
117 return self._writer.close()
118
119 - def getvalue(self):
120 return self._writer.getvalue()
121
122
123 -def MessageToString(message,
124 as_utf8=False,
125 as_one_line=False,
126 use_short_repeated_primitives=False,
127 pointy_brackets=False,
128 use_index_order=False,
129 float_format=None,
130 double_format=None,
131 use_field_number=False,
132 descriptor_pool=None,
133 indent=0,
134 message_formatter=None,
135 print_unknown_fields=False):
136
137 """Convert protobuf message to text format.
138
139 Double values can be formatted compactly with 15 digits of
140 precision (which is the most that IEEE 754 "double" can guarantee)
141 using double_format='.15g'. To ensure that converting to text and back to a
142 proto will result in an identical value, double_format='.17g' should be used.
143
144 Args:
145 message: The protocol buffers message.
146 as_utf8: Return unescaped Unicode for non-ASCII characters.
147 In Python 3 actual Unicode characters may appear as is in strings.
148 In Python 2 the return value will be valid UTF-8 rather than only ASCII.
149 as_one_line: Don't introduce newlines between fields.
150 use_short_repeated_primitives: Use short repeated format for primitives.
151 pointy_brackets: If True, use angle brackets instead of curly braces for
152 nesting.
153 use_index_order: If True, fields of a proto message will be printed using
154 the order defined in source code instead of the field number, extensions
155 will be printed at the end of the message and their relative order is
156 determined by the extension number. By default, use the field number
157 order.
158 float_format: If set, use this to specify float field formatting
159 (per the "Format Specification Mini-Language"); otherwise, 8 valid digits
160 is used (default '.8g'). Also affect double field if double_format is
161 not set but float_format is set.
162 double_format: If set, use this to specify double field formatting
163 (per the "Format Specification Mini-Language"); if it is not set but
164 float_format is set, use float_format. Otherwise, use str()
165 use_field_number: If True, print field numbers instead of names.
166 descriptor_pool: A DescriptorPool used to resolve Any types.
167 indent: The initial indent level, in terms of spaces, for pretty print.
168 message_formatter: A function(message, indent, as_one_line): unicode|None
169 to custom format selected sub-messages (usually based on message type).
170 Use to pretty print parts of the protobuf for easier diffing.
171 print_unknown_fields: If True, unknown fields will be printed.
172
173 Returns:
174 A string of the text formatted protocol buffer message.
175 """
176 out = TextWriter(as_utf8)
177 printer = _Printer(out, indent, as_utf8, as_one_line,
178 use_short_repeated_primitives, pointy_brackets,
179 use_index_order, float_format, double_format,
180 use_field_number,
181 descriptor_pool, message_formatter,
182 print_unknown_fields=print_unknown_fields)
183 printer.PrintMessage(message)
184 result = out.getvalue()
185 out.close()
186 if as_one_line:
187 return result.rstrip()
188 return result
189
192
193 """Convert protobuf message to encoded text format. See MessageToString."""
194 text = MessageToString(message, **kwargs)
195 if isinstance(text, bytes):
196 return text
197 codec = 'utf-8' if kwargs.get('as_utf8') else 'ascii'
198 return text.encode(codec)
199
200
201 -def _IsMapEntry(field):
202 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and
203 field.message_type.has_options and
204 field.message_type.GetOptions().map_entry)
205
206
207 -def PrintMessage(message,
208 out,
209 indent=0,
210 as_utf8=False,
211 as_one_line=False,
212 use_short_repeated_primitives=False,
213 pointy_brackets=False,
214 use_index_order=False,
215 float_format=None,
216 double_format=None,
217 use_field_number=False,
218 descriptor_pool=None,
219 message_formatter=None,
220 print_unknown_fields=False):
221 printer = _Printer(
222 out=out, indent=indent, as_utf8=as_utf8,
223 as_one_line=as_one_line,
224 use_short_repeated_primitives=use_short_repeated_primitives,
225 pointy_brackets=pointy_brackets,
226 use_index_order=use_index_order,
227 float_format=float_format,
228 double_format=double_format,
229 use_field_number=use_field_number,
230 descriptor_pool=descriptor_pool,
231 message_formatter=message_formatter,
232 print_unknown_fields=print_unknown_fields)
233 printer.PrintMessage(message)
234
235
236 -def PrintField(field,
237 value,
238 out,
239 indent=0,
240 as_utf8=False,
241 as_one_line=False,
242 use_short_repeated_primitives=False,
243 pointy_brackets=False,
244 use_index_order=False,
245 float_format=None,
246 double_format=None,
247 message_formatter=None,
248 print_unknown_fields=False):
249 """Print a single field name/value pair."""
250 printer = _Printer(out, indent, as_utf8, as_one_line,
251 use_short_repeated_primitives, pointy_brackets,
252 use_index_order, float_format, double_format,
253 message_formatter=message_formatter,
254 print_unknown_fields=print_unknown_fields)
255 printer.PrintField(field, value)
256
257
258 -def PrintFieldValue(field,
259 value,
260 out,
261 indent=0,
262 as_utf8=False,
263 as_one_line=False,
264 use_short_repeated_primitives=False,
265 pointy_brackets=False,
266 use_index_order=False,
267 float_format=None,
268 double_format=None,
269 message_formatter=None,
270 print_unknown_fields=False):
271 """Print a single field value (not including name)."""
272 printer = _Printer(out, indent, as_utf8, as_one_line,
273 use_short_repeated_primitives, pointy_brackets,
274 use_index_order, float_format, double_format,
275 message_formatter=message_formatter,
276 print_unknown_fields=print_unknown_fields)
277 printer.PrintFieldValue(field, value)
278
303
304
305
306 WIRETYPE_LENGTH_DELIMITED = 2
307 WIRETYPE_START_GROUP = 3
311 """Text format printer for protocol message."""
312
313 - def __init__(self,
314 out,
315 indent=0,
316 as_utf8=False,
317 as_one_line=False,
318 use_short_repeated_primitives=False,
319 pointy_brackets=False,
320 use_index_order=False,
321 float_format=None,
322 double_format=None,
323 use_field_number=False,
324 descriptor_pool=None,
325 message_formatter=None,
326 print_unknown_fields=False):
327 """Initialize the Printer.
328
329 Double values can be formatted compactly with 15 digits of precision
330 (which is the most that IEEE 754 "double" can guarantee) using
331 double_format='.15g'. To ensure that converting to text and back to a proto
332 will result in an identical value, double_format='.17g' should be used.
333
334 Args:
335 out: To record the text format result.
336 indent: The initial indent level for pretty print.
337 as_utf8: Return unescaped Unicode for non-ASCII characters.
338 In Python 3 actual Unicode characters may appear as is in strings.
339 In Python 2 the return value will be valid UTF-8 rather than ASCII.
340 as_one_line: Don't introduce newlines between fields.
341 use_short_repeated_primitives: Use short repeated format for primitives.
342 pointy_brackets: If True, use angle brackets instead of curly braces for
343 nesting.
344 use_index_order: If True, print fields of a proto message using the order
345 defined in source code instead of the field number. By default, use the
346 field number order.
347 float_format: If set, use this to specify float field formatting
348 (per the "Format Specification Mini-Language"); otherwise, 8 valid
349 digits is used (default '.8g'). Also affect double field if
350 double_format is not set but float_format is set.
351 double_format: If set, use this to specify double field formatting
352 (per the "Format Specification Mini-Language"); if it is not set but
353 float_format is set, use float_format. Otherwise, str() is used.
354 use_field_number: If True, print field numbers instead of names.
355 descriptor_pool: A DescriptorPool used to resolve Any types.
356 message_formatter: A function(message, indent, as_one_line): unicode|None
357 to custom format selected sub-messages (usually based on message type).
358 Use to pretty print parts of the protobuf for easier diffing.
359 print_unknown_fields: If True, unknown fields will be printed.
360 """
361 self.out = out
362 self.indent = indent
363 self.as_utf8 = as_utf8
364 self.as_one_line = as_one_line
365 self.use_short_repeated_primitives = use_short_repeated_primitives
366 self.pointy_brackets = pointy_brackets
367 self.use_index_order = use_index_order
368 self.float_format = float_format
369 if double_format is not None:
370 self.double_format = double_format
371 else:
372 self.double_format = float_format
373 self.use_field_number = use_field_number
374 self.descriptor_pool = descriptor_pool
375 self.message_formatter = message_formatter
376 self.print_unknown_fields = print_unknown_fields
377
392
403
442
444 """Print unknown fields."""
445 out = self.out
446 for field in unknown_fields:
447 out.write(' ' * self.indent)
448 out.write(str(field.field_number))
449 if field.wire_type == WIRETYPE_START_GROUP:
450 if self.as_one_line:
451 out.write(' { ')
452 else:
453 out.write(' {\n')
454 self.indent += 2
455
456 self._PrintUnknownFields(field.data)
457
458 if self.as_one_line:
459 out.write('} ')
460 else:
461 self.indent -= 2
462 out.write(' ' * self.indent + '}\n')
463 elif field.wire_type == WIRETYPE_LENGTH_DELIMITED:
464 try:
465
466
467
468 (embedded_unknown_message, pos) = decoder._DecodeUnknownFieldSet(
469 memoryview(field.data), 0, len(field.data))
470 except Exception:
471 pos = 0
472
473 if pos == len(field.data):
474 if self.as_one_line:
475 out.write(' { ')
476 else:
477 out.write(' {\n')
478 self.indent += 2
479
480 self._PrintUnknownFields(embedded_unknown_message)
481
482 if self.as_one_line:
483 out.write('} ')
484 else:
485 self.indent -= 2
486 out.write(' ' * self.indent + '}\n')
487 else:
488
489 out.write(': \"')
490 out.write(text_encoding.CEscape(field.data, False))
491 out.write('\" ' if self.as_one_line else '\"\n')
492 else:
493
494 out.write(': ')
495 out.write(str(field.data))
496 out.write(' ' if self.as_one_line else '\n')
497
524
531
542
544 if self.pointy_brackets:
545 openb = '<'
546 closeb = '>'
547 else:
548 openb = '{'
549 closeb = '}'
550
551 if self.as_one_line:
552 self.out.write('%s ' % openb)
553 self.PrintMessage(value)
554 self.out.write(closeb)
555 else:
556 self.out.write('%s\n' % openb)
557 self.indent += 2
558 self.PrintMessage(value)
559 self.indent -= 2
560 self.out.write(' ' * self.indent + closeb)
561
608
609
610 -def Parse(text,
611 message,
612 allow_unknown_extension=False,
613 allow_field_number=False,
614 descriptor_pool=None,
615 allow_unknown_field=False):
616 """Parses a text representation of a protocol message into a message.
617
618 NOTE: for historical reasons this function does not clear the input
619 message. This is different from what the binary msg.ParseFrom(...) does.
620
621 Example
622 a = MyProto()
623 a.repeated_field.append('test')
624 b = MyProto()
625
626 text_format.Parse(repr(a), b)
627 text_format.Parse(repr(a), b) # repeated_field contains ["test", "test"]
628
629 # Binary version:
630 b.ParseFromString(a.SerializeToString()) # repeated_field is now "test"
631
632 Caller is responsible for clearing the message as needed.
633
634 Args:
635 text: Message text representation.
636 message: A protocol buffer message to merge into.
637 allow_unknown_extension: if True, skip over missing extensions and keep
638 parsing
639 allow_field_number: if True, both field number and field name are allowed.
640 descriptor_pool: A DescriptorPool used to resolve Any types.
641 allow_unknown_field: if True, skip over unknown field and keep
642 parsing. Avoid to use this option if possible. It may hide some
643 errors (e.g. spelling error on field name)
644
645 Returns:
646 The same message passed as argument.
647
648 Raises:
649 ParseError: On text parsing problems.
650 """
651 return ParseLines(text.split(b'\n' if isinstance(text, bytes) else u'\n'),
652 message,
653 allow_unknown_extension,
654 allow_field_number,
655 descriptor_pool=descriptor_pool,
656 allow_unknown_field=allow_unknown_field)
657
658
659 -def Merge(text,
660 message,
661 allow_unknown_extension=False,
662 allow_field_number=False,
663 descriptor_pool=None,
664 allow_unknown_field=False):
665 """Parses a text representation of a protocol message into a message.
666
667 Like Parse(), but allows repeated values for a non-repeated field, and uses
668 the last one.
669
670 Args:
671 text: Message text representation.
672 message: A protocol buffer message to merge into.
673 allow_unknown_extension: if True, skip over missing extensions and keep
674 parsing
675 allow_field_number: if True, both field number and field name are allowed.
676 descriptor_pool: A DescriptorPool used to resolve Any types.
677 allow_unknown_field: if True, skip over unknown field and keep
678 parsing. Avoid to use this option if possible. It may hide some
679 errors (e.g. spelling error on field name)
680
681 Returns:
682 The same message passed as argument.
683
684 Raises:
685 ParseError: On text parsing problems.
686 """
687 return MergeLines(
688 text.split(b'\n' if isinstance(text, bytes) else u'\n'),
689 message,
690 allow_unknown_extension,
691 allow_field_number,
692 descriptor_pool=descriptor_pool,
693 allow_unknown_field=allow_unknown_field)
694
695
696 -def ParseLines(lines,
697 message,
698 allow_unknown_extension=False,
699 allow_field_number=False,
700 descriptor_pool=None,
701 allow_unknown_field=False):
702 """Parses a text representation of a protocol message into a message.
703
704 Args:
705 lines: An iterable of lines of a message's text representation.
706 message: A protocol buffer message to merge into.
707 allow_unknown_extension: if True, skip over missing extensions and keep
708 parsing
709 allow_field_number: if True, both field number and field name are allowed.
710 descriptor_pool: A DescriptorPool used to resolve Any types.
711 allow_unknown_field: if True, skip over unknown field and keep
712 parsing. Avoid to use this option if possible. It may hide some
713 errors (e.g. spelling error on field name)
714
715 Returns:
716 The same message passed as argument.
717
718 Raises:
719 ParseError: On text parsing problems.
720 """
721 parser = _Parser(allow_unknown_extension,
722 allow_field_number,
723 descriptor_pool=descriptor_pool,
724 allow_unknown_field=allow_unknown_field)
725 return parser.ParseLines(lines, message)
726
727
728 -def MergeLines(lines,
729 message,
730 allow_unknown_extension=False,
731 allow_field_number=False,
732 descriptor_pool=None,
733 allow_unknown_field=False):
734 """Parses a text representation of a protocol message into a message.
735
736 Like ParseLines(), but allows repeated values for a non-repeated field, and
737 uses the last one.
738
739 Args:
740 lines: An iterable of lines of a message's text representation.
741 message: A protocol buffer message to merge into.
742 allow_unknown_extension: if True, skip over missing extensions and keep
743 parsing
744 allow_field_number: if True, both field number and field name are allowed.
745 descriptor_pool: A DescriptorPool used to resolve Any types.
746 allow_unknown_field: if True, skip over unknown field and keep
747 parsing. Avoid to use this option if possible. It may hide some
748 errors (e.g. spelling error on field name)
749
750 Returns:
751 The same message passed as argument.
752
753 Raises:
754 ParseError: On text parsing problems.
755 """
756 parser = _Parser(allow_unknown_extension,
757 allow_field_number,
758 descriptor_pool=descriptor_pool,
759 allow_unknown_field=allow_unknown_field)
760 return parser.MergeLines(lines, message)
761
764 """Text format parser for protocol message."""
765
766 - def __init__(self,
767 allow_unknown_extension=False,
768 allow_field_number=False,
769 descriptor_pool=None,
770 allow_unknown_field=False):
771 self.allow_unknown_extension = allow_unknown_extension
772 self.allow_field_number = allow_field_number
773 self.descriptor_pool = descriptor_pool
774 self.allow_unknown_field = allow_unknown_field
775
777 """Parses a text representation of a protocol message into a message."""
778 self._allow_multiple_scalars = False
779 self._ParseOrMerge(lines, message)
780 return message
781
783 """Merges a text representation of a protocol message into a message."""
784 self._allow_multiple_scalars = True
785 self._ParseOrMerge(lines, message)
786 return message
787
789 """Converts a text representation of a protocol message into a message.
790
791 Args:
792 lines: Lines of a message's text representation.
793 message: A protocol buffer message to merge into.
794
795 Raises:
796 ParseError: On text parsing problems.
797 """
798
799 if six.PY2:
800 str_lines = (line if isinstance(line, str) else line.encode('utf-8')
801 for line in lines)
802 else:
803 str_lines = (line if isinstance(line, str) else line.decode('utf-8')
804 for line in lines)
805 tokenizer = Tokenizer(str_lines)
806 while not tokenizer.AtEnd():
807 self._MergeField(tokenizer, message)
808
810 """Merges a single protocol message field into a message.
811
812 Args:
813 tokenizer: A tokenizer to parse the field name and values.
814 message: A protocol message to record the data.
815
816 Raises:
817 ParseError: In case of text parsing problems.
818 """
819 message_descriptor = message.DESCRIPTOR
820 if (message_descriptor.full_name == _ANY_FULL_TYPE_NAME and
821 tokenizer.TryConsume('[')):
822 type_url_prefix, packed_type_name = self._ConsumeAnyTypeUrl(tokenizer)
823 tokenizer.Consume(']')
824 tokenizer.TryConsume(':')
825 if tokenizer.TryConsume('<'):
826 expanded_any_end_token = '>'
827 else:
828 tokenizer.Consume('{')
829 expanded_any_end_token = '}'
830 expanded_any_sub_message = _BuildMessageFromTypeName(packed_type_name,
831 self.descriptor_pool)
832 if not expanded_any_sub_message:
833 raise ParseError('Type %s not found in descriptor pool' %
834 packed_type_name)
835 while not tokenizer.TryConsume(expanded_any_end_token):
836 if tokenizer.AtEnd():
837 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' %
838 (expanded_any_end_token,))
839 self._MergeField(tokenizer, expanded_any_sub_message)
840 message.Pack(expanded_any_sub_message,
841 type_url_prefix=type_url_prefix)
842 return
843
844 if tokenizer.TryConsume('['):
845 name = [tokenizer.ConsumeIdentifier()]
846 while tokenizer.TryConsume('.'):
847 name.append(tokenizer.ConsumeIdentifier())
848 name = '.'.join(name)
849
850 if not message_descriptor.is_extendable:
851 raise tokenizer.ParseErrorPreviousToken(
852 'Message type "%s" does not have extensions.' %
853 message_descriptor.full_name)
854
855 field = message.Extensions._FindExtensionByName(name)
856
857 if not field:
858 if self.allow_unknown_extension:
859 field = None
860 else:
861 raise tokenizer.ParseErrorPreviousToken(
862 'Extension "%s" not registered. '
863 'Did you import the _pb2 module which defines it? '
864 'If you are trying to place the extension in the MessageSet '
865 'field of another message that is in an Any or MessageSet field, '
866 'that message\'s _pb2 module must be imported as well' % name)
867 elif message_descriptor != field.containing_type:
868 raise tokenizer.ParseErrorPreviousToken(
869 'Extension "%s" does not extend message type "%s".' %
870 (name, message_descriptor.full_name))
871
872 tokenizer.Consume(']')
873
874 else:
875 name = tokenizer.ConsumeIdentifierOrNumber()
876 if self.allow_field_number and name.isdigit():
877 number = ParseInteger(name, True, True)
878 field = message_descriptor.fields_by_number.get(number, None)
879 if not field and message_descriptor.is_extendable:
880 field = message.Extensions._FindExtensionByNumber(number)
881 else:
882 field = message_descriptor.fields_by_name.get(name, None)
883
884
885
886
887 if not field:
888 field = message_descriptor.fields_by_name.get(name.lower(), None)
889 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP:
890 field = None
891
892 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and
893 field.message_type.name != name):
894 field = None
895
896 if not field and not self.allow_unknown_field:
897 raise tokenizer.ParseErrorPreviousToken(
898 'Message type "%s" has no field named "%s".' %
899 (message_descriptor.full_name, name))
900
901 if field:
902 if not self._allow_multiple_scalars and field.containing_oneof:
903
904
905
906 which_oneof = message.WhichOneof(field.containing_oneof.name)
907 if which_oneof is not None and which_oneof != field.name:
908 raise tokenizer.ParseErrorPreviousToken(
909 'Field "%s" is specified along with field "%s", another member '
910 'of oneof "%s" for message type "%s".' %
911 (field.name, which_oneof, field.containing_oneof.name,
912 message_descriptor.full_name))
913
914 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
915 tokenizer.TryConsume(':')
916 merger = self._MergeMessageField
917 else:
918 tokenizer.Consume(':')
919 merger = self._MergeScalarField
920
921 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED and
922 tokenizer.TryConsume('[')):
923
924 if not tokenizer.TryConsume(']'):
925 while True:
926 merger(tokenizer, message, field)
927 if tokenizer.TryConsume(']'):
928 break
929 tokenizer.Consume(',')
930
931 else:
932 merger(tokenizer, message, field)
933
934 else:
935 assert (self.allow_unknown_extension or self.allow_unknown_field)
936 _SkipFieldContents(tokenizer)
937
938
939
940 if not tokenizer.TryConsume(','):
941 tokenizer.TryConsume(';')
942
957
959 """Merges a single scalar field into a message.
960
961 Args:
962 tokenizer: A tokenizer to parse the field value.
963 message: The message of which field is a member.
964 field: The descriptor of the field to be merged.
965
966 Raises:
967 ParseError: In case of text parsing problems.
968 """
969 is_map_entry = _IsMapEntry(field)
970
971 if tokenizer.TryConsume('<'):
972 end_token = '>'
973 else:
974 tokenizer.Consume('{')
975 end_token = '}'
976
977 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
978 if field.is_extension:
979 sub_message = message.Extensions[field].add()
980 elif is_map_entry:
981 sub_message = getattr(message, field.name).GetEntryClass()()
982 else:
983 sub_message = getattr(message, field.name).add()
984 else:
985 if field.is_extension:
986 if (not self._allow_multiple_scalars and
987 message.HasExtension(field)):
988 raise tokenizer.ParseErrorPreviousToken(
989 'Message type "%s" should not have multiple "%s" extensions.' %
990 (message.DESCRIPTOR.full_name, field.full_name))
991 sub_message = message.Extensions[field]
992 else:
993
994
995 if (not self._allow_multiple_scalars and
996 message.HasField(field.name)):
997 raise tokenizer.ParseErrorPreviousToken(
998 'Message type "%s" should not have multiple "%s" fields.' %
999 (message.DESCRIPTOR.full_name, field.name))
1000 sub_message = getattr(message, field.name)
1001 sub_message.SetInParent()
1002
1003 while not tokenizer.TryConsume(end_token):
1004 if tokenizer.AtEnd():
1005 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token,))
1006 self._MergeField(tokenizer, sub_message)
1007
1008 if is_map_entry:
1009 value_cpptype = field.message_type.fields_by_name['value'].cpp_type
1010 if value_cpptype == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
1011 value = getattr(message, field.name)[sub_message.key]
1012 value.MergeFrom(sub_message.value)
1013 else:
1014 getattr(message, field.name)[sub_message.key] = sub_message.value
1015
1016 @staticmethod
1018 message_descriptor = message.DESCRIPTOR
1019 return (hasattr(message_descriptor, 'syntax') and
1020 message_descriptor.syntax == 'proto3')
1021
1023 """Merges a single scalar field into a message.
1024
1025 Args:
1026 tokenizer: A tokenizer to parse the field value.
1027 message: A protocol message to record the data.
1028 field: The descriptor of the field to be merged.
1029
1030 Raises:
1031 ParseError: In case of text parsing problems.
1032 RuntimeError: On runtime errors.
1033 """
1034 _ = self.allow_unknown_extension
1035 value = None
1036
1037 if field.type in (descriptor.FieldDescriptor.TYPE_INT32,
1038 descriptor.FieldDescriptor.TYPE_SINT32,
1039 descriptor.FieldDescriptor.TYPE_SFIXED32):
1040 value = _ConsumeInt32(tokenizer)
1041 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64,
1042 descriptor.FieldDescriptor.TYPE_SINT64,
1043 descriptor.FieldDescriptor.TYPE_SFIXED64):
1044 value = _ConsumeInt64(tokenizer)
1045 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32,
1046 descriptor.FieldDescriptor.TYPE_FIXED32):
1047 value = _ConsumeUint32(tokenizer)
1048 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64,
1049 descriptor.FieldDescriptor.TYPE_FIXED64):
1050 value = _ConsumeUint64(tokenizer)
1051 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT,
1052 descriptor.FieldDescriptor.TYPE_DOUBLE):
1053 value = tokenizer.ConsumeFloat()
1054 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:
1055 value = tokenizer.ConsumeBool()
1056 elif field.type == descriptor.FieldDescriptor.TYPE_STRING:
1057 value = tokenizer.ConsumeString()
1058 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:
1059 value = tokenizer.ConsumeByteString()
1060 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:
1061 value = tokenizer.ConsumeEnum(field)
1062 else:
1063 raise RuntimeError('Unknown field type %d' % field.type)
1064
1065 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
1066 if field.is_extension:
1067 message.Extensions[field].append(value)
1068 else:
1069 getattr(message, field.name).append(value)
1070 else:
1071 if field.is_extension:
1072 if (not self._allow_multiple_scalars and
1073 not self._IsProto3Syntax(message) and
1074 message.HasExtension(field)):
1075 raise tokenizer.ParseErrorPreviousToken(
1076 'Message type "%s" should not have multiple "%s" extensions.' %
1077 (message.DESCRIPTOR.full_name, field.full_name))
1078 else:
1079 message.Extensions[field] = value
1080 else:
1081 duplicate_error = False
1082 if not self._allow_multiple_scalars:
1083 if self._IsProto3Syntax(message):
1084
1085
1086 duplicate_error = bool(getattr(message, field.name))
1087 else:
1088 duplicate_error = message.HasField(field.name)
1089
1090 if duplicate_error:
1091 raise tokenizer.ParseErrorPreviousToken(
1092 'Message type "%s" should not have multiple "%s" fields.' %
1093 (message.DESCRIPTOR.full_name, field.name))
1094 else:
1095 setattr(message, field.name, value)
1096
1097
1098 -def _SkipFieldContents(tokenizer):
1099 """Skips over contents (value or message) of a field.
1100
1101 Args:
1102 tokenizer: A tokenizer to parse the field name and values.
1103 """
1104
1105
1106
1107
1108
1109
1110 if tokenizer.TryConsume(':') and not tokenizer.LookingAt(
1111 '{') and not tokenizer.LookingAt('<'):
1112 _SkipFieldValue(tokenizer)
1113 else:
1114 _SkipFieldMessage(tokenizer)
1115
1138
1141 """Skips over a field message.
1142
1143 Args:
1144 tokenizer: A tokenizer to parse the field name and values.
1145 """
1146
1147 if tokenizer.TryConsume('<'):
1148 delimiter = '>'
1149 else:
1150 tokenizer.Consume('{')
1151 delimiter = '}'
1152
1153 while not tokenizer.LookingAt('>') and not tokenizer.LookingAt('}'):
1154 _SkipField(tokenizer)
1155
1156 tokenizer.Consume(delimiter)
1157
1179
1182 """Protocol buffer text representation tokenizer.
1183
1184 This class handles the lower level string parsing by splitting it into
1185 meaningful tokens.
1186
1187 It was directly ported from the Java protocol buffer API.
1188 """
1189
1190 _WHITESPACE = re.compile(r'\s+')
1191 _COMMENT = re.compile(r'(\s*#.*$)', re.MULTILINE)
1192 _WHITESPACE_OR_COMMENT = re.compile(r'(\s|(#.*$))+', re.MULTILINE)
1193 _TOKEN = re.compile('|'.join([
1194 r'[a-zA-Z_][0-9a-zA-Z_+-]*',
1195 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*',
1196 ] + [
1197
1198 r'{qt}[^{qt}\n\\]*((\\.)+[^{qt}\n\\]*)*({qt}|\\?$)'.format(qt=mark)
1199 for mark in _QUOTES
1200 ]))
1201
1202 _IDENTIFIER = re.compile(r'[^\d\W]\w*')
1203 _IDENTIFIER_OR_NUMBER = re.compile(r'\w+')
1204
1205 - def __init__(self, lines, skip_comments=True):
1206 self._position = 0
1207 self._line = -1
1208 self._column = 0
1209 self._token_start = None
1210 self.token = ''
1211 self._lines = iter(lines)
1212 self._current_line = ''
1213 self._previous_line = 0
1214 self._previous_column = 0
1215 self._more_lines = True
1216 self._skip_comments = skip_comments
1217 self._whitespace_pattern = (skip_comments and self._WHITESPACE_OR_COMMENT
1218 or self._WHITESPACE)
1219 self._SkipWhitespace()
1220 self.NextToken()
1221
1223 return self.token == token
1224
1226 """Checks the end of the text was reached.
1227
1228 Returns:
1229 True iff the end was reached.
1230 """
1231 return not self.token
1232
1234 while len(self._current_line) <= self._column:
1235 try:
1236 self._current_line = next(self._lines)
1237 except StopIteration:
1238 self._current_line = ''
1239 self._more_lines = False
1240 return
1241 else:
1242 self._line += 1
1243 self._column = 0
1244
1246 while True:
1247 self._PopLine()
1248 match = self._whitespace_pattern.match(self._current_line, self._column)
1249 if not match:
1250 break
1251 length = len(match.group(0))
1252 self._column += length
1253
1255 """Tries to consume a given piece of text.
1256
1257 Args:
1258 token: Text to consume.
1259
1260 Returns:
1261 True iff the text was consumed.
1262 """
1263 if self.token == token:
1264 self.NextToken()
1265 return True
1266 return False
1267
1269 """Consumes a piece of text.
1270
1271 Args:
1272 token: Text to consume.
1273
1274 Raises:
1275 ParseError: If the text couldn't be consumed.
1276 """
1277 if not self.TryConsume(token):
1278 raise self.ParseError('Expected "%s".' % token)
1279
1286
1302
1309
1311 """Consumes protocol message field identifier.
1312
1313 Returns:
1314 Identifier string.
1315
1316 Raises:
1317 ParseError: If an identifier couldn't be consumed.
1318 """
1319 result = self.token
1320 if not self._IDENTIFIER.match(result):
1321 raise self.ParseError('Expected identifier.')
1322 self.NextToken()
1323 return result
1324
1331
1333 """Consumes protocol message field identifier.
1334
1335 Returns:
1336 Identifier string.
1337
1338 Raises:
1339 ParseError: If an identifier couldn't be consumed.
1340 """
1341 result = self.token
1342 if not self._IDENTIFIER_OR_NUMBER.match(result):
1343 raise self.ParseError('Expected identifier or number, got %s.' % result)
1344 self.NextToken()
1345 return result
1346
1354
1356 """Consumes an integer number.
1357
1358 Args:
1359 is_long: True if the value should be returned as a long integer.
1360 Returns:
1361 The integer parsed.
1362
1363 Raises:
1364 ParseError: If an integer couldn't be consumed.
1365 """
1366 try:
1367 result = _ParseAbstractInteger(self.token, is_long=is_long)
1368 except ValueError as e:
1369 raise self.ParseError(str(e))
1370 self.NextToken()
1371 return result
1372
1379
1381 """Consumes an floating point number.
1382
1383 Returns:
1384 The number parsed.
1385
1386 Raises:
1387 ParseError: If a floating point number couldn't be consumed.
1388 """
1389 try:
1390 result = ParseFloat(self.token)
1391 except ValueError as e:
1392 raise self.ParseError(str(e))
1393 self.NextToken()
1394 return result
1395
1397 """Consumes a boolean value.
1398
1399 Returns:
1400 The bool parsed.
1401
1402 Raises:
1403 ParseError: If a boolean value couldn't be consumed.
1404 """
1405 try:
1406 result = ParseBool(self.token)
1407 except ValueError as e:
1408 raise self.ParseError(str(e))
1409 self.NextToken()
1410 return result
1411
1418
1420 """Consumes a string value.
1421
1422 Returns:
1423 The string parsed.
1424
1425 Raises:
1426 ParseError: If a string value couldn't be consumed.
1427 """
1428 the_bytes = self.ConsumeByteString()
1429 try:
1430 return six.text_type(the_bytes, 'utf-8')
1431 except UnicodeDecodeError as e:
1432 raise self._StringParseError(e)
1433
1435 """Consumes a byte array value.
1436
1437 Returns:
1438 The array parsed (as a string).
1439
1440 Raises:
1441 ParseError: If a byte array value couldn't be consumed.
1442 """
1443 the_list = [self._ConsumeSingleByteString()]
1444 while self.token and self.token[0] in _QUOTES:
1445 the_list.append(self._ConsumeSingleByteString())
1446 return b''.join(the_list)
1447
1449 """Consume one token of a string literal.
1450
1451 String literals (whether bytes or text) can come in multiple adjacent
1452 tokens which are automatically concatenated, like in C or Python. This
1453 method only consumes one token.
1454
1455 Returns:
1456 The token parsed.
1457 Raises:
1458 ParseError: When the wrong format data is found.
1459 """
1460 text = self.token
1461 if len(text) < 1 or text[0] not in _QUOTES:
1462 raise self.ParseError('Expected string but found: %r' % (text,))
1463
1464 if len(text) < 2 or text[-1] != text[0]:
1465 raise self.ParseError('String missing ending quote: %r' % (text,))
1466
1467 try:
1468 result = text_encoding.CUnescape(text[1:-1])
1469 except ValueError as e:
1470 raise self.ParseError(str(e))
1471 self.NextToken()
1472 return result
1473
1475 try:
1476 result = ParseEnum(field, self.token)
1477 except ValueError as e:
1478 raise self.ParseError(str(e))
1479 self.NextToken()
1480 return result
1481
1483 """Creates and *returns* a ParseError for the previously read token.
1484
1485 Args:
1486 message: A message to set for the exception.
1487
1488 Returns:
1489 A ParseError instance.
1490 """
1491 return ParseError(message, self._previous_line + 1,
1492 self._previous_column + 1)
1493
1495 """Creates and *returns* a ParseError for the current token."""
1496 return ParseError('\'' + self._current_line + '\': ' + message,
1497 self._line + 1, self._column + 1)
1498
1500 return self.ParseError('Couldn\'t parse string: ' + str(e))
1501
1503 """Reads the next meaningful token."""
1504 self._previous_line = self._line
1505 self._previous_column = self._column
1506
1507 self._column += len(self.token)
1508 self._SkipWhitespace()
1509
1510 if not self._more_lines:
1511 self.token = ''
1512 return
1513
1514 match = self._TOKEN.match(self._current_line, self._column)
1515 if not match and not self._skip_comments:
1516 match = self._COMMENT.match(self._current_line, self._column)
1517 if match:
1518 token = match.group(0)
1519 self.token = token
1520 else:
1521 self.token = self._current_line[self._column]
1522
1523
1524
1525 _Tokenizer = Tokenizer
1529 """Consumes a signed 32bit integer number from tokenizer.
1530
1531 Args:
1532 tokenizer: A tokenizer used to parse the number.
1533
1534 Returns:
1535 The integer parsed.
1536
1537 Raises:
1538 ParseError: If a signed 32bit integer couldn't be consumed.
1539 """
1540 return _ConsumeInteger(tokenizer, is_signed=True, is_long=False)
1541
1544 """Consumes an unsigned 32bit integer number from tokenizer.
1545
1546 Args:
1547 tokenizer: A tokenizer used to parse the number.
1548
1549 Returns:
1550 The integer parsed.
1551
1552 Raises:
1553 ParseError: If an unsigned 32bit integer couldn't be consumed.
1554 """
1555 return _ConsumeInteger(tokenizer, is_signed=False, is_long=False)
1556
1564
1567 """Consumes a signed 32bit integer number from tokenizer.
1568
1569 Args:
1570 tokenizer: A tokenizer used to parse the number.
1571
1572 Returns:
1573 The integer parsed.
1574
1575 Raises:
1576 ParseError: If a signed 32bit integer couldn't be consumed.
1577 """
1578 return _ConsumeInteger(tokenizer, is_signed=True, is_long=True)
1579
1587
1590 """Consumes an unsigned 64bit integer number from tokenizer.
1591
1592 Args:
1593 tokenizer: A tokenizer used to parse the number.
1594
1595 Returns:
1596 The integer parsed.
1597
1598 Raises:
1599 ParseError: If an unsigned 64bit integer couldn't be consumed.
1600 """
1601 return _ConsumeInteger(tokenizer, is_signed=False, is_long=True)
1602
1605 try:
1606 _ConsumeInteger(tokenizer, is_signed=is_signed, is_long=is_long)
1607 return True
1608 except ParseError:
1609 return False
1610
1613 """Consumes an integer number from tokenizer.
1614
1615 Args:
1616 tokenizer: A tokenizer used to parse the number.
1617 is_signed: True if a signed integer must be parsed.
1618 is_long: True if a long integer must be parsed.
1619
1620 Returns:
1621 The integer parsed.
1622
1623 Raises:
1624 ParseError: If an integer with given characteristics couldn't be consumed.
1625 """
1626 try:
1627 result = ParseInteger(tokenizer.token, is_signed=is_signed, is_long=is_long)
1628 except ValueError as e:
1629 raise tokenizer.ParseError(str(e))
1630 tokenizer.NextToken()
1631 return result
1632
1633
1634 -def ParseInteger(text, is_signed=False, is_long=False):
1635 """Parses an integer.
1636
1637 Args:
1638 text: The text to parse.
1639 is_signed: True if a signed integer must be parsed.
1640 is_long: True if a long integer must be parsed.
1641
1642 Returns:
1643 The integer value.
1644
1645 Raises:
1646 ValueError: Thrown Iff the text is not a valid integer.
1647 """
1648
1649 result = _ParseAbstractInteger(text, is_long=is_long)
1650
1651
1652 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]
1653 checker.CheckValue(result)
1654 return result
1655
1658 """Parses an integer without checking size/signedness.
1659
1660 Args:
1661 text: The text to parse.
1662 is_long: True if the value should be returned as a long integer.
1663
1664 Returns:
1665 The integer value.
1666
1667 Raises:
1668 ValueError: Thrown Iff the text is not a valid integer.
1669 """
1670
1671 orig_text = text
1672 c_octal_match = re.match(r'(-?)0(\d+)$', text)
1673 if c_octal_match:
1674
1675
1676 text = c_octal_match.group(1) + '0o' + c_octal_match.group(2)
1677 try:
1678
1679
1680
1681 if is_long:
1682 return long(text, 0)
1683 else:
1684 return int(text, 0)
1685 except ValueError:
1686 raise ValueError('Couldn\'t parse integer: %s' % orig_text)
1687
1690 """Parse a floating point number.
1691
1692 Args:
1693 text: Text to parse.
1694
1695 Returns:
1696 The number parsed.
1697
1698 Raises:
1699 ValueError: If a floating point number couldn't be parsed.
1700 """
1701 try:
1702
1703 return float(text)
1704 except ValueError:
1705
1706 if _FLOAT_INFINITY.match(text):
1707 if text[0] == '-':
1708 return float('-inf')
1709 else:
1710 return float('inf')
1711 elif _FLOAT_NAN.match(text):
1712 return float('nan')
1713 else:
1714
1715 try:
1716 return float(text.rstrip('f'))
1717 except ValueError:
1718 raise ValueError('Couldn\'t parse float: %s' % text)
1719
1722 """Parse a boolean value.
1723
1724 Args:
1725 text: Text to parse.
1726
1727 Returns:
1728 Boolean values parsed
1729
1730 Raises:
1731 ValueError: If text is not a valid boolean.
1732 """
1733 if text in ('true', 't', '1', 'True'):
1734 return True
1735 elif text in ('false', 'f', '0', 'False'):
1736 return False
1737 else:
1738 raise ValueError('Expected "true" or "false".')
1739
1742 """Parse an enum value.
1743
1744 The value can be specified by a number (the enum value), or by
1745 a string literal (the enum name).
1746
1747 Args:
1748 field: Enum field descriptor.
1749 value: String value.
1750
1751 Returns:
1752 Enum value number.
1753
1754 Raises:
1755 ValueError: If the enum value could not be parsed.
1756 """
1757 enum_descriptor = field.enum_type
1758 try:
1759 number = int(value, 0)
1760 except ValueError:
1761
1762 enum_value = enum_descriptor.values_by_name.get(value, None)
1763 if enum_value is None:
1764 raise ValueError('Enum type "%s" has no value named %s.' %
1765 (enum_descriptor.full_name, value))
1766 else:
1767
1768 if hasattr(field.file, 'syntax'):
1769
1770 if field.file.syntax == 'proto3':
1771
1772 return number
1773 enum_value = enum_descriptor.values_by_number.get(number, None)
1774 if enum_value is None:
1775 raise ValueError('Enum type "%s" has no value with number %d.' %
1776 (enum_descriptor.full_name, number))
1777 return enum_value.number
1778