1 # Copyright (c) 2013, Schneider Electric Buildings AB
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are met:
6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above copyright
9 # notice, this list of conditions and the following disclaimer in the
10 # documentation and/or other materials provided with the distribution.
11 # * Neither the name of Schneider Electric Buildings AB nor the
12 # names of contributors may be used to endorse or promote products
13 # derived from this software without specific prior written permission.
15 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
19 # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 from pyparsing import Keyword, Literal, Word, OneOrMore, Combine, Regex, Forward, Optional, Group, Suppress, delimitedList, cStyleComment, nums, alphanums, empty, srange
31 __all__ = ['parse_asn1', 'AnnotatedToken']
34 def parse_asn1(asn1_payload):
35 """ Parse a string containing an ASN.1 module definition
36 and return a syntax tree in the form of a list of
37 AnnotatedToken objects.
39 grammar = _build_asn1_grammar()
40 parse_result = grammar.parseString(asn1_payload)
41 parse_tree = parse_result.asList()
45 def print_parse_tree(node, indent=1):
46 """ Debugging aid. Dumps a parse tree as returned
47 from parse_asn1 to stdout in indented tree form.
49 def indented_print(msg):
50 print(' ' * indent + msg)
52 if type(node) is AnnotatedToken:
54 tag, values = node.ty, node.elements
55 indented_print('%s:' % tag)
56 print_parse_tree(values, indent + 1)
57 elif type(node) is list:
60 print_parse_tree(token, indent + 1)
63 indented_print(str(node))
66 class AnnotatedToken(object):
67 """ A simple data structure to keep track of a token's
68 type, identified by a string, and its children.
69 Children may be other annotated tokens, lists or simple
72 def __init__(self, token_type, elements):
74 self.elements = elements
77 return 'T(%s)%s' % (self.ty, self.elements)
82 def _build_asn1_grammar():
83 def build_identifier(prefix_pattern):
84 identifier_suffix = Optional(Word(srange('[-0-9a-zA-Z]')))
85 identifier = Combine(Word(srange(prefix_pattern), exact=1) + identifier_suffix) # todo: more rigorous? trailing hyphens and -- forbidden
88 def braced_list(element_rule):
89 return Suppress('{') + Group(delimitedList(element_rule)) + Suppress('}')
93 return AnnotatedToken(name, t.asList())
98 DEFINITIONS = Keyword('DEFINITIONS')
99 BEGIN = Keyword('BEGIN')
101 OPTIONAL = Keyword('OPTIONAL')
102 DEFAULT = Keyword('DEFAULT')
103 TRUE = Keyword('TRUE')
104 FALSE = Keyword('FALSE')
105 UNIVERSAL = Keyword('UNIVERSAL')
106 APPLICATION = Keyword('APPLICATION')
107 PRIVATE = Keyword('PRIVATE')
110 IMPLICIT = Keyword('IMPLICIT')
111 EXPLICIT = Keyword('EXPLICIT')
112 EXPLICIT_TAGS = Keyword('EXPLICIT TAGS')
113 IMPLICIT_TAGS = Keyword('IMPLICIT TAGS')
114 AUTOMATIC_TAGS = Keyword('AUTOMATIC TAGS')
115 EXTENSIBILITY_IMPLIED = Keyword('EXTENSIBILITY IMPLIED')
116 COMPONENTS_OF = Keyword('COMPONENTS OF')
117 ELLIPSIS = Keyword('...')
118 SIZE = Keyword('SIZE')
122 SEQUENCE = Keyword('SEQUENCE')
124 CHOICE = Keyword('CHOICE')
125 ENUMERATED = Keyword('ENUMERATED')
126 BIT_STRING = Keyword('BIT STRING')
127 BOOLEAN = Keyword('BOOLEAN')
128 REAL = Keyword('REAL')
129 OCTET_STRING = Keyword('OCTET STRING')
130 CHARACTER_STRING = Keyword('CHARACTER STRING')
131 NULL = Keyword('NULL')
132 INTEGER = Keyword('INTEGER')
133 OBJECT_IDENTIFIER = Keyword('OBJECT IDENTIFIER')
135 # Restricted string types
136 BMPString = Keyword('BMPString')
137 GeneralString = Keyword('GeneralString')
138 GraphicString = Keyword('GraphicString')
139 IA5String = Keyword('IA5String')
140 ISO646String = Keyword('ISO646String')
141 NumericString = Keyword('NumericString')
142 PrintableString = Keyword('PrintableString')
143 TeletexString = Keyword('TeletexString')
144 T61String = Keyword('T61String')
145 UniversalString = Keyword('UniversalString')
146 UTF8String = Keyword('UTF8String')
147 VideotexString = Keyword('VideotexString')
148 VisibleString = Keyword('VisibleString')
152 signed_number = Combine(Optional('-') + number) # todo: consider defined values from 18.1
153 bstring = Literal('\'') + Regex('[01]+') + Literal('\'B')
154 hstring = Literal('\'') + Regex('[0-9A-F]+') + Literal('\'H')
157 hyphen_comment = Regex(r"--[\s\S]*?(--|$)", flags=re.MULTILINE)
158 comment = hyphen_comment | cStyleComment
161 identifier = build_identifier('[a-z]')
164 # these are duplicated to force unique token annotations
165 valuereference = build_identifier('[a-z]')
166 typereference = build_identifier('[A-Z]')
167 module_reference = build_identifier('[A-Z]')
170 # BUG: These are badly specified and cause the grammar to break if used generally.
171 # todo: consider more literals from 16.9
172 real_value = Regex(r'-?\d+(\.\d*)?') # todo: this doesn't really follow the spec
173 boolean_value = TRUE | FALSE
174 bitstring_value = bstring | hstring # todo: consider more forms from 21.9
175 integer_value = signed_number
178 builtin_value = boolean_value | bitstring_value | real_value | integer_value | null_value
179 defined_value = valuereference # todo: more options from 13.1
181 # object identifier value
182 name_form = Unique(identifier)
183 number_form = Unique(number)
184 name_and_number_form = name_form + Suppress('(') + number_form + Suppress(')')
185 objid_components = name_and_number_form | name_form | number_form | defined_value
186 objid_components_list = OneOrMore(objid_components)
187 object_identifier_value = Suppress('{') + \
188 (objid_components_list | (defined_value + objid_components_list)) + \
191 value = builtin_value | defined_value | object_identifier_value
193 # definitive identifier value
194 definitive_number_form = Unique(number)
195 definitive_name_and_number_form = name_form + Suppress('(') + definitive_number_form + Suppress(')')
196 definitive_objid_component = definitive_name_and_number_form | name_form | definitive_number_form
197 definitive_objid_component_list = OneOrMore(definitive_objid_component)
198 definitive_identifier = Optional(Suppress('{') + definitive_objid_component_list + Suppress('}'))
201 class_ = UNIVERSAL | APPLICATION | PRIVATE
202 class_number = Unique(number) # todo: consider defined values from 30.1
203 tag = Suppress('[') + Optional(class_) + class_number + Suppress(']')
204 tag_default = EXPLICIT_TAGS | IMPLICIT_TAGS | AUTOMATIC_TAGS | empty
207 extension_default = EXTENSIBILITY_IMPLIED | empty
210 defined_type = Unique(typereference) # todo: consider other defined types from 13.1
211 referenced_type = Unique(defined_type) # todo: consider other ref:d types from 16.3
213 # Forward-declare these, they can only be fully defined once
214 # we have all types defined. There are some circular dependencies.
215 named_type = Forward()
219 # todo: consider the full subtype and general constraint syntax described in 45.*
220 # but for now, just implement a simple integer value range.
221 value_range_constraint = (signed_number | valuereference | MIN) + Suppress('..') + (signed_number | valuereference | MAX)
222 size_constraint = Optional(Suppress('(')) + Suppress(SIZE) + Suppress('(') + value_range_constraint + Suppress(')') + Optional(Suppress(')'))
223 constraint = Suppress('(') + value_range_constraint + Suppress(')')
225 # TODO: consider exception syntax from 24.1
226 extension_marker = Unique(ELLIPSIS)
228 component_type_optional = named_type + Suppress(OPTIONAL)
229 component_type_default = named_type + Suppress(DEFAULT) + value
230 component_type_components_of = Suppress(COMPONENTS_OF) + type_
231 component_type = component_type_components_of | component_type_optional | component_type_default | named_type
233 tagged_type = tag + Optional(IMPLICIT | EXPLICIT) + type_
235 named_number_value = Suppress('(') + signed_number + Suppress(')')
236 named_number = identifier + named_number_value
237 enumeration = named_number | identifier
239 set_type = SET + braced_list(component_type | extension_marker)
240 sequence_type = SEQUENCE + braced_list(component_type | extension_marker)
241 sequenceof_type = Suppress(SEQUENCE) + Optional(size_constraint) + Suppress(OF) + (type_ | named_type)
242 setof_type = Suppress(SET) + Optional(size_constraint) + Suppress(OF) + (type_ | named_type)
243 choice_type = CHOICE + braced_list(named_type | extension_marker)
244 enumerated_type = ENUMERATED + braced_list(enumeration)
245 bitstring_type = BIT_STRING + braced_list(named_number)
246 plain_integer_type = INTEGER
247 restricted_integer_type = INTEGER + braced_list(named_number)
248 boolean_type = BOOLEAN
251 object_identifier_type = OBJECT_IDENTIFIER
252 octetstring_type = OCTET_STRING
253 unrestricted_characterstring_type = CHARACTER_STRING
254 restricted_characterstring_type = BMPString | GeneralString | \
255 GraphicString | IA5String | \
256 ISO646String | NumericString | \
257 PrintableString | TeletexString | \
258 T61String | UniversalString | \
259 UTF8String | VideotexString | VisibleString
260 characterstring_type = restricted_characterstring_type | unrestricted_characterstring_type
262 # todo: consider other builtins from 16.2
263 simple_type = (boolean_type | null_type | octetstring_type | characterstring_type | real_type | plain_integer_type | object_identifier_type) + Optional(constraint)
264 constructed_type = choice_type | sequence_type | set_type
265 value_list_type = restricted_integer_type | enumerated_type
266 builtin_type = tagged_type | simple_type | constructed_type | sequenceof_type | setof_type | value_list_type | bitstring_type
268 type_ << (builtin_type | referenced_type)
270 # BUG: identifier should not be Optional here,
271 # but our ASN.1 interpreter supports unnamed members,
273 named_type << (Optional(identifier) + type_)
275 # BUG: Trailing semi-colon is not allowed by standard grammar, but our ASN.1 interpreter accepts it
276 # and we happen to use it.
277 type_assignment = typereference + '::=' + type_ + Suppress(Optional(';'))
278 value_assignment = valuereference + type_ + '::=' + value
280 assignment = type_assignment | value_assignment
281 assignment_list = OneOrMore(assignment)
283 module_body = (assignment_list | empty)
284 module_defaults = Suppress(tag_default + extension_default) # we don't want these in the AST
285 module_identifier = module_reference + definitive_identifier
286 module_definition = module_identifier + DEFINITIONS + module_defaults + '::=' + BEGIN + module_body + END
288 module_definition.ignore(comment)
290 # Mark up the parse results with token tags
291 identifier.setParseAction(annotate('Identifier'))
292 named_number_value.setParseAction(annotate('Value'))
293 tag.setParseAction(annotate('Tag'))
294 class_.setParseAction(annotate('TagClass'))
295 class_number.setParseAction(annotate('TagClassNumber'))
296 type_.setParseAction(annotate('Type'))
297 simple_type.setParseAction(annotate('SimpleType'))
298 choice_type.setParseAction(annotate('ChoiceType'))
299 sequence_type.setParseAction(annotate('SequenceType'))
300 set_type.setParseAction(annotate('SetType'))
301 value_list_type.setParseAction(annotate('ValueListType'))
302 bitstring_type.setParseAction(annotate('BitStringType'))
303 referenced_type.setParseAction(annotate('ReferencedType'))
304 sequenceof_type.setParseAction(annotate('SequenceOfType'))
305 setof_type.setParseAction(annotate('SetOfType'))
306 named_number.setParseAction(annotate('NamedValue'))
307 constraint.setParseAction(annotate('Constraint'))
308 size_constraint.setParseAction(annotate('SizeConstraint'))
309 component_type.setParseAction(annotate('ComponentType'))
310 component_type_optional.setParseAction(annotate('ComponentTypeOptional'))
311 component_type_default.setParseAction(annotate('ComponentTypeDefault'))
312 component_type_components_of.setParseAction(annotate('ComponentTypeComponentsOf'))
313 tagged_type.setParseAction(annotate('TaggedType'))
314 named_type.setParseAction(annotate('NamedType'))
315 type_assignment.setParseAction(annotate('TypeAssignment'))
316 value_assignment.setParseAction(annotate('ValueAssignment'))
317 valuereference.setParseAction(annotate('ValueReference'))
318 module_reference.setParseAction(annotate('ModuleReference'))
319 module_body.setParseAction(annotate('ModuleBody'))
320 module_definition.setParseAction(annotate('ModuleDefinition'))
321 extension_marker.setParseAction(annotate('ExtensionMarker'))
322 name_form.setParseAction(annotate('NameForm'))
323 number_form.setParseAction(annotate('NumberForm'))
324 name_and_number_form.setParseAction(annotate('NameAndNumberForm'))
325 object_identifier_value.setParseAction(annotate('ObjectIdentifierValue'))
326 definitive_identifier.setParseAction(annotate('DefinitiveIdentifier'))
327 definitive_number_form.setParseAction(annotate('DefinitiveNumberForm'))
328 definitive_name_and_number_form.setParseAction(annotate('DefinitiveNameAndNumberForm'))
330 return module_definition
334 """ Use to create a distinct name of a production
335 with the same form as another, e.g.
336 identifier = build_identifier('[a-z]')
337 valuereference = build_identifier('[a-z]')
339 identifier = build_identifier('[a-z]')
340 valuereference = Unique(identifier)
341 to avoid duplicating the details of the grammar.
342 This allows unique parse actions for productions
343 with the same underlying rules.