1 # Copyright (c) 2013, Schneider Electric Buildings AB
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are met:
6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above copyright
9 # notice, this list of conditions and the following disclaimer in the
10 # documentation and/or other materials provided with the distribution.
11 # * Neither the name of Schneider Electric Buildings AB nor the
12 # names of contributors may be used to endorse or promote products
13 # derived from this software without specific prior written permission.
15 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
19 # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 from pyparsing import Keyword, Literal, Word, OneOrMore, ZeroOrMore, Combine, Regex, Forward, Optional, Group, Suppress, delimitedList, cStyleComment, nums, srange, dblQuotedString, Or, CaselessLiteral
31 __all__ = ['parse_asn1', 'AnnotatedToken']
34 def parse_asn1(asn1_definition):
35 """ Parse a string containing one or more ASN.1 module definitions.
36 Returns a list of module syntax trees represented as nested lists of
37 AnnotatedToken objects.
39 grammar = _build_asn1_grammar()
40 parse_result = grammar.parseString(asn1_definition)
41 parse_tree = parse_result.asList()
45 def print_parse_tree(node, indent=1):
46 """ Debugging aid. Dumps a parse tree as returned
47 from parse_asn1 to stdout in indented tree form.
49 def indented_print(msg):
50 print(' ' * indent + msg)
52 if type(node) is AnnotatedToken:
54 tag, values = node.ty, node.elements
55 indented_print('%s:' % tag)
56 print_parse_tree(values, indent + 1)
57 elif type(node) is list:
60 print_parse_tree(token, indent + 1)
63 indented_print(str(node))
66 class AnnotatedToken(object):
67 """ A simple data structure to keep track of a token's
68 type, identified by a string, and its children.
69 Children may be other annotated tokens, lists or simple
72 def __init__(self, token_type, elements):
74 self.elements = elements
77 return 'T(%s)%s' % (self.ty, self.elements)
82 def _build_asn1_grammar():
83 def build_identifier(prefix_pattern):
84 identifier_suffix = Optional(Word(srange('[-0-9a-zA-Z]')))
85 identifier = Combine(Word(srange(prefix_pattern), exact=1) + identifier_suffix) # todo: more rigorous? trailing hyphens and -- forbidden
88 def braced_list(element_rule):
89 return Suppress('{') + Group(delimitedList(element_rule)) + Suppress('}')
93 return AnnotatedToken(name, t.asList())
99 DEFINED_BY = Keyword('DEFINED BY')
100 DEFINITIONS = Keyword('DEFINITIONS')
101 BEGIN = Keyword('BEGIN')
103 OPTIONAL = Keyword('OPTIONAL')
104 DEFAULT = Keyword('DEFAULT')
105 TRUE = Keyword('TRUE')
106 FALSE = Keyword('FALSE')
107 UNIVERSAL = Keyword('UNIVERSAL')
108 APPLICATION = Keyword('APPLICATION')
109 PRIVATE = Keyword('PRIVATE')
112 IMPLICIT = Keyword('IMPLICIT')
113 EXPLICIT = Keyword('EXPLICIT')
114 EXPLICIT_TAGS = Keyword('EXPLICIT TAGS')
115 IMPLICIT_TAGS = Keyword('IMPLICIT TAGS')
116 AUTOMATIC_TAGS = Keyword('AUTOMATIC TAGS')
117 EXTENSIBILITY_IMPLIED = Keyword('EXTENSIBILITY IMPLIED')
118 COMPONENTS_OF = Keyword('COMPONENTS OF')
119 ELLIPSIS = Keyword('...')
120 SIZE = Keyword('SIZE')
122 IMPORTS = Keyword('IMPORTS')
123 EXPORTS = Keyword('EXPORTS')
124 FROM = Keyword('FROM')
127 SEQUENCE = Keyword('SEQUENCE')
129 CHOICE = Keyword('CHOICE')
130 ENUMERATED = Keyword('ENUMERATED')
131 BIT_STRING = Keyword('BIT STRING')
132 BOOLEAN = Keyword('BOOLEAN')
133 REAL = Keyword('REAL')
134 OCTET_STRING = Keyword('OCTET STRING')
135 CHARACTER_STRING = Keyword('CHARACTER STRING')
136 NULL = Keyword('NULL')
137 INTEGER = Keyword('INTEGER')
138 OBJECT_IDENTIFIER = Keyword('OBJECT IDENTIFIER')
140 # Restricted string types
141 BMPString = Keyword('BMPString')
142 GeneralString = Keyword('GeneralString')
143 GraphicString = Keyword('GraphicString')
144 IA5String = Keyword('IA5String')
145 ISO646String = Keyword('ISO646String')
146 NumericString = Keyword('NumericString')
147 PrintableString = Keyword('PrintableString')
148 TeletexString = Keyword('TeletexString')
149 T61String = Keyword('T61String')
150 UniversalString = Keyword('UniversalString')
151 UTF8String = Keyword('UTF8String')
152 VideotexString = Keyword('VideotexString')
153 VisibleString = Keyword('VisibleString')
156 GeneralizedTime = Keyword('GeneralizedTime')
157 UTCTime = Keyword('UTCTime')
158 ObjectDescriptor = Keyword('ObjectDescriptor')
162 signed_number = Combine(Optional('-') + number) # todo: consider defined values from 18.1
163 bstring = Suppress('\'') + StringOf('01') + Suppress('\'B')
164 hstring = Suppress('\'') + StringOf('0123456789ABCDEF') + Suppress('\'H')
167 hyphen_comment = Regex(r"--[\s\S]*?(--|$)", flags=re.MULTILINE)
168 comment = hyphen_comment | cStyleComment
171 identifier = build_identifier('[a-z]')
174 # these are duplicated to force unique token annotations
175 valuereference = build_identifier('[a-z]')
176 typereference = build_identifier('[A-Z]')
177 module_reference = build_identifier('[A-Z]')
178 reference = valuereference | typereference # TODO: consider object references from 12.1
181 # todo: consider more literals from 16.9
182 boolean_value = TRUE | FALSE
183 bitstring_value = bstring | hstring # todo: consider more forms from 21.9
184 integer_value = signed_number
186 cstring_value = dblQuotedString
188 exponent = CaselessLiteral('e') + signed_number
189 real_value = Combine(signed_number + Optional(Literal('.') + Optional(number)) + Optional(exponent))
191 # In value range constraints, decimal points must be followed by number, or
192 # the grammar becomes ambiguous: ([1.].100) vs ([1]..[100])
193 constraint_real_value = Combine(signed_number + Optional(Literal('.') + number) + Optional(exponent))
195 builtin_value = boolean_value | bitstring_value | real_value | integer_value | null_value | cstring_value
196 external_value_reference = module_reference + Suppress('.') + valuereference
197 defined_value = external_value_reference | valuereference # todo: more options from 13.1
198 referenced_value = Unique(defined_value) # todo: more options from 16.11
200 # object identifier value
201 name_form = Unique(identifier)
202 number_form = Unique(number)
203 name_and_number_form = name_form + Suppress('(') + number_form + Suppress(')')
204 objid_components = name_and_number_form | name_form | number_form | defined_value
205 objid_components_list = OneOrMore(objid_components)
206 object_identifier_value = Suppress('{') + \
207 (objid_components_list | (defined_value + objid_components_list)) + \
210 value = builtin_value | referenced_value | object_identifier_value
212 # definitive identifier value
213 definitive_number_form = Unique(number)
214 definitive_name_and_number_form = name_form + Suppress('(') + definitive_number_form + Suppress(')')
215 definitive_objid_component = definitive_name_and_number_form | name_form | definitive_number_form
216 definitive_objid_component_list = OneOrMore(definitive_objid_component)
217 definitive_identifier = Optional(Suppress('{') + definitive_objid_component_list + Suppress('}'))
220 class_ = UNIVERSAL | APPLICATION | PRIVATE
221 class_number = Unique(number) # todo: consider defined values from 30.1
222 tag = Suppress('[') + Optional(class_) + class_number + Suppress(']')
223 tag_default = Optional(EXPLICIT_TAGS | IMPLICIT_TAGS | AUTOMATIC_TAGS)
226 extension_default = Optional(EXTENSIBILITY_IMPLIED)
229 # todo: consider other defined types from 13.1
230 external_type_reference = module_reference + Suppress('.') + typereference
231 defined_type = external_type_reference | typereference
235 # Forward-declare these, they can only be fully defined once
236 # we have all types defined. There are some circular dependencies.
237 named_type = Forward()
241 # todo: consider the full subtype and general constraint syntax described in 45.*
242 lower_bound = (constraint_real_value | signed_number | referenced_value | MIN)
243 upper_bound = (constraint_real_value | signed_number | referenced_value | MAX)
244 single_value_constraint = Suppress('(') + value + Suppress(')')
245 value_range_constraint = Suppress('(') + lower_bound + Suppress('..') + upper_bound + Suppress(')')
246 # TODO: Include contained subtype constraint here if we ever implement it.
247 size_constraint = Optional(Suppress('(')) + Suppress(SIZE) + (single_value_constraint | value_range_constraint) + Optional(Suppress(')'))
249 # TODO: consider exception syntax from 24.1
250 extension_marker = Unique(ELLIPSIS)
252 component_type_optional = named_type + Suppress(OPTIONAL)
253 component_type_default = named_type + Suppress(DEFAULT) + value
254 component_type_components_of = Suppress(COMPONENTS_OF) + type_
255 component_type = component_type_components_of | component_type_optional | component_type_default | named_type
257 tagged_type = tag + Optional(IMPLICIT | EXPLICIT) + type_
259 named_number_value = Suppress('(') + signed_number + Suppress(')')
260 named_number = identifier + named_number_value
261 named_nonumber = Unique(identifier)
262 enumeration = named_number | named_nonumber
264 set_type = SET + braced_list(component_type | extension_marker)
265 sequence_type = SEQUENCE + braced_list(component_type | extension_marker)
266 sequenceof_type = Suppress(SEQUENCE) + Optional(size_constraint, default=None) + Suppress(OF) + (type_ | named_type)
267 setof_type = Suppress(SET) + Optional(size_constraint, default=None) + Suppress(OF) + (type_ | named_type)
268 choice_type = CHOICE + braced_list(named_type | extension_marker)
269 selection_type = identifier + Suppress('<') + type_
270 enumerated_type = ENUMERATED + braced_list(enumeration | extension_marker)
271 bitstring_type = BIT_STRING + Optional(braced_list(named_number), default=[]) + Optional(single_value_constraint | size_constraint, default=None)
272 plain_integer_type = INTEGER
273 restricted_integer_type = INTEGER + braced_list(named_number) + Optional(single_value_constraint, default=None)
274 boolean_type = BOOLEAN
277 object_identifier_type = OBJECT_IDENTIFIER
278 octetstring_type = OCTET_STRING + Optional(size_constraint)
279 unrestricted_characterstring_type = CHARACTER_STRING
280 restricted_characterstring_type = BMPString | GeneralString | \
281 GraphicString | IA5String | \
282 ISO646String | NumericString | \
283 PrintableString | TeletexString | \
284 T61String | UniversalString | \
285 UTF8String | VideotexString | \
287 characterstring_type = (restricted_characterstring_type | unrestricted_characterstring_type) + Optional(size_constraint)
288 useful_type = GeneralizedTime | UTCTime | ObjectDescriptor
291 any_defined_by = Suppress(DEFINED_BY) + Suppress(identifier)
292 any_type = ANY + Optional(any_defined_by)
294 # todo: consider other builtins from 16.2
295 simple_type = (any_type | boolean_type | null_type | octetstring_type | characterstring_type | real_type | plain_integer_type | object_identifier_type | useful_type) + Optional(value_range_constraint | single_value_constraint)
296 constructed_type = choice_type | sequence_type | set_type
297 value_list_type = restricted_integer_type | enumerated_type
298 builtin_type = value_list_type | tagged_type | simple_type | constructed_type | sequenceof_type | setof_type | bitstring_type
300 referenced_type = defined_type | selection_type # todo: consider other ref:d types from 16.3
302 type_ << (builtin_type | referenced_type)
304 # EXT: identifier should not be Optional here, but
305 # our other ASN.1 code generator supports unnamed members,
307 named_type << (Optional(identifier) + type_)
309 type_assignment = typereference + '::=' + type_
310 value_assignment = valuereference + type_ + '::=' + value
312 assignment = type_assignment | value_assignment
313 assignment_list = ZeroOrMore(assignment)
315 assigned_identifier = Optional(object_identifier_value | defined_value)
316 global_module_reference = module_reference + assigned_identifier
318 symbol = Unique(reference) # TODO: parameterized reference?
319 symbol_list = Group(delimitedList(symbol))
320 symbols_from_module = symbol_list + Suppress(FROM) + global_module_reference
321 symbols_from_module_list = OneOrMore(symbols_from_module)
322 symbols_imported = Optional(symbols_from_module_list)
323 exports = Optional(Suppress(EXPORTS) + symbol_list + Suppress(';'))
324 imports = Optional(Suppress(IMPORTS) + symbols_imported + Suppress(';'))
326 module_body = (exports + imports + assignment_list)
327 module_defaults = Suppress(tag_default + extension_default) # we don't want these in the AST
328 module_identifier = module_reference + definitive_identifier
329 module_definition = module_identifier + DEFINITIONS + module_defaults + '::=' + BEGIN + module_body + END
331 module_definition.ignore(comment)
333 # Mark up the parse results with token tags
334 identifier.setParseAction(annotate('Identifier'))
335 named_number_value.setParseAction(annotate('Value'))
336 tag.setParseAction(annotate('Tag'))
337 class_.setParseAction(annotate('TagClass'))
338 class_number.setParseAction(annotate('TagClassNumber'))
339 type_.setParseAction(annotate('Type'))
340 simple_type.setParseAction(annotate('SimpleType'))
341 choice_type.setParseAction(annotate('ChoiceType'))
342 sequence_type.setParseAction(annotate('SequenceType'))
343 set_type.setParseAction(annotate('SetType'))
344 value_list_type.setParseAction(annotate('ValueListType'))
345 bitstring_type.setParseAction(annotate('BitStringType'))
346 sequenceof_type.setParseAction(annotate('SequenceOfType'))
347 setof_type.setParseAction(annotate('SetOfType'))
348 named_number.setParseAction(annotate('NamedValue'))
349 named_nonumber.setParseAction(annotate('NamedValue'))
350 single_value_constraint.setParseAction(annotate('SingleValueConstraint'))
351 size_constraint.setParseAction(annotate('SizeConstraint'))
352 value_range_constraint.setParseAction(annotate('ValueRangeConstraint'))
353 component_type.setParseAction(annotate('ComponentType'))
354 component_type_optional.setParseAction(annotate('ComponentTypeOptional'))
355 component_type_default.setParseAction(annotate('ComponentTypeDefault'))
356 component_type_components_of.setParseAction(annotate('ComponentTypeComponentsOf'))
357 tagged_type.setParseAction(annotate('TaggedType'))
358 named_type.setParseAction(annotate('NamedType'))
359 type_assignment.setParseAction(annotate('TypeAssignment'))
360 value_assignment.setParseAction(annotate('ValueAssignment'))
361 module_reference.setParseAction(annotate('ModuleReference'))
362 module_body.setParseAction(annotate('ModuleBody'))
363 module_definition.setParseAction(annotate('ModuleDefinition'))
364 extension_marker.setParseAction(annotate('ExtensionMarker'))
365 name_form.setParseAction(annotate('NameForm'))
366 number_form.setParseAction(annotate('NumberForm'))
367 name_and_number_form.setParseAction(annotate('NameAndNumberForm'))
368 object_identifier_value.setParseAction(annotate('ObjectIdentifierValue'))
369 definitive_identifier.setParseAction(annotate('DefinitiveIdentifier'))
370 definitive_number_form.setParseAction(annotate('DefinitiveNumberForm'))
371 definitive_name_and_number_form.setParseAction(annotate('DefinitiveNameAndNumberForm'))
372 imports.setParseAction(annotate('Imports'))
373 exports.setParseAction(annotate('Exports'))
374 assignment_list.setParseAction(annotate('AssignmentList'))
375 bstring.setParseAction(annotate('BinaryStringValue'))
376 hstring.setParseAction(annotate('HexStringValue'))
377 defined_type.setParseAction(annotate('DefinedType'))
378 selection_type.setParseAction(annotate('SelectionType'))
379 referenced_value.setParseAction(annotate('ReferencedValue'))
381 start = OneOrMore(module_definition)
386 """ Use to create a distinct name of a production
387 with the same form as another, e.g.
388 identifier = build_identifier('[a-z]')
389 valuereference = build_identifier('[a-z]')
391 identifier = build_identifier('[a-z]')
392 valuereference = Unique(identifier)
393 to avoid duplicating the details of the grammar.
394 This allows unique parse actions for productions
395 with the same underlying rules.
400 def StringOf(elements):
401 """ Create a rule to parse a string of any of the chars in elements.
402 Skips any whitespace.
403 This is useful for the ASN.1 hstring and bstring productions.
405 element = CharSet(elements)
406 return Combine(OneOrMore(element), adjacent=False) # Use adjacent=False to skip whitespace
409 def CharSet(elements):
410 """ Create a set of valid characters as a single rule.
411 elements is a string containing all the desired chars, e.g.
412 CharSet('01234567890') # all numbers
413 CharSet('01234567890ABCDEF') # all hex numbers
415 unpacked_chars = [Literal(c) for c in elements]
416 return Or(unpacked_chars)