1 # Copyright (c) 2013, Schneider Electric Buildings AB
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are met:
6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above copyright
9 # notice, this list of conditions and the following disclaimer in the
10 # documentation and/or other materials provided with the distribution.
11 # * Neither the name of Schneider Electric Buildings AB nor the
12 # names of contributors may be used to endorse or promote products
13 # derived from this software without specific prior written permission.
15 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
19 # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 from pyparsing import Keyword, Literal, Word, OneOrMore, ZeroOrMore, Combine, Regex, Forward, Optional, Group, Suppress, delimitedList, cStyleComment, nums, srange, dblQuotedString, Or, CaselessLiteral
31 __all__ = ['parse_asn1', 'AnnotatedToken']
34 def parse_asn1(asn1_definition):
35 """ Parse a string containing one or more ASN.1 module definitions.
36 Returns a list of module syntax trees represented as nested lists of
37 AnnotatedToken objects.
39 grammar = _build_asn1_grammar()
40 parse_result = grammar.parseString(asn1_definition)
41 parse_tree = parse_result.asList()
45 def print_parse_tree(node, indent=1):
46 """ Debugging aid. Dumps a parse tree as returned
47 from parse_asn1 to stdout in indented tree form.
49 def indented_print(msg):
50 print(' ' * indent + msg)
52 if type(node) is AnnotatedToken:
54 tag, values = node.ty, node.elements
55 indented_print('%s:' % tag)
56 print_parse_tree(values, indent + 1)
57 elif type(node) is list:
60 print_parse_tree(token, indent + 1)
63 indented_print(str(node))
66 class AnnotatedToken(object):
67 """ A simple data structure to keep track of a token's
68 type, identified by a string, and its children.
69 Children may be other annotated tokens, lists or simple
72 def __init__(self, token_type, elements):
74 self.elements = elements
77 return 'T(%s)%s' % (self.ty, self.elements)
82 def _build_asn1_grammar():
83 def build_identifier(prefix_pattern):
84 identifier_suffix = Optional(Word(srange('[-0-9a-zA-Z]')))
85 identifier = Combine(Word(srange(prefix_pattern), exact=1) + identifier_suffix) # todo: more rigorous? trailing hyphens and -- forbidden
88 def braced_list(element_rule):
89 return Suppress('{') + Group(delimitedList(element_rule)) + Suppress('}')
93 return AnnotatedToken(name, t.asList())
99 DEFINED_BY = Keyword('DEFINED BY')
100 DEFINITIONS = Keyword('DEFINITIONS')
101 BEGIN = Keyword('BEGIN')
103 OPTIONAL = Keyword('OPTIONAL')
104 DEFAULT = Keyword('DEFAULT')
105 TRUE = Keyword('TRUE')
106 FALSE = Keyword('FALSE')
107 UNIVERSAL = Keyword('UNIVERSAL')
108 APPLICATION = Keyword('APPLICATION')
109 PRIVATE = Keyword('PRIVATE')
112 IMPLICIT = Keyword('IMPLICIT')
113 EXPLICIT = Keyword('EXPLICIT')
114 EXPLICIT_TAGS = Keyword('EXPLICIT TAGS')
115 IMPLICIT_TAGS = Keyword('IMPLICIT TAGS')
116 AUTOMATIC_TAGS = Keyword('AUTOMATIC TAGS')
117 EXTENSIBILITY_IMPLIED = Keyword('EXTENSIBILITY IMPLIED')
118 COMPONENTS_OF = Keyword('COMPONENTS OF')
119 ELLIPSIS = Keyword('...')
120 SIZE = Keyword('SIZE')
122 IMPORTS = Keyword('IMPORTS')
123 EXPORTS = Keyword('EXPORTS')
124 FROM = Keyword('FROM')
127 SEQUENCE = Keyword('SEQUENCE')
129 CHOICE = Keyword('CHOICE')
130 ENUMERATED = Keyword('ENUMERATED')
131 BIT_STRING = Keyword('BIT STRING')
132 BOOLEAN = Keyword('BOOLEAN')
133 REAL = Keyword('REAL')
134 OCTET_STRING = Keyword('OCTET STRING')
135 CHARACTER_STRING = Keyword('CHARACTER STRING')
136 NULL = Keyword('NULL')
137 INTEGER = Keyword('INTEGER')
138 OBJECT_IDENTIFIER = Keyword('OBJECT IDENTIFIER')
140 # Restricted string types
141 BMPString = Keyword('BMPString')
142 GeneralString = Keyword('GeneralString')
143 GraphicString = Keyword('GraphicString')
144 IA5String = Keyword('IA5String')
145 ISO646String = Keyword('ISO646String')
146 NumericString = Keyword('NumericString')
147 PrintableString = Keyword('PrintableString')
148 TeletexString = Keyword('TeletexString')
149 T61String = Keyword('T61String')
150 UniversalString = Keyword('UniversalString')
151 UTF8String = Keyword('UTF8String')
152 VideotexString = Keyword('VideotexString')
153 VisibleString = Keyword('VisibleString')
156 GeneralizedTime = Keyword('GeneralizedTime')
157 UTCTime = Keyword('UTCTime')
158 ObjectDescriptor = Keyword('ObjectDescriptor')
162 signed_number = Combine(Optional('-') + number) # todo: consider defined values from 18.1
163 bstring = Suppress('\'') + StringOf('01') + Suppress('\'B')
164 hstring = Suppress('\'') + StringOf('0123456789ABCDEF') + Suppress('\'H')
167 hyphen_comment = Regex(r"--[\s\S]*?(--|$)", flags=re.MULTILINE)
168 comment = hyphen_comment | cStyleComment
171 identifier = build_identifier('[a-z]')
174 # these are duplicated to force unique token annotations
175 valuereference = build_identifier('[a-z]')
176 typereference = build_identifier('[A-Z]')
177 module_reference = build_identifier('[A-Z]')
178 reference = valuereference | typereference # TODO: consider object references from 12.1
181 # todo: consider more literals from 16.9
182 boolean_value = TRUE | FALSE
183 bitstring_value = bstring | hstring # todo: consider more forms from 21.9
184 integer_value = signed_number
186 cstring_value = dblQuotedString
188 exponent = CaselessLiteral('e') + signed_number
189 real_value = Combine(signed_number + Optional(Literal('.') + Optional(number)) + Optional(exponent))
191 # In value range constraints, decimal points must be followed by number, or
192 # the grammar becomes ambiguous: ([1.].100) vs ([1]..[100])
193 constraint_real_value = Combine(signed_number + Optional(Literal('.') + number) + Optional(exponent))
195 builtin_value = boolean_value | bitstring_value | real_value | integer_value | null_value | cstring_value
196 external_value_reference = module_reference + Suppress('.') + valuereference
197 defined_value = external_value_reference | valuereference # todo: more options from 13.1
198 referenced_value = Unique(defined_value) # todo: more options from 16.11
200 # object identifier value
201 name_form = Unique(identifier)
202 number_form = Unique(number)
203 name_and_number_form = name_form + Suppress('(') + number_form + Suppress(')')
204 objid_components = name_and_number_form | name_form | number_form | defined_value
205 objid_components_list = OneOrMore(objid_components)
206 object_identifier_value = Suppress('{') + \
207 (objid_components_list | (defined_value + objid_components_list)) + \
210 value = builtin_value | referenced_value | object_identifier_value
212 # definitive identifier value
213 definitive_number_form = Unique(number)
214 definitive_name_and_number_form = name_form + Suppress('(') + definitive_number_form + Suppress(')')
215 definitive_objid_component = definitive_name_and_number_form | name_form | definitive_number_form
216 definitive_objid_component_list = OneOrMore(definitive_objid_component)
217 definitive_identifier = Optional(Suppress('{') + definitive_objid_component_list + Suppress('}'))
220 class_ = UNIVERSAL | APPLICATION | PRIVATE
221 class_number = Unique(number) # todo: consider defined values from 30.1
222 tag = Suppress('[') + Optional(class_) + class_number + Suppress(']')
223 tag_default = Optional(EXPLICIT_TAGS | IMPLICIT_TAGS | AUTOMATIC_TAGS)
226 extension_default = Optional(EXTENSIBILITY_IMPLIED)
229 # todo: consider other defined types from 13.1
230 external_type_reference = module_reference + Suppress('.') + typereference
231 defined_type = external_type_reference | typereference
235 # Forward-declare these, they can only be fully defined once
236 # we have all types defined. There are some circular dependencies.
237 named_type = Forward()
241 # todo: consider the full subtype and general constraint syntax described in 45.*
242 lower_bound = (constraint_real_value | signed_number | referenced_value | MIN)
243 upper_bound = (constraint_real_value | signed_number | referenced_value | MAX)
244 single_value_constraint = Suppress('(') + value + Suppress(')')
245 value_range_constraint = Suppress('(') + lower_bound + Suppress('..') + upper_bound + Suppress(')')
246 # TODO: Include contained subtype constraint here if we ever implement it.
247 size_constraint = Optional(Suppress('(')) + Suppress(SIZE) + (single_value_constraint | value_range_constraint) + Optional(Suppress(')'))
249 # TODO: consider exception syntax from 24.1
250 extension_marker = Unique(ELLIPSIS)
252 component_type_optional = named_type + Suppress(OPTIONAL)
253 component_type_default = named_type + Suppress(DEFAULT) + value
254 component_type_components_of = Suppress(COMPONENTS_OF) + type_
255 component_type = component_type_components_of | component_type_optional | component_type_default | named_type
257 tagged_type = tag + Optional(IMPLICIT | EXPLICIT) + type_
259 named_number_value = Suppress('(') + signed_number + Suppress(')')
260 named_number = identifier + named_number_value
261 named_nonumber = Unique(identifier)
262 enumeration = named_number | named_nonumber
264 set_type = SET + braced_list(component_type | extension_marker)
265 sequence_type = SEQUENCE + braced_list(component_type | extension_marker)
266 sequenceof_type = Suppress(SEQUENCE) + Optional(size_constraint, default=None) + Suppress(OF) + (type_ | named_type)
267 setof_type = Suppress(SET) + Optional(size_constraint, default=None) + Suppress(OF) + (type_ | named_type)
268 choice_type = CHOICE + braced_list(named_type | extension_marker)
269 selection_type = identifier + Suppress('<') + type_
270 enumerated_type = ENUMERATED + braced_list(enumeration | extension_marker)
271 bitstring_type = BIT_STRING + Optional(braced_list(named_number), default=[]) + Optional(single_value_constraint | size_constraint, default=None)
272 plain_integer_type = INTEGER
273 restricted_integer_type = INTEGER + braced_list(named_number) + Optional(single_value_constraint, default=None)
274 boolean_type = BOOLEAN
277 object_identifier_type = OBJECT_IDENTIFIER
278 octetstring_type = OCTET_STRING + Optional(size_constraint)
279 unrestricted_characterstring_type = CHARACTER_STRING
280 restricted_characterstring_type = BMPString | GeneralString | \
281 GraphicString | IA5String | \
282 ISO646String | NumericString | \
283 PrintableString | TeletexString | \
284 T61String | UniversalString | \
285 UTF8String | VideotexString | \
287 characterstring_type = (restricted_characterstring_type | unrestricted_characterstring_type) + Optional(size_constraint)
288 useful_type = GeneralizedTime | UTCTime | ObjectDescriptor
291 any_type = ANY + Optional(Suppress(DEFINED_BY + identifier))
293 # todo: consider other builtins from 16.2
294 simple_type = (any_type | boolean_type | null_type | octetstring_type | characterstring_type | real_type | plain_integer_type | object_identifier_type | useful_type) + Optional(value_range_constraint | single_value_constraint)
295 constructed_type = choice_type | sequence_type | set_type
296 value_list_type = restricted_integer_type | enumerated_type
297 builtin_type = value_list_type | tagged_type | simple_type | constructed_type | sequenceof_type | setof_type | bitstring_type
299 referenced_type = defined_type | selection_type # todo: consider other ref:d types from 16.3
301 type_ << (builtin_type | referenced_type)
303 # EXT: identifier should not be Optional here, but
304 # our other ASN.1 code generator supports unnamed members,
306 named_type << (Optional(identifier) + type_)
308 type_assignment = typereference + '::=' + type_
309 value_assignment = valuereference + type_ + '::=' + value
311 assignment = type_assignment | value_assignment
312 assignment_list = ZeroOrMore(assignment)
314 assigned_identifier = Optional(object_identifier_value | defined_value)
315 global_module_reference = module_reference + assigned_identifier
317 symbol = Unique(reference) # TODO: parameterized reference?
318 symbol_list = Group(delimitedList(symbol))
319 symbols_from_module = symbol_list + Suppress(FROM) + global_module_reference
320 symbols_from_module_list = OneOrMore(symbols_from_module)
321 symbols_imported = Optional(symbols_from_module_list)
322 exports = Optional(Suppress(EXPORTS) + symbol_list + Suppress(';'))
323 imports = Optional(Suppress(IMPORTS) + symbols_imported + Suppress(';'))
325 module_body = (exports + imports + assignment_list)
326 module_defaults = Suppress(tag_default + extension_default) # we don't want these in the AST
327 module_identifier = module_reference + definitive_identifier
328 module_definition = module_identifier + DEFINITIONS + module_defaults + '::=' + BEGIN + module_body + END
330 module_definition.ignore(comment)
332 # Mark up the parse results with token tags
333 identifier.setParseAction(annotate('Identifier'))
334 named_number_value.setParseAction(annotate('Value'))
335 tag.setParseAction(annotate('Tag'))
336 class_.setParseAction(annotate('TagClass'))
337 class_number.setParseAction(annotate('TagClassNumber'))
338 type_.setParseAction(annotate('Type'))
339 simple_type.setParseAction(annotate('SimpleType'))
340 choice_type.setParseAction(annotate('ChoiceType'))
341 sequence_type.setParseAction(annotate('SequenceType'))
342 set_type.setParseAction(annotate('SetType'))
343 value_list_type.setParseAction(annotate('ValueListType'))
344 bitstring_type.setParseAction(annotate('BitStringType'))
345 sequenceof_type.setParseAction(annotate('SequenceOfType'))
346 setof_type.setParseAction(annotate('SetOfType'))
347 named_number.setParseAction(annotate('NamedValue'))
348 named_nonumber.setParseAction(annotate('NamedValue'))
349 single_value_constraint.setParseAction(annotate('SingleValueConstraint'))
350 size_constraint.setParseAction(annotate('SizeConstraint'))
351 value_range_constraint.setParseAction(annotate('ValueRangeConstraint'))
352 component_type.setParseAction(annotate('ComponentType'))
353 component_type_optional.setParseAction(annotate('ComponentTypeOptional'))
354 component_type_default.setParseAction(annotate('ComponentTypeDefault'))
355 component_type_components_of.setParseAction(annotate('ComponentTypeComponentsOf'))
356 tagged_type.setParseAction(annotate('TaggedType'))
357 named_type.setParseAction(annotate('NamedType'))
358 type_assignment.setParseAction(annotate('TypeAssignment'))
359 value_assignment.setParseAction(annotate('ValueAssignment'))
360 module_reference.setParseAction(annotate('ModuleReference'))
361 module_body.setParseAction(annotate('ModuleBody'))
362 module_definition.setParseAction(annotate('ModuleDefinition'))
363 extension_marker.setParseAction(annotate('ExtensionMarker'))
364 name_form.setParseAction(annotate('NameForm'))
365 number_form.setParseAction(annotate('NumberForm'))
366 name_and_number_form.setParseAction(annotate('NameAndNumberForm'))
367 object_identifier_value.setParseAction(annotate('ObjectIdentifierValue'))
368 definitive_identifier.setParseAction(annotate('DefinitiveIdentifier'))
369 definitive_number_form.setParseAction(annotate('DefinitiveNumberForm'))
370 definitive_name_and_number_form.setParseAction(annotate('DefinitiveNameAndNumberForm'))
371 imports.setParseAction(annotate('Imports'))
372 exports.setParseAction(annotate('Exports'))
373 assignment_list.setParseAction(annotate('AssignmentList'))
374 bstring.setParseAction(annotate('BinaryStringValue'))
375 hstring.setParseAction(annotate('HexStringValue'))
376 defined_type.setParseAction(annotate('DefinedType'))
377 selection_type.setParseAction(annotate('SelectionType'))
378 referenced_value.setParseAction(annotate('ReferencedValue'))
380 start = OneOrMore(module_definition)
385 """ Use to create a distinct name of a production
386 with the same form as another, e.g.
387 identifier = build_identifier('[a-z]')
388 valuereference = build_identifier('[a-z]')
390 identifier = build_identifier('[a-z]')
391 valuereference = Unique(identifier)
392 to avoid duplicating the details of the grammar.
393 This allows unique parse actions for productions
394 with the same underlying rules.
399 def StringOf(elements):
400 """ Create a rule to parse a string of any of the chars in elements.
401 Skips any whitespace.
402 This is useful for the ASN.1 hstring and bstring productions.
404 element = CharSet(elements)
405 return Combine(OneOrMore(element), adjacent=False) # Use adjacent=False to skip whitespace
408 def CharSet(elements):
409 """ Create a set of valid characters as a single rule.
410 elements is a string containing all the desired chars, e.g.
411 CharSet('01234567890') # all numbers
412 CharSet('01234567890ABCDEF') # all hex numbers
414 unpacked_chars = [Literal(c) for c in elements]
415 return Or(unpacked_chars)