1 # Copyright (c) 2013, Schneider Electric Buildings AB
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are met:
6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above copyright
9 # notice, this list of conditions and the following disclaimer in the
10 # documentation and/or other materials provided with the distribution.
11 # * Neither the name of Schneider Electric Buildings AB nor the
12 # names of contributors may be used to endorse or promote products
13 # derived from this software without specific prior written permission.
15 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
19 # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 from pyparsing import Keyword, Literal, Word, OneOrMore, ZeroOrMore, Combine, Regex, Forward, Optional, Group, Suppress, delimitedList, cStyleComment, nums, alphanums, empty, srange, dblQuotedString, Or
31 __all__ = ['parse_asn1', 'AnnotatedToken']
34 def parse_asn1(asn1_definition):
35 """ Parse a string containing one or more ASN.1 module definitions.
36 Returns a list of module syntax trees represented as nested lists of
37 AnnotatedToken objects.
39 grammar = _build_asn1_grammar()
40 parse_result = grammar.parseString(asn1_definition)
41 parse_tree = parse_result.asList()
45 def print_parse_tree(node, indent=1):
46 """ Debugging aid. Dumps a parse tree as returned
47 from parse_asn1 to stdout in indented tree form.
49 def indented_print(msg):
50 print(' ' * indent + msg)
52 if type(node) is AnnotatedToken:
54 tag, values = node.ty, node.elements
55 indented_print('%s:' % tag)
56 print_parse_tree(values, indent + 1)
57 elif type(node) is list:
60 print_parse_tree(token, indent + 1)
63 indented_print(str(node))
66 class AnnotatedToken(object):
67 """ A simple data structure to keep track of a token's
68 type, identified by a string, and its children.
69 Children may be other annotated tokens, lists or simple
72 def __init__(self, token_type, elements):
74 self.elements = elements
77 return 'T(%s)%s' % (self.ty, self.elements)
82 def _build_asn1_grammar():
83 def build_identifier(prefix_pattern):
84 identifier_suffix = Optional(Word(srange('[-0-9a-zA-Z]')))
85 identifier = Combine(Word(srange(prefix_pattern), exact=1) + identifier_suffix) # todo: more rigorous? trailing hyphens and -- forbidden
88 def braced_list(element_rule):
89 return Suppress('{') + Group(delimitedList(element_rule)) + Suppress('}')
93 return AnnotatedToken(name, t.asList())
98 DEFINITIONS = Keyword('DEFINITIONS')
99 BEGIN = Keyword('BEGIN')
101 OPTIONAL = Keyword('OPTIONAL')
102 DEFAULT = Keyword('DEFAULT')
103 TRUE = Keyword('TRUE')
104 FALSE = Keyword('FALSE')
105 UNIVERSAL = Keyword('UNIVERSAL')
106 APPLICATION = Keyword('APPLICATION')
107 PRIVATE = Keyword('PRIVATE')
110 IMPLICIT = Keyword('IMPLICIT')
111 EXPLICIT = Keyword('EXPLICIT')
112 EXPLICIT_TAGS = Keyword('EXPLICIT TAGS')
113 IMPLICIT_TAGS = Keyword('IMPLICIT TAGS')
114 AUTOMATIC_TAGS = Keyword('AUTOMATIC TAGS')
115 EXTENSIBILITY_IMPLIED = Keyword('EXTENSIBILITY IMPLIED')
116 COMPONENTS_OF = Keyword('COMPONENTS OF')
117 ELLIPSIS = Keyword('...')
118 SIZE = Keyword('SIZE')
120 IMPORTS = Keyword('IMPORTS')
121 EXPORTS = Keyword('EXPORTS')
122 FROM = Keyword('FROM')
125 SEQUENCE = Keyword('SEQUENCE')
127 CHOICE = Keyword('CHOICE')
128 ENUMERATED = Keyword('ENUMERATED')
129 BIT_STRING = Keyword('BIT STRING')
130 BOOLEAN = Keyword('BOOLEAN')
131 REAL = Keyword('REAL')
132 OCTET_STRING = Keyword('OCTET STRING')
133 CHARACTER_STRING = Keyword('CHARACTER STRING')
134 NULL = Keyword('NULL')
135 INTEGER = Keyword('INTEGER')
136 OBJECT_IDENTIFIER = Keyword('OBJECT IDENTIFIER')
138 # Restricted string types
139 BMPString = Keyword('BMPString')
140 GeneralString = Keyword('GeneralString')
141 GraphicString = Keyword('GraphicString')
142 IA5String = Keyword('IA5String')
143 ISO646String = Keyword('ISO646String')
144 NumericString = Keyword('NumericString')
145 PrintableString = Keyword('PrintableString')
146 TeletexString = Keyword('TeletexString')
147 T61String = Keyword('T61String')
148 UniversalString = Keyword('UniversalString')
149 UTF8String = Keyword('UTF8String')
150 VideotexString = Keyword('VideotexString')
151 VisibleString = Keyword('VisibleString')
154 GeneralizedTime = Keyword('GeneralizedTime')
155 UTCTime = Keyword('UTCTime')
156 ObjectDescriptor = Keyword('ObjectDescriptor')
160 signed_number = Combine(Optional('-') + number) # todo: consider defined values from 18.1
161 bstring = Suppress('\'') + StringOf('01') + Suppress('\'B')
162 hstring = Suppress('\'') + StringOf('0123456789ABCDEF') + Suppress('\'H')
165 hyphen_comment = Regex(r"--[\s\S]*?(--|$)", flags=re.MULTILINE)
166 comment = hyphen_comment | cStyleComment
169 identifier = build_identifier('[a-z]')
172 # these are duplicated to force unique token annotations
173 valuereference = build_identifier('[a-z]')
174 typereference = build_identifier('[A-Z]')
175 module_reference = build_identifier('[A-Z]')
176 reference = valuereference | typereference # TODO: consider object references from 12.1
179 # todo: consider more literals from 16.9
180 boolean_value = TRUE | FALSE
181 bitstring_value = bstring | hstring # todo: consider more forms from 21.9
182 integer_value = signed_number
184 cstring_value = dblQuotedString
186 exponent = (Literal('e') | Literal('E')) + signed_number
187 real_value = Combine(signed_number + Optional(Literal('.') + Optional(number)) + Optional(exponent))
189 # In value range constraints, decimal points must be followed by number, or
190 # the grammar becomes ambiguous: ([1.].100) vs ([1]..[100])
191 constraint_real_value = Combine(signed_number + Optional(Literal('.') + number) + Optional(exponent))
193 builtin_value = boolean_value | bitstring_value | real_value | integer_value | null_value | cstring_value
194 external_value_reference = module_reference + Suppress('.') + valuereference
195 defined_value = external_value_reference | valuereference # todo: more options from 13.1
196 referenced_value = Unique(defined_value) # todo: more options from 16.11
198 # object identifier value
199 name_form = Unique(identifier)
200 number_form = Unique(number)
201 name_and_number_form = name_form + Suppress('(') + number_form + Suppress(')')
202 objid_components = name_and_number_form | name_form | number_form | defined_value
203 objid_components_list = OneOrMore(objid_components)
204 object_identifier_value = Suppress('{') + \
205 (objid_components_list | (defined_value + objid_components_list)) + \
208 value = builtin_value | referenced_value | object_identifier_value
210 # definitive identifier value
211 definitive_number_form = Unique(number)
212 definitive_name_and_number_form = name_form + Suppress('(') + definitive_number_form + Suppress(')')
213 definitive_objid_component = definitive_name_and_number_form | name_form | definitive_number_form
214 definitive_objid_component_list = OneOrMore(definitive_objid_component)
215 definitive_identifier = Optional(Suppress('{') + definitive_objid_component_list + Suppress('}'))
218 class_ = UNIVERSAL | APPLICATION | PRIVATE
219 class_number = Unique(number) # todo: consider defined values from 30.1
220 tag = Suppress('[') + Optional(class_) + class_number + Suppress(']')
221 tag_default = EXPLICIT_TAGS | IMPLICIT_TAGS | AUTOMATIC_TAGS | empty
224 extension_default = EXTENSIBILITY_IMPLIED | empty
227 # todo: consider other defined types from 13.1
228 external_type_reference = module_reference + Suppress('.') + typereference
229 defined_type = external_type_reference | typereference
230 referenced_type = Unique(defined_type) # todo: consider other ref:d types from 16.3
234 # Forward-declare these, they can only be fully defined once
235 # we have all types defined. There are some circular dependencies.
236 named_type = Forward()
240 # todo: consider the full subtype and general constraint syntax described in 45.*
241 lower_bound = (constraint_real_value | signed_number | referenced_value | MIN)
242 upper_bound = (constraint_real_value | signed_number | referenced_value | MAX)
243 single_value_constraint = Suppress('(') + value + Suppress(')')
244 value_range_constraint = Suppress('(') + lower_bound + Suppress('..') + upper_bound + Suppress(')')
245 # TODO: Include contained subtype constraint here if we ever implement it.
246 size_constraint = Optional(Suppress('(')) + Suppress(SIZE) + (single_value_constraint | value_range_constraint) + Optional(Suppress(')'))
248 # TODO: consider exception syntax from 24.1
249 extension_marker = Unique(ELLIPSIS)
251 component_type_optional = named_type + Suppress(OPTIONAL)
252 component_type_default = named_type + Suppress(DEFAULT) + value
253 component_type_components_of = Suppress(COMPONENTS_OF) + type_
254 component_type = component_type_components_of | component_type_optional | component_type_default | named_type
256 tagged_type = tag + Optional(IMPLICIT | EXPLICIT) + type_
258 named_number_value = Suppress('(') + signed_number + Suppress(')')
259 named_number = identifier + named_number_value
260 named_nonumber = Unique(identifier)
261 enumeration = named_number | named_nonumber
263 set_type = SET + braced_list(component_type | extension_marker)
264 sequence_type = SEQUENCE + braced_list(component_type | extension_marker)
265 sequenceof_type = Suppress(SEQUENCE) + Optional(size_constraint) + Suppress(OF) + (type_ | named_type)
266 setof_type = Suppress(SET) + Optional(size_constraint) + Suppress(OF) + (type_ | named_type)
267 choice_type = CHOICE + braced_list(named_type | extension_marker)
268 enumerated_type = ENUMERATED + braced_list(enumeration | extension_marker)
269 bitstring_type = BIT_STRING + Optional(braced_list(named_number), default=[]) + Optional(single_value_constraint | size_constraint, default=None)
270 plain_integer_type = INTEGER + Optional(single_value_constraint)
271 restricted_integer_type = INTEGER + braced_list(named_number) + Optional(single_value_constraint)
272 boolean_type = BOOLEAN
275 object_identifier_type = OBJECT_IDENTIFIER
276 octetstring_type = OCTET_STRING + Optional(size_constraint)
277 unrestricted_characterstring_type = CHARACTER_STRING
278 restricted_characterstring_type = BMPString | GeneralString | \
279 GraphicString | IA5String | \
280 ISO646String | NumericString | \
281 PrintableString | TeletexString | \
282 T61String | UniversalString | \
283 UTF8String | VideotexString | \
285 characterstring_type = (restricted_characterstring_type | unrestricted_characterstring_type) + Optional(size_constraint)
286 useful_type = GeneralizedTime | UTCTime | ObjectDescriptor
288 # todo: consider other builtins from 16.2
289 simple_type = (boolean_type | null_type | octetstring_type | characterstring_type | real_type | plain_integer_type | object_identifier_type | useful_type) + Optional(value_range_constraint | single_value_constraint)
290 constructed_type = choice_type | sequence_type | set_type
291 value_list_type = restricted_integer_type | enumerated_type
292 builtin_type = value_list_type | tagged_type | simple_type | constructed_type | sequenceof_type | setof_type | bitstring_type
294 type_ << (builtin_type | referenced_type)
296 # EXT: identifier should not be Optional here, but
297 # our other ASN.1 code generator supports unnamed members,
299 named_type << (Optional(identifier) + type_)
301 type_assignment = typereference + '::=' + type_
302 value_assignment = valuereference + type_ + '::=' + value
304 assignment = type_assignment | value_assignment
305 assignment_list = ZeroOrMore(assignment)
307 assigned_identifier = Optional(object_identifier_value | defined_value)
308 global_module_reference = module_reference + assigned_identifier
310 symbol = Unique(reference) # TODO: parameterized reference?
311 symbol_list = Group(delimitedList(symbol))
312 symbols_from_module = symbol_list + Suppress(FROM) + global_module_reference
313 symbols_from_module_list = OneOrMore(symbols_from_module)
314 symbols_imported = Optional(symbols_from_module_list)
315 exports = Optional(Suppress(EXPORTS) + symbol_list + Suppress(';'))
316 imports = Optional(Suppress(IMPORTS) + symbols_imported + Suppress(';'))
318 module_body = (exports + imports + assignment_list)
319 module_defaults = Suppress(tag_default + extension_default) # we don't want these in the AST
320 module_identifier = module_reference + definitive_identifier
321 module_definition = module_identifier + DEFINITIONS + module_defaults + '::=' + BEGIN + module_body + END
323 module_definition.ignore(comment)
325 # Mark up the parse results with token tags
326 identifier.setParseAction(annotate('Identifier'))
327 named_number_value.setParseAction(annotate('Value'))
328 tag.setParseAction(annotate('Tag'))
329 class_.setParseAction(annotate('TagClass'))
330 class_number.setParseAction(annotate('TagClassNumber'))
331 type_.setParseAction(annotate('Type'))
332 simple_type.setParseAction(annotate('SimpleType'))
333 choice_type.setParseAction(annotate('ChoiceType'))
334 sequence_type.setParseAction(annotate('SequenceType'))
335 set_type.setParseAction(annotate('SetType'))
336 value_list_type.setParseAction(annotate('ValueListType'))
337 bitstring_type.setParseAction(annotate('BitStringType'))
338 sequenceof_type.setParseAction(annotate('SequenceOfType'))
339 setof_type.setParseAction(annotate('SetOfType'))
340 named_number.setParseAction(annotate('NamedValue'))
341 named_nonumber.setParseAction(annotate('NamedValue'))
342 single_value_constraint.setParseAction(annotate('SingleValueConstraint'))
343 size_constraint.setParseAction(annotate('SizeConstraint'))
344 value_range_constraint.setParseAction(annotate('ValueRangeConstraint'))
345 component_type.setParseAction(annotate('ComponentType'))
346 component_type_optional.setParseAction(annotate('ComponentTypeOptional'))
347 component_type_default.setParseAction(annotate('ComponentTypeDefault'))
348 component_type_components_of.setParseAction(annotate('ComponentTypeComponentsOf'))
349 tagged_type.setParseAction(annotate('TaggedType'))
350 named_type.setParseAction(annotate('NamedType'))
351 type_assignment.setParseAction(annotate('TypeAssignment'))
352 value_assignment.setParseAction(annotate('ValueAssignment'))
353 module_reference.setParseAction(annotate('ModuleReference'))
354 module_body.setParseAction(annotate('ModuleBody'))
355 module_definition.setParseAction(annotate('ModuleDefinition'))
356 extension_marker.setParseAction(annotate('ExtensionMarker'))
357 name_form.setParseAction(annotate('NameForm'))
358 number_form.setParseAction(annotate('NumberForm'))
359 name_and_number_form.setParseAction(annotate('NameAndNumberForm'))
360 object_identifier_value.setParseAction(annotate('ObjectIdentifierValue'))
361 definitive_identifier.setParseAction(annotate('DefinitiveIdentifier'))
362 definitive_number_form.setParseAction(annotate('DefinitiveNumberForm'))
363 definitive_name_and_number_form.setParseAction(annotate('DefinitiveNameAndNumberForm'))
364 imports.setParseAction(annotate('Imports'))
365 exports.setParseAction(annotate('Exports'))
366 assignment_list.setParseAction(annotate('AssignmentList'))
367 bstring.setParseAction(annotate('BinaryStringValue'))
368 hstring.setParseAction(annotate('HexStringValue'))
369 referenced_type.setParseAction(annotate('ReferencedType'))
370 referenced_value.setParseAction(annotate('ReferencedValue'))
372 start = OneOrMore(module_definition)
377 """ Use to create a distinct name of a production
378 with the same form as another, e.g.
379 identifier = build_identifier('[a-z]')
380 valuereference = build_identifier('[a-z]')
382 identifier = build_identifier('[a-z]')
383 valuereference = Unique(identifier)
384 to avoid duplicating the details of the grammar.
385 This allows unique parse actions for productions
386 with the same underlying rules.
391 def StringOf(elements):
392 """ Create a rule to parse a string of any of the chars in elements.
393 Skips any whitespace.
394 This is useful for the ASN.1 hstring and bstring productions.
396 element = CharSet(elements)
397 return Combine(OneOrMore(element), adjacent=False) # Use adjacent=False to skip whitespace
400 def CharSet(elements):
401 """ Create a set of valid characters as a single rule.
402 elements is a string containing all the desired chars, e.g.
403 CharSet('01234567890') # all numbers
404 CharSet('01234567890ABCDEF') # all hex numbers
406 unpacked_chars = [Literal(c) for c in elements]
407 return Or(unpacked_chars)