1 # Copyright (c) 2013-2015, Schneider Electric Buildings AB
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are met:
6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above copyright
9 # notice, this list of conditions and the following disclaimer in the
10 # documentation and/or other materials provided with the distribution.
11 # * Neither the name of Schneider Electric Buildings AB nor the
12 # names of contributors may be used to endorse or promote products
13 # derived from this software without specific prior written permission.
15 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
19 # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 from pyparsing import Keyword, Literal, Word, OneOrMore, ZeroOrMore, Combine, Regex, Forward, Optional, Group, Suppress, delimitedList, cStyleComment, nums, srange, dblQuotedString, Or, CaselessLiteral
31 __all__ = ['parse_asn1', 'AnnotatedToken']
34 def parse_asn1(asn1_definition):
35 """ Parse a string containing one or more ASN.1 module definitions.
36 Returns a list of module syntax trees represented as nested lists of
37 AnnotatedToken objects.
39 grammar = _build_asn1_grammar()
40 parse_result = grammar.parseString(asn1_definition)
41 parse_tree = parse_result.asList()
45 def print_parse_tree(node, indent=1):
46 """ Debugging aid. Dumps a parse tree as returned
47 from parse_asn1 to stdout in indented tree form.
49 def indented_print(msg):
50 print(' ' * indent + msg)
52 if type(node) is AnnotatedToken:
54 tag, values = node.ty, node.elements
55 indented_print('%s:' % tag)
56 print_parse_tree(values, indent + 1)
57 elif type(node) is list:
60 print_parse_tree(token, indent + 1)
63 indented_print(str(node))
66 class AnnotatedToken(object):
67 """ A simple data structure to keep track of a token's
68 type, identified by a string, and its children.
69 Children may be other annotated tokens, lists or simple
72 def __init__(self, token_type, elements):
74 self.elements = elements
77 return 'T(%s)%s' % (self.ty, self.elements)
82 def _build_asn1_grammar():
83 def build_identifier(prefix_pattern):
84 identifier_suffix = Optional(Word(srange('[-0-9a-zA-Z]')))
85 identifier = Combine(Word(srange(prefix_pattern), exact=1) + identifier_suffix) # todo: more rigorous? trailing hyphens and -- forbidden
88 def braced_list(element_rule):
89 return Suppress('{') + Group(delimitedList(element_rule)) + Suppress('}')
93 return AnnotatedToken(name, t.asList())
99 DEFINED_BY = Keyword('DEFINED BY')
100 DEFINITIONS = Keyword('DEFINITIONS')
101 BEGIN = Keyword('BEGIN')
103 OPTIONAL = Keyword('OPTIONAL')
104 DEFAULT = Keyword('DEFAULT')
105 TRUE = Keyword('TRUE')
106 FALSE = Keyword('FALSE')
107 UNIVERSAL = Keyword('UNIVERSAL')
108 APPLICATION = Keyword('APPLICATION')
109 PRIVATE = Keyword('PRIVATE')
112 IMPLICIT = Keyword('IMPLICIT')
113 EXPLICIT = Keyword('EXPLICIT')
114 EXPLICIT_TAGS = Keyword('EXPLICIT TAGS')
115 IMPLICIT_TAGS = Keyword('IMPLICIT TAGS')
116 AUTOMATIC_TAGS = Keyword('AUTOMATIC TAGS')
117 EXTENSIBILITY_IMPLIED = Keyword('EXTENSIBILITY IMPLIED')
118 COMPONENTS_OF = Keyword('COMPONENTS OF')
119 ELLIPSIS = Keyword('...')
120 SIZE = Keyword('SIZE')
122 IMPORTS = Keyword('IMPORTS')
123 EXPORTS = Keyword('EXPORTS')
124 FROM = Keyword('FROM')
127 SEQUENCE = Keyword('SEQUENCE')
129 CHOICE = Keyword('CHOICE')
130 ENUMERATED = Keyword('ENUMERATED')
131 BIT_STRING = Keyword('BIT STRING')
132 BOOLEAN = Keyword('BOOLEAN')
133 REAL = Keyword('REAL')
134 OCTET_STRING = Keyword('OCTET STRING')
135 CHARACTER_STRING = Keyword('CHARACTER STRING')
136 NULL = Keyword('NULL')
137 INTEGER = Keyword('INTEGER')
138 OBJECT_IDENTIFIER = Keyword('OBJECT IDENTIFIER')
140 # Restricted string types
141 BMPString = Keyword('BMPString')
142 GeneralString = Keyword('GeneralString')
143 GraphicString = Keyword('GraphicString')
144 IA5String = Keyword('IA5String')
145 ISO646String = Keyword('ISO646String')
146 NumericString = Keyword('NumericString')
147 PrintableString = Keyword('PrintableString')
148 TeletexString = Keyword('TeletexString')
149 T61String = Keyword('T61String')
150 UniversalString = Keyword('UniversalString')
151 UTF8String = Keyword('UTF8String')
152 VideotexString = Keyword('VideotexString')
153 VisibleString = Keyword('VisibleString')
156 GeneralizedTime = Keyword('GeneralizedTime')
157 UTCTime = Keyword('UTCTime')
158 ObjectDescriptor = Keyword('ObjectDescriptor')
162 signed_number = Combine(Optional('-') + number) # todo: consider defined values from 18.1
163 bstring = Suppress('\'') + StringOf('01') + Suppress('\'B')
164 hstring = Suppress('\'') + StringOf('0123456789ABCDEF') + Suppress('\'H')
167 hyphen_comment = Regex(r"--[\s\S]*?(--|$)", flags=re.MULTILINE)
168 comment = hyphen_comment | cStyleComment
171 identifier = build_identifier('[a-z]')
174 # these are duplicated to force unique token annotations
175 valuereference = build_identifier('[a-z]')
176 typereference = build_identifier('[A-Z]')
177 module_reference = build_identifier('[A-Z]')
178 reference = valuereference | typereference # TODO: consider object references from 12.1
181 # todo: consider more literals from 16.9
182 boolean_value = TRUE | FALSE
183 bitstring_value = bstring | hstring # todo: consider more forms from 21.9
184 integer_value = signed_number
186 cstring_value = dblQuotedString
188 exponent = CaselessLiteral('e') + signed_number
189 real_value = Combine(signed_number + Optional(Literal('.') + Optional(number)) + Optional(exponent))
191 # In value range constraints, decimal points must be followed by number, or
192 # the grammar becomes ambiguous: ([1.].100) vs ([1]..[100])
193 constraint_real_value = Combine(signed_number + Optional(Literal('.') + number) + Optional(exponent))
195 builtin_value = boolean_value | bitstring_value | real_value | integer_value | null_value | cstring_value
196 external_value_reference = module_reference + Suppress('.') + valuereference
197 defined_value = external_value_reference | valuereference # todo: more options from 13.1
198 referenced_value = Unique(defined_value) # todo: more options from 16.11
200 # object identifier value
201 name_form = Unique(identifier)
202 number_form = Unique(number)
203 name_and_number_form = name_form + Suppress('(') + number_form + Suppress(')')
204 objid_components = name_and_number_form | name_form | number_form | defined_value
205 objid_components_list = OneOrMore(objid_components)
206 object_identifier_value = Suppress('{') + \
207 (objid_components_list | (defined_value + objid_components_list)) + \
210 value = builtin_value | referenced_value | object_identifier_value
212 # definitive identifier value
213 definitive_number_form = Unique(number)
214 definitive_name_and_number_form = name_form + Suppress('(') + definitive_number_form + Suppress(')')
215 definitive_objid_component = definitive_name_and_number_form | name_form | definitive_number_form
216 definitive_objid_component_list = OneOrMore(definitive_objid_component)
217 definitive_identifier = Optional(Suppress('{') + definitive_objid_component_list + Suppress('}'))
220 class_ = UNIVERSAL | APPLICATION | PRIVATE
221 class_number = Unique(number) # todo: consider defined values from 30.1
222 tag = Suppress('[') + Optional(class_) + class_number + Suppress(']')
223 tag_default = EXPLICIT_TAGS | IMPLICIT_TAGS | AUTOMATIC_TAGS
226 extension_default = Unique(EXTENSIBILITY_IMPLIED)
230 # Forward-declare these, they can only be fully defined once
231 # we have all types defined. There are some circular dependencies.
232 named_type = Forward()
236 # todo: consider the full subtype and general constraint syntax described in 45.*
237 lower_bound = (constraint_real_value | signed_number | referenced_value | MIN)
238 upper_bound = (constraint_real_value | signed_number | referenced_value | MAX)
239 single_value_constraint = Suppress('(') + value + Suppress(')')
240 value_range_constraint = Suppress('(') + lower_bound + Suppress('..') + upper_bound + Suppress(')')
241 # TODO: Include contained subtype constraint here if we ever implement it.
242 size_constraint = Optional(Suppress('(')) + Suppress(SIZE) + (single_value_constraint | value_range_constraint) + Optional(Suppress(')'))
245 # todo: consider other defined types from 13.1
246 defined_type = Optional(module_reference + Suppress('.'), default=None) + typereference + Optional(size_constraint, default=None)
248 # TODO: consider exception syntax from 24.1
249 extension_marker = Unique(ELLIPSIS)
251 component_type_optional = named_type + Suppress(OPTIONAL)
252 component_type_default = named_type + Suppress(DEFAULT) + value
253 component_type_components_of = Suppress(COMPONENTS_OF) + type_
254 component_type = component_type_components_of | component_type_optional | component_type_default | named_type
256 tagged_type = tag + Optional(IMPLICIT | EXPLICIT, default=None) + type_
258 named_number_value = Suppress('(') + signed_number + Suppress(')')
259 named_number = identifier + named_number_value
260 named_nonumber = Unique(identifier)
261 enumeration = named_number | named_nonumber
263 set_type = SET + braced_list(component_type | extension_marker)
264 sequence_type = SEQUENCE + braced_list(component_type | extension_marker)
265 sequenceof_type = Suppress(SEQUENCE) + Optional(size_constraint, default=None) + Suppress(OF) + (type_ | named_type)
266 setof_type = Suppress(SET) + Optional(size_constraint, default=None) + Suppress(OF) + (type_ | named_type)
267 choice_type = CHOICE + braced_list(named_type | extension_marker)
268 selection_type = identifier + Suppress('<') + type_
269 enumerated_type = ENUMERATED + braced_list(enumeration | extension_marker)
270 bitstring_type = BIT_STRING + Optional(braced_list(named_number), default=[]) + Optional(single_value_constraint | size_constraint, default=None)
271 plain_integer_type = INTEGER
272 restricted_integer_type = INTEGER + braced_list(named_number) + Optional(single_value_constraint, default=None)
273 boolean_type = BOOLEAN
276 object_identifier_type = OBJECT_IDENTIFIER
277 octetstring_type = OCTET_STRING + Optional(size_constraint)
278 unrestricted_characterstring_type = CHARACTER_STRING
279 restricted_characterstring_type = BMPString | GeneralString | \
280 GraphicString | IA5String | \
281 ISO646String | NumericString | \
282 PrintableString | TeletexString | \
283 T61String | UniversalString | \
284 UTF8String | VideotexString | \
286 characterstring_type = (restricted_characterstring_type | unrestricted_characterstring_type) + Optional(size_constraint)
287 useful_type = GeneralizedTime | UTCTime | ObjectDescriptor
290 any_type = ANY + Optional(Suppress(DEFINED_BY + identifier))
292 # todo: consider other builtins from 16.2
293 simple_type = (any_type | boolean_type | null_type | octetstring_type | characterstring_type | real_type | plain_integer_type | object_identifier_type | useful_type) + Optional(value_range_constraint | single_value_constraint)
294 constructed_type = choice_type | sequence_type | set_type
295 value_list_type = restricted_integer_type | enumerated_type
296 builtin_type = value_list_type | tagged_type | simple_type | constructed_type | sequenceof_type | setof_type | bitstring_type
298 referenced_type = defined_type | selection_type # todo: consider other ref:d types from 16.3
300 type_ << (builtin_type | referenced_type)
302 # EXT: identifier should not be Optional here, but
303 # our other ASN.1 code generator supports unnamed members,
305 named_type << (Optional(identifier) + type_)
307 type_assignment = typereference + '::=' + type_
308 value_assignment = valuereference + type_ + '::=' + value
310 assignment = type_assignment | value_assignment
311 assignment_list = ZeroOrMore(assignment)
313 assigned_identifier = Optional(object_identifier_value | defined_value)
314 global_module_reference = module_reference + assigned_identifier
316 symbol = Unique(reference) # TODO: parameterized reference?
317 symbol_list = Group(delimitedList(symbol))
318 symbols_from_module = symbol_list + Suppress(FROM) + global_module_reference
319 symbols_from_module_list = OneOrMore(symbols_from_module)
320 symbols_imported = Optional(symbols_from_module_list)
321 exports = Optional(Suppress(EXPORTS) + symbol_list + Suppress(';'))
322 imports = Optional(Suppress(IMPORTS) + symbols_imported + Suppress(';'))
324 module_body = (exports + imports + assignment_list)
325 module_identifier = module_reference + definitive_identifier
326 module_definition = module_identifier + Suppress(DEFINITIONS) + Optional(tag_default, default=None) + \
327 Optional(extension_default, default=None) + Suppress('::=') + Suppress(BEGIN) + module_body + Suppress(END)
329 module_definition.ignore(comment)
331 # Mark up the parse results with token tags
332 identifier.setParseAction(annotate('Identifier'))
333 named_number_value.setParseAction(annotate('Value'))
334 tag.setParseAction(annotate('Tag'))
335 class_.setParseAction(annotate('TagClass'))
336 class_number.setParseAction(annotate('TagClassNumber'))
337 type_.setParseAction(annotate('Type'))
338 simple_type.setParseAction(annotate('SimpleType'))
339 choice_type.setParseAction(annotate('ChoiceType'))
340 sequence_type.setParseAction(annotate('SequenceType'))
341 set_type.setParseAction(annotate('SetType'))
342 value_list_type.setParseAction(annotate('ValueListType'))
343 bitstring_type.setParseAction(annotate('BitStringType'))
344 sequenceof_type.setParseAction(annotate('SequenceOfType'))
345 setof_type.setParseAction(annotate('SetOfType'))
346 named_number.setParseAction(annotate('NamedValue'))
347 named_nonumber.setParseAction(annotate('NamedValue'))
348 single_value_constraint.setParseAction(annotate('SingleValueConstraint'))
349 size_constraint.setParseAction(annotate('SizeConstraint'))
350 value_range_constraint.setParseAction(annotate('ValueRangeConstraint'))
351 component_type.setParseAction(annotate('ComponentType'))
352 component_type_optional.setParseAction(annotate('ComponentTypeOptional'))
353 component_type_default.setParseAction(annotate('ComponentTypeDefault'))
354 component_type_components_of.setParseAction(annotate('ComponentTypeComponentsOf'))
355 tagged_type.setParseAction(annotate('TaggedType'))
356 named_type.setParseAction(annotate('NamedType'))
357 type_assignment.setParseAction(annotate('TypeAssignment'))
358 value_assignment.setParseAction(annotate('ValueAssignment'))
359 module_reference.setParseAction(annotate('ModuleReference'))
360 module_body.setParseAction(annotate('ModuleBody'))
361 module_definition.setParseAction(annotate('ModuleDefinition'))
362 extension_marker.setParseAction(annotate('ExtensionMarker'))
363 name_form.setParseAction(annotate('NameForm'))
364 number_form.setParseAction(annotate('NumberForm'))
365 name_and_number_form.setParseAction(annotate('NameAndNumberForm'))
366 object_identifier_value.setParseAction(annotate('ObjectIdentifierValue'))
367 definitive_identifier.setParseAction(annotate('DefinitiveIdentifier'))
368 definitive_number_form.setParseAction(annotate('DefinitiveNumberForm'))
369 definitive_name_and_number_form.setParseAction(annotate('DefinitiveNameAndNumberForm'))
370 imports.setParseAction(annotate('Imports'))
371 exports.setParseAction(annotate('Exports'))
372 assignment_list.setParseAction(annotate('AssignmentList'))
373 bstring.setParseAction(annotate('BinaryStringValue'))
374 hstring.setParseAction(annotate('HexStringValue'))
375 defined_type.setParseAction(annotate('DefinedType'))
376 selection_type.setParseAction(annotate('SelectionType'))
377 referenced_value.setParseAction(annotate('ReferencedValue'))
379 start = OneOrMore(module_definition)
384 """ Use to create a distinct name of a production
385 with the same form as another, e.g.
386 identifier = build_identifier('[a-z]')
387 valuereference = build_identifier('[a-z]')
389 identifier = build_identifier('[a-z]')
390 valuereference = Unique(identifier)
391 to avoid duplicating the details of the grammar.
392 This allows unique parse actions for productions
393 with the same underlying rules.
398 def StringOf(elements):
399 """ Create a rule to parse a string of any of the chars in elements.
400 Skips any whitespace.
401 This is useful for the ASN.1 hstring and bstring productions.
403 element = CharSet(elements)
404 return Combine(OneOrMore(element), adjacent=False) # Use adjacent=False to skip whitespace
407 def CharSet(elements):
408 """ Create a set of valid characters as a single rule.
409 elements is a string containing all the desired chars, e.g.
410 CharSet('01234567890') # all numbers
411 CharSet('01234567890ABCDEF') # all hex numbers
413 unpacked_chars = [Literal(c) for c in elements]
414 return Or(unpacked_chars)