1 # Copyright (c) 2013, Schneider Electric Buildings AB
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are met:
6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above copyright
9 # notice, this list of conditions and the following disclaimer in the
10 # documentation and/or other materials provided with the distribution.
11 # * Neither the name of Schneider Electric Buildings AB nor the
12 # names of contributors may be used to endorse or promote products
13 # derived from this software without specific prior written permission.
15 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
19 # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 from pyparsing import Keyword, Literal, Word, OneOrMore, ZeroOrMore, Combine, Regex, Forward, Optional, Group, Suppress, delimitedList, cStyleComment, nums, alphanums, empty, srange, dblQuotedString, Or
31 __all__ = ['parse_asn1', 'AnnotatedToken']
34 def parse_asn1(asn1_definition):
35 """ Parse a string containing one or more ASN.1 module definitions.
36 Returns a list of module syntax trees represented as nested lists of
37 AnnotatedToken objects.
39 grammar = _build_asn1_grammar()
40 parse_result = grammar.parseString(asn1_definition)
41 parse_tree = parse_result.asList()
45 def print_parse_tree(node, indent=1):
46 """ Debugging aid. Dumps a parse tree as returned
47 from parse_asn1 to stdout in indented tree form.
49 def indented_print(msg):
50 print(' ' * indent + msg)
52 if type(node) is AnnotatedToken:
54 tag, values = node.ty, node.elements
55 indented_print('%s:' % tag)
56 print_parse_tree(values, indent + 1)
57 elif type(node) is list:
60 print_parse_tree(token, indent + 1)
63 indented_print(str(node))
66 class AnnotatedToken(object):
67 """ A simple data structure to keep track of a token's
68 type, identified by a string, and its children.
69 Children may be other annotated tokens, lists or simple
72 def __init__(self, token_type, elements):
74 self.elements = elements
77 return 'T(%s)%s' % (self.ty, self.elements)
82 def _build_asn1_grammar():
83 def build_identifier(prefix_pattern):
84 identifier_suffix = Optional(Word(srange('[-0-9a-zA-Z]')))
85 identifier = Combine(Word(srange(prefix_pattern), exact=1) + identifier_suffix) # todo: more rigorous? trailing hyphens and -- forbidden
88 def braced_list(element_rule):
89 return Suppress('{') + Group(delimitedList(element_rule)) + Suppress('}')
93 return AnnotatedToken(name, t.asList())
98 DEFINITIONS = Keyword('DEFINITIONS')
99 BEGIN = Keyword('BEGIN')
101 OPTIONAL = Keyword('OPTIONAL')
102 DEFAULT = Keyword('DEFAULT')
103 TRUE = Keyword('TRUE')
104 FALSE = Keyword('FALSE')
105 UNIVERSAL = Keyword('UNIVERSAL')
106 APPLICATION = Keyword('APPLICATION')
107 PRIVATE = Keyword('PRIVATE')
110 IMPLICIT = Keyword('IMPLICIT')
111 EXPLICIT = Keyword('EXPLICIT')
112 EXPLICIT_TAGS = Keyword('EXPLICIT TAGS')
113 IMPLICIT_TAGS = Keyword('IMPLICIT TAGS')
114 AUTOMATIC_TAGS = Keyword('AUTOMATIC TAGS')
115 EXTENSIBILITY_IMPLIED = Keyword('EXTENSIBILITY IMPLIED')
116 COMPONENTS_OF = Keyword('COMPONENTS OF')
117 ELLIPSIS = Keyword('...')
118 SIZE = Keyword('SIZE')
120 IMPORTS = Keyword('IMPORTS')
121 EXPORTS = Keyword('EXPORTS')
122 FROM = Keyword('FROM')
125 SEQUENCE = Keyword('SEQUENCE')
127 CHOICE = Keyword('CHOICE')
128 ENUMERATED = Keyword('ENUMERATED')
129 BIT_STRING = Keyword('BIT STRING')
130 BOOLEAN = Keyword('BOOLEAN')
131 REAL = Keyword('REAL')
132 OCTET_STRING = Keyword('OCTET STRING')
133 CHARACTER_STRING = Keyword('CHARACTER STRING')
134 NULL = Keyword('NULL')
135 INTEGER = Keyword('INTEGER')
136 OBJECT_IDENTIFIER = Keyword('OBJECT IDENTIFIER')
138 # Restricted string types
139 BMPString = Keyword('BMPString')
140 GeneralString = Keyword('GeneralString')
141 GraphicString = Keyword('GraphicString')
142 IA5String = Keyword('IA5String')
143 ISO646String = Keyword('ISO646String')
144 NumericString = Keyword('NumericString')
145 PrintableString = Keyword('PrintableString')
146 TeletexString = Keyword('TeletexString')
147 T61String = Keyword('T61String')
148 UniversalString = Keyword('UniversalString')
149 UTF8String = Keyword('UTF8String')
150 VideotexString = Keyword('VideotexString')
151 VisibleString = Keyword('VisibleString')
154 GeneralizedTime = Keyword('GeneralizedTime')
155 UTCTime = Keyword('UTCTime')
156 ObjectDescriptor = Keyword('ObjectDescriptor')
160 signed_number = Combine(Optional('-') + number) # todo: consider defined values from 18.1
161 bstring = Suppress('\'') + StringOf('01') + Suppress('\'B')
162 hstring = Suppress('\'') + StringOf('0123456789ABCDEF') + Suppress('\'H')
165 hyphen_comment = Regex(r"--[\s\S]*?(--|$)", flags=re.MULTILINE)
166 comment = hyphen_comment | cStyleComment
169 identifier = build_identifier('[a-z]')
172 # these are duplicated to force unique token annotations
173 valuereference = build_identifier('[a-z]')
174 typereference = build_identifier('[A-Z]')
175 module_reference = build_identifier('[A-Z]')
176 reference = valuereference | typereference # TODO: consider object references from 12.1
179 # BUG: These are badly specified and cause the grammar to break if used generally.
180 # todo: consider more literals from 16.9
181 real_value = Regex(r'-?\d+(\.\d*)?') # todo: this doesn't really follow the spec
182 boolean_value = TRUE | FALSE
183 bitstring_value = bstring | hstring # todo: consider more forms from 21.9
184 integer_value = signed_number
186 cstring_value = dblQuotedString
188 builtin_value = boolean_value | bitstring_value | real_value | integer_value | null_value | cstring_value
189 external_value_reference = module_reference + Suppress('.') + valuereference
190 defined_value = external_value_reference | valuereference # todo: more options from 13.1
191 referenced_value = Unique(defined_value) # todo: more options from 16.11
193 # object identifier value
194 name_form = Unique(identifier)
195 number_form = Unique(number)
196 name_and_number_form = name_form + Suppress('(') + number_form + Suppress(')')
197 objid_components = name_and_number_form | name_form | number_form | defined_value
198 objid_components_list = OneOrMore(objid_components)
199 object_identifier_value = Suppress('{') + \
200 (objid_components_list | (defined_value + objid_components_list)) + \
203 value = builtin_value | referenced_value | object_identifier_value
205 # definitive identifier value
206 definitive_number_form = Unique(number)
207 definitive_name_and_number_form = name_form + Suppress('(') + definitive_number_form + Suppress(')')
208 definitive_objid_component = definitive_name_and_number_form | name_form | definitive_number_form
209 definitive_objid_component_list = OneOrMore(definitive_objid_component)
210 definitive_identifier = Optional(Suppress('{') + definitive_objid_component_list + Suppress('}'))
213 class_ = UNIVERSAL | APPLICATION | PRIVATE
214 class_number = Unique(number) # todo: consider defined values from 30.1
215 tag = Suppress('[') + Optional(class_) + class_number + Suppress(']')
216 tag_default = EXPLICIT_TAGS | IMPLICIT_TAGS | AUTOMATIC_TAGS | empty
219 extension_default = EXTENSIBILITY_IMPLIED | empty
222 # todo: consider other defined types from 13.1
223 external_type_reference = module_reference + Suppress('.') + typereference
224 defined_type = external_type_reference | typereference
225 referenced_type = Unique(defined_type) # todo: consider other ref:d types from 16.3
229 # Forward-declare these, they can only be fully defined once
230 # we have all types defined. There are some circular dependencies.
231 named_type = Forward()
235 # todo: consider the full subtype and general constraint syntax described in 45.*
236 lower_bound = (signed_number | referenced_value | MIN)
237 upper_bound = (signed_number | referenced_value | MAX)
238 single_value_constraint = Suppress('(') + value + Suppress(')')
239 value_range_constraint = Suppress('(') + lower_bound + Suppress('..') + upper_bound + Suppress(')')
240 # TODO: Include contained subtype constraint here if we ever implement it.
241 size_constraint = Optional(Suppress('(')) + Suppress(SIZE) + (single_value_constraint | value_range_constraint) + Optional(Suppress(')'))
243 # TODO: consider exception syntax from 24.1
244 extension_marker = Unique(ELLIPSIS)
246 component_type_optional = named_type + Suppress(OPTIONAL)
247 component_type_default = named_type + Suppress(DEFAULT) + value
248 component_type_components_of = Suppress(COMPONENTS_OF) + type_
249 component_type = component_type_components_of | component_type_optional | component_type_default | named_type
251 tagged_type = tag + Optional(IMPLICIT | EXPLICIT) + type_
253 named_number_value = Suppress('(') + signed_number + Suppress(')')
254 named_number = identifier + named_number_value
255 named_nonumber = Unique(identifier)
256 enumeration = named_number | named_nonumber
258 set_type = SET + braced_list(component_type | extension_marker)
259 sequence_type = SEQUENCE + braced_list(component_type | extension_marker)
260 sequenceof_type = Suppress(SEQUENCE) + Optional(size_constraint) + Suppress(OF) + (type_ | named_type)
261 setof_type = Suppress(SET) + Optional(size_constraint) + Suppress(OF) + (type_ | named_type)
262 choice_type = CHOICE + braced_list(named_type | extension_marker)
263 enumerated_type = ENUMERATED + braced_list(enumeration | extension_marker)
264 bitstring_type = BIT_STRING + braced_list(named_number) + Optional(single_value_constraint | size_constraint)
265 plain_integer_type = INTEGER + Optional(single_value_constraint)
266 restricted_integer_type = INTEGER + braced_list(named_number) + Optional(single_value_constraint)
267 boolean_type = BOOLEAN
270 object_identifier_type = OBJECT_IDENTIFIER
271 octetstring_type = OCTET_STRING + Optional(size_constraint)
272 unrestricted_characterstring_type = CHARACTER_STRING
273 restricted_characterstring_type = BMPString | GeneralString | \
274 GraphicString | IA5String | \
275 ISO646String | NumericString | \
276 PrintableString | TeletexString | \
277 T61String | UniversalString | \
278 UTF8String | VideotexString | \
280 characterstring_type = (restricted_characterstring_type | unrestricted_characterstring_type) + Optional(size_constraint)
281 useful_type = GeneralizedTime | UTCTime | ObjectDescriptor
283 # todo: consider other builtins from 16.2
284 simple_type = (boolean_type | null_type | octetstring_type | characterstring_type | real_type | plain_integer_type | object_identifier_type | useful_type) + Optional(value_range_constraint)
285 constructed_type = choice_type | sequence_type | set_type
286 value_list_type = restricted_integer_type | enumerated_type
287 builtin_type = value_list_type | tagged_type | simple_type | constructed_type | sequenceof_type | setof_type | bitstring_type
289 type_ << (builtin_type | referenced_type)
291 # EXT: identifier should not be Optional here, but
292 # our other ASN.1 code generator supports unnamed members,
294 named_type << (Optional(identifier) + type_)
296 type_assignment = typereference + '::=' + type_
297 value_assignment = valuereference + type_ + '::=' + value
299 assignment = type_assignment | value_assignment
300 assignment_list = ZeroOrMore(assignment)
302 assigned_identifier = Optional(object_identifier_value | defined_value)
303 global_module_reference = module_reference + assigned_identifier
305 symbol = Unique(reference) # TODO: parameterized reference?
306 symbol_list = Group(delimitedList(symbol))
307 symbols_from_module = symbol_list + Suppress(FROM) + global_module_reference
308 symbols_from_module_list = OneOrMore(symbols_from_module)
309 symbols_imported = Optional(symbols_from_module_list)
310 exports = Optional(Suppress(EXPORTS) + symbol_list + Suppress(';'))
311 imports = Optional(Suppress(IMPORTS) + symbols_imported + Suppress(';'))
313 module_body = (exports + imports + assignment_list)
314 module_defaults = Suppress(tag_default + extension_default) # we don't want these in the AST
315 module_identifier = module_reference + definitive_identifier
316 module_definition = module_identifier + DEFINITIONS + module_defaults + '::=' + BEGIN + module_body + END
318 module_definition.ignore(comment)
320 # Mark up the parse results with token tags
321 identifier.setParseAction(annotate('Identifier'))
322 named_number_value.setParseAction(annotate('Value'))
323 tag.setParseAction(annotate('Tag'))
324 class_.setParseAction(annotate('TagClass'))
325 class_number.setParseAction(annotate('TagClassNumber'))
326 type_.setParseAction(annotate('Type'))
327 simple_type.setParseAction(annotate('SimpleType'))
328 choice_type.setParseAction(annotate('ChoiceType'))
329 sequence_type.setParseAction(annotate('SequenceType'))
330 set_type.setParseAction(annotate('SetType'))
331 value_list_type.setParseAction(annotate('ValueListType'))
332 bitstring_type.setParseAction(annotate('BitStringType'))
333 sequenceof_type.setParseAction(annotate('SequenceOfType'))
334 setof_type.setParseAction(annotate('SetOfType'))
335 named_number.setParseAction(annotate('NamedValue'))
336 named_nonumber.setParseAction(annotate('NamedValue'))
337 single_value_constraint.setParseAction(annotate('SingleValueConstraint'))
338 size_constraint.setParseAction(annotate('SizeConstraint'))
339 value_range_constraint.setParseAction(annotate('ValueRangeConstraint'))
340 component_type.setParseAction(annotate('ComponentType'))
341 component_type_optional.setParseAction(annotate('ComponentTypeOptional'))
342 component_type_default.setParseAction(annotate('ComponentTypeDefault'))
343 component_type_components_of.setParseAction(annotate('ComponentTypeComponentsOf'))
344 tagged_type.setParseAction(annotate('TaggedType'))
345 named_type.setParseAction(annotate('NamedType'))
346 type_assignment.setParseAction(annotate('TypeAssignment'))
347 value_assignment.setParseAction(annotate('ValueAssignment'))
348 module_reference.setParseAction(annotate('ModuleReference'))
349 module_body.setParseAction(annotate('ModuleBody'))
350 module_definition.setParseAction(annotate('ModuleDefinition'))
351 extension_marker.setParseAction(annotate('ExtensionMarker'))
352 name_form.setParseAction(annotate('NameForm'))
353 number_form.setParseAction(annotate('NumberForm'))
354 name_and_number_form.setParseAction(annotate('NameAndNumberForm'))
355 object_identifier_value.setParseAction(annotate('ObjectIdentifierValue'))
356 definitive_identifier.setParseAction(annotate('DefinitiveIdentifier'))
357 definitive_number_form.setParseAction(annotate('DefinitiveNumberForm'))
358 definitive_name_and_number_form.setParseAction(annotate('DefinitiveNameAndNumberForm'))
359 imports.setParseAction(annotate('Imports'))
360 exports.setParseAction(annotate('Exports'))
361 assignment_list.setParseAction(annotate('AssignmentList'))
362 bstring.setParseAction(annotate('BinaryStringValue'))
363 hstring.setParseAction(annotate('HexStringValue'))
364 referenced_type.setParseAction(annotate('ReferencedType'))
365 referenced_value.setParseAction(annotate('ReferencedValue'))
367 start = OneOrMore(module_definition)
372 """ Use to create a distinct name of a production
373 with the same form as another, e.g.
374 identifier = build_identifier('[a-z]')
375 valuereference = build_identifier('[a-z]')
377 identifier = build_identifier('[a-z]')
378 valuereference = Unique(identifier)
379 to avoid duplicating the details of the grammar.
380 This allows unique parse actions for productions
381 with the same underlying rules.
386 def StringOf(elements):
387 """ Create a rule to parse a string of any of the chars in elements.
388 Skips any whitespace.
389 This is useful for the ASN.1 hstring and bstring productions.
391 element = CharSet(elements)
392 return Combine(OneOrMore(element), adjacent=False) # Use adjacent=False to skip whitespace
395 def CharSet(elements):
396 """ Create a set of valid characters as a single rule.
397 elements is a string containing all the desired chars, e.g.
398 CharSet('01234567890') # all numbers
399 CharSet('01234567890ABCDEF') # all hex numbers
401 unpacked_chars = [Literal(c) for c in elements]
402 return Or(unpacked_chars)