1 # Copyright (c) 2013, Schneider Electric Buildings AB
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are met:
6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above copyright
9 # notice, this list of conditions and the following disclaimer in the
10 # documentation and/or other materials provided with the distribution.
11 # * Neither the name of Schneider Electric Buildings AB nor the
12 # names of contributors may be used to endorse or promote products
13 # derived from this software without specific prior written permission.
15 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
19 # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 from pyparsing import Keyword, Literal, Word, OneOrMore, ZeroOrMore, Combine, Regex, Forward, Optional, Group, Suppress, delimitedList, cStyleComment, nums, alphanums, empty, srange, dblQuotedString, Or
31 __all__ = ['parse_asn1', 'AnnotatedToken']
34 def parse_asn1(asn1_definition):
35 """ Parse a string containing one or more ASN.1 module definitions.
36 Returns a list of module syntax trees represented as nested lists of
37 AnnotatedToken objects.
39 grammar = _build_asn1_grammar()
40 parse_result = grammar.parseString(asn1_definition)
41 parse_tree = parse_result.asList()
45 def print_parse_tree(node, indent=1):
46 """ Debugging aid. Dumps a parse tree as returned
47 from parse_asn1 to stdout in indented tree form.
49 def indented_print(msg):
50 print(' ' * indent + msg)
52 if type(node) is AnnotatedToken:
54 tag, values = node.ty, node.elements
55 indented_print('%s:' % tag)
56 print_parse_tree(values, indent + 1)
57 elif type(node) is list:
60 print_parse_tree(token, indent + 1)
63 indented_print(str(node))
66 class AnnotatedToken(object):
67 """ A simple data structure to keep track of a token's
68 type, identified by a string, and its children.
69 Children may be other annotated tokens, lists or simple
72 def __init__(self, token_type, elements):
74 self.elements = elements
77 return 'T(%s)%s' % (self.ty, self.elements)
82 def _build_asn1_grammar():
83 def build_identifier(prefix_pattern):
84 identifier_suffix = Optional(Word(srange('[-0-9a-zA-Z]')))
85 identifier = Combine(Word(srange(prefix_pattern), exact=1) + identifier_suffix) # todo: more rigorous? trailing hyphens and -- forbidden
88 def braced_list(element_rule):
89 return Suppress('{') + Group(delimitedList(element_rule)) + Suppress('}')
93 return AnnotatedToken(name, t.asList())
98 DEFINITIONS = Keyword('DEFINITIONS')
99 BEGIN = Keyword('BEGIN')
101 OPTIONAL = Keyword('OPTIONAL')
102 DEFAULT = Keyword('DEFAULT')
103 TRUE = Keyword('TRUE')
104 FALSE = Keyword('FALSE')
105 UNIVERSAL = Keyword('UNIVERSAL')
106 APPLICATION = Keyword('APPLICATION')
107 PRIVATE = Keyword('PRIVATE')
110 IMPLICIT = Keyword('IMPLICIT')
111 EXPLICIT = Keyword('EXPLICIT')
112 EXPLICIT_TAGS = Keyword('EXPLICIT TAGS')
113 IMPLICIT_TAGS = Keyword('IMPLICIT TAGS')
114 AUTOMATIC_TAGS = Keyword('AUTOMATIC TAGS')
115 EXTENSIBILITY_IMPLIED = Keyword('EXTENSIBILITY IMPLIED')
116 COMPONENTS_OF = Keyword('COMPONENTS OF')
117 ELLIPSIS = Keyword('...')
118 SIZE = Keyword('SIZE')
120 IMPORTS = Keyword('IMPORTS')
121 EXPORTS = Keyword('EXPORTS')
122 FROM = Keyword('FROM')
125 SEQUENCE = Keyword('SEQUENCE')
127 CHOICE = Keyword('CHOICE')
128 ENUMERATED = Keyword('ENUMERATED')
129 BIT_STRING = Keyword('BIT STRING')
130 BOOLEAN = Keyword('BOOLEAN')
131 REAL = Keyword('REAL')
132 OCTET_STRING = Keyword('OCTET STRING')
133 CHARACTER_STRING = Keyword('CHARACTER STRING')
134 NULL = Keyword('NULL')
135 INTEGER = Keyword('INTEGER')
136 OBJECT_IDENTIFIER = Keyword('OBJECT IDENTIFIER')
138 # Restricted string types
139 BMPString = Keyword('BMPString')
140 GeneralString = Keyword('GeneralString')
141 GraphicString = Keyword('GraphicString')
142 IA5String = Keyword('IA5String')
143 ISO646String = Keyword('ISO646String')
144 NumericString = Keyword('NumericString')
145 PrintableString = Keyword('PrintableString')
146 TeletexString = Keyword('TeletexString')
147 T61String = Keyword('T61String')
148 UniversalString = Keyword('UniversalString')
149 UTF8String = Keyword('UTF8String')
150 VideotexString = Keyword('VideotexString')
151 VisibleString = Keyword('VisibleString')
154 GeneralizedTime = Keyword('GeneralizedTime')
155 UTCTime = Keyword('UTCTime')
156 ObjectDescriptor = Keyword('ObjectDescriptor')
160 signed_number = Combine(Optional('-') + number) # todo: consider defined values from 18.1
161 bstring = Suppress('\'') + StringOf('01') + Suppress('\'B')
162 hstring = Suppress('\'') + StringOf('0123456789ABCDEF') + Suppress('\'H')
165 hyphen_comment = Regex(r"--[\s\S]*?(--|$)", flags=re.MULTILINE)
166 comment = hyphen_comment | cStyleComment
169 identifier = build_identifier('[a-z]')
172 # these are duplicated to force unique token annotations
173 valuereference = build_identifier('[a-z]')
174 typereference = build_identifier('[A-Z]')
175 module_reference = build_identifier('[A-Z]')
176 reference = valuereference | typereference # TODO: consider object references from 12.1
179 # BUG: These are badly specified and cause the grammar to break if used generally.
180 # todo: consider more literals from 16.9
181 real_value = Regex(r'-?\d+(\.\d*)?') # todo: this doesn't really follow the spec
182 boolean_value = TRUE | FALSE
183 bitstring_value = bstring | hstring # todo: consider more forms from 21.9
184 integer_value = signed_number
186 cstring_value = dblQuotedString
188 builtin_value = boolean_value | bitstring_value | real_value | integer_value | null_value | cstring_value
189 defined_value = Unique(valuereference) # todo: more options from 13.1
190 referenced_value = Unique(defined_value) # todo: more options from 16.11
192 # object identifier value
193 name_form = Unique(identifier)
194 number_form = Unique(number)
195 name_and_number_form = name_form + Suppress('(') + number_form + Suppress(')')
196 objid_components = name_and_number_form | name_form | number_form | defined_value
197 objid_components_list = OneOrMore(objid_components)
198 object_identifier_value = Suppress('{') + \
199 (objid_components_list | (defined_value + objid_components_list)) + \
202 value = builtin_value | referenced_value | object_identifier_value
204 # definitive identifier value
205 definitive_number_form = Unique(number)
206 definitive_name_and_number_form = name_form + Suppress('(') + definitive_number_form + Suppress(')')
207 definitive_objid_component = definitive_name_and_number_form | name_form | definitive_number_form
208 definitive_objid_component_list = OneOrMore(definitive_objid_component)
209 definitive_identifier = Optional(Suppress('{') + definitive_objid_component_list + Suppress('}'))
212 class_ = UNIVERSAL | APPLICATION | PRIVATE
213 class_number = Unique(number) # todo: consider defined values from 30.1
214 tag = Suppress('[') + Optional(class_) + class_number + Suppress(']')
215 tag_default = EXPLICIT_TAGS | IMPLICIT_TAGS | AUTOMATIC_TAGS | empty
218 extension_default = EXTENSIBILITY_IMPLIED | empty
221 # todo: consider other defined types from 13.1
222 external_type_reference = module_reference + Suppress('.') + typereference
223 defined_type = external_type_reference | typereference
224 referenced_type = Unique(defined_type) # todo: consider other ref:d types from 16.3
228 # Forward-declare these, they can only be fully defined once
229 # we have all types defined. There are some circular dependencies.
230 named_type = Forward()
234 # todo: consider the full subtype and general constraint syntax described in 45.*
235 # but for now, just implement a simple integer value range.
236 value_range_constraint = (signed_number | referenced_value | MIN) + Suppress('..') + (signed_number | referenced_value | MAX)
237 size_constraint = Optional(Suppress('(')) + Suppress(SIZE) + Suppress('(') + value_range_constraint + Suppress(')') + Optional(Suppress(')'))
238 constraint = Suppress('(') + value_range_constraint + Suppress(')')
240 # TODO: consider exception syntax from 24.1
241 extension_marker = Unique(ELLIPSIS)
243 component_type_optional = named_type + Suppress(OPTIONAL)
244 component_type_default = named_type + Suppress(DEFAULT) + value
245 component_type_components_of = Suppress(COMPONENTS_OF) + type_
246 component_type = component_type_components_of | component_type_optional | component_type_default | named_type
248 tagged_type = tag + Optional(IMPLICIT | EXPLICIT) + type_
250 named_number_value = Suppress('(') + signed_number + Suppress(')')
251 named_number = identifier + named_number_value
252 named_nonumber = Unique(identifier)
253 enumeration = named_number | named_nonumber
255 set_type = SET + braced_list(component_type | extension_marker)
256 sequence_type = SEQUENCE + braced_list(component_type | extension_marker)
257 sequenceof_type = Suppress(SEQUENCE) + Optional(size_constraint) + Suppress(OF) + (type_ | named_type)
258 setof_type = Suppress(SET) + Optional(size_constraint) + Suppress(OF) + (type_ | named_type)
259 choice_type = CHOICE + braced_list(named_type | extension_marker)
260 enumerated_type = ENUMERATED + braced_list(enumeration | extension_marker)
261 bitstring_type = BIT_STRING + braced_list(named_number)
262 plain_integer_type = INTEGER
263 restricted_integer_type = INTEGER + braced_list(named_number)
264 boolean_type = BOOLEAN
267 object_identifier_type = OBJECT_IDENTIFIER
268 octetstring_type = OCTET_STRING
269 unrestricted_characterstring_type = CHARACTER_STRING
270 restricted_characterstring_type = BMPString | GeneralString | \
271 GraphicString | IA5String | \
272 ISO646String | NumericString | \
273 PrintableString | TeletexString | \
274 T61String | UniversalString | \
275 UTF8String | VideotexString | VisibleString
276 characterstring_type = restricted_characterstring_type | unrestricted_characterstring_type
277 useful_type = GeneralizedTime | UTCTime | ObjectDescriptor
279 # todo: consider other builtins from 16.2
280 simple_type = (boolean_type | null_type | octetstring_type | characterstring_type | real_type | plain_integer_type | object_identifier_type | useful_type) + Optional(constraint)
281 constructed_type = choice_type | sequence_type | set_type
282 value_list_type = restricted_integer_type | enumerated_type
283 builtin_type = value_list_type | tagged_type | simple_type | constructed_type | sequenceof_type | setof_type | bitstring_type
285 type_ << (builtin_type | referenced_type)
287 # EXT: identifier should not be Optional here, but
288 # our other ASN.1 code generator supports unnamed members,
290 named_type << (Optional(identifier) + type_)
292 type_assignment = typereference + '::=' + type_
293 value_assignment = valuereference + type_ + '::=' + value
295 assignment = type_assignment | value_assignment
296 assignment_list = ZeroOrMore(assignment)
298 assigned_identifier = Optional(object_identifier_value | defined_value)
299 global_module_reference = module_reference + assigned_identifier
301 symbol = Unique(reference) # TODO: parameterized reference?
302 symbol_list = Group(delimitedList(symbol))
303 symbols_from_module = symbol_list + Suppress(FROM) + global_module_reference
304 symbols_from_module_list = OneOrMore(symbols_from_module)
305 symbols_imported = Optional(symbols_from_module_list)
306 exports = Optional(Suppress(EXPORTS) + symbol_list + Suppress(';'))
307 imports = Optional(Suppress(IMPORTS) + symbols_imported + Suppress(';'))
309 module_body = (exports + imports + assignment_list)
310 module_defaults = Suppress(tag_default + extension_default) # we don't want these in the AST
311 module_identifier = module_reference + definitive_identifier
312 module_definition = module_identifier + DEFINITIONS + module_defaults + '::=' + BEGIN + module_body + END
314 module_definition.ignore(comment)
316 # Mark up the parse results with token tags
317 identifier.setParseAction(annotate('Identifier'))
318 named_number_value.setParseAction(annotate('Value'))
319 tag.setParseAction(annotate('Tag'))
320 class_.setParseAction(annotate('TagClass'))
321 class_number.setParseAction(annotate('TagClassNumber'))
322 type_.setParseAction(annotate('Type'))
323 simple_type.setParseAction(annotate('SimpleType'))
324 choice_type.setParseAction(annotate('ChoiceType'))
325 sequence_type.setParseAction(annotate('SequenceType'))
326 set_type.setParseAction(annotate('SetType'))
327 value_list_type.setParseAction(annotate('ValueListType'))
328 bitstring_type.setParseAction(annotate('BitStringType'))
329 sequenceof_type.setParseAction(annotate('SequenceOfType'))
330 setof_type.setParseAction(annotate('SetOfType'))
331 named_number.setParseAction(annotate('NamedValue'))
332 named_nonumber.setParseAction(annotate('NamedValue'))
333 constraint.setParseAction(annotate('Constraint'))
334 size_constraint.setParseAction(annotate('SizeConstraint'))
335 component_type.setParseAction(annotate('ComponentType'))
336 component_type_optional.setParseAction(annotate('ComponentTypeOptional'))
337 component_type_default.setParseAction(annotate('ComponentTypeDefault'))
338 component_type_components_of.setParseAction(annotate('ComponentTypeComponentsOf'))
339 tagged_type.setParseAction(annotate('TaggedType'))
340 named_type.setParseAction(annotate('NamedType'))
341 type_assignment.setParseAction(annotate('TypeAssignment'))
342 value_assignment.setParseAction(annotate('ValueAssignment'))
343 module_reference.setParseAction(annotate('ModuleReference'))
344 module_body.setParseAction(annotate('ModuleBody'))
345 module_definition.setParseAction(annotate('ModuleDefinition'))
346 extension_marker.setParseAction(annotate('ExtensionMarker'))
347 name_form.setParseAction(annotate('NameForm'))
348 number_form.setParseAction(annotate('NumberForm'))
349 name_and_number_form.setParseAction(annotate('NameAndNumberForm'))
350 object_identifier_value.setParseAction(annotate('ObjectIdentifierValue'))
351 definitive_identifier.setParseAction(annotate('DefinitiveIdentifier'))
352 definitive_number_form.setParseAction(annotate('DefinitiveNumberForm'))
353 definitive_name_and_number_form.setParseAction(annotate('DefinitiveNameAndNumberForm'))
354 imports.setParseAction(annotate('Imports'))
355 exports.setParseAction(annotate('Exports'))
356 assignment_list.setParseAction(annotate('AssignmentList'))
357 bstring.setParseAction(annotate('BinaryStringValue'))
358 hstring.setParseAction(annotate('HexStringValue'))
359 referenced_type.setParseAction(annotate('ReferencedType'))
360 referenced_value.setParseAction(annotate('ReferencedValue'))
362 start = OneOrMore(module_definition)
367 """ Use to create a distinct name of a production
368 with the same form as another, e.g.
369 identifier = build_identifier('[a-z]')
370 valuereference = build_identifier('[a-z]')
372 identifier = build_identifier('[a-z]')
373 valuereference = Unique(identifier)
374 to avoid duplicating the details of the grammar.
375 This allows unique parse actions for productions
376 with the same underlying rules.
381 def StringOf(elements):
382 """ Create a rule to parse a string of any of the chars in elements.
383 Skips any whitespace.
384 This is useful for the ASN.1 hstring and bstring productions.
386 element = CharSet(elements)
387 return Combine(OneOrMore(element), adjacent=False) # Use adjacent=False to skip whitespace
390 def CharSet(elements):
391 """ Create a set of valid characters as a single rule.
392 elements is a string containing all the desired chars, e.g.
393 CharSet('01234567890') # all numbers
394 CharSet('01234567890ABCDEF') # all hex numbers
396 unpacked_chars = [Literal(c) for c in elements]
397 return Or(unpacked_chars)