1 # Copyright (c) 2013, Schneider Electric Buildings AB
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are met:
6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above copyright
9 # notice, this list of conditions and the following disclaimer in the
10 # documentation and/or other materials provided with the distribution.
11 # * Neither the name of Schneider Electric Buildings AB nor the
12 # names of contributors may be used to endorse or promote products
13 # derived from this software without specific prior written permission.
15 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
19 # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 from pyparsing import Keyword, Literal, Word, OneOrMore, ZeroOrMore, Combine, Regex, Forward, Optional, Group, Suppress, delimitedList, cStyleComment, nums, alphanums, empty, srange, dblQuotedString
31 __all__ = ['parse_asn1', 'AnnotatedToken']
34 def parse_asn1(asn1_definition):
35 """ Parse a string containing one or more ASN.1 module definitions.
36 Returns a list of module syntax trees represented as nested lists of
37 AnnotatedToken objects.
39 grammar = _build_asn1_grammar()
40 parse_result = grammar.parseString(asn1_definition)
41 parse_tree = parse_result.asList()
45 def print_parse_tree(node, indent=1):
46 """ Debugging aid. Dumps a parse tree as returned
47 from parse_asn1 to stdout in indented tree form.
49 def indented_print(msg):
50 print(' ' * indent + msg)
52 if type(node) is AnnotatedToken:
54 tag, values = node.ty, node.elements
55 indented_print('%s:' % tag)
56 print_parse_tree(values, indent + 1)
57 elif type(node) is list:
60 print_parse_tree(token, indent + 1)
63 indented_print(str(node))
66 class AnnotatedToken(object):
67 """ A simple data structure to keep track of a token's
68 type, identified by a string, and its children.
69 Children may be other annotated tokens, lists or simple
72 def __init__(self, token_type, elements):
74 self.elements = elements
77 return 'T(%s)%s' % (self.ty, self.elements)
82 def _build_asn1_grammar():
83 def build_identifier(prefix_pattern):
84 identifier_suffix = Optional(Word(srange('[-0-9a-zA-Z]')))
85 identifier = Combine(Word(srange(prefix_pattern), exact=1) + identifier_suffix) # todo: more rigorous? trailing hyphens and -- forbidden
88 def braced_list(element_rule):
89 return Suppress('{') + Group(delimitedList(element_rule)) + Suppress('}')
93 return AnnotatedToken(name, t.asList())
98 DEFINITIONS = Keyword('DEFINITIONS')
99 BEGIN = Keyword('BEGIN')
101 OPTIONAL = Keyword('OPTIONAL')
102 DEFAULT = Keyword('DEFAULT')
103 TRUE = Keyword('TRUE')
104 FALSE = Keyword('FALSE')
105 UNIVERSAL = Keyword('UNIVERSAL')
106 APPLICATION = Keyword('APPLICATION')
107 PRIVATE = Keyword('PRIVATE')
110 IMPLICIT = Keyword('IMPLICIT')
111 EXPLICIT = Keyword('EXPLICIT')
112 EXPLICIT_TAGS = Keyword('EXPLICIT TAGS')
113 IMPLICIT_TAGS = Keyword('IMPLICIT TAGS')
114 AUTOMATIC_TAGS = Keyword('AUTOMATIC TAGS')
115 EXTENSIBILITY_IMPLIED = Keyword('EXTENSIBILITY IMPLIED')
116 COMPONENTS_OF = Keyword('COMPONENTS OF')
117 ELLIPSIS = Keyword('...')
118 SIZE = Keyword('SIZE')
120 IMPORTS = Keyword('IMPORTS')
121 EXPORTS = Keyword('EXPORTS')
122 FROM = Keyword('FROM')
125 SEQUENCE = Keyword('SEQUENCE')
127 CHOICE = Keyword('CHOICE')
128 ENUMERATED = Keyword('ENUMERATED')
129 BIT_STRING = Keyword('BIT STRING')
130 BOOLEAN = Keyword('BOOLEAN')
131 REAL = Keyword('REAL')
132 OCTET_STRING = Keyword('OCTET STRING')
133 CHARACTER_STRING = Keyword('CHARACTER STRING')
134 NULL = Keyword('NULL')
135 INTEGER = Keyword('INTEGER')
136 OBJECT_IDENTIFIER = Keyword('OBJECT IDENTIFIER')
138 # Restricted string types
139 BMPString = Keyword('BMPString')
140 GeneralString = Keyword('GeneralString')
141 GraphicString = Keyword('GraphicString')
142 IA5String = Keyword('IA5String')
143 ISO646String = Keyword('ISO646String')
144 NumericString = Keyword('NumericString')
145 PrintableString = Keyword('PrintableString')
146 TeletexString = Keyword('TeletexString')
147 T61String = Keyword('T61String')
148 UniversalString = Keyword('UniversalString')
149 UTF8String = Keyword('UTF8String')
150 VideotexString = Keyword('VideotexString')
151 VisibleString = Keyword('VisibleString')
154 GeneralizedTime = Keyword('GeneralizedTime')
155 UTCTime = Keyword('UTCTime')
156 ObjectDescriptor = Keyword('ObjectDescriptor')
160 signed_number = Combine(Optional('-') + number) # todo: consider defined values from 18.1
161 bstring = Literal('\'') + Regex('[01]+') + Literal('\'B')
162 hstring = Literal('\'') + Regex('[0-9A-F]+') + Literal('\'H')
165 hyphen_comment = Regex(r"--[\s\S]*?(--|$)", flags=re.MULTILINE)
166 comment = hyphen_comment | cStyleComment
169 identifier = build_identifier('[a-z]')
172 # these are duplicated to force unique token annotations
173 valuereference = build_identifier('[a-z]')
174 typereference = build_identifier('[A-Z]')
175 module_reference = build_identifier('[A-Z]')
176 reference = valuereference | typereference # TODO: consider object references from 12.1
179 # BUG: These are badly specified and cause the grammar to break if used generally.
180 # todo: consider more literals from 16.9
181 real_value = Regex(r'-?\d+(\.\d*)?') # todo: this doesn't really follow the spec
182 boolean_value = TRUE | FALSE
183 bitstring_value = bstring | hstring # todo: consider more forms from 21.9
184 integer_value = signed_number
186 cstring_value = dblQuotedString
188 builtin_value = boolean_value | bitstring_value | real_value | integer_value | null_value | cstring_value
189 defined_value = valuereference # todo: more options from 13.1
191 # object identifier value
192 name_form = Unique(identifier)
193 number_form = Unique(number)
194 name_and_number_form = name_form + Suppress('(') + number_form + Suppress(')')
195 objid_components = name_and_number_form | name_form | number_form | defined_value
196 objid_components_list = OneOrMore(objid_components)
197 object_identifier_value = Suppress('{') + \
198 (objid_components_list | (defined_value + objid_components_list)) + \
201 value = builtin_value | defined_value | object_identifier_value
203 # definitive identifier value
204 definitive_number_form = Unique(number)
205 definitive_name_and_number_form = name_form + Suppress('(') + definitive_number_form + Suppress(')')
206 definitive_objid_component = definitive_name_and_number_form | name_form | definitive_number_form
207 definitive_objid_component_list = OneOrMore(definitive_objid_component)
208 definitive_identifier = Optional(Suppress('{') + definitive_objid_component_list + Suppress('}'))
211 class_ = UNIVERSAL | APPLICATION | PRIVATE
212 class_number = Unique(number) # todo: consider defined values from 30.1
213 tag = Suppress('[') + Optional(class_) + class_number + Suppress(']')
214 tag_default = EXPLICIT_TAGS | IMPLICIT_TAGS | AUTOMATIC_TAGS | empty
217 extension_default = EXTENSIBILITY_IMPLIED | empty
220 defined_type = Unique(typereference) # todo: consider other defined types from 13.1
221 referenced_type = Unique(defined_type) # todo: consider other ref:d types from 16.3
223 # Forward-declare these, they can only be fully defined once
224 # we have all types defined. There are some circular dependencies.
225 named_type = Forward()
229 # todo: consider the full subtype and general constraint syntax described in 45.*
230 # but for now, just implement a simple integer value range.
231 value_range_constraint = (signed_number | valuereference | MIN) + Suppress('..') + (signed_number | valuereference | MAX)
232 size_constraint = Optional(Suppress('(')) + Suppress(SIZE) + Suppress('(') + value_range_constraint + Suppress(')') + Optional(Suppress(')'))
233 constraint = Suppress('(') + value_range_constraint + Suppress(')')
235 # TODO: consider exception syntax from 24.1
236 extension_marker = Unique(ELLIPSIS)
238 component_type_optional = named_type + Suppress(OPTIONAL)
239 component_type_default = named_type + Suppress(DEFAULT) + value
240 component_type_components_of = Suppress(COMPONENTS_OF) + type_
241 component_type = component_type_components_of | component_type_optional | component_type_default | named_type
243 tagged_type = tag + Optional(IMPLICIT | EXPLICIT) + type_
245 named_number_value = Suppress('(') + signed_number + Suppress(')')
246 named_number = identifier + named_number_value
247 enumeration = named_number | identifier
249 set_type = SET + braced_list(component_type | extension_marker)
250 sequence_type = SEQUENCE + braced_list(component_type | extension_marker)
251 sequenceof_type = Suppress(SEQUENCE) + Optional(size_constraint) + Suppress(OF) + (type_ | named_type)
252 setof_type = Suppress(SET) + Optional(size_constraint) + Suppress(OF) + (type_ | named_type)
253 choice_type = CHOICE + braced_list(named_type | extension_marker)
254 enumerated_type = ENUMERATED + braced_list(enumeration)
255 bitstring_type = BIT_STRING + braced_list(named_number)
256 plain_integer_type = INTEGER
257 restricted_integer_type = INTEGER + braced_list(named_number)
258 boolean_type = BOOLEAN
261 object_identifier_type = OBJECT_IDENTIFIER
262 octetstring_type = OCTET_STRING
263 unrestricted_characterstring_type = CHARACTER_STRING
264 restricted_characterstring_type = BMPString | GeneralString | \
265 GraphicString | IA5String | \
266 ISO646String | NumericString | \
267 PrintableString | TeletexString | \
268 T61String | UniversalString | \
269 UTF8String | VideotexString | VisibleString
270 characterstring_type = restricted_characterstring_type | unrestricted_characterstring_type
271 useful_type = GeneralizedTime | UTCTime | ObjectDescriptor
273 # todo: consider other builtins from 16.2
274 simple_type = (boolean_type | null_type | octetstring_type | characterstring_type | real_type | plain_integer_type | object_identifier_type | useful_type) + Optional(constraint)
275 constructed_type = choice_type | sequence_type | set_type
276 value_list_type = restricted_integer_type | enumerated_type
277 builtin_type = value_list_type | tagged_type | simple_type | constructed_type | sequenceof_type | setof_type | bitstring_type
279 type_ << (builtin_type | referenced_type)
281 # EXT: identifier should not be Optional here, but
282 # our other ASN.1 code generator supports unnamed members,
284 named_type << (Optional(identifier) + type_)
286 type_assignment = typereference + '::=' + type_
287 value_assignment = valuereference + type_ + '::=' + value
289 assignment = type_assignment | value_assignment
290 assignment_list = ZeroOrMore(assignment)
292 assigned_identifier = Optional(object_identifier_value | defined_value)
293 global_module_reference = module_reference + assigned_identifier
295 symbol = Unique(reference) # TODO: parameterized reference?
296 symbol_list = Group(delimitedList(symbol))
297 symbols_from_module = symbol_list + Suppress(FROM) + global_module_reference
298 symbols_from_module_list = OneOrMore(symbols_from_module)
299 symbols_imported = Optional(symbols_from_module_list)
300 exports = Optional(Suppress(EXPORTS) + symbol_list + Suppress(';'))
301 imports = Optional(Suppress(IMPORTS) + symbols_imported + Suppress(';'))
303 module_body = (exports + imports + assignment_list)
304 module_defaults = Suppress(tag_default + extension_default) # we don't want these in the AST
305 module_identifier = module_reference + definitive_identifier
306 module_definition = module_identifier + DEFINITIONS + module_defaults + '::=' + BEGIN + module_body + END
308 module_definition.ignore(comment)
310 # Mark up the parse results with token tags
311 identifier.setParseAction(annotate('Identifier'))
312 named_number_value.setParseAction(annotate('Value'))
313 tag.setParseAction(annotate('Tag'))
314 class_.setParseAction(annotate('TagClass'))
315 class_number.setParseAction(annotate('TagClassNumber'))
316 type_.setParseAction(annotate('Type'))
317 simple_type.setParseAction(annotate('SimpleType'))
318 choice_type.setParseAction(annotate('ChoiceType'))
319 sequence_type.setParseAction(annotate('SequenceType'))
320 set_type.setParseAction(annotate('SetType'))
321 value_list_type.setParseAction(annotate('ValueListType'))
322 bitstring_type.setParseAction(annotate('BitStringType'))
323 referenced_type.setParseAction(annotate('ReferencedType'))
324 sequenceof_type.setParseAction(annotate('SequenceOfType'))
325 setof_type.setParseAction(annotate('SetOfType'))
326 named_number.setParseAction(annotate('NamedValue'))
327 constraint.setParseAction(annotate('Constraint'))
328 size_constraint.setParseAction(annotate('SizeConstraint'))
329 component_type.setParseAction(annotate('ComponentType'))
330 component_type_optional.setParseAction(annotate('ComponentTypeOptional'))
331 component_type_default.setParseAction(annotate('ComponentTypeDefault'))
332 component_type_components_of.setParseAction(annotate('ComponentTypeComponentsOf'))
333 tagged_type.setParseAction(annotate('TaggedType'))
334 named_type.setParseAction(annotate('NamedType'))
335 type_assignment.setParseAction(annotate('TypeAssignment'))
336 value_assignment.setParseAction(annotate('ValueAssignment'))
337 valuereference.setParseAction(annotate('ValueReference'))
338 module_reference.setParseAction(annotate('ModuleReference'))
339 module_body.setParseAction(annotate('ModuleBody'))
340 module_definition.setParseAction(annotate('ModuleDefinition'))
341 extension_marker.setParseAction(annotate('ExtensionMarker'))
342 name_form.setParseAction(annotate('NameForm'))
343 number_form.setParseAction(annotate('NumberForm'))
344 name_and_number_form.setParseAction(annotate('NameAndNumberForm'))
345 object_identifier_value.setParseAction(annotate('ObjectIdentifierValue'))
346 definitive_identifier.setParseAction(annotate('DefinitiveIdentifier'))
347 definitive_number_form.setParseAction(annotate('DefinitiveNumberForm'))
348 definitive_name_and_number_form.setParseAction(annotate('DefinitiveNameAndNumberForm'))
349 imports.setParseAction(annotate('Imports'))
350 exports.setParseAction(annotate('Exports'))
351 assignment_list.setParseAction(annotate('AssignmentList'))
353 start = ZeroOrMore(module_definition)
358 """ Use to create a distinct name of a production
359 with the same form as another, e.g.
360 identifier = build_identifier('[a-z]')
361 valuereference = build_identifier('[a-z]')
363 identifier = build_identifier('[a-z]')
364 valuereference = Unique(identifier)
365 to avoid duplicating the details of the grammar.
366 This allows unique parse actions for productions
367 with the same underlying rules.