1 # Copyright (c) 2013, Schneider Electric Buildings AB
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are met:
6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above copyright
9 # notice, this list of conditions and the following disclaimer in the
10 # documentation and/or other materials provided with the distribution.
11 # * Neither the name of Schneider Electric Buildings AB nor the
12 # names of contributors may be used to endorse or promote products
13 # derived from this software without specific prior written permission.
15 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
19 # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 from pyparsing import Keyword, Literal, Word, OneOrMore, ZeroOrMore, Combine, Regex, Forward, Optional, Group, Suppress, delimitedList, cStyleComment, nums, alphanums, empty, srange, dblQuotedString, White
31 __all__ = ['parse_asn1', 'AnnotatedToken']
34 def parse_asn1(asn1_definition):
35 """ Parse a string containing one or more ASN.1 module definitions.
36 Returns a list of module syntax trees represented as nested lists of
37 AnnotatedToken objects.
39 grammar = _build_asn1_grammar()
40 parse_result = grammar.parseString(asn1_definition)
41 parse_tree = parse_result.asList()
45 def print_parse_tree(node, indent=1):
46 """ Debugging aid. Dumps a parse tree as returned
47 from parse_asn1 to stdout in indented tree form.
49 def indented_print(msg):
50 print(' ' * indent + msg)
52 if type(node) is AnnotatedToken:
54 tag, values = node.ty, node.elements
55 indented_print('%s:' % tag)
56 print_parse_tree(values, indent + 1)
57 elif type(node) is list:
60 print_parse_tree(token, indent + 1)
63 indented_print(str(node))
66 class AnnotatedToken(object):
67 """ A simple data structure to keep track of a token's
68 type, identified by a string, and its children.
69 Children may be other annotated tokens, lists or simple
72 def __init__(self, token_type, elements):
74 self.elements = elements
77 return 'T(%s)%s' % (self.ty, self.elements)
82 def _build_asn1_grammar():
83 def build_identifier(prefix_pattern):
84 identifier_suffix = Optional(Word(srange('[-0-9a-zA-Z]')))
85 identifier = Combine(Word(srange(prefix_pattern), exact=1) + identifier_suffix) # todo: more rigorous? trailing hyphens and -- forbidden
88 def braced_list(element_rule):
89 return Suppress('{') + Group(delimitedList(element_rule)) + Suppress('}')
93 return AnnotatedToken(name, t.asList())
98 DEFINITIONS = Keyword('DEFINITIONS')
99 BEGIN = Keyword('BEGIN')
101 OPTIONAL = Keyword('OPTIONAL')
102 DEFAULT = Keyword('DEFAULT')
103 TRUE = Keyword('TRUE')
104 FALSE = Keyword('FALSE')
105 UNIVERSAL = Keyword('UNIVERSAL')
106 APPLICATION = Keyword('APPLICATION')
107 PRIVATE = Keyword('PRIVATE')
110 IMPLICIT = Keyword('IMPLICIT')
111 EXPLICIT = Keyword('EXPLICIT')
112 EXPLICIT_TAGS = Keyword('EXPLICIT TAGS')
113 IMPLICIT_TAGS = Keyword('IMPLICIT TAGS')
114 AUTOMATIC_TAGS = Keyword('AUTOMATIC TAGS')
115 EXTENSIBILITY_IMPLIED = Keyword('EXTENSIBILITY IMPLIED')
116 COMPONENTS_OF = Keyword('COMPONENTS OF')
117 ELLIPSIS = Keyword('...')
118 SIZE = Keyword('SIZE')
120 IMPORTS = Keyword('IMPORTS')
121 EXPORTS = Keyword('EXPORTS')
122 FROM = Keyword('FROM')
125 SEQUENCE = Keyword('SEQUENCE')
127 CHOICE = Keyword('CHOICE')
128 ENUMERATED = Keyword('ENUMERATED')
129 BIT_STRING = Keyword('BIT STRING')
130 BOOLEAN = Keyword('BOOLEAN')
131 REAL = Keyword('REAL')
132 OCTET_STRING = Keyword('OCTET STRING')
133 CHARACTER_STRING = Keyword('CHARACTER STRING')
134 NULL = Keyword('NULL')
135 INTEGER = Keyword('INTEGER')
136 OBJECT_IDENTIFIER = Keyword('OBJECT IDENTIFIER')
138 # Restricted string types
139 BMPString = Keyword('BMPString')
140 GeneralString = Keyword('GeneralString')
141 GraphicString = Keyword('GraphicString')
142 IA5String = Keyword('IA5String')
143 ISO646String = Keyword('ISO646String')
144 NumericString = Keyword('NumericString')
145 PrintableString = Keyword('PrintableString')
146 TeletexString = Keyword('TeletexString')
147 T61String = Keyword('T61String')
148 UniversalString = Keyword('UniversalString')
149 UTF8String = Keyword('UTF8String')
150 VideotexString = Keyword('VideotexString')
151 VisibleString = Keyword('VisibleString')
154 GeneralizedTime = Keyword('GeneralizedTime')
155 UTCTime = Keyword('UTCTime')
156 ObjectDescriptor = Keyword('ObjectDescriptor')
160 signed_number = Combine(Optional('-') + number) # todo: consider defined values from 18.1
161 binary_digit = Literal('0') | Literal('1')
162 binary_string = Combine(OneOrMore(binary_digit), adjacent=False) # Use adjacent=False to skip whiteepace
163 bstring = Suppress('\'') + binary_string + Suppress('\'B')
164 hstring = Literal('\'') + Regex('[0-9A-F]+') + Literal('\'H')
167 hyphen_comment = Regex(r"--[\s\S]*?(--|$)", flags=re.MULTILINE)
168 comment = hyphen_comment | cStyleComment
171 identifier = build_identifier('[a-z]')
174 # these are duplicated to force unique token annotations
175 valuereference = build_identifier('[a-z]')
176 typereference = build_identifier('[A-Z]')
177 module_reference = build_identifier('[A-Z]')
178 reference = valuereference | typereference # TODO: consider object references from 12.1
181 # BUG: These are badly specified and cause the grammar to break if used generally.
182 # todo: consider more literals from 16.9
183 real_value = Regex(r'-?\d+(\.\d*)?') # todo: this doesn't really follow the spec
184 boolean_value = TRUE | FALSE
185 bitstring_value = bstring | hstring # todo: consider more forms from 21.9
186 integer_value = signed_number
188 cstring_value = dblQuotedString
190 builtin_value = boolean_value | bitstring_value | real_value | integer_value | null_value | cstring_value
191 defined_value = valuereference # todo: more options from 13.1
193 # object identifier value
194 name_form = Unique(identifier)
195 number_form = Unique(number)
196 name_and_number_form = name_form + Suppress('(') + number_form + Suppress(')')
197 objid_components = name_and_number_form | name_form | number_form | defined_value
198 objid_components_list = OneOrMore(objid_components)
199 object_identifier_value = Suppress('{') + \
200 (objid_components_list | (defined_value + objid_components_list)) + \
203 value = builtin_value | defined_value | object_identifier_value
205 # definitive identifier value
206 definitive_number_form = Unique(number)
207 definitive_name_and_number_form = name_form + Suppress('(') + definitive_number_form + Suppress(')')
208 definitive_objid_component = definitive_name_and_number_form | name_form | definitive_number_form
209 definitive_objid_component_list = OneOrMore(definitive_objid_component)
210 definitive_identifier = Optional(Suppress('{') + definitive_objid_component_list + Suppress('}'))
213 class_ = UNIVERSAL | APPLICATION | PRIVATE
214 class_number = Unique(number) # todo: consider defined values from 30.1
215 tag = Suppress('[') + Optional(class_) + class_number + Suppress(']')
216 tag_default = EXPLICIT_TAGS | IMPLICIT_TAGS | AUTOMATIC_TAGS | empty
219 extension_default = EXTENSIBILITY_IMPLIED | empty
222 defined_type = Unique(typereference) # todo: consider other defined types from 13.1
223 referenced_type = Unique(defined_type) # todo: consider other ref:d types from 16.3
225 # Forward-declare these, they can only be fully defined once
226 # we have all types defined. There are some circular dependencies.
227 named_type = Forward()
231 # todo: consider the full subtype and general constraint syntax described in 45.*
232 # but for now, just implement a simple integer value range.
233 value_range_constraint = (signed_number | valuereference | MIN) + Suppress('..') + (signed_number | valuereference | MAX)
234 size_constraint = Optional(Suppress('(')) + Suppress(SIZE) + Suppress('(') + value_range_constraint + Suppress(')') + Optional(Suppress(')'))
235 constraint = Suppress('(') + value_range_constraint + Suppress(')')
237 # TODO: consider exception syntax from 24.1
238 extension_marker = Unique(ELLIPSIS)
240 component_type_optional = named_type + Suppress(OPTIONAL)
241 component_type_default = named_type + Suppress(DEFAULT) + value
242 component_type_components_of = Suppress(COMPONENTS_OF) + type_
243 component_type = component_type_components_of | component_type_optional | component_type_default | named_type
245 tagged_type = tag + Optional(IMPLICIT | EXPLICIT) + type_
247 named_number_value = Suppress('(') + signed_number + Suppress(')')
248 named_number = identifier + named_number_value
249 enumeration = named_number | identifier
251 set_type = SET + braced_list(component_type | extension_marker)
252 sequence_type = SEQUENCE + braced_list(component_type | extension_marker)
253 sequenceof_type = Suppress(SEQUENCE) + Optional(size_constraint) + Suppress(OF) + (type_ | named_type)
254 setof_type = Suppress(SET) + Optional(size_constraint) + Suppress(OF) + (type_ | named_type)
255 choice_type = CHOICE + braced_list(named_type | extension_marker)
256 enumerated_type = ENUMERATED + braced_list(enumeration | extension_marker)
257 bitstring_type = BIT_STRING + braced_list(named_number)
258 plain_integer_type = INTEGER
259 restricted_integer_type = INTEGER + braced_list(named_number)
260 boolean_type = BOOLEAN
263 object_identifier_type = OBJECT_IDENTIFIER
264 octetstring_type = OCTET_STRING
265 unrestricted_characterstring_type = CHARACTER_STRING
266 restricted_characterstring_type = BMPString | GeneralString | \
267 GraphicString | IA5String | \
268 ISO646String | NumericString | \
269 PrintableString | TeletexString | \
270 T61String | UniversalString | \
271 UTF8String | VideotexString | VisibleString
272 characterstring_type = restricted_characterstring_type | unrestricted_characterstring_type
273 useful_type = GeneralizedTime | UTCTime | ObjectDescriptor
275 # todo: consider other builtins from 16.2
276 simple_type = (boolean_type | null_type | octetstring_type | characterstring_type | real_type | plain_integer_type | object_identifier_type | useful_type) + Optional(constraint)
277 constructed_type = choice_type | sequence_type | set_type
278 value_list_type = restricted_integer_type | enumerated_type
279 builtin_type = value_list_type | tagged_type | simple_type | constructed_type | sequenceof_type | setof_type | bitstring_type
281 type_ << (builtin_type | referenced_type)
283 # EXT: identifier should not be Optional here, but
284 # our other ASN.1 code generator supports unnamed members,
286 named_type << (Optional(identifier) + type_)
288 type_assignment = typereference + '::=' + type_
289 value_assignment = valuereference + type_ + '::=' + value
291 assignment = type_assignment | value_assignment
292 assignment_list = ZeroOrMore(assignment)
294 assigned_identifier = Optional(object_identifier_value | defined_value)
295 global_module_reference = module_reference + assigned_identifier
297 symbol = Unique(reference) # TODO: parameterized reference?
298 symbol_list = Group(delimitedList(symbol))
299 symbols_from_module = symbol_list + Suppress(FROM) + global_module_reference
300 symbols_from_module_list = OneOrMore(symbols_from_module)
301 symbols_imported = Optional(symbols_from_module_list)
302 exports = Optional(Suppress(EXPORTS) + symbol_list + Suppress(';'))
303 imports = Optional(Suppress(IMPORTS) + symbols_imported + Suppress(';'))
305 module_body = (exports + imports + assignment_list)
306 module_defaults = Suppress(tag_default + extension_default) # we don't want these in the AST
307 module_identifier = module_reference + definitive_identifier
308 module_definition = module_identifier + DEFINITIONS + module_defaults + '::=' + BEGIN + module_body + END
310 module_definition.ignore(comment)
312 # Mark up the parse results with token tags
313 identifier.setParseAction(annotate('Identifier'))
314 named_number_value.setParseAction(annotate('Value'))
315 tag.setParseAction(annotate('Tag'))
316 class_.setParseAction(annotate('TagClass'))
317 class_number.setParseAction(annotate('TagClassNumber'))
318 type_.setParseAction(annotate('Type'))
319 simple_type.setParseAction(annotate('SimpleType'))
320 choice_type.setParseAction(annotate('ChoiceType'))
321 sequence_type.setParseAction(annotate('SequenceType'))
322 set_type.setParseAction(annotate('SetType'))
323 value_list_type.setParseAction(annotate('ValueListType'))
324 bitstring_type.setParseAction(annotate('BitStringType'))
325 referenced_type.setParseAction(annotate('ReferencedType'))
326 sequenceof_type.setParseAction(annotate('SequenceOfType'))
327 setof_type.setParseAction(annotate('SetOfType'))
328 named_number.setParseAction(annotate('NamedValue'))
329 constraint.setParseAction(annotate('Constraint'))
330 size_constraint.setParseAction(annotate('SizeConstraint'))
331 component_type.setParseAction(annotate('ComponentType'))
332 component_type_optional.setParseAction(annotate('ComponentTypeOptional'))
333 component_type_default.setParseAction(annotate('ComponentTypeDefault'))
334 component_type_components_of.setParseAction(annotate('ComponentTypeComponentsOf'))
335 tagged_type.setParseAction(annotate('TaggedType'))
336 named_type.setParseAction(annotate('NamedType'))
337 type_assignment.setParseAction(annotate('TypeAssignment'))
338 value_assignment.setParseAction(annotate('ValueAssignment'))
339 valuereference.setParseAction(annotate('ValueReference'))
340 module_reference.setParseAction(annotate('ModuleReference'))
341 module_body.setParseAction(annotate('ModuleBody'))
342 module_definition.setParseAction(annotate('ModuleDefinition'))
343 extension_marker.setParseAction(annotate('ExtensionMarker'))
344 name_form.setParseAction(annotate('NameForm'))
345 number_form.setParseAction(annotate('NumberForm'))
346 name_and_number_form.setParseAction(annotate('NameAndNumberForm'))
347 object_identifier_value.setParseAction(annotate('ObjectIdentifierValue'))
348 definitive_identifier.setParseAction(annotate('DefinitiveIdentifier'))
349 definitive_number_form.setParseAction(annotate('DefinitiveNumberForm'))
350 definitive_name_and_number_form.setParseAction(annotate('DefinitiveNameAndNumberForm'))
351 imports.setParseAction(annotate('Imports'))
352 exports.setParseAction(annotate('Exports'))
353 assignment_list.setParseAction(annotate('AssignmentList'))
354 bstring.setParseAction(annotate('BinaryStringValue'))
356 start = OneOrMore(module_definition)
361 """ Use to create a distinct name of a production
362 with the same form as another, e.g.
363 identifier = build_identifier('[a-z]')
364 valuereference = build_identifier('[a-z]')
366 identifier = build_identifier('[a-z]')
367 valuereference = Unique(identifier)
368 to avoid duplicating the details of the grammar.
369 This allows unique parse actions for productions
370 with the same underlying rules.