1 # Copyright (c) 2013, Schneider Electric Buildings AB
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are met:
6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above copyright
9 # notice, this list of conditions and the following disclaimer in the
10 # documentation and/or other materials provided with the distribution.
11 # * Neither the name of Schneider Electric Buildings AB nor the
12 # names of contributors may be used to endorse or promote products
13 # derived from this software without specific prior written permission.
15 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
19 # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 from pyparsing import Keyword, Literal, Word, OneOrMore, Combine, Regex, Forward, Optional, Group, Suppress, delimitedList, cStyleComment, nums, alphanums, empty, srange
31 __all__ = ['parse_asn1', 'AnnotatedToken']
34 def parse_asn1(asn1_payload):
35 """ Parse a string containing an ASN.1 module definition
36 and return a syntax tree in the form of a list of
37 AnnotatedToken objects.
39 grammar = _build_asn1_grammar()
40 parse_result = grammar.parseString(asn1_payload)
41 parse_tree = parse_result.asList()
45 def print_parse_tree(node, indent=1):
46 """ Debugging aid. Dumps a parse tree as returned
47 from parse_asn1 to stdout in indented tree form.
49 def indented_print(msg):
50 print(' ' * indent + msg)
52 if type(node) is AnnotatedToken:
54 tag, values = node.ty, node.elements
55 indented_print('%s:' % tag)
56 print_parse_tree(values, indent + 1)
57 elif type(node) is list:
60 print_parse_tree(token, indent + 1)
63 indented_print(str(node))
66 class AnnotatedToken(object):
67 """ A simple data structure to keep track of a token's
68 type, identified by a string, and its children.
69 Children may be other annotated tokens, lists or simple
72 def __init__(self, token_type, elements):
74 self.elements = elements
77 return 'T(%s)%s' % (self.ty, self.elements)
82 def _build_asn1_grammar():
83 def build_identifier(prefix_pattern):
84 identifier_suffix = Optional(Word(srange('[-0-9a-zA-Z]')))
85 identifier = Combine(Word(srange(prefix_pattern), exact=1) + identifier_suffix) # todo: more rigorous? trailing hyphens and -- forbidden
88 def braced_list(element_rule):
89 return Suppress('{') + Group(delimitedList(element_rule)) + Suppress('}')
93 return AnnotatedToken(name, t.asList())
98 DEFINITIONS = Keyword('DEFINITIONS')
99 BEGIN = Keyword('BEGIN')
101 OPTIONAL = Keyword('OPTIONAL')
102 DEFAULT = Keyword('DEFAULT')
103 TRUE = Keyword('TRUE')
104 FALSE = Keyword('FALSE')
105 UNIVERSAL = Keyword('UNIVERSAL')
106 APPLICATION = Keyword('APPLICATION')
107 PRIVATE = Keyword('PRIVATE')
110 IMPLICIT = Keyword('IMPLICIT')
111 EXPLICIT = Keyword('EXPLICIT')
112 EXPLICIT_TAGS = Keyword('EXPLICIT TAGS')
113 IMPLICIT_TAGS = Keyword('IMPLICIT TAGS')
114 AUTOMATIC_TAGS = Keyword('AUTOMATIC TAGS')
115 EXTENSIBILITY_IMPLIED = Keyword('EXTENSIBILITY IMPLIED')
116 COMPONENTS_OF = Keyword('COMPONENTS OF')
117 ELLIPSIS = Keyword('...')
118 SIZE = Keyword('SIZE')
122 SEQUENCE = Keyword('SEQUENCE')
124 CHOICE = Keyword('CHOICE')
125 ENUMERATED = Keyword('ENUMERATED')
126 BIT_STRING = Keyword('BIT STRING')
127 BOOLEAN = Keyword('BOOLEAN')
128 REAL = Keyword('REAL')
129 OCTET_STRING = Keyword('OCTET STRING')
130 CHARACTER_STRING = Keyword('CHARACTER STRING')
131 NULL = Keyword('NULL')
132 INTEGER = Keyword('INTEGER')
133 OBJECT_IDENTIFIER = Keyword('OBJECT IDENTIFIER')
135 # Restricted string types
136 BMPString = Keyword('BMPString')
137 GeneralString = Keyword('GeneralString')
138 GraphicString = Keyword('GraphicString')
139 IA5String = Keyword('IA5String')
140 ISO646String = Keyword('ISO646String')
141 NumericString = Keyword('NumericString')
142 PrintableString = Keyword('PrintableString')
143 TeletexString = Keyword('TeletexString')
144 T61String = Keyword('T61String')
145 UniversalString = Keyword('UniversalString')
146 UTF8String = Keyword('UTF8String')
147 VideotexString = Keyword('VideotexString')
148 VisibleString = Keyword('VisibleString')
152 signed_number = Combine(Optional('-') + number) # todo: consider defined values from 18.1
153 bstring = Literal('\'') + Regex('[01]+') + Literal('\'B')
154 hstring = Literal('\'') + Regex('[0-9A-F]+') + Literal('\'H')
157 hyphen_comment = Regex(r"--[\s\S]*?(--|$)", flags=re.MULTILINE)
158 comment = hyphen_comment | cStyleComment
161 identifier = build_identifier('[a-z]')
164 # these are duplicated to force unique token annotations
165 valuereference = build_identifier('[a-z]')
166 typereference = build_identifier('[A-Z]')
167 module_reference = build_identifier('[A-Z]')
170 # BUG: These are badly specified and cause the grammar to break if used generally.
171 # todo: consider more literals from 16.9
172 real_value = Regex(r'-?\d+(\.\d*)?') # todo: this doesn't really follow the spec
173 boolean_value = TRUE | FALSE
174 bitstring_value = bstring | hstring # todo: consider more forms from 21.9
175 integer_value = signed_number
178 builtin_value = boolean_value | bitstring_value | real_value | integer_value | null_value
179 defined_value = valuereference # todo: more options from 13.1
181 # object identifier value
182 name_form = Unique(identifier)
183 number_form = number | defined_value
184 name_and_number_form = name_form + Suppress('(') + number_form + Suppress(')')
185 objid_components = name_and_number_form | name_form | number_form | defined_value
186 objid_components_list = OneOrMore(objid_components)
187 object_identifier_value = Suppress('{') + \
188 (objid_components_list | (defined_value + objid_components_list)) + \
191 value = builtin_value | defined_value | object_identifier_value
194 class_ = UNIVERSAL | APPLICATION | PRIVATE
195 class_number = Unique(number) # todo: consider defined values from 30.1
196 tag = Suppress('[') + Optional(class_) + class_number + Suppress(']')
197 tag_default = EXPLICIT_TAGS | IMPLICIT_TAGS | AUTOMATIC_TAGS | empty
200 extension_default = EXTENSIBILITY_IMPLIED | empty
203 defined_type = Unique(typereference) # todo: consider other defined types from 13.1
204 referenced_type = Unique(defined_type) # todo: consider other ref:d types from 16.3
206 # Forward-declare these, they can only be fully defined once
207 # we have all types defined. There are some circular dependencies.
208 named_type = Forward()
212 # todo: consider the full subtype and general constraint syntax described in 45.*
213 # but for now, just implement a simple integer value range.
214 value_range_constraint = (signed_number | valuereference | MIN) + Suppress('..') + (signed_number | valuereference | MAX)
215 size_constraint = Optional(Suppress('(')) + Suppress(SIZE) + Suppress('(') + value_range_constraint + Suppress(')') + Optional(Suppress(')'))
216 constraint = Suppress('(') + value_range_constraint + Suppress(')')
218 # TODO: consider exception syntax from 24.1
219 extension_marker = Unique(ELLIPSIS)
221 component_type_optional = named_type + Suppress(OPTIONAL)
222 component_type_default = named_type + Suppress(DEFAULT) + value
223 component_type_components_of = Suppress(COMPONENTS_OF) + type_
224 component_type = component_type_components_of | component_type_optional | component_type_default | named_type
226 tagged_type = tag + Optional(IMPLICIT | EXPLICIT) + type_
228 named_number_value = Suppress('(') + signed_number + Suppress(')')
229 named_number = identifier + named_number_value
230 enumeration = named_number | identifier
232 set_type = SET + braced_list(component_type | extension_marker)
233 sequence_type = SEQUENCE + braced_list(component_type | extension_marker)
234 sequenceof_type = Suppress(SEQUENCE) + Optional(size_constraint) + Suppress(OF) + (type_ | named_type)
235 setof_type = Suppress(SET) + Optional(size_constraint) + Suppress(OF) + (type_ | named_type)
236 choice_type = CHOICE + braced_list(named_type | extension_marker)
237 enumerated_type = ENUMERATED + braced_list(enumeration)
238 bitstring_type = BIT_STRING + braced_list(named_number)
239 plain_integer_type = INTEGER
240 restricted_integer_type = INTEGER + braced_list(named_number)
241 boolean_type = BOOLEAN
244 object_identifier_type = OBJECT_IDENTIFIER
245 octetstring_type = OCTET_STRING
246 unrestricted_characterstring_type = CHARACTER_STRING
247 restricted_characterstring_type = BMPString | GeneralString | \
248 GraphicString | IA5String | \
249 ISO646String | NumericString | \
250 PrintableString | TeletexString | \
251 T61String | UniversalString | \
252 UTF8String | VideotexString | VisibleString
253 characterstring_type = restricted_characterstring_type | unrestricted_characterstring_type
255 # todo: consider other builtins from 16.2
256 simple_type = (boolean_type | null_type | octetstring_type | characterstring_type | real_type | plain_integer_type | object_identifier_type) + Optional(constraint)
257 constructed_type = choice_type | sequence_type | set_type
258 value_list_type = restricted_integer_type | enumerated_type
259 builtin_type = tagged_type | simple_type | constructed_type | sequenceof_type | setof_type | value_list_type | bitstring_type
261 type_ << (builtin_type | referenced_type)
263 # BUG: identifier should not be Optional here,
264 # but our ASN.1 interpreter supports unnamed members,
266 named_type << (Optional(identifier) + type_)
268 # BUG: Trailing semi-colon is not allowed by standard grammar, but our ASN.1 interpreter accepts it
269 # and we happen to use it.
270 type_assignment = typereference + '::=' + type_ + Suppress(Optional(';'))
271 value_assignment = valuereference + type_ + '::=' + value
273 assignment = type_assignment | value_assignment
274 assignment_list = OneOrMore(assignment)
276 module_body = (assignment_list | empty)
277 module_defaults = Suppress(tag_default + extension_default) # we don't want these in the AST
278 module_definition = module_reference + DEFINITIONS + module_defaults + '::=' + BEGIN + module_body + END
280 module_definition.ignore(comment)
282 # Mark up the parse results with token tags
283 identifier.setParseAction(annotate('Identifier'))
284 named_number_value.setParseAction(annotate('Value'))
285 tag.setParseAction(annotate('Tag'))
286 class_.setParseAction(annotate('TagClass'))
287 class_number.setParseAction(annotate('TagClassNumber'))
288 type_.setParseAction(annotate('Type'))
289 simple_type.setParseAction(annotate('SimpleType'))
290 choice_type.setParseAction(annotate('ChoiceType'))
291 sequence_type.setParseAction(annotate('SequenceType'))
292 set_type.setParseAction(annotate('SetType'))
293 value_list_type.setParseAction(annotate('ValueListType'))
294 bitstring_type.setParseAction(annotate('BitStringType'))
295 referenced_type.setParseAction(annotate('ReferencedType'))
296 sequenceof_type.setParseAction(annotate('SequenceOfType'))
297 setof_type.setParseAction(annotate('SetOfType'))
298 named_number.setParseAction(annotate('NamedValue'))
299 constraint.setParseAction(annotate('Constraint'))
300 size_constraint.setParseAction(annotate('SizeConstraint'))
301 component_type.setParseAction(annotate('ComponentType'))
302 component_type_optional.setParseAction(annotate('ComponentTypeOptional'))
303 component_type_default.setParseAction(annotate('ComponentTypeDefault'))
304 component_type_components_of.setParseAction(annotate('ComponentTypeComponentsOf'))
305 tagged_type.setParseAction(annotate('TaggedType'))
306 named_type.setParseAction(annotate('NamedType'))
307 type_assignment.setParseAction(annotate('TypeAssignment'))
308 value_assignment.setParseAction(annotate('ValueAssignment'))
309 valuereference.setParseAction(annotate('ValueReference'))
310 module_reference.setParseAction(annotate('ModuleReference'))
311 module_body.setParseAction(annotate('ModuleBody'))
312 module_definition.setParseAction(annotate('ModuleDefinition'))
313 extension_marker.setParseAction(annotate('ExtensionMarker'))
314 name_form.setParseAction(annotate('NameForm'))
315 name_and_number_form.setParseAction(annotate('NameAndNumberForm'))
316 object_identifier_value.setParseAction(annotate('ObjectIdentifierValue'))
318 return module_definition
322 """ Use to create a distinct name of a production
323 with the same form as another, e.g.
324 identifier = build_identifier('[a-z]')
325 valuereference = build_identifier('[a-z]')
327 identifier = build_identifier('[a-z]')
328 valuereference = Unique(identifier)
329 to avoid duplicating the details of the grammar.
330 This allows unique parse actions for productions
331 with the same underlying rules.