1 # Copyright (c) 2013, Schneider Electric Buildings AB
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are met:
6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above copyright
9 # notice, this list of conditions and the following disclaimer in the
10 # documentation and/or other materials provided with the distribution.
11 # * Neither the name of Schneider Electric Buildings AB nor the
12 # names of contributors may be used to endorse or promote products
13 # derived from this software without specific prior written permission.
15 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
19 # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 from pyparsing import Keyword, Literal, Word, OneOrMore, Combine, Regex, Forward, Optional, Group, Suppress, delimitedList, cStyleComment, nums, alphanums, empty, srange
31 __all__ = ['parse_asn1', 'AnnotatedToken']
34 def parse_asn1(asn1_payload):
35 """ Parse a string containing an ASN.1 module definition
36 and return a syntax tree in the form of a list of
37 AnnotatedToken objects.
39 grammar = _build_asn1_grammar()
40 parse_result = grammar.parseString(asn1_payload)
41 parse_tree = parse_result.asList()
45 def print_parse_tree(node, indent=1):
46 """ Debugging aid. Dumps a parse tree as returned
47 from parse_asn1 to stdout in indented tree form.
49 def indented_print(msg):
50 print(' ' * indent + msg)
52 if type(node) is AnnotatedToken:
54 tag, values = node.ty, node.elements
55 indented_print('%s:' % tag)
56 print_parse_tree(values, indent + 1)
57 elif type(node) is list:
60 print_parse_tree(token, indent + 1)
63 indented_print(str(node))
66 class AnnotatedToken(object):
67 """ A simple data structure to keep track of a token's
68 type, identified by a string, and its children.
69 Children may be other annotated tokens, lists or simple
72 def __init__(self, token_type, elements):
74 self.elements = elements
77 return 'T(%s)%s' % (self.ty, self.elements)
82 def _build_asn1_grammar():
83 def build_identifier(prefix_pattern):
84 identifier_suffix = Optional(Word(srange('[-0-9a-zA-Z]')))
85 identifier = Combine(Word(srange(prefix_pattern), exact=1) + identifier_suffix) # todo: more rigorous? trailing hyphens and -- forbidden
88 def braced_list(element_rule):
89 return Suppress('{') + Group(delimitedList(element_rule)) + Suppress('}')
93 return AnnotatedToken(name, t.asList())
98 DEFINITIONS = Keyword('DEFINITIONS')
99 BEGIN = Keyword('BEGIN')
101 OPTIONAL = Keyword('OPTIONAL')
102 DEFAULT = Keyword('DEFAULT')
103 TRUE = Keyword('TRUE')
104 FALSE = Keyword('FALSE')
105 UNIVERSAL = Keyword('UNIVERSAL')
106 APPLICATION = Keyword('APPLICATION')
107 PRIVATE = Keyword('PRIVATE')
110 IMPLICIT = Keyword('IMPLICIT')
111 EXPLICIT = Keyword('EXPLICIT')
112 EXPLICIT_TAGS = Keyword('EXPLICIT TAGS')
113 IMPLICIT_TAGS = Keyword('IMPLICIT TAGS')
114 AUTOMATIC_TAGS = Keyword('AUTOMATIC TAGS')
115 EXTENSIBILITY_IMPLIED = Keyword('EXTENSIBILITY IMPLIED')
116 COMPONENTS_OF = Keyword('COMPONENTS OF')
117 ELLIPSIS = Keyword('...')
120 SEQUENCE = Keyword('SEQUENCE')
121 SEQUENCE_OF = Keyword('SEQUENCE OF')
122 SET_OF = Keyword('SET OF')
123 CHOICE = Keyword('CHOICE')
124 ENUMERATED = Keyword('ENUMERATED')
125 BIT_STRING = Keyword('BIT STRING')
126 BOOLEAN = Keyword('BOOLEAN')
127 REAL = Keyword('REAL')
128 OCTET_STRING = Keyword('OCTET STRING')
129 CHARACTER_STRING = Keyword('CHARACTER STRING')
130 NULL = Keyword('NULL')
131 INTEGER = Keyword('INTEGER')
133 # Restricted string types
134 BMPString = Keyword('BMPString')
135 GeneralString = Keyword('GeneralString')
136 GraphicString = Keyword('GraphicString')
137 IA5String = Keyword('IA5String')
138 ISO646String = Keyword('ISO646String')
139 NumericString = Keyword('NumericString')
140 PrintableString = Keyword('PrintableString')
141 TeletexString = Keyword('TeletexString')
142 T61String = Keyword('T61String')
143 UniversalString = Keyword('UniversalString')
144 UTF8String = Keyword('UTF8String')
145 VideotexString = Keyword('VideotexString')
146 VisibleString = Keyword('VisibleString')
150 signed_number = Combine(Optional('-') + number) # todo: consider defined values from 18.1
151 bstring = Literal('\'') + Regex('[01]+') + Literal('\'B')
152 hstring = Literal('\'') + Regex('[0-9A-F]+') + Literal('\'H')
155 hyphen_comment = Regex(r"--[\s\S]*?(--|$)", flags=re.MULTILINE)
156 comment = hyphen_comment | cStyleComment
159 identifier = build_identifier('[a-z]')
162 # these are duplicated to force unique token annotations
163 valuereference = build_identifier('[a-z]')
164 typereference = build_identifier('[A-Z]')
165 module_reference = build_identifier('[A-Z]')
168 # BUG: These are badly specified and cause the grammar to break if used generally.
169 # todo: consider more literals from 16.9
170 real_value = Regex(r'-?\d+(\.\d*)?') # todo: this doesn't really follow the spec
171 boolean_value = TRUE | FALSE
172 bitstring_value = bstring | hstring # todo: consider more forms from 21.9
173 integer_value = signed_number
176 builtin_value = boolean_value | bitstring_value | real_value | integer_value | null_value
177 defined_value = valuereference # todo: more options from 13.1
178 value = builtin_value | defined_value
181 class_ = UNIVERSAL | APPLICATION | PRIVATE
182 class_number = number # todo: consider defined values from 30.1
183 tag = Suppress('[') + Optional(class_) + class_number + Suppress(']')
184 tag_default = EXPLICIT_TAGS | IMPLICIT_TAGS | AUTOMATIC_TAGS | empty
187 extension_default = EXTENSIBILITY_IMPLIED | empty
190 defined_type = Unique(typereference) # todo: consider other defined types from 13.1
191 referenced_type = Unique(defined_type) # todo: consider other ref:d types from 16.3
193 # Forward-declare these, they can only be fully defined once
194 # we have all types defined. There are some circular dependencies.
195 named_type = Forward()
199 # todo: consider the full subtype and general constraint syntax described in 45.*
200 # but for now, just implement a simple integer value range.
201 value_range_min = (signed_number | valuereference | MIN)
202 value_range_max = (signed_number | valuereference | MAX)
203 value_range_constraint = value_range_min + Suppress('..') + value_range_max
204 constraint = Suppress('(') + value_range_constraint + Suppress(')') # todo: consider exception spec from 45.6
206 component_type_optional = named_type + Suppress(OPTIONAL)
207 component_type_default = named_type + Suppress(DEFAULT) + value
208 component_type_components_of = Suppress(COMPONENTS_OF) + type_
209 component_type = component_type_components_of | component_type_optional | component_type_default | named_type
211 tagged_type = tag + Optional(IMPLICIT | EXPLICIT) + type_
213 named_number_value = Suppress('(') + signed_number + Suppress(')')
214 named_number = identifier + named_number_value
215 enumeration = named_number | identifier
217 # todo: consider extension and exception syntax from 24.1
218 sequence_type = SEQUENCE + braced_list(component_type | ELLIPSIS)
219 sequenceof_type = SEQUENCE_OF + (type_ | named_type)
220 setof_type = SET_OF + (type_ | named_type)
221 choice_type = CHOICE + braced_list(named_type | ELLIPSIS)
222 enumerated_type = ENUMERATED + braced_list(enumeration)
223 bitstring_type = BIT_STRING + braced_list(named_number)
224 plain_integer_type = INTEGER
225 restricted_integer_type = INTEGER + braced_list(named_number)
226 boolean_type = BOOLEAN
229 octetstring_type = OCTET_STRING
230 unrestricted_characterstring_type = CHARACTER_STRING
231 restricted_characterstring_type = BMPString | GeneralString | \
232 GraphicString | IA5String | \
233 ISO646String | NumericString | \
234 PrintableString | TeletexString | \
235 T61String | UniversalString | \
236 UTF8String | VideotexString | VisibleString
237 characterstring_type = restricted_characterstring_type | unrestricted_characterstring_type
239 # todo: consider other builtins from 16.2
240 simple_type = (boolean_type | null_type | octetstring_type | characterstring_type | real_type | plain_integer_type) + Optional(constraint)
241 constructed_type = choice_type | sequence_type
242 value_list_type = restricted_integer_type | enumerated_type
243 builtin_type = tagged_type | simple_type | constructed_type | sequenceof_type | setof_type | value_list_type | bitstring_type
245 type_ << (builtin_type | referenced_type)
247 # BUG: identifier should not be Optional here,
248 # but our ASN.1 interpreter supports unnamed members,
250 named_type << (Optional(identifier) + type_)
252 # BUG: Trailing semi-colon is not allowed by standard grammar, but our ASN.1 interpreter accepts it
253 # and we happen to use it.
254 type_assignment = typereference + '::=' + type_ + Suppress(Optional(';'))
255 value_assignment = valuereference + type_ + '::=' + value
257 assignment = type_assignment | value_assignment
258 assignment_list = OneOrMore(assignment)
260 module_body = (assignment_list | empty)
261 module_defaults = Suppress(tag_default + extension_default) # we don't want these in the AST
262 module_definition = module_reference + DEFINITIONS + module_defaults + '::=' + BEGIN + module_body + END
264 module_definition.ignore(comment)
266 # Mark up the parse results with token tags
267 identifier.setParseAction(annotate('Identifier'))
268 named_number_value.setParseAction(annotate('Value'))
269 tag.setParseAction(annotate('Tag'))
270 class_.setParseAction(annotate('TagClass'))
271 class_number.setParseAction(annotate('TagClassNumber'))
272 type_.setParseAction(annotate('Type'))
273 simple_type.setParseAction(annotate('SimpleType'))
274 choice_type.setParseAction(annotate('ChoiceType'))
275 sequence_type.setParseAction(annotate('SequenceType'))
276 value_list_type.setParseAction(annotate('ValueListType'))
277 bitstring_type.setParseAction(annotate('BitStringType'))
278 referenced_type.setParseAction(annotate('ReferencedType'))
279 sequenceof_type.setParseAction(annotate('SequenceOfType'))
280 setof_type.setParseAction(annotate('SetOfType'))
281 named_number.setParseAction(annotate('NamedValue'))
282 constraint.setParseAction(annotate('Constraint'))
283 component_type.setParseAction(annotate('ComponentType'))
284 component_type_optional.setParseAction(annotate('ComponentTypeOptional'))
285 component_type_default.setParseAction(annotate('ComponentTypeDefault'))
286 component_type_components_of.setParseAction(annotate('ComponentTypeComponentsOf'))
287 tagged_type.setParseAction(annotate('TaggedType'))
288 named_type.setParseAction(annotate('NamedType'))
289 type_assignment.setParseAction(annotate('TypeAssignment'))
290 value_assignment.setParseAction(annotate('ValueAssignment'))
291 valuereference.setParseAction(annotate('ValueReference'))
292 module_reference.setParseAction(annotate('ModuleReference'))
293 module_body.setParseAction(annotate('ModuleBody'))
294 module_definition.setParseAction(annotate('ModuleDefinition'))
296 return module_definition
300 """ Use to create a distinct name of a production
301 with the same form as another, e.g.
302 identifier = build_identifier('[a-z]')
303 valuereference = build_identifier('[a-z]')
305 identifier = build_identifier('[a-z]')
306 valuereference = Unique(identifier)
307 to avoid duplicating the details of the grammar.
308 This allows unique parse actions for productions
309 with the same underlying rules.