1 # Copyright (c) 2013, Schneider Electric Buildings AB
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are met:
6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above copyright
9 # notice, this list of conditions and the following disclaimer in the
10 # documentation and/or other materials provided with the distribution.
11 # * Neither the name of Schneider Electric Buildings AB nor the
12 # names of contributors may be used to endorse or promote products
13 # derived from this software without specific prior written permission.
15 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
19 # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 from pyparsing import Keyword, Literal, Word, OneOrMore, Combine, Regex, Forward, Optional, Group, Suppress, delimitedList, cStyleComment, nums, alphanums, empty, srange
31 __all__ = ['parse_asn1', 'AnnotatedToken']
34 def parse_asn1(asn1_payload):
35 """ Parse a string containing an ASN.1 module definition
36 and return a syntax tree in the form of a list of
37 AnnotatedToken objects.
39 grammar = _build_asn1_grammar()
40 parse_result = grammar.parseString(asn1_payload)
41 parse_tree = parse_result.asList()
45 def print_parse_tree(node, indent=1):
46 """ Debugging aid. Dumps a parse tree as returned
47 from parse_asn1 to stdout in indented tree form.
49 def indented_print(msg):
50 print(' ' * indent + msg)
52 if type(node) is AnnotatedToken:
54 tag, values = node.ty, node.elements
55 indented_print('%s:' % tag)
56 print_parse_tree(values, indent + 1)
57 elif type(node) is list:
60 print_parse_tree(token, indent + 1)
63 indented_print(str(node))
66 class AnnotatedToken(object):
67 """ A simple data structure to keep track of a token's
68 type, identified by a string, and its children.
69 Children may be other annotated tokens, lists or simple
72 def __init__(self, token_type, elements):
74 self.elements = elements
77 return 'T(%s)%s' % (self.ty, self.elements)
82 def _build_asn1_grammar():
83 def build_identifier(prefix_pattern):
84 identifier_suffix = Optional(Word(srange('[-0-9a-zA-Z]')))
85 identifier = Combine(Word(srange(prefix_pattern), exact=1) + identifier_suffix) # todo: more rigorous? trailing hyphens and -- forbidden
88 def braced_list(element_rule):
89 return Suppress('{') + Group(delimitedList(element_rule)) + Suppress('}')
93 return AnnotatedToken(name, t.asList())
98 DEFINITIONS = Keyword('DEFINITIONS')
99 BEGIN = Keyword('BEGIN')
101 OPTIONAL = Keyword('OPTIONAL')
102 DEFAULT = Keyword('DEFAULT')
103 TRUE = Keyword('TRUE')
104 FALSE = Keyword('FALSE')
105 UNIVERSAL = Keyword('UNIVERSAL')
106 APPLICATION = Keyword('APPLICATION')
107 PRIVATE = Keyword('PRIVATE')
110 IMPLICIT = Keyword('IMPLICIT')
111 EXPLICIT = Keyword('EXPLICIT')
112 EXPLICIT_TAGS = Keyword('EXPLICIT TAGS')
113 IMPLICIT_TAGS = Keyword('IMPLICIT TAGS')
114 AUTOMATIC_TAGS = Keyword('AUTOMATIC TAGS')
115 EXTENSIBILITY_IMPLIED = Keyword('EXTENSIBILITY IMPLIED')
118 SEQUENCE = Keyword('SEQUENCE')
119 SEQUENCE_OF = Keyword('SEQUENCE OF')
120 SET_OF = Keyword('SET OF')
121 CHOICE = Keyword('CHOICE')
122 ENUMERATED = Keyword('ENUMERATED')
123 BIT_STRING = Keyword('BIT STRING')
124 BOOLEAN = Keyword('BOOLEAN')
125 REAL = Keyword('REAL')
126 OCTET_STRING = Keyword('OCTET STRING')
127 CHARACTER_STRING = Keyword('CHARACTER STRING')
128 NULL = Keyword('NULL')
129 INTEGER = Keyword('INTEGER')
131 # Restricted string types
132 BMPString = Keyword('BMPString')
133 GeneralString = Keyword('GeneralString')
134 GraphicString = Keyword('GraphicString')
135 IA5String = Keyword('IA5String')
136 ISO646String = Keyword('ISO646String')
137 NumericString = Keyword('NumericString')
138 PrintableString = Keyword('PrintableString')
139 TeletexString = Keyword('TeletexString')
140 T61String = Keyword('T61String')
141 UniversalString = Keyword('UniversalString')
142 UTF8String = Keyword('UTF8String')
143 VideotexString = Keyword('VideotexString')
144 VisibleString = Keyword('VisibleString')
148 signed_number = Combine(Optional('-') + number) # todo: consider defined values from 18.1
149 bstring = Literal('\'') + Regex('[01]+') + Literal('\'B')
150 hstring = Literal('\'') + Regex('[0-9A-F]+') + Literal('\'H')
153 hyphen_comment = Regex(r"--[\s\S]*?(--|$)", flags=re.MULTILINE)
154 comment = hyphen_comment | cStyleComment
157 identifier = build_identifier('[a-z]')
160 # these are duplicated to force unique token annotations
161 valuereference = build_identifier('[a-z]')
162 typereference = build_identifier('[A-Z]')
163 module_reference = build_identifier('[A-Z]')
166 # BUG: These are badly specified and cause the grammar to break if used generally.
167 # todo: consider more literals from 16.9
168 real_value = Regex(r'-?\d+(\.\d*)?') # todo: this doesn't really follow the spec
169 boolean_value = TRUE | FALSE
170 bitstring_value = bstring | hstring # todo: consider more forms from 21.9
171 integer_value = signed_number
174 builtin_value = boolean_value | bitstring_value | real_value | integer_value | null_value
175 defined_value = valuereference # todo: more options from 13.1
176 value = builtin_value | defined_value
179 class_ = UNIVERSAL | APPLICATION | PRIVATE
180 class_number = number # todo: consider defined values from 30.1
181 tag = Suppress('[') + Optional(class_) + class_number + Suppress(']')
182 tag_default = EXPLICIT_TAGS | IMPLICIT_TAGS | AUTOMATIC_TAGS | empty
185 extension_default = EXTENSIBILITY_IMPLIED | empty
188 defined_type = Unique(typereference) # todo: consider other defined types from 13.1
189 referenced_type = Unique(defined_type) # todo: consider other ref:d types from 16.3
190 named_type = Forward() # this can only be full defined once we have all types defined.
191 type_ = Forward() # this can only be full defined once we have all types defined.
194 # todo: consider the full subtype and general constraint syntax described in 45.*
195 # but for now, just implement a simple integer value range.
196 value_range_min = (signed_number | MIN)
197 value_range_max = (signed_number | MAX)
198 value_range_constraint = value_range_min + Suppress('..') + value_range_max
199 constraint = Suppress('(') + value_range_constraint + Suppress(')') # todo: consider exception spec from 45.6
201 # BUG: identifier should not be Optional here,
202 # but our ASN.1 interpreter supports unnamed members,
204 # todo: consider COMPONENTS OF from 24.1
205 component_optional = OPTIONAL
206 component_default = DEFAULT + value
207 component_type = Optional(identifier) + type_ + Optional(component_optional | component_default)
208 tagged_type = tag + Optional(IMPLICIT | EXPLICIT) + type_
210 named_number_value = Suppress('(') + signed_number + Suppress(')')
211 named_number = identifier + named_number_value
212 enumeration = named_number | identifier
214 # todo: consider extension and exception syntax from 24.1
215 sequence_type = SEQUENCE + braced_list(component_type)
216 sequenceof_type = SEQUENCE_OF + (type_ | named_type)
217 setof_type = SET_OF + (type_ | named_type)
218 choice_type = CHOICE + braced_list(named_type)
219 enumerated_type = ENUMERATED + braced_list(enumeration)
220 bitstring_type = BIT_STRING + braced_list(named_number)
221 plain_integer_type = INTEGER
222 restricted_integer_type = INTEGER + braced_list(named_number)
223 boolean_type = BOOLEAN
226 octetstring_type = OCTET_STRING
227 unrestricted_characterstring_type = CHARACTER_STRING
228 restricted_characterstring_type = BMPString | GeneralString | \
229 GraphicString | IA5String | \
230 ISO646String | NumericString | \
231 PrintableString | TeletexString | \
232 T61String | UniversalString | \
233 UTF8String | VideotexString | VisibleString
234 characterstring_type = restricted_characterstring_type | unrestricted_characterstring_type
236 # todo: consider other builtins from 16.2
237 simple_type = (boolean_type | null_type | octetstring_type | characterstring_type | real_type | plain_integer_type) + Optional(constraint)
238 constructed_type = choice_type | sequence_type
239 value_list_type = restricted_integer_type | enumerated_type
240 builtin_type = tagged_type | simple_type | constructed_type | sequenceof_type | setof_type | value_list_type | bitstring_type
242 type_ << (builtin_type | referenced_type)
243 named_type << (identifier + type_)
245 # BUG: Trailing semi-colon is not allowed by standard grammar, but our ASN.1 interpreter accepts it
246 # and we happen to use it.
247 type_assignment = typereference + '::=' + type_ + Suppress(Optional(';'))
248 value_assignment = valuereference + type_ + '::=' + value
250 assignment = type_assignment | value_assignment
251 assignment_list = OneOrMore(assignment)
253 module_body = (assignment_list | empty)
254 module_defaults = Suppress(tag_default + extension_default) # we don't want these in the AST
255 module_definition = module_reference + DEFINITIONS + module_defaults + '::=' + BEGIN + module_body + END
257 module_definition.ignore(comment)
259 # Mark up the parse results with token tags
260 identifier.setParseAction(annotate('Identifier'))
261 named_number_value.setParseAction(annotate('Value'))
262 tag.setParseAction(annotate('Tag'))
263 class_.setParseAction(annotate('TagClass'))
264 class_number.setParseAction(annotate('TagClassNumber'))
265 type_.setParseAction(annotate('Type'))
266 simple_type.setParseAction(annotate('SimpleType'))
267 choice_type.setParseAction(annotate('ChoiceType'))
268 sequence_type.setParseAction(annotate('SequenceType'))
269 value_list_type.setParseAction(annotate('ValueListType'))
270 bitstring_type.setParseAction(annotate('BitStringType'))
271 referenced_type.setParseAction(annotate('ReferencedType'))
272 sequenceof_type.setParseAction(annotate('SequenceOfType'))
273 setof_type.setParseAction(annotate('SetOfType'))
274 named_number.setParseAction(annotate('NamedValue'))
275 constraint.setParseAction(annotate('Constraint'))
276 component_type.setParseAction(annotate('ComponentType'))
277 tagged_type.setParseAction(annotate('TaggedType'))
278 named_type.setParseAction(annotate('NamedType'))
279 type_assignment.setParseAction(annotate('TypeAssignment'))
280 value_assignment.setParseAction(annotate('ValueAssignment'))
281 module_reference.setParseAction(annotate('ModuleReference'))
282 module_body.setParseAction(annotate('ModuleBody'))
283 module_definition.setParseAction(annotate('ModuleDefinition'))
284 component_optional.setParseAction(annotate('ComponentOptional'))
285 component_default.setParseAction(annotate('ComponentDefault'))
287 return module_definition
291 """ Use to create a distinct name of a production
292 with the same form as another, e.g.
293 identifier = build_identifier('[a-z]')
294 valuereference = build_identifier('[a-z]')
296 identifier = build_identifier('[a-z]')
297 valuereference = Unique(identifier)
298 to avoid duplicating the details of the grammar.
299 This allows unique parse actions for productions
300 with the same underlying rules.