import re
from copy import copy
-from pyparsing import Keyword, Literal, Word, OneOrMore, Combine, Regex, Forward, Optional, Group, Suppress, delimitedList, cStyleComment, nums, alphanums, empty, srange
+from pyparsing import Keyword, Literal, Word, OneOrMore, ZeroOrMore, Combine, Regex, Forward, Optional, Group, Suppress, delimitedList, cStyleComment, nums, alphanums, empty, srange, dblQuotedString
__all__ = ['parse_asn1', 'AnnotatedToken']
-def parse_asn1(asn1_payload):
- """ Parse a string containing an ASN.1 module definition
- and return a syntax tree in the form of a list of
+def parse_asn1(asn1_definition):
+ """ Parse a string containing one or more ASN.1 module definitions.
+ Returns a list of module syntax trees represented as nested lists of
AnnotatedToken objects.
"""
grammar = _build_asn1_grammar()
- parse_result = grammar.parseString(asn1_payload)
+ parse_result = grammar.parseString(asn1_definition)
parse_tree = parse_result.asList()
return parse_tree
AUTOMATIC_TAGS = Keyword('AUTOMATIC TAGS')
EXTENSIBILITY_IMPLIED = Keyword('EXTENSIBILITY IMPLIED')
COMPONENTS_OF = Keyword('COMPONENTS OF')
+ ELLIPSIS = Keyword('...')
+ SIZE = Keyword('SIZE')
+ OF = Keyword('OF')
+ IMPORTS = Keyword('IMPORTS')
+ EXPORTS = Keyword('EXPORTS')
+ FROM = Keyword('FROM')
# Built-in types
SEQUENCE = Keyword('SEQUENCE')
- SEQUENCE_OF = Keyword('SEQUENCE OF')
- SET_OF = Keyword('SET OF')
+ SET = Keyword('SET')
CHOICE = Keyword('CHOICE')
ENUMERATED = Keyword('ENUMERATED')
BIT_STRING = Keyword('BIT STRING')
CHARACTER_STRING = Keyword('CHARACTER STRING')
NULL = Keyword('NULL')
INTEGER = Keyword('INTEGER')
+ OBJECT_IDENTIFIER = Keyword('OBJECT IDENTIFIER')
# Restricted string types
BMPString = Keyword('BMPString')
VideotexString = Keyword('VideotexString')
VisibleString = Keyword('VisibleString')
+ # Useful types
+ GeneralizedTime = Keyword('GeneralizedTime')
+ UTCTime = Keyword('UTCTime')
+ ObjectDescriptor = Keyword('ObjectDescriptor')
+
# Literals
number = Word(nums)
signed_number = Combine(Optional('-') + number) # todo: consider defined values from 18.1
valuereference = build_identifier('[a-z]')
typereference = build_identifier('[A-Z]')
module_reference = build_identifier('[A-Z]')
+ reference = valuereference | typereference # TODO: consider object references from 12.1
# values
# BUG: These are badly specified and cause the grammar to break if used generally.
bitstring_value = bstring | hstring # todo: consider more forms from 21.9
integer_value = signed_number
null_value = NULL
+ cstring_value = dblQuotedString
- builtin_value = boolean_value | bitstring_value | real_value | integer_value | null_value
+ builtin_value = boolean_value | bitstring_value | real_value | integer_value | null_value | cstring_value
defined_value = valuereference # todo: more options from 13.1
- value = builtin_value | defined_value
+
+ # object identifier value
+ name_form = Unique(identifier)
+ number_form = Unique(number)
+ name_and_number_form = name_form + Suppress('(') + number_form + Suppress(')')
+ objid_components = name_and_number_form | name_form | number_form | defined_value
+ objid_components_list = OneOrMore(objid_components)
+ object_identifier_value = Suppress('{') + \
+ (objid_components_list | (defined_value + objid_components_list)) + \
+ Suppress('}')
+
+ value = builtin_value | defined_value | object_identifier_value
+
+ # definitive identifier value
+ definitive_number_form = Unique(number)
+ definitive_name_and_number_form = name_form + Suppress('(') + definitive_number_form + Suppress(')')
+ definitive_objid_component = definitive_name_and_number_form | name_form | definitive_number_form
+ definitive_objid_component_list = OneOrMore(definitive_objid_component)
+ definitive_identifier = Optional(Suppress('{') + definitive_objid_component_list + Suppress('}'))
# tags
class_ = UNIVERSAL | APPLICATION | PRIVATE
- class_number = number # todo: consider defined values from 30.1
+ class_number = Unique(number) # todo: consider defined values from 30.1
tag = Suppress('[') + Optional(class_) + class_number + Suppress(']')
tag_default = EXPLICIT_TAGS | IMPLICIT_TAGS | AUTOMATIC_TAGS | empty
# constraints
# todo: consider the full subtype and general constraint syntax described in 45.*
# but for now, just implement a simple integer value range.
- value_range_min = (signed_number | valuereference | MIN)
- value_range_max = (signed_number | valuereference | MAX)
- value_range_constraint = value_range_min + Suppress('..') + value_range_max
- constraint = Suppress('(') + value_range_constraint + Suppress(')') # todo: consider exception spec from 45.6
+ value_range_constraint = (signed_number | valuereference | MIN) + Suppress('..') + (signed_number | valuereference | MAX)
+ size_constraint = Optional(Suppress('(')) + Suppress(SIZE) + Suppress('(') + value_range_constraint + Suppress(')') + Optional(Suppress(')'))
+ constraint = Suppress('(') + value_range_constraint + Suppress(')')
+
+ # TODO: consider exception syntax from 24.1
+ extension_marker = Unique(ELLIPSIS)
component_type_optional = named_type + Suppress(OPTIONAL)
component_type_default = named_type + Suppress(DEFAULT) + value
named_number = identifier + named_number_value
enumeration = named_number | identifier
- # todo: consider extension and exception syntax from 24.1
- sequence_type = SEQUENCE + braced_list(component_type)
- sequenceof_type = SEQUENCE_OF + (type_ | named_type)
- setof_type = SET_OF + (type_ | named_type)
- choice_type = CHOICE + braced_list(named_type)
- enumerated_type = ENUMERATED + braced_list(enumeration)
+ set_type = SET + braced_list(component_type | extension_marker)
+ sequence_type = SEQUENCE + braced_list(component_type | extension_marker)
+ sequenceof_type = Suppress(SEQUENCE) + Optional(size_constraint) + Suppress(OF) + (type_ | named_type)
+ setof_type = Suppress(SET) + Optional(size_constraint) + Suppress(OF) + (type_ | named_type)
+ choice_type = CHOICE + braced_list(named_type | extension_marker)
+ enumerated_type = ENUMERATED + braced_list(enumeration | extension_marker)
bitstring_type = BIT_STRING + braced_list(named_number)
plain_integer_type = INTEGER
restricted_integer_type = INTEGER + braced_list(named_number)
boolean_type = BOOLEAN
real_type = REAL
null_type = NULL
+ object_identifier_type = OBJECT_IDENTIFIER
octetstring_type = OCTET_STRING
unrestricted_characterstring_type = CHARACTER_STRING
restricted_characterstring_type = BMPString | GeneralString | \
T61String | UniversalString | \
UTF8String | VideotexString | VisibleString
characterstring_type = restricted_characterstring_type | unrestricted_characterstring_type
+ useful_type = GeneralizedTime | UTCTime | ObjectDescriptor
# todo: consider other builtins from 16.2
- simple_type = (boolean_type | null_type | octetstring_type | characterstring_type | real_type | plain_integer_type) + Optional(constraint)
- constructed_type = choice_type | sequence_type
+ simple_type = (boolean_type | null_type | octetstring_type | characterstring_type | real_type | plain_integer_type | object_identifier_type | useful_type) + Optional(constraint)
+ constructed_type = choice_type | sequence_type | set_type
value_list_type = restricted_integer_type | enumerated_type
- builtin_type = tagged_type | simple_type | constructed_type | sequenceof_type | setof_type | value_list_type | bitstring_type
+ builtin_type = value_list_type | tagged_type | simple_type | constructed_type | sequenceof_type | setof_type | bitstring_type
type_ << (builtin_type | referenced_type)
- # BUG: identifier should not be Optional here,
- # but our ASN.1 interpreter supports unnamed members,
+ # EXT: identifier should not be Optional here, but
+ # our other ASN.1 code generator supports unnamed members,
# and we use them.
named_type << (Optional(identifier) + type_)
- # BUG: Trailing semi-colon is not allowed by standard grammar, but our ASN.1 interpreter accepts it
- # and we happen to use it.
- type_assignment = typereference + '::=' + type_ + Suppress(Optional(';'))
+ type_assignment = typereference + '::=' + type_
value_assignment = valuereference + type_ + '::=' + value
assignment = type_assignment | value_assignment
- assignment_list = OneOrMore(assignment)
+ assignment_list = ZeroOrMore(assignment)
- module_body = (assignment_list | empty)
+ assigned_identifier = Optional(object_identifier_value | defined_value)
+ global_module_reference = module_reference + assigned_identifier
+
+ symbol = Unique(reference) # TODO: parameterized reference?
+ symbol_list = Group(delimitedList(symbol))
+ symbols_from_module = symbol_list + Suppress(FROM) + global_module_reference
+ symbols_from_module_list = OneOrMore(symbols_from_module)
+ symbols_imported = Optional(symbols_from_module_list)
+ exports = Optional(Suppress(EXPORTS) + symbol_list + Suppress(';'))
+ imports = Optional(Suppress(IMPORTS) + symbols_imported + Suppress(';'))
+
+ module_body = (exports + imports + assignment_list)
module_defaults = Suppress(tag_default + extension_default) # we don't want these in the AST
- module_definition = module_reference + DEFINITIONS + module_defaults + '::=' + BEGIN + module_body + END
+ module_identifier = module_reference + definitive_identifier
+ module_definition = module_identifier + DEFINITIONS + module_defaults + '::=' + BEGIN + module_body + END
module_definition.ignore(comment)
simple_type.setParseAction(annotate('SimpleType'))
choice_type.setParseAction(annotate('ChoiceType'))
sequence_type.setParseAction(annotate('SequenceType'))
+ set_type.setParseAction(annotate('SetType'))
value_list_type.setParseAction(annotate('ValueListType'))
bitstring_type.setParseAction(annotate('BitStringType'))
referenced_type.setParseAction(annotate('ReferencedType'))
setof_type.setParseAction(annotate('SetOfType'))
named_number.setParseAction(annotate('NamedValue'))
constraint.setParseAction(annotate('Constraint'))
+ size_constraint.setParseAction(annotate('SizeConstraint'))
component_type.setParseAction(annotate('ComponentType'))
component_type_optional.setParseAction(annotate('ComponentTypeOptional'))
component_type_default.setParseAction(annotate('ComponentTypeDefault'))
module_reference.setParseAction(annotate('ModuleReference'))
module_body.setParseAction(annotate('ModuleBody'))
module_definition.setParseAction(annotate('ModuleDefinition'))
-
- return module_definition
+ extension_marker.setParseAction(annotate('ExtensionMarker'))
+ name_form.setParseAction(annotate('NameForm'))
+ number_form.setParseAction(annotate('NumberForm'))
+ name_and_number_form.setParseAction(annotate('NameAndNumberForm'))
+ object_identifier_value.setParseAction(annotate('ObjectIdentifierValue'))
+ definitive_identifier.setParseAction(annotate('DefinitiveIdentifier'))
+ definitive_number_form.setParseAction(annotate('DefinitiveNumberForm'))
+ definitive_name_and_number_form.setParseAction(annotate('DefinitiveNameAndNumberForm'))
+ imports.setParseAction(annotate('Imports'))
+ exports.setParseAction(annotate('Exports'))
+ assignment_list.setParseAction(annotate('AssignmentList'))
+
+ start = ZeroOrMore(module_definition)
+ return start
def Unique(token):