Support extension markers in ENUMERATED.
[asn2quickder] / asn1ate / parser.py
index 6df2c3f..cef74d0 100644 (file)
 
 import re
 from copy import copy
-from pyparsing import Keyword, Literal, Word, OneOrMore, Combine, Regex, Forward, Optional, Group, Suppress, delimitedList, cStyleComment, nums, alphanums, empty, srange
+from pyparsing import Keyword, Literal, Word, OneOrMore, ZeroOrMore, Combine, Regex, Forward, Optional, Group, Suppress, delimitedList, cStyleComment, nums, alphanums, empty, srange, dblQuotedString
 
 
 __all__ = ['parse_asn1', 'AnnotatedToken']
 
 
-def parse_asn1(asn1_payload):
-    """ Parse a string containing an ASN.1 module definition
-    and return a syntax tree in the form of a list of
+def parse_asn1(asn1_definition):
+    """ Parse a string containing one or more ASN.1 module definitions.
+    Returns a list of module syntax trees represented as nested lists of
     AnnotatedToken objects.
     """
     grammar = _build_asn1_grammar()
-    parse_result = grammar.parseString(asn1_payload)
+    parse_result = grammar.parseString(asn1_definition)
     parse_tree = parse_result.asList()
     return parse_tree
 
@@ -115,12 +115,15 @@ def _build_asn1_grammar():
     EXTENSIBILITY_IMPLIED = Keyword('EXTENSIBILITY IMPLIED')
     COMPONENTS_OF = Keyword('COMPONENTS OF')
     ELLIPSIS = Keyword('...')
+    SIZE = Keyword('SIZE')
+    OF = Keyword('OF')
+    IMPORTS = Keyword('IMPORTS')
+    EXPORTS = Keyword('EXPORTS')
+    FROM = Keyword('FROM')
 
     # Built-in types
     SEQUENCE = Keyword('SEQUENCE')
     SET = Keyword('SET')
-    SEQUENCE_OF = Keyword('SEQUENCE OF')
-    SET_OF = Keyword('SET OF')
     CHOICE = Keyword('CHOICE')
     ENUMERATED = Keyword('ENUMERATED')
     BIT_STRING = Keyword('BIT STRING')
@@ -147,6 +150,11 @@ def _build_asn1_grammar():
     VideotexString = Keyword('VideotexString')
     VisibleString = Keyword('VisibleString')
 
+    # Useful types
+    GeneralizedTime = Keyword('GeneralizedTime')
+    UTCTime = Keyword('UTCTime')
+    ObjectDescriptor = Keyword('ObjectDescriptor')
+
     # Literals
     number = Word(nums)
     signed_number = Combine(Optional('-') + number)  # todo: consider defined values from 18.1
@@ -165,6 +173,7 @@ def _build_asn1_grammar():
     valuereference = build_identifier('[a-z]')
     typereference = build_identifier('[A-Z]')
     module_reference = build_identifier('[A-Z]')
+    reference = valuereference | typereference  # TODO: consider object references from 12.1
 
     # values
     # BUG: These are badly specified and cause the grammar to break if used generally.
@@ -174,14 +183,33 @@ def _build_asn1_grammar():
     bitstring_value = bstring | hstring     # todo: consider more forms from 21.9
     integer_value = signed_number
     null_value = NULL
+    cstring_value = dblQuotedString
 
-    builtin_value = boolean_value | bitstring_value | real_value | integer_value | null_value
+    builtin_value = boolean_value | bitstring_value | real_value | integer_value | null_value | cstring_value
     defined_value = valuereference # todo: more options from 13.1
-    value = builtin_value | defined_value
+
+    # object identifier value
+    name_form = Unique(identifier)
+    number_form = Unique(number)
+    name_and_number_form = name_form + Suppress('(') + number_form + Suppress(')')
+    objid_components = name_and_number_form | name_form | number_form | defined_value
+    objid_components_list = OneOrMore(objid_components)
+    object_identifier_value = Suppress('{') + \
+                              (objid_components_list | (defined_value + objid_components_list)) + \
+                              Suppress('}')
+
+    value = builtin_value | defined_value | object_identifier_value
+
+    # definitive identifier value
+    definitive_number_form = Unique(number)
+    definitive_name_and_number_form = name_form + Suppress('(') + definitive_number_form + Suppress(')')
+    definitive_objid_component = definitive_name_and_number_form | name_form | definitive_number_form
+    definitive_objid_component_list = OneOrMore(definitive_objid_component)
+    definitive_identifier = Optional(Suppress('{') + definitive_objid_component_list + Suppress('}'))
 
     # tags
     class_ = UNIVERSAL | APPLICATION | PRIVATE
-    class_number = number # todo: consider defined values from 30.1
+    class_number = Unique(number) # todo: consider defined values from 30.1
     tag = Suppress('[') + Optional(class_) + class_number + Suppress(']')
     tag_default = EXPLICIT_TAGS | IMPLICIT_TAGS | AUTOMATIC_TAGS | empty
 
@@ -200,10 +228,9 @@ def _build_asn1_grammar():
     # constraints
     # todo: consider the full subtype and general constraint syntax described in 45.*
     # but for now, just implement a simple integer value range.
-    value_range_min = (signed_number | valuereference | MIN)
-    value_range_max = (signed_number | valuereference | MAX)
-    value_range_constraint = value_range_min + Suppress('..') + value_range_max
-    constraint = Suppress('(') + value_range_constraint + Suppress(')')  # todo: consider exception spec from 45.6
+    value_range_constraint = (signed_number | valuereference | MIN) + Suppress('..') + (signed_number | valuereference | MAX)
+    size_constraint = Optional(Suppress('(')) + Suppress(SIZE) + Suppress('(') + value_range_constraint + Suppress(')') + Optional(Suppress(')'))
+    constraint = Suppress('(') + value_range_constraint + Suppress(')')
 
     # TODO: consider exception syntax from 24.1
     extension_marker = Unique(ELLIPSIS)
@@ -221,10 +248,10 @@ def _build_asn1_grammar():
 
     set_type = SET + braced_list(component_type | extension_marker)
     sequence_type = SEQUENCE + braced_list(component_type | extension_marker)
-    sequenceof_type = SEQUENCE_OF + (type_ | named_type)
-    setof_type = SET_OF + (type_ | named_type)
+    sequenceof_type = Suppress(SEQUENCE) + Optional(size_constraint) + Suppress(OF) + (type_ | named_type)
+    setof_type = Suppress(SET) + Optional(size_constraint) + Suppress(OF) + (type_ | named_type)
     choice_type = CHOICE + braced_list(named_type | extension_marker)
-    enumerated_type = ENUMERATED + braced_list(enumeration)
+    enumerated_type = ENUMERATED + braced_list(enumeration | extension_marker)
     bitstring_type = BIT_STRING + braced_list(named_number)
     plain_integer_type = INTEGER
     restricted_integer_type = INTEGER + braced_list(named_number)
@@ -241,31 +268,42 @@ def _build_asn1_grammar():
                                       T61String | UniversalString | \
                                       UTF8String | VideotexString | VisibleString
     characterstring_type = restricted_characterstring_type | unrestricted_characterstring_type
+    useful_type = GeneralizedTime | UTCTime | ObjectDescriptor
 
     # todo: consider other builtins from 16.2
-    simple_type = (boolean_type | null_type | octetstring_type | characterstring_type | real_type | plain_integer_type | object_identifier_type) + Optional(constraint)
+    simple_type = (boolean_type | null_type | octetstring_type | characterstring_type | real_type | plain_integer_type | object_identifier_type | useful_type) + Optional(constraint)
     constructed_type = choice_type | sequence_type | set_type
     value_list_type = restricted_integer_type | enumerated_type
-    builtin_type = tagged_type | simple_type | constructed_type | sequenceof_type | setof_type | value_list_type | bitstring_type
+    builtin_type = value_list_type | tagged_type | simple_type | constructed_type | sequenceof_type | setof_type | bitstring_type
 
     type_ << (builtin_type | referenced_type)
 
-    # BUG: identifier should not be Optional here,
-    # but our ASN.1 interpreter supports unnamed members,
+    # EXT: identifier should not be Optional here, but
+    # our other ASN.1 code generator supports unnamed members,
     # and we use them.
     named_type << (Optional(identifier) + type_)
 
-    # BUG: Trailing semi-colon is not allowed by standard grammar, but our ASN.1 interpreter accepts it
-    # and we happen to use it.
-    type_assignment = typereference + '::=' + type_ + Suppress(Optional(';'))
+    type_assignment = typereference + '::=' + type_
     value_assignment = valuereference + type_ + '::=' + value
 
     assignment = type_assignment | value_assignment
-    assignment_list = OneOrMore(assignment)
+    assignment_list = ZeroOrMore(assignment)
+
+    assigned_identifier = Optional(object_identifier_value | defined_value)
+    global_module_reference = module_reference + assigned_identifier
 
-    module_body = (assignment_list | empty)
+    symbol = Unique(reference)  # TODO: parameterized reference?
+    symbol_list = Group(delimitedList(symbol))
+    symbols_from_module = symbol_list + Suppress(FROM) + global_module_reference
+    symbols_from_module_list = OneOrMore(symbols_from_module)
+    symbols_imported = Optional(symbols_from_module_list)
+    exports = Optional(Suppress(EXPORTS) + symbol_list + Suppress(';'))
+    imports = Optional(Suppress(IMPORTS) + symbols_imported + Suppress(';'))
+
+    module_body = (exports + imports + assignment_list)
     module_defaults = Suppress(tag_default + extension_default)  # we don't want these in the AST
-    module_definition = module_reference + DEFINITIONS + module_defaults + '::=' + BEGIN + module_body + END
+    module_identifier = module_reference + definitive_identifier
+    module_definition = module_identifier + DEFINITIONS + module_defaults + '::=' + BEGIN + module_body + END
 
     module_definition.ignore(comment)
 
@@ -287,6 +325,7 @@ def _build_asn1_grammar():
     setof_type.setParseAction(annotate('SetOfType'))
     named_number.setParseAction(annotate('NamedValue'))
     constraint.setParseAction(annotate('Constraint'))
+    size_constraint.setParseAction(annotate('SizeConstraint'))
     component_type.setParseAction(annotate('ComponentType'))
     component_type_optional.setParseAction(annotate('ComponentTypeOptional'))
     component_type_default.setParseAction(annotate('ComponentTypeDefault'))
@@ -300,8 +339,19 @@ def _build_asn1_grammar():
     module_body.setParseAction(annotate('ModuleBody'))
     module_definition.setParseAction(annotate('ModuleDefinition'))
     extension_marker.setParseAction(annotate('ExtensionMarker'))
-
-    return module_definition
+    name_form.setParseAction(annotate('NameForm'))
+    number_form.setParseAction(annotate('NumberForm'))
+    name_and_number_form.setParseAction(annotate('NameAndNumberForm'))
+    object_identifier_value.setParseAction(annotate('ObjectIdentifierValue'))
+    definitive_identifier.setParseAction(annotate('DefinitiveIdentifier'))
+    definitive_number_form.setParseAction(annotate('DefinitiveNumberForm'))
+    definitive_name_and_number_form.setParseAction(annotate('DefinitiveNameAndNumberForm'))
+    imports.setParseAction(annotate('Imports'))
+    exports.setParseAction(annotate('Exports'))
+    assignment_list.setParseAction(annotate('AssignmentList'))
+
+    start = ZeroOrMore(module_definition)
+    return start
 
 
 def Unique(token):