asn.1 parser in C/Python
I wrote such parser a few years ago. It generates python classes for pyasn1 library. I used in on ericsson doc to make parser for their CDRs.
I'll try posting the code here now.
import sysfrom pyparsing import *OpenBracket = Regex("[({]").suppress()CloseBracket = Regex("[)}]").suppress()def Enclose(val): return OpenBracket + val + CloseBracketdef SetDefType(typekw): def f(a, b, c): c["defType"] = typekw return fdef NoDashes(a, b, c): return c[0].replace("-", "_")def DefineTypeDef(typekw, typename, typedef): return typename.addParseAction(SetDefType(typekw)).setResultsName("definitionType") - \ Optional(Enclose(typedef).setResultsName("definition"))SizeConstraintBodyOpt = Word(nums).setResultsName("minSize") - \ Optional(Suppress(Literal("..")) - Word(nums + "n").setResultsName("maxSize"))SizeConstraint = Group(Keyword("SIZE").suppress() - Enclose(SizeConstraintBodyOpt)).setResultsName("sizeConstraint")Constraints = Group(delimitedList(SizeConstraint)).setResultsName("constraints")DefinitionBody = Forward()TagPrefix = Enclose(Word(nums).setResultsName("tagID")) - Keyword("IMPLICIT").setResultsName("tagFormat")OptionalSuffix = Optional(Keyword("OPTIONAL").setResultsName("isOptional"))JunkPrefix = Optional("--F--").suppress()AName = Word(alphanums + "-").setParseAction(NoDashes).setResultsName("name")SingleElement = Group(JunkPrefix - AName - Optional(TagPrefix) - DefinitionBody.setResultsName("typedef") - OptionalSuffix)NamedTypes = Dict(delimitedList(SingleElement)).setResultsName("namedTypes")SetBody = DefineTypeDef("Set", Keyword("SET"), NamedTypes)SequenceBody = DefineTypeDef("Sequence", Keyword("SEQUENCE"), NamedTypes)ChoiceBody = DefineTypeDef("Choice", Keyword("CHOICE"), NamedTypes)SetOfBody = (Keyword("SET") + Optional(SizeConstraint) + Keyword("OF")).setParseAction(SetDefType("SetOf")) + Group(DefinitionBody).setResultsName("typedef")SequenceOfBody = (Keyword("SEQUENCE") + Optional(SizeConstraint) + Keyword("OF")).setParseAction(SetDefType("SequenceOf")) + Group(DefinitionBody).setResultsName("typedef")CustomBody = DefineTypeDef("constructed", Word(alphanums + "-").setParseAction(NoDashes), Constraints)NullBody = DefineTypeDef("Null", Keyword("NULL"), Constraints)OctetStringBody = DefineTypeDef("OctetString", Regex("OCTET STRING"), Constraints)IA5StringBody = DefineTypeDef("IA5String", Keyword("IA5STRING"), Constraints)EnumElement = Group(Word(printables).setResultsName("name") - Enclose(Word(nums).setResultsName("value")))NamedValues = Dict(delimitedList(EnumElement)).setResultsName("namedValues")EnumBody = DefineTypeDef("Enum", Keyword("ENUMERATED"), NamedValues)BitStringBody = DefineTypeDef("BitString", Keyword("BIT") + Keyword("STRING"), NamedValues)DefinitionBody << (OctetStringBody | SetOfBody | SetBody | ChoiceBody | SequenceOfBody | SequenceBody | EnumBody | BitStringBody | IA5StringBody | NullBody | CustomBody)Definition = AName - Literal("::=").suppress() - Optional(TagPrefix) - DefinitionBodyDefinitions = Dict(ZeroOrMore(Group(Definition)))pf = Definitions.parseFile(sys.argv[1])TypeDeps = {}TypeDefs = {}def SizeConstraintHelper(size): s2 = s1 = size.get("minSize") s2 = size.get("maxSize", s2) try: return("constraint.ValueSizeConstraint(%s, %s)" % (int(s1), int(s2))) except ValueError: passConstraintMap = { 'sizeConstraint' : SizeConstraintHelper,}def ConstraintHelper(c): result = [] for key, value in c.items(): r = ConstraintMap[key](value) if r: result.append(r) return resultdef GenerateConstraints(c, ancestor, element, level=1): result = ConstraintHelper(c) if result: return [ "subtypeSpec = %s" % " + ".join(["%s.subtypeSpec" % ancestor] + result) ] return []def GenerateNamedValues(definitions, ancestor, element, level=1): result = [ "namedValues = namedval.NamedValues(" ] for kw in definitions: result.append(" ('%s', %s)," % (kw["name"], kw["value"])) result.append(")") return resultOptMap = { False: "", True: "Optional",}def GenerateNamedTypesList(definitions, element, level=1): result = [] for val in definitions: name = val["name"] typename = None isOptional = bool(val.get("isOptional")) subtype = [] constraints = val.get("constraints") if constraints: cg = ConstraintHelper(constraints) subtype.append("subtypeSpec=%s" % " + ".join(cg)) tagId = val.get("tagID") if tagId: subtype.append("implicitTag=tag.Tag(tag.tagClassContext, tag.tagFormatConstructed, %s)" % tagId) if subtype: subtype = ".subtype(%s)" % ", ".join(subtype) else: subtype = "" cbody = [] if val["defType"] == "constructed": typename = val["typedef"] element["_d"].append(typename) elif val["defType"] == "Null": typename = "univ.Null" elif val["defType"] == "SequenceOf": typename = "univ.SequenceOf" print val.items() cbody = [ " componentType=%s()" % val["typedef"]["definitionType"] ] elif val["defType"] == "Choice": typename = "univ.Choice" indef = val.get("definition") if indef: cbody = [ " %s" % x for x in GenerateClassDefinition(indef, name, typename, element) ] construct = [ "namedtype.%sNamedType('%s', %s(" % (OptMap[isOptional], name, typename), ")%s)," % subtype ] if not cbody: result.append("%s%s%s" % (" " * level, construct[0], construct[1])) else: result.append(" %s" % construct[0]) result.extend(cbody) result.append(" %s" % construct[1]) return resultdef GenerateNamedTypes(definitions, ancestor, element, level=1): result = [ "componentType = namedtype.NamedTypes(" ] result.extend(GenerateNamedTypesList(definitions, element)) result.append(")") return resultdefmap = { 'constraints' : GenerateConstraints, 'namedValues' : GenerateNamedValues, 'namedTypes' : GenerateNamedTypes,}def GenerateClassDefinition(definition, name, ancestor, element, level=1): result = [] for defkey, defval in definition.items(): if defval: fn = defmap.get(defkey) if fn: result.extend(fn(defval, ancestor, element, level)) return [" %s" % x for x in result]def GenerateClass(element, ancestor): name = element["name"] top = "class %s(%s):" % (name, ancestor) definition = element.get("definition") body = [] if definition: body = GenerateClassDefinition(definition, name, ancestor, element) else: typedef = element.get("typedef") if typedef: element["_d"].append(typedef["definitionType"]) body.append(" componentType = %s()" % typedef["definitionType"]) szc = element.get('sizeConstraint') if szc: body.extend(GenerateConstraints({ 'sizeConstraint' : szc }, ancestor, element)) if not body: body.append(" pass") TypeDeps[name] = list(frozenset(element["_d"])) return "\n".join([top] + body)StaticMap = { "Null" : "univ.Null", "Enum" : "univ.Enumerated", "OctetString" : "univ.OctetString", "IA5String" : "char.IA5String", "Set" : "univ.Set", "Sequence" : "univ.Sequence", "Choice" : "univ.Choice", "SetOf" : "univ.SetOf", "BitString" : "univ.BitString", "SequenceOf" : "univ.SequenceOf",}def StaticConstructor(x): x["_d"] = [] if x["defType"] == "constructed": dt = x["definitionType"] x["_d"].append(dt) else: dt = StaticMap[x["defType"]] return GenerateClass(x, dt)for element in pf: TypeDefs[element["name"]] = StaticConstructor(element)while TypeDefs: ready = [ k for k, v in TypeDeps.items() if len(v) == 0 ] if not ready: x = list() for a in TypeDeps.values(): x.extend(a) x = frozenset(x) - frozenset(TypeDeps.keys()) print TypeDefs raise ValueError, sorted(x) for t in ready: for v in TypeDeps.values(): try: v.remove(t) except ValueError: pass del TypeDeps[t] print TypeDefs[t] print print del TypeDefs[t]
This will take a file with syntax, similar to this one:
CarrierInfo ::= OCTET STRING (SIZE(2..3))ChargeAreaCode ::= OCTET STRING (SIZE(3))ChargeInformation ::= OCTET STRING (SIZE(2..33))ChargedParty ::= ENUMERATED (chargingOfCallingSubscriber (0), chargingOfCalledSubscriber (1), noCharging (2))ChargingOrigin ::= OCTET STRING (SIZE(1))Counter ::= OCTET STRING (SIZE(1..4))Date ::= OCTET STRING (SIZE(3..4))
You will need to add this line on top of the generated file:
from pyasn1.type import univ, namedtype, namedval, constraint, tag, char
And name the result defs.py. Then, I attached a bunch of prettyprinters to the defs (if you don't have just skip it)
import defs, parsersdef rplPrettyOut(self, value): return repr(self.decval(value))for name in dir(parsers): if (not name.startswith("_")) and hasattr(defs, name): target = getattr(defs, name) target.prettyOut = rplPrettyOut target.decval = getattr(parsers, name)
Then, it's down to:
def ParseBlock(self, block): while block and block[0] != '\x00': result, block = pyasn1.codec.ber.decoder.decode(block, asn1Spec=parserimp.defs.CallDataRecord()) yield result
If you're still interested I'll put the code somewhere. In fact, I'll put it somewhere in any case - but if you're interested just let me know and I'll point you there.
There is an ANTLR ASN.1 grammar; using ANTLR, you should be able to make an ASN.1 parser out of it. Generating code for pyasn1 is left as an exercise to the poster :-)