ついにねんがんのコンパイル時パーサをてにいれたぞ!
やった。ついにやった。
去年の3月にあったD言語BOFで「コンパイル時コンパイラコンパイル」なるものを発表した。
http://www.nicovideo.jp/watch/sm2485376
要約すると、D言語のCTFEでコンパイル時にテキストファイルをimportして解析・コード生成すれば、コンパイル時に実行できる簡易コンパイラになるよね、という話だ。テキストファイルはD言語だけじゃなくて、CSVやXMLやYAMLでもbison文法ファイルでもlex文法ファイルでも可能。ただしパーサがあれば。
はっきりいってD言語のCTFEはC言語未満の機能しかない。それでパーサを書くのは非常な労力を要する。
ではどうするか……。
コンパイル時にパーサを生成して、そのパーサ自体もCTFEで動かせれば随分楽になるじゃないか。
という妄想のもと、開発を進めていた。不可解なバグ・喰われまくるメモリ(そして落ちる)・遅すぎる実行速度……。幾多の試練を潜り抜けて、ついにコンパイル時コンパイラコンパイルが実現した!
サンプルコード。
module sample; /// PEG解析器。中身はまだ秘密。 import outland.ctfe.peg.all; /// PEG文法。これを元にPEGパーサをコンパイル時に作っちまうぜ! const string PEG_SOURCE = ` NO_SKIP utf8Bom = "\xEF\xBB\xBF"; NO_SKIP NEW_LINE newLine = ("\r" "\n"?) / "\n"; NO_SKIP lineComment = "//" (!newLine .)* newLine; NO_SKIP blockCommentChar = newLine / .; NO_SKIP blockComment = "/*" (!"*/" blockCommentChar)* "*/"; NO_SKIP nestCommentContent = nestComment / (!"+/" blockCommentChar); NO_SKIP nestComment = "/+" nestCommentContent* "+/"; NO_SKIP blankChars = " "+ / "\t"+ / "\v"+ / newLine+; NO_SKIP SKIPPER spaces = (blankChars / lineComment / nestComment / blockComment)*; NO_SKIP identifierHead = ['a' .. 'z'] / ['A' .. 'Z'] / "_"; NO_SKIP identifierTail = identifierHead / ['0' .. '9']; NO_SKIP identifierStr = identifierHead identifierTail*; TEXT_NODE identifier = identifierStr; NO_SKIP octDigit = ['0' .. '8']; NO_SKIP hexDigit = ['0' .. '9'] / ['a' .. 'f'] / ['A' .. 'F']; NO_SKIP escHex = ("x"/"X") hexDigit hexDigit?; NO_SKIP escOct = octDigit octDigit? octDigit?; NO_SKIP escGroup = "\'" / "\"" / "?" / "\\" / "a" / "b" / "f" / "n" / "r" / "t" / "v" / escHex / escOct; NO_SKIP escChar = "\\" (escGroup / E{UNKNOWN_ESC}); NO_SKIP normalChar = !"\"" .; NO_SKIP literalStr = "\"" (escChar / normalChar)* "\""; TEXT_NODE literal = literalStr; NO_SKIP normalRangeChar = !"\'" .; NO_SKIP rangeCharValue = "\'" (escChar / normalRangeChar) "\'"; TEXT_NODE rangeChar = rangeCharValue; MAKE_NODE rangeExp = "[" rangeChar ".." rangeChar "]"; MAKE_NODE any = "."; MAKE_NODE errorExp = "E{" identifier "}"; primaryExp = errorExp / identifier / literal / any / rangeExp / ("(" pegExp ")"); MAKE_NODE ALWAYS zeroOrMoreExp = "*"; MAKE_NODE ALWAYS oneOrMoreExp = "+"; MAKE_NODE ALWAYS optionExp = "?"; repeatExp = primaryExp (zeroOrMoreExp / oneOrMoreExp / optionExp)?; MAKE_NODE ALWAYS andExp = "&"; MAKE_NODE ALWAYS notExp = "!"; testExp = (andExp / notExp)? repeatExp; MAKE_NODE sequenceExp = testExp+; MAKE_NODE choiceExp = sequenceExp ("/" sequenceExp)*; pegExp = choiceExp; MAKE_NODE ALWAYS declareSkipper = "SKIPPER"; MAKE_NODE ALWAYS declareNoSkip = "NO_SKIP"; MAKE_NODE ALWAYS declareNewLine = "NEW_LINE"; MAKE_NODE ALWAYS declareMakeNode = "MAKE_NODE"; MAKE_NODE ALWAYS declareTextNode = "TEXT_NODE"; MAKE_NODE ALWAYS declareAlways = "ALWAYS"; declareProperty = declareSkipper / declareNoSkip / declareNewLine / declareMakeNode / declareTextNode / declareAlways; MAKE_NODE ALWAYS declareParser = declareProperty* identifier; MAKE_NODE ALWAYS defineParser = declareParser "=" pegExp ";"; MAKE_NODE ALWAYS pegRoot = utf8Bom? defineParser+; `; // PEG文法ファイルからパーサを生成。 mixin(compilePegParser(PEG_SOURCE)); unittest { // 生成されたパーサで自分自身を解析してみる。《コンパイル時に》ASTが表示される。 pragma(msg, dumpNode(pegRoot(PEG_SOURCE, 0).nodes[0])); }
出力結果
pegRoot: defineParser: declareParser: declareNoSkip: identifier:utf8Bom literal:"\xEF\xBB\xBF" defineParser: declareParser: declareNoSkip: declareNewLine: identifier:newLine choiceExp: sequenceExp: literal:"\r" literal:"\n" optionExp: literal:"\n" defineParser: declareParser: declareNoSkip: identifier:lineComment sequenceExp: literal:"//" sequenceExp: notExp: identifier:newLine any: zeroOrMoreExp: identifier:newLine defineParser: declareParser: declareNoSkip: identifier:blockCommentChar choiceExp: identifier:newLine any: defineParser: declareParser: declareNoSkip: identifier:blockComment sequenceExp: literal:"/*" sequenceExp: notExp: literal:"*/" identifier:blockCommentChar zeroOrMoreExp: literal:"*/" defineParser: declareParser: declareNoSkip: identifier:nestCommentContent choiceExp: identifier:nestComment sequenceExp: notExp: literal:"+/" identifier:blockCommentChar defineParser: declareParser: declareNoSkip: identifier:nestComment sequenceExp: literal:"/+" identifier:nestCommentContent zeroOrMoreExp: literal:"+/" defineParser: declareParser: declareNoSkip: identifier:blankChars choiceExp: sequenceExp: literal:" " oneOrMoreExp: sequenceExp: literal:"\t" oneOrMoreExp: sequenceExp: literal:"\v" oneOrMoreExp: sequenceExp: identifier:newLine oneOrMoreExp: defineParser: declareParser: declareNoSkip: declareSkipper: identifier:spaces sequenceExp: choiceExp: identifier:blankChars identifier:lineComment identifier:nestComment identifier:blockComment zeroOrMoreExp: defineParser: declareParser: declareNoSkip: identifier:identifierHead choiceExp: rangeExp: rangeChar:'a' rangeChar:'z' rangeExp: rangeChar:'A' rangeChar:'Z' literal:"_" defineParser: declareParser: declareNoSkip: identifier:identifierTail choiceExp: identifier:identifierHead rangeExp: rangeChar:'0' rangeChar:'9' defineParser: declareParser: declareNoSkip: identifier:identifierStr sequenceExp: identifier:identifierHead identifier:identifierTail zeroOrMoreExp: defineParser: declareParser: declareTextNode: identifier:identifier identifier:identifierStr defineParser: declareParser: declareNoSkip: identifier:octDigit rangeExp: rangeChar:'0' rangeChar:'8' defineParser: declareParser: declareNoSkip: identifier:hexDigit choiceExp: rangeExp: rangeChar:'0' rangeChar:'9' rangeExp: rangeChar:'a' rangeChar:'f' rangeExp: rangeChar:'A' rangeChar:'F' defineParser: declareParser: declareNoSkip: identifier:escHex sequenceExp: choiceExp: literal:"x" literal:"X" identifier:hexDigit identifier:hexDigit optionExp: defineParser: declareParser: declareNoSkip: identifier:escOct sequenceExp: identifier:octDigit identifier:octDigit optionExp: identifier:octDigit optionExp: defineParser: declareParser: declareNoSkip: identifier:escGroup choiceExp: literal:"\'" literal:"\"" literal:"?" literal:"\\" literal:"a" literal:"b" literal:"f" literal:"n" literal:"r" literal:"t" literal:"v" identifier:escHex identifier:escOct defineParser: declareParser: declareNoSkip: identifier:escChar sequenceExp: literal:"\\" choiceExp: identifier:escGroup identifier:UNKNOWN_ESC defineParser: declareParser: declareNoSkip: identifier:normalChar sequenceExp: notExp: literal:"\"" any: defineParser: declareParser: declareNoSkip: identifier:literalStr sequenceExp: literal:"\"" choiceExp: identifier:escChar identifier:normalChar zeroOrMoreExp: literal:"\"" defineParser: declareParser: declareTextNode: identifier:literal identifier:literalStr defineParser: declareParser: declareNoSkip: identifier:normalRangeChar sequenceExp: notExp: literal:"\'" any: defineParser: declareParser: declareNoSkip: identifier:rangeCharValue sequenceExp: literal:"\'" choiceExp: identifier:escChar identifier:normalRangeChar literal:"\'" defineParser: declareParser: declareTextNode: identifier:rangeChar identifier:rangeCharValue defineParser: declareParser: declareMakeNode: identifier:rangeExp sequenceExp: literal:"[" identifier:rangeChar literal:".." identifier:rangeChar literal:"]" defineParser: declareParser: declareMakeNode: identifier:any literal:"." defineParser: declareParser: declareMakeNode: identifier:errorExp sequenceExp: literal:"E{" identifier:identifier literal:"}" defineParser: declareParser: identifier:primaryExp choiceExp: identifier:errorExp identifier:identifier identifier:literal identifier:any identifier:rangeExp sequenceExp: literal:"(" identifier:pegExp literal:")" defineParser: declareParser: declareMakeNode: declareAlways: identifier:zeroOrMoreExp literal:"*" defineParser: declareParser: declareMakeNode: declareAlways: identifier:oneOrMoreExp literal:"+" defineParser: declareParser: declareMakeNode: declareAlways: identifier:optionExp literal:"?" defineParser: declareParser: identifier:repeatExp sequenceExp: identifier:primaryExp choiceExp: identifier:zeroOrMoreExp identifier:oneOrMoreExp identifier:optionExp optionExp: defineParser: declareParser: declareMakeNode: declareAlways: identifier:andExp literal:"&" defineParser: declareParser: declareMakeNode: declareAlways: identifier:notExp literal:"!" defineParser: declareParser: identifier:testExp sequenceExp: choiceExp: identifier:andExp identifier:notExp optionExp: identifier:repeatExp defineParser: declareParser: declareMakeNode: identifier:sequenceExp sequenceExp: identifier:testExp oneOrMoreExp: defineParser: declareParser: declareMakeNode: identifier:choiceExp sequenceExp: identifier:sequenceExp sequenceExp: literal:"/" identifier:sequenceExp zeroOrMoreExp: defineParser: declareParser: identifier:pegExp identifier:choiceExp defineParser: declareParser: declareMakeNode: declareAlways: identifier:declareSkipper literal:"SKIPPER" defineParser: declareParser: declareMakeNode: declareAlways: identifier:declareNoSkip literal:"NO_SKIP" defineParser: declareParser: declareMakeNode: declareAlways: identifier:declareNewLine literal:"NEW_LINE" defineParser: declareParser: declareMakeNode: declareAlways: identifier:declareMakeNode literal:"MAKE_NODE" defineParser: declareParser: declareMakeNode: declareAlways: identifier:declareTextNode literal:"TEXT_NODE" defineParser: declareParser: declareMakeNode: declareAlways: identifier:declareAlways literal:"ALWAYS" defineParser: declareParser: identifier:declareProperty choiceExp: identifier:declareSkipper identifier:declareNoSkip identifier:declareNewLine identifier:declareMakeNode identifier:declareTextNode identifier:declareAlways defineParser: declareParser: declareMakeNode: declareAlways: identifier:declareParser sequenceExp: identifier:declareProperty zeroOrMoreExp: identifier:identifier defineParser: declareParser: declareMakeNode: declareAlways: identifier:defineParser sequenceExp: identifier:declareParser literal:"=" identifier:pegExp literal:";" defineParser: declareParser: declareMakeNode: declareAlways: identifier:pegRoot sequenceExp: identifier:utf8Bom optionExp: identifier:defineParser oneOrMoreExp:
解析した後はASTを参照してコード生成するなりなんなりできる。
文法部分はふつう別ファイルにしてimportするべき。
車輪の再発明だったら嫌だなあ。