ついにねんがんのコンパイル時パーサをてにいれたぞ!

 やった。ついにやった。
 去年の3月にあったD言語BOFで「コンパイルコンパイラコンパイル」なるものを発表した。

http://www.nicovideo.jp/watch/sm2485376

 要約すると、D言語のCTFEでコンパイル時にテキストファイルをimportして解析・コード生成すれば、コンパイル時に実行できる簡易コンパイラになるよね、という話だ。テキストファイルはD言語だけじゃなくて、CSVXMLYAMLでもbison文法ファイルでもlex文法ファイルでも可能。ただしパーサがあれば。
 はっきりいってD言語のCTFEはC言語未満の機能しかない。それでパーサを書くのは非常な労力を要する。


 ではどうするか……。
 コンパイル時にパーサを生成して、そのパーサ自体もCTFEで動かせれば随分楽になるじゃないか。
 という妄想のもと、開発を進めていた。不可解なバグ・喰われまくるメモリ(そして落ちる)・遅すぎる実行速度……。幾多の試練を潜り抜けて、ついにコンパイルコンパイラコンパイルが実現した!


 サンプルコード。

module sample;

/// PEG解析器。中身はまだ秘密。
import outland.ctfe.peg.all;

/// PEG文法。これを元にPEGパーサをコンパイル時に作っちまうぜ!
const string PEG_SOURCE = `
NO_SKIP utf8Bom = "\xEF\xBB\xBF";

NO_SKIP NEW_LINE newLine = ("\r" "\n"?) / "\n";

NO_SKIP lineComment = "//" (!newLine .)* newLine;

NO_SKIP blockCommentChar = newLine / .;
NO_SKIP blockComment = "/*" (!"*/" blockCommentChar)* "*/";
NO_SKIP nestCommentContent = nestComment / (!"+/" blockCommentChar);
NO_SKIP nestComment = "/+" nestCommentContent* "+/";

NO_SKIP blankChars = " "+ / "\t"+ / "\v"+ / newLine+;
NO_SKIP SKIPPER spaces = (blankChars / lineComment / nestComment / blockComment)*;

NO_SKIP identifierHead = ['a' .. 'z'] / ['A' .. 'Z'] / "_";
NO_SKIP identifierTail = identifierHead / ['0' .. '9'];
NO_SKIP identifierStr = identifierHead identifierTail*;
TEXT_NODE identifier = identifierStr;

NO_SKIP octDigit = ['0' .. '8'];

NO_SKIP hexDigit = ['0' .. '9'] / ['a' .. 'f'] / ['A' .. 'F'];
    
NO_SKIP escHex = ("x"/"X") hexDigit hexDigit?;
NO_SKIP escOct = octDigit octDigit? octDigit?;
NO_SKIP escGroup = "\'" / "\"" / "?" / "\\"
                   / "a" / "b" / "f" / "n" / "r" / "t" / "v" / escHex / escOct;
NO_SKIP escChar = "\\" (escGroup / E{UNKNOWN_ESC});

NO_SKIP normalChar = !"\"" .;
NO_SKIP literalStr = "\"" (escChar / normalChar)* "\"";
TEXT_NODE literal = literalStr;

NO_SKIP normalRangeChar = !"\'" .;
NO_SKIP rangeCharValue = "\'" (escChar / normalRangeChar) "\'";
TEXT_NODE rangeChar = rangeCharValue;
MAKE_NODE rangeExp = "[" rangeChar ".." rangeChar "]";

MAKE_NODE any = ".";

MAKE_NODE errorExp = "E{" identifier "}";

primaryExp = errorExp / identifier / literal / any / rangeExp / ("(" pegExp ")");

MAKE_NODE ALWAYS zeroOrMoreExp = "*";
MAKE_NODE ALWAYS oneOrMoreExp = "+";
MAKE_NODE ALWAYS optionExp = "?";

repeatExp = primaryExp (zeroOrMoreExp / oneOrMoreExp / optionExp)?;

MAKE_NODE ALWAYS andExp = "&";
MAKE_NODE ALWAYS notExp = "!";

testExp = (andExp / notExp)? repeatExp;

MAKE_NODE sequenceExp = testExp+;

MAKE_NODE choiceExp = sequenceExp ("/" sequenceExp)*;

pegExp = choiceExp;

MAKE_NODE ALWAYS declareSkipper = "SKIPPER";
MAKE_NODE ALWAYS declareNoSkip = "NO_SKIP";
MAKE_NODE ALWAYS declareNewLine = "NEW_LINE";
MAKE_NODE ALWAYS declareMakeNode = "MAKE_NODE";
MAKE_NODE ALWAYS declareTextNode = "TEXT_NODE";
MAKE_NODE ALWAYS declareAlways = "ALWAYS";

declareProperty = declareSkipper
                  / declareNoSkip
                  / declareNewLine
                  / declareMakeNode
                  / declareTextNode
                  / declareAlways;

MAKE_NODE ALWAYS declareParser = declareProperty* identifier;

MAKE_NODE ALWAYS defineParser = declareParser "=" pegExp ";";

MAKE_NODE ALWAYS pegRoot = utf8Bom? defineParser+;
`;

// PEG文法ファイルからパーサを生成。
mixin(compilePegParser(PEG_SOURCE));

unittest {
    // 生成されたパーサで自分自身を解析してみる。《コンパイル時に》ASTが表示される。
    pragma(msg, dumpNode(pegRoot(PEG_SOURCE, 0).nodes[0]));
}

出力結果

pegRoot:
  defineParser:
    declareParser:
      declareNoSkip:
      identifier:utf8Bom
    literal:"\xEF\xBB\xBF"
  defineParser:
    declareParser:
      declareNoSkip:
      declareNewLine:
      identifier:newLine
    choiceExp:
      sequenceExp:
        literal:"\r"
        literal:"\n"
        optionExp:
      literal:"\n"
  defineParser:
    declareParser:
      declareNoSkip:
      identifier:lineComment
    sequenceExp:
      literal:"//"
      sequenceExp:
        notExp:
        identifier:newLine
        any:
      zeroOrMoreExp:
      identifier:newLine
  defineParser:
    declareParser:
      declareNoSkip:
      identifier:blockCommentChar
    choiceExp:
      identifier:newLine
      any:
  defineParser:
    declareParser:
      declareNoSkip:
      identifier:blockComment
    sequenceExp:
      literal:"/*"
      sequenceExp:
        notExp:
        literal:"*/"
        identifier:blockCommentChar
      zeroOrMoreExp:
      literal:"*/"
  defineParser:
    declareParser:
      declareNoSkip:
      identifier:nestCommentContent
    choiceExp:
      identifier:nestComment
      sequenceExp:
        notExp:
        literal:"+/"
        identifier:blockCommentChar
  defineParser:
    declareParser:
      declareNoSkip:
      identifier:nestComment
    sequenceExp:
      literal:"/+"
      identifier:nestCommentContent
      zeroOrMoreExp:
      literal:"+/"
  defineParser:
    declareParser:
      declareNoSkip:
      identifier:blankChars
    choiceExp:
      sequenceExp:
        literal:" "
        oneOrMoreExp:
      sequenceExp:
        literal:"\t"
        oneOrMoreExp:
      sequenceExp:
        literal:"\v"
        oneOrMoreExp:
      sequenceExp:
        identifier:newLine
        oneOrMoreExp:
  defineParser:
    declareParser:
      declareNoSkip:
      declareSkipper:
      identifier:spaces
    sequenceExp:
      choiceExp:
        identifier:blankChars
        identifier:lineComment
        identifier:nestComment
        identifier:blockComment
      zeroOrMoreExp:
  defineParser:
    declareParser:
      declareNoSkip:
      identifier:identifierHead
    choiceExp:
      rangeExp:
        rangeChar:'a'
        rangeChar:'z'
      rangeExp:
        rangeChar:'A'
        rangeChar:'Z'
      literal:"_"
  defineParser:
    declareParser:
      declareNoSkip:
      identifier:identifierTail
    choiceExp:
      identifier:identifierHead
      rangeExp:
        rangeChar:'0'
        rangeChar:'9'
  defineParser:
    declareParser:
      declareNoSkip:
      identifier:identifierStr
    sequenceExp:
      identifier:identifierHead
      identifier:identifierTail
      zeroOrMoreExp:
  defineParser:
    declareParser:
      declareTextNode:
      identifier:identifier
    identifier:identifierStr
  defineParser:
    declareParser:
      declareNoSkip:
      identifier:octDigit
    rangeExp:
      rangeChar:'0'
      rangeChar:'8'
  defineParser:
    declareParser:
      declareNoSkip:
      identifier:hexDigit
    choiceExp:
      rangeExp:
        rangeChar:'0'
        rangeChar:'9'
      rangeExp:
        rangeChar:'a'
        rangeChar:'f'
      rangeExp:
        rangeChar:'A'
        rangeChar:'F'
  defineParser:
    declareParser:
      declareNoSkip:
      identifier:escHex
    sequenceExp:
      choiceExp:
        literal:"x"
        literal:"X"
      identifier:hexDigit
      identifier:hexDigit
      optionExp:
  defineParser:
    declareParser:
      declareNoSkip:
      identifier:escOct
    sequenceExp:
      identifier:octDigit
      identifier:octDigit
      optionExp:
      identifier:octDigit
      optionExp:
  defineParser:
    declareParser:
      declareNoSkip:
      identifier:escGroup
    choiceExp:
      literal:"\'"
      literal:"\""
      literal:"?"
      literal:"\\"
      literal:"a"
      literal:"b"
      literal:"f"
      literal:"n"
      literal:"r"
      literal:"t"
      literal:"v"
      identifier:escHex
      identifier:escOct
  defineParser:
    declareParser:
      declareNoSkip:
      identifier:escChar
    sequenceExp:
      literal:"\\"
      choiceExp:
        identifier:escGroup
        identifier:UNKNOWN_ESC
  defineParser:
    declareParser:
      declareNoSkip:
      identifier:normalChar
    sequenceExp:
      notExp:
      literal:"\""
      any:
  defineParser:
    declareParser:
      declareNoSkip:
      identifier:literalStr
    sequenceExp:
      literal:"\""
      choiceExp:
        identifier:escChar
        identifier:normalChar
      zeroOrMoreExp:
      literal:"\""
  defineParser:
    declareParser:
      declareTextNode:
      identifier:literal
    identifier:literalStr
  defineParser:
    declareParser:
      declareNoSkip:
      identifier:normalRangeChar
    sequenceExp:
      notExp:
      literal:"\'"
      any:
  defineParser:
    declareParser:
      declareNoSkip:
      identifier:rangeCharValue
    sequenceExp:
      literal:"\'"
      choiceExp:
        identifier:escChar
        identifier:normalRangeChar
      literal:"\'"
  defineParser:
    declareParser:
      declareTextNode:
      identifier:rangeChar
    identifier:rangeCharValue
  defineParser:
    declareParser:
      declareMakeNode:
      identifier:rangeExp
    sequenceExp:
      literal:"["
      identifier:rangeChar
      literal:".."
      identifier:rangeChar
      literal:"]"
  defineParser:
    declareParser:
      declareMakeNode:
      identifier:any
    literal:"."
  defineParser:
    declareParser:
      declareMakeNode:
      identifier:errorExp
    sequenceExp:
      literal:"E{"
      identifier:identifier
      literal:"}"
  defineParser:
    declareParser:
      identifier:primaryExp
    choiceExp:
      identifier:errorExp
      identifier:identifier
      identifier:literal
      identifier:any
      identifier:rangeExp
      sequenceExp:
        literal:"("
        identifier:pegExp
        literal:")"
  defineParser:
    declareParser:
      declareMakeNode:
      declareAlways:
      identifier:zeroOrMoreExp
    literal:"*"
  defineParser:
    declareParser:
      declareMakeNode:
      declareAlways:
      identifier:oneOrMoreExp
    literal:"+"
  defineParser:
    declareParser:
      declareMakeNode:
      declareAlways:
      identifier:optionExp
    literal:"?"
  defineParser:
    declareParser:
      identifier:repeatExp
    sequenceExp:
      identifier:primaryExp
      choiceExp:
        identifier:zeroOrMoreExp
        identifier:oneOrMoreExp
        identifier:optionExp
      optionExp:
  defineParser:
    declareParser:
      declareMakeNode:
      declareAlways:
      identifier:andExp
    literal:"&"
  defineParser:
    declareParser:
      declareMakeNode:
      declareAlways:
      identifier:notExp
    literal:"!"
  defineParser:
    declareParser:
      identifier:testExp
    sequenceExp:
      choiceExp:
        identifier:andExp
        identifier:notExp
      optionExp:
      identifier:repeatExp
  defineParser:
    declareParser:
      declareMakeNode:
      identifier:sequenceExp
    sequenceExp:
      identifier:testExp
      oneOrMoreExp:
  defineParser:
    declareParser:
      declareMakeNode:
      identifier:choiceExp
    sequenceExp:
      identifier:sequenceExp
      sequenceExp:
        literal:"/"
        identifier:sequenceExp
      zeroOrMoreExp:
  defineParser:
    declareParser:
      identifier:pegExp
    identifier:choiceExp
  defineParser:
    declareParser:
      declareMakeNode:
      declareAlways:
      identifier:declareSkipper
    literal:"SKIPPER"
  defineParser:
    declareParser:
      declareMakeNode:
      declareAlways:
      identifier:declareNoSkip
    literal:"NO_SKIP"
  defineParser:
    declareParser:
      declareMakeNode:
      declareAlways:
      identifier:declareNewLine
    literal:"NEW_LINE"
  defineParser:
    declareParser:
      declareMakeNode:
      declareAlways:
      identifier:declareMakeNode
    literal:"MAKE_NODE"
  defineParser:
    declareParser:
      declareMakeNode:
      declareAlways:
      identifier:declareTextNode
    literal:"TEXT_NODE"
  defineParser:
    declareParser:
      declareMakeNode:
      declareAlways:
      identifier:declareAlways
    literal:"ALWAYS"
  defineParser:
    declareParser:
      identifier:declareProperty
    choiceExp:
      identifier:declareSkipper
      identifier:declareNoSkip
      identifier:declareNewLine
      identifier:declareMakeNode
      identifier:declareTextNode
      identifier:declareAlways
  defineParser:
    declareParser:
      declareMakeNode:
      declareAlways:
      identifier:declareParser
    sequenceExp:
      identifier:declareProperty
      zeroOrMoreExp:
      identifier:identifier
  defineParser:
    declareParser:
      declareMakeNode:
      declareAlways:
      identifier:defineParser
    sequenceExp:
      identifier:declareParser
      literal:"="
      identifier:pegExp
      literal:";"
  defineParser:
    declareParser:
      declareMakeNode:
      declareAlways:
      identifier:pegRoot
    sequenceExp:
      identifier:utf8Bom
      optionExp:
      identifier:defineParser
      oneOrMoreExp:

 解析した後はASTを参照してコード生成するなりなんなりできる。
 文法部分はふつう別ファイルにしてimportするべき。
 車輪の再発明だったら嫌だなあ。