|
| 1 | +%{ |
| 2 | +/* YAML Parser - LALR-Safe with Cleaned Flow Entries */ |
| 3 | +#include <stdio.h> |
| 4 | +#include <stdlib.h> |
| 5 | +#include <string.h> |
| 6 | + |
| 7 | +extern int yylineno; |
| 8 | +extern char *yytext; |
| 9 | +const char* tok_name(int tok); |
| 10 | +void yyerror(const char *s); |
| 11 | +int yylex(void); |
| 12 | + |
| 13 | +typedef struct Node { |
| 14 | + enum { N_SCALAR, N_SEQ, N_MAP, N_ALIAS, N_STREAM, N_NULL } type; |
| 15 | + char *tag, *anchor, *value; |
| 16 | + struct Node *children, *next; |
| 17 | +} Node; |
| 18 | + |
| 19 | +Node *root = NULL; |
| 20 | + |
| 21 | +Node* nnew(int type) { |
| 22 | + Node *n = calloc(1, sizeof(Node)); n->type = type; return n; |
| 23 | +} |
| 24 | +Node* nscalar(char *v) { Node *n = nnew(N_SCALAR); n->value = v; return n; } |
| 25 | +Node* nseq(Node *c) { Node *n = nnew(N_SEQ); n->children = c; return n; } |
| 26 | +Node* nmap(Node *c) { Node *n = nnew(N_MAP); n->children = c; return n; } |
| 27 | +Node* nalias(char *v) { Node *n = nnew(N_ALIAS); n->value = v; return n; } |
| 28 | +Node* nnull() { Node *n = nnew(N_NULL); n->value = strdup("null"); return n; } |
| 29 | + |
| 30 | +Node* napply(Node *n, Node *p) { |
| 31 | + if (!n) n = nnull(); |
| 32 | + if (p) { n->anchor = p->anchor; n->tag = p->tag; free(p); } |
| 33 | + return n; |
| 34 | +} |
| 35 | +Node* nappend(Node *l, Node *i) { |
| 36 | + if (!l) return i; |
| 37 | + Node *c = l; while (c->next) c = c->next; |
| 38 | + c->next = i; return l; |
| 39 | +} |
| 40 | +void nprint(Node *n, int d, int pa, int pt) { |
| 41 | + if (!n) return; |
| 42 | + if (pa && n->anchor) { for(int i=0;i<d*2;i++)putchar(' '); printf("ANCHOR: &%s\n", n->anchor); nprint(n, d+1, 0, 1); return; } |
| 43 | + if (pt && n->tag) { for(int i=0;i<d*2;i++)putchar(' '); printf("TAG: %s\n", n->tag); nprint(n, d+1, 0, 0); return; } |
| 44 | + if (n->type == N_STREAM) { printf("STREAM:\n"); for(Node *c=n->children;c;c=c->next) nprint(c, d, 1, 1); return; } |
| 45 | + for(int i=0;i<d*2;i++)putchar(' '); |
| 46 | + switch(n->type) { |
| 47 | + case N_SCALAR: printf("SCALAR: %s\n", n->value); break; |
| 48 | + case N_SEQ: printf("SEQUENCE:\n"); for(Node *c=n->children;c;c=c->next) nprint(c, d+1, 1, 1); break; |
| 49 | + case N_MAP: printf("MAPPING:\n"); for(Node *c=n->children;c;c=c->next) nprint(c, d+1, 1, 1); break; |
| 50 | + case N_ALIAS: printf("ALIAS: *%s\n", n->value); break; |
| 51 | + case N_NULL: printf("SCALAR: null\n"); break; |
| 52 | + } |
| 53 | +} |
| 54 | +char* jscalar(char *s1, char *s2) { |
| 55 | + char *r = malloc(strlen(s1)+strlen(s2)+2); sprintf(r, "%s %s", s1, s2); free(s1); free(s2); return r; |
| 56 | +} |
| 57 | +%} |
| 58 | + |
| 59 | +%union { char *str; struct Node *node; } |
| 60 | +%token DOC_START LBRACKET RBRACKET LBRACE RBRACE COMMA SEQ_ENTRY MAP_KEY COLON NEWLINE INDENT DEDENT NEWLINE_DEDENT NEWLINE_INDENT DOC_END |
| 61 | +%token <str> ANCHOR ALIAS TAG PLAIN_SCALAR DQUOTE_STRING SQUOTE_STRING LITERAL_CONTENT |
| 62 | +%token LITERAL FOLDED |
| 63 | + |
| 64 | +%nonassoc LOW_PREC |
| 65 | +%nonassoc TAG ANCHOR |
| 66 | +%nonassoc DEDENT NEWLINE_DEDENT NEWLINE_INDENT |
| 67 | +%nonassoc NEWLINE |
| 68 | +%right COLON |
| 69 | + |
| 70 | +%type <node> stream document node pair atom map_list seq_list seq_entry properties property indented_node flow_seq_items flow_map_entries flow_entry flow_seq_item flow_node |
| 71 | +%type <str> plain |
| 72 | + |
| 73 | +%start stream |
| 74 | + |
| 75 | +%% |
| 76 | + |
| 77 | +stream : /* empty */ { root = nnew(N_STREAM); $$ = root; } |
| 78 | + | stream document { if($2) $1->children = nappend($1->children, $2); $$=$1; } |
| 79 | + | stream NEWLINE { $$=$1; } | stream DEDENT { $$=$1; } | stream NEWLINE_DEDENT { $$=$1; } ; |
| 80 | + |
| 81 | +document : node | DOC_START node { $$ = $2; } | DOC_START { $$ = nnull(); } ; |
| 82 | + |
| 83 | +node : atom %prec LOW_PREC |
| 84 | + | map_list { $$ = nmap($1); } |
| 85 | + | seq_list { $$ = nseq($1); } |
| 86 | + | indented_node { $$ = $1; } |
| 87 | + | LITERAL LITERAL_CONTENT { $$ = nscalar($2); } |
| 88 | + | FOLDED LITERAL_CONTENT { $$ = nscalar($2); } |
| 89 | + ; |
| 90 | + |
| 91 | +map_list : pair { $$ = $1; } |
| 92 | + | map_list NEWLINE pair { $$ = nappend($1, $3); } ; |
| 93 | + |
| 94 | +pair : atom COLON node { $$ = nappend($1, $3); } |
| 95 | + | atom COLON { $$ = nappend($1, nnull()); } %prec LOW_PREC |
| 96 | + | MAP_KEY node COLON node { $$ = nappend($2, $4); } |
| 97 | + | MAP_KEY node { $$ = nappend($2, nnull()); } %prec LOW_PREC ; |
| 98 | + |
| 99 | +atom : flow_node | properties flow_node { $$ = napply($2, $1); } |
| 100 | + | properties %prec LOW_PREC { $$ = napply(NULL, $1); } ; |
| 101 | + |
| 102 | +seq_list : seq_entry { $$ = $1; } |
| 103 | + | seq_list NEWLINE seq_entry { $$ = nappend($1, $3); } ; |
| 104 | + |
| 105 | +seq_entry : SEQ_ENTRY node { $$ = $2; } | SEQ_ENTRY { $$ = nnull(); } ; |
| 106 | + |
| 107 | +indented_node : INDENT node DEDENT { $$ = $2; } | INDENT node NEWLINE_DEDENT { $$ = $2; } |
| 108 | + | NEWLINE_INDENT node DEDENT { $$ = $2; } | NEWLINE_INDENT node NEWLINE_DEDENT { $$ = $2; } ; |
| 109 | + |
| 110 | +flow_node : plain { $$ = nscalar($1); } | DQUOTE_STRING { $$ = nscalar($1); } | SQUOTE_STRING { $$ = nscalar($1); } |
| 111 | + | ALIAS { $$ = nalias($1); } |
| 112 | + | LBRACE flow_map_entries RBRACE { $$ = nmap($2); } | LBRACE RBRACE { $$ = nmap(NULL); } |
| 113 | + | LBRACKET flow_seq_items RBRACKET { $$ = nseq($2); } | LBRACKET RBRACKET { $$ = nseq(NULL); } |
| 114 | + ; |
| 115 | + |
| 116 | +plain : PLAIN_SCALAR | plain PLAIN_SCALAR { $$ = jscalar($1, $2); } ; |
| 117 | + |
| 118 | +flow_seq_items : flow_seq_item { $$ = $1; } | flow_seq_items COMMA flow_seq_item { $$ = nappend($1, $3); } | flow_seq_items COMMA { $$ = nappend($1, nnull()); } ; |
| 119 | +flow_seq_item : node %prec LOW_PREC { $$ = $1; } ; |
| 120 | + |
| 121 | +flow_map_entries : flow_entry { $$ = $1; } | flow_map_entries COMMA flow_entry { $$ = nappend($1, $3); } | flow_map_entries COMMA { $$ = $1; } ; |
| 122 | +flow_entry : pair | atom { $$ = nappend($1, nnull()); } %prec LOW_PREC ; |
| 123 | + |
| 124 | +properties : property | properties property { if($2->anchor) $1->anchor = $2->anchor; if($2->tag) $1->tag = $2->tag; free($2); $$ = $1; } ; |
| 125 | +property : ANCHOR { $$ = nnew(0); $$->anchor = $1; } | TAG { $$ = nnew(0); $$->tag = $1; } ; |
| 126 | + |
| 127 | +%% |
| 128 | +void yyerror(const char *s) { fprintf(stderr, "Error line %d: %s (tok: %s, text: '%s')\n", yylineno, s, tok_name(yychar), yytext); } |
| 129 | +int main() { if (!yyparse() && root) { nprint(root, 0, 1, 1); return 0; } return 1; } |
0 commit comments