From 00f2ed90a3364283dd12fce123b0858092432686 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Luttringer?= Date: Mon, 25 Sep 2006 03:15:35 +0000 Subject: [PATCH] correction du lexer pour gestion des "ionumber" correction de la grammaire pour la regle simple_command avancement du parser et du printer --- GRAMMAR | 21 ++-- TODO | 2 +- src/ast/ast_cmd.c | 32 ++++-- src/last-ast.sh | 6 +- src/parser/lexer.c | 10 +- src/parser/parser.c | 230 ++++++++++++++++++++++++++++------------- src/parser/parser.h | 5 +- src/shell/getoptions.c | 6 +- 8 files changed, 217 insertions(+), 95 deletions(-) diff --git a/GRAMMAR b/GRAMMAR index bf2d879..fb123d2 100644 --- a/GRAMMAR +++ b/GRAMMAR @@ -13,7 +13,8 @@ command: simple_command | shell_command (redirection)* | funcdec -simple_command: (prefix)* (element)+ +simple_command: (prefix)+ + | (prefix)* (element)+ shell_command: '{' compound_list '}' | '(' compound_list ')' @@ -25,15 +26,15 @@ shell_command: '{' compound_list '}' funcdec: ['function'] WORD '(' ')' ('\n')* shell_command (redirection)* -redirection: [NUMBER] '>' WORD - | [NUMBER] '<' WORD - | [NUMBER] '>>' WORD - | [NUMBER] '<<' HEREDOC - | [NUMBER] '<<-' HEREDOC - | [NUMBER] '>&' WORD - | [NUMBER] '<&' WORD - | [NUMBER] '>|' WORD - | [NUMBER] '<>' WORD +redirection: [IONUMBER] '>' WORD + | [IONUMBER] '<' WORD + | [IONUMBER] '>>' WORD + | [IONUMBER] '<<' HEREDOC + | [IONUMBER] '<<-' HEREDOC + | [IONUMBER] '>&' WORD + | [IONUMBER] '<&' WORD + | [IONUMBER] '>|' WORD + | [IONUMBER] '<>' WORD prefix: ASSIGMENT_WORD | redirection diff --git a/TODO b/TODO index e8d5c67..8b13789 100644 --- a/TODO +++ b/TODO @@ -1 +1 @@ -lexer: verifier si un EOF apparait si le dernier token est bien pris en compte \ No newline at end of file + diff --git a/src/ast/ast_cmd.c b/src/ast/ast_cmd.c index a12bfc0..670e615 100644 --- a/src/ast/ast_cmd.c +++ b/src/ast/ast_cmd.c @@ -5,7 +5,7 @@ ** Login ** ** Started on Fri Aug 18 22:13:51 2006 Seblu -** Last update Fri Sep 1 00:44:22 2006 Seblu +** Last update Mon Sep 25 04:19:18 2006 Seblu */ #include "ast.h" @@ -83,7 +83,13 @@ void ast_cmd_print(s_ast_node *node, FILE *fs, unsigned int *node_id) ++*node_id; fprintf(fs, "%u [label = \"", *node_id); for (int i = 0; prefix && prefix[i]; ++i) { - fprintf(fs, "prefix[%d]=%s\\n", i, prefix[i]); + fprintf(fs, "prefix[%d]:", i); + size_t last = 0, p = 0; + for (; prefix[i][p]; ++p) + if (prefix[i][p] == '"') + fprintf(fs, "%.*s\\", p - last, prefix[i] + last), last = p; + fprintf(fs, "%*s", p - last, prefix[i] + last), last = p; + fprintf(fs, "\\n"); } fprintf(fs, "\"];\n"); fprintf(fs, "%u -> %u\n", cur_id, *node_id); @@ -93,8 +99,15 @@ void ast_cmd_print(s_ast_node *node, FILE *fs, unsigned int *node_id) if (argv && argv[0]) { ++*node_id; fprintf(fs, "%u [label = \"", *node_id); - for (int i = 0; argv && argv[i]; ++i) - fprintf(fs, "argv[%d]=%s\\n", i, argv[i]); + for (int i = 0; argv && argv[i]; ++i) { + fprintf(fs, "argv[%d]:", i); + size_t last = 0, p = 0; + for (; argv[i][p]; ++p) + if (argv[i][p] == '"') + fprintf(fs, "%.*s\\", p - last, argv[i] + last), last = p; + fprintf(fs, "%*s", p - last, argv[i] + last), last = p; + fprintf(fs, "\\n"); + } fprintf(fs, "\"];\n"); fprintf(fs, "%u -> %u\n", cur_id, *node_id); } @@ -103,8 +116,15 @@ void ast_cmd_print(s_ast_node *node, FILE *fs, unsigned int *node_id) int i = 0; ++*node_id; fprintf(fs, "%u [label = \"", *node_id); - for (s_redir *this = node->body.child_cmd.redirs; this; this = this->next, ++i) - fprintf(fs, "redirs[%d]: fd=%d, type=%d, word=%s\\n", i, this->fd, this->type, this->word); + for (s_redir *this = node->body.child_cmd.redirs; this; this = this->next, ++i) { + fprintf(fs, "redirs[%d]: fd=%d, type=%d, word=", i, this->fd, this->type); + size_t last = 0, p = 0; + for (; this->word[p]; ++p) + if (this->word[p] == '"') + fprintf(fs, "%.*s\\", p - last, this->word + last), last = p; + fprintf(fs, "%*s", p - last, this->word + last), last = p; + fprintf(fs, "\\n"); + } fprintf(fs, "\"];\n"); fprintf(fs, "%u -> %u\n", cur_id, *node_id); } diff --git a/src/last-ast.sh b/src/last-ast.sh index 96bd109..d66f7b6 100755 --- a/src/last-ast.sh +++ b/src/last-ast.sh @@ -1,9 +1,11 @@ #!/bin/sh +shopt -s nullglob + for i in /tmp/42sh-ast-*.dot; do last="$i" done - -echo "$last to echo x${last%*.dot}" +test "$last" || exit 1 +echo "[$last] to [${last%*.dot}.png]" dot -Tpng $last -o ${last%*.dot}.png ln -sf ${last%*.dot}.png ast.png diff --git a/src/parser/lexer.c b/src/parser/lexer.c index b18608f..dae9bbb 100644 --- a/src/parser/lexer.c +++ b/src/parser/lexer.c @@ -5,7 +5,7 @@ ** Login ** ** Started on Sun Jul 30 04:36:53 2006 Seblu -** Last update Tue Aug 29 21:54:23 2006 Seblu +** Last update Fri Sep 1 02:04:21 2006 Seblu */ #include @@ -282,10 +282,11 @@ static int lexer_eatline(s_lexer *lexer) static int lexer_cut(s_lexer *lexer) { const char *buf = lexer->buf; + char *tokstr; size_t *buf_pos = &lexer->buf_pos, token_start, token_pos; int end_found = 0; char backed = 0, quoted = 0; - const s_quote*quote; + const s_quote *quote; // Rationale: Search begin of token //eat separators (" ",\t, \v) @@ -321,8 +322,9 @@ static int lexer_cut(s_lexer *lexer) if (end_found) break; } lexer->buf_pos = token_pos; //update real lexer position buffer - token_set(&lexer->token, TOK_WORD, - strndup(buf + token_start, token_pos - token_start)); + tokstr = strndup(buf + token_start, token_pos - token_start); + token_set(&lexer->token, ((buf[token_pos] == '>' || buf[token_pos] == '<') + && isdigitstr(tokstr)) ? TOK_IONUMBER : TOK_WORD, tokstr); return 1; } diff --git a/src/parser/parser.c b/src/parser/parser.c index dcca960..65decb0 100644 --- a/src/parser/parser.c +++ b/src/parser/parser.c @@ -5,13 +5,15 @@ ** Login ** ** Started on Wed Aug 2 00:56:07 2006 Seblu -** Last update Fri Sep 1 01:00:42 2006 Seblu +** Last update Mon Sep 25 04:44:08 2006 Seblu */ #include +#include #include #include #include +#include #include "parser.h" #include "../common/macro.h" #include "../shell/shell.h" @@ -23,6 +25,9 @@ ** ============ */ +enum { + FD_MAX = 32765 +}; static s_token keywords[] = { @@ -45,7 +50,6 @@ static s_token keywords[] = {TOK_NONE, NULL, 0} }; - static s_ast_node *regnode(s_parser *parser, s_ast_node *node); /*! @@ -68,8 +72,16 @@ static s_ast_node *parse_andor(s_parser *parser); static s_ast_node *parse_pipeline(s_parser *parser); static s_ast_node *parse_command(s_parser *parser); - +static int parse_prefix(s_parser *parser, s_ast_node *cmd); +static int parse_element(s_parser *parser, s_ast_node *cmd); static s_ast_node *parse_simplecommand(s_parser *parser); +static void parse_redirection(s_parser *parser, s_ast_node *cmd); +static s_ast_node *parse_shellcommand(s_parser *parser); +static s_ast_node *parse_rulefor(s_parser *parser); +static s_ast_node *parse_rulewhile(s_parser *parser); +static s_ast_node *parse_ruleuntil(s_parser *parser); +static s_ast_node *parse_ruleif(s_parser *parser); +static s_ast_node *parse_rulecase(s_parser *parser); /*! ** Notify a parse error @@ -140,17 +152,17 @@ static void parse_error(s_parser *parser, s_token t) /* return 0; */ /* } */ -static int is_assignment(s_token t) +static int is_assignment(const s_token t) { return strchr(t.str, '=') == NULL ? 0 : 1; } -static void recon(s_token t) +static void recon(s_token *t) { //check for keywords for (int i = 0; keywords[i].id != TOK_NONE; ++i) - if (!strncmp(t.str, keywords[i].str, keywords[i].len)) { - t.id = keywords[i].id; + if (!strncmp(t->str, keywords[i].str, keywords[i].len)) { + t->id = keywords[i].id; } //check } @@ -250,6 +262,7 @@ static s_ast_node *parse_pipeline(s_parser *parser) debugmsg("parse_pipeline"); token = lexer_lookahead(parser->lexer); + recon(&token); if (token.id == TOK_BANG) { lexer_gettoken(parser->lexer); banged = 1; @@ -264,65 +277,144 @@ static s_ast_node *parse_command(s_parser *parser) debugmsg("parse_command"); token = lexer_lookahead(parser->lexer); - recon(token); - if (token.id == TOK_WORD) { + recon(&token); + if (token.id == TOK_FOR || token.id == TOK_WHILE || token.id == TOK_UNTIL || + token.id == TOK_CASE || token.id == TOK_IF || + !strcmp(token.str, "{") || !strcmp(token.str, "(")) + return parse_shellcommand(parser); + // probleme de choix avec function pour l'instant ya pas defonction ! + else if (token.id == TOK_WORD) { return parse_simplecommand(parser); } + else + parse_error(parser, token); return NULL; } -static s_ast_node *parse_simplecommand(s_parser *parser) +static int parse_element(s_parser *parser, s_ast_node *cmd) +{ + s_token token; + int found = 0; + + debugmsg("parse_element"); + for (;;) { + token = lexer_lookahead(parser->lexer); + if (token.id >= TOK_DLESSDASH && token.id <= TOK_IONUMBER) { + parse_redirection(parser, cmd); + ++found; + } + else if (token.id == TOK_WORD) { + ast_cmd_add_argv(cmd, lexer_gettoken(parser->lexer).str); + ++found; + } + else + break; + } + return found; +} + +static int parse_prefix(s_parser *parser, s_ast_node *cmd) { s_token token; + int found = 0; + + debugmsg("parse_prefix"); + for (;;) { + token = lexer_lookahead(parser->lexer); + if (token.id >= TOK_DLESSDASH && token.id <= TOK_IONUMBER) { + parse_redirection(parser, cmd); + ++found; + } + else if (is_assignment(token)) { + ast_cmd_add_prefix(cmd, lexer_gettoken(parser->lexer).str); + ++found; + } + else + break; + } + return found; +} + +static s_ast_node *parse_simplecommand(s_parser *parser) +{ s_ast_node *cmd; + int found = 0; debugmsg("parse_simplecommand"); cmd = regnode(parser, ast_cmd_create()); - //get prefix - while (is_assignment(lexer_lookahead(parser->lexer))) - ast_cmd_add_prefix(cmd, lexer_gettoken(parser->lexer).str); - //get element - if ((token = lexer_gettoken(parser->lexer)).id == TOK_WORD) - ast_cmd_add_argv(cmd, token.str); - else - parse_error(parser, token); - while (recon(token = lexer_lookahead(parser->lexer)), - token.id == TOK_WORD) { - ast_cmd_add_argv(cmd, token.str); - lexer_gettoken(parser->lexer); - } + found += parse_prefix(parser, cmd); + found += parse_element(parser, cmd); + if (!found) + parse_error(parser, lexer_lookahead(parser->lexer)); return cmd; } -/* static s_ast_node *parse_shellcommand(s_parser *parser) */ -/* { */ -/* parser=parser; */ -/* return NULL; */ -/* } */ +static s_ast_node *parse_shellcommand(s_parser *parser) +{ + s_token token; -/* static s_ast_node *parse_funcdec(s_parser *parser) */ -/* { */ -/* parser=parser; */ -/* return NULL; */ -/* } */ + debugmsg("parse_shellcommand"); + token = lexer_lookahead(parser->lexer); + switch (token.id) { + case TOK_IF: parse_ruleif(parser); break; + case TOK_FOR: parse_rulefor(parser); break; + case TOK_WHILE: parse_rulewhile(parser); break; + case TOK_UNTIL: parse_ruleuntil(parser); break; + case TOK_CASE: parse_rulecase(parser); break; + case TOK_WORD: + //gerer le cas de { et de ( + assert(0); + break; + default: + parse_error(parser, token); + } -/* static s_ast_node *parse_cmdprefix(s_parser *parser) */ -/* { */ -/* parser=parser; */ -/* return NULL; */ -/* } */ + return NULL; +} -/* static s_ast_node *parse_redirection(s_parser *parser) */ +/* static s_ast_node *parse_funcdec(s_parser *parser) */ /* { */ /* parser=parser; */ /* return NULL; */ /* } */ -/* static s_ast_node *parse_element(s_parser *parser) */ -/* { */ -/* parser=parser; */ -/* return NULL; */ -/* } */ +static void parse_redirection(s_parser *parser, s_ast_node *cmd) +{ + s_token token; + long int fd; + e_redir_type redtype; + + debugmsg("parse_redirection"); + //retrieve redirection fd if exist + if ((token = lexer_lookahead(parser->lexer)).id == TOK_IONUMBER) { + lexer_gettoken(parser->lexer); + errno = 0; + fd = strtol(token.str, NULL, 10); + if (errno || fd < 0 || fd > FD_MAX) + parse_error(parser, token); + } + //retrieve redirection type + token = lexer_gettoken(parser->lexer); + switch (token.id) { + case TOK_GREAT: redtype = R_GREAT; break; + case TOK_DGREAT: redtype = R_DLESS; break; + case TOK_DLESSDASH: redtype = R_DLESSDASH; break; + case TOK_DLESS: redtype = R_DLESS; break; + case TOK_LESSGREAT: redtype = R_LESSGREAT; break; + case TOK_LESSAND: redtype = R_LESSAND; break; + case TOK_LESS: redtype = R_LESS; break; + case TOK_CLOBBER: redtype = R_CLOBBER; break; + case TOK_GREATAND: redtype = R_GREATAND; break; + default: + parse_error(parser, token); + } + //retrieve redirection word + token = lexer_gettoken(parser->lexer); + if (token.id == TOK_WORD) + ast_cmd_add_redir(cmd, redtype, fd, token.str); + else + parse_error(parser, token); +} /* static s_ast_node *parse_compound_list(s_parser *parser) */ /* { */ @@ -330,35 +422,35 @@ static s_ast_node *parse_simplecommand(s_parser *parser) /* return NULL; */ /* } */ -/* static s_ast_node *parse_rulefor(s_parser *parser) */ -/* { */ -/* parser=parser; */ -/* return NULL; */ -/* } */ +static s_ast_node *parse_rulefor(s_parser *parser) +{ + parser=parser; + return NULL; +} -/* static s_ast_node *parse_rulewhile(s_parser *parser) */ -/* { */ -/* parser=parser; */ -/* return NULL; */ -/* } */ +static s_ast_node *parse_rulewhile(s_parser *parser) +{ + parser=parser; + return NULL; +} -/* static s_ast_node *parse_ruleuntil(s_parser *parser) */ -/* { */ -/* parser=parser; */ -/* return NULL; */ -/* } */ +static s_ast_node *parse_ruleuntil(s_parser *parser) +{ + parser=parser; + return NULL; +} -/* static s_ast_node *parse_rulecase(s_parser *parser) */ -/* { */ -/* parser=parser; */ -/* return NULL; */ -/* } */ +static s_ast_node *parse_rulecase(s_parser *parser) +{ + parser=parser; + return NULL; +} -/* static s_ast_node *parse_ruleif(s_parser *parser) */ -/* { */ -/* parser=parser; */ -/* return NULL; */ -/* } */ +static s_ast_node *parse_ruleif(s_parser *parser) +{ + parser=parser; + return NULL; +} /* static s_ast_node *parse_elseclause(s_parser *parser) */ /* { */ diff --git a/src/parser/parser.h b/src/parser/parser.h index e20f5e5..ac44e22 100644 --- a/src/parser/parser.h +++ b/src/parser/parser.h @@ -5,7 +5,7 @@ ** Login ** ** Started on Wed Aug 2 00:49:50 2006 Seblu -** Last update Fri Sep 1 00:21:07 2006 Seblu +** Last update Mon Sep 25 03:41:31 2006 Seblu */ #include @@ -16,7 +16,7 @@ # include "getline.h" -// Define is parser or lexer is run for DEBBUGING +// Define is parser or lexer is running for DEBBUGING #define DEBUG_PARSER 1 #define DEBUG_LEXER 0 @@ -41,6 +41,7 @@ typedef enum tokenid TOK_GREATAND, // >& TOK_CLOBBER, // >| TOK_GREAT, // > + TOK_IONUMBER, // number juste before '>' or '<' TOK_WORD, // all others //token context-sensitive recognition (parser time) TOK_IF, diff --git a/src/shell/getoptions.c b/src/shell/getoptions.c index 0c52db2..58ca9ca 100644 --- a/src/shell/getoptions.c +++ b/src/shell/getoptions.c @@ -5,7 +5,7 @@ ** Login ** ** Started on Sun Jul 30 03:28:26 2006 Seblu -** Last update Wed Aug 30 00:27:53 2006 Seblu +** Last update Mon Sep 25 02:52:45 2006 Seblu */ #include @@ -42,5 +42,9 @@ void getoptions(s_options *opt, int argc, char **argv) } else if (!strcmp(copt, "--ast-print")) option_set(shell->options, "ast_print"); + else { + printf("Error: Invalid option %s\n", copt); + exit(1); + } } } -- GitLab