lex.c

/* [<][>][^][v][top][bottom][index][help] */
FUNCTIONS

This source file includes following functions.
init_lex
get_character
unget_character
get_linenum
lex
lexbang
lexsl
lexdec
lexcolon
lexlt
lexeq
lexgt
lexname
   1 #include <stddef.h>
   2 #include <stdlib.h>
   3 #include <stdio.h>
   4 #include <ctype.h>
   5 
   6 #include "confdef.h"
   7 #include "lang_dep.h"
   8 
   9 #include "token.h"
  10 
  11 #include "lex.h"
  12 
  13 /* 文字列と字句値の対応のための構造体 */
  14 typedef struct words_tbl {
  15   char const *str;
  16   lexval const lexval;
  17 } words_tbl;
  18 
  19 static int get_character(void);
  20 static void unget_character(int c);
  21 
  22 static token const lexbang(int c);
  23 static token const lexsl(int c);
  24 static token const lexdec(int c);
  25 static token const lexcolon(int c);
  26 static token const lexlt(int c);
  27 static token const lexeq(int c);
  28 static token const lexgt(int c);
  29 static token const lexname(int c);
  30 
  31 static int line_count;
  32 static FILE *in_file;
  33 
  34 /* 予約語の文字列と字句値の対応、二分検索のため辞書順を保つこと */
  35 static words_tbl const keywords_tbl[] = {
  36   {"call",     LEXVAL_CALL},
  37   {"defproc",  LEXVAL_DEFPROC},
  38   {"getchar",  LEXVAL_GETCHAR},
  39   {"if",       LEXVAL_IF},
  40   {"proc",     LEXVAL_PROC},
  41   {"putchar",  LEXVAL_PUTCHAR},
  42   {"set",      LEXVAL_SET},
  43   {"while",    LEXVAL_WHILE},
  44   {"word",     LEXVAL_WORD}
  45 };
  46 
  47 void
  48 init_lex(FILE *fp)
     /* [<][>][^][v][top][bottom][index][help] */
  49 {
  50   line_count = 1;
  51   in_file = fp;
  52 }
  53 
  54 static int
  55 get_character(void)
     /* [<][>][^][v][top][bottom][index][help] */
  56 {
  57   int c;
  58 
  59   c = getc(in_file);
  60 
  61   if (c == '\n')
  62     line_count++;
  63 
  64   c = ((c == EOF) ? (-1) : (c));
  65 
  66   return c;
  67 }
  68 
  69 static void
  70 unget_character(int c)
     /* [<][>][^][v][top][bottom][index][help] */
  71 {
  72   if (c == EOF)
  73     return;
  74 
  75   if (c == '\n')
  76     line_count--;
  77 
  78   ungetc(c, in_file);
  79 }
  80 
  81 int
  82 get_linenum(void)
     /* [<][>][^][v][top][bottom][index][help] */
  83 {
  84   return line_count;
  85 }
  86 
  87 token const
  88 lex(void)
     /* [<][>][^][v][top][bottom][index][help] */
  89 {
  90   int c;
  91 
  92   for (;;)  /* トークンの 1 文字目 ( の候補 ) を見る */
  93     {
  94       c = get_character();
  95       switch (c)
  96         {
  97         /* End Of File */
  98         case -1:
  99           return get_token(LEXVAL_eof);
 100 
 101         /* 空白は読み飛ばす */
 102 
 103         case '\t':  /* FALLTHRU */  /* 「落ち抜け」を積極的に使う case */
 104         case '\n':  /* FALLTHRU */
 105         case '\f':  /* FALLTHRU */
 106         case '\r':  /* FALLTHRU */
 107         case '\x1a':  /* FALLTHRU */
 108         case ' ':  /* FALLTHRU */
 109           break;
 110 
 111         /* 以下、キャラクタを分類 */
 112         /* 使われないキャラクタについては case を書かずに default に落とす */
 113         /* '_' をアルファベットの一種とみなすことと */
 114         /* '/' の特殊な扱い ("//" によるコメントを読み飛ばす ) に注意  */
 115 
 116         /* 1 文字で決定する記号類 (ASCII 順 ) */
 117         case '%':  /* FALLTHRU */
 118         case '&':  /* FALLTHRU */
 119         case '(':  /* FALLTHRU */
 120         case ')':  /* FALLTHRU */
 121         case '*':  /* FALLTHRU */
 122         case '+':  /* FALLTHRU */
 123         case ',':  /* FALLTHRU */
 124         case '-':  /* FALLTHRU */
 125         case ';':  /* FALLTHRU */
 126         case '{':  /* FALLTHRU */
 127         case '|':  /* FALLTHRU */
 128         case '}':  /* FALLTHRU */
 129         case '~':  /* FALLTHRU */
 130           return get_token(c);
 131 
 132         /* 複数文字の記号類 (ASCII 順 ) */
 133         /* (c を渡しているが、ほぼ無意味である ) */
 134         case '!':
 135           return lexbang(c);
 136 
 137         case '/':
 138           return lexsl(c);
 139 
 140         case ':':
 141           return lexcolon(c);
 142 
 143         case '<':
 144           return lexlt(c);
 145 
 146         case '=':
 147           return lexeq(c);
 148 
 149         case '>':
 150           return lexgt(c);
 151 
 152         /* 残りはアルファベットと数字。さらにそれ以外ならエラー */
 153         default:
 154           if (isalpha(c) || (c == '_'))
 155             {
 156               return lexname(c);
 157             }
 158           else if (c == '0')
 159             {
 160               int next;
 161               next = get_character();
 162               if (isdigit(next))
 163                 {
 164                   EMSTOP("decimal literal cannot begin with \'0\' except for 0 itself", line_count);
 165                   exit(1);
 166                 }
 167               else
 168                 {
 169                   unget_character(next);
 170                   return lexdec(c);
 171                 }
 172             }
 173           else if (isdigit(c))
 174             {
 175               return lexdec(c);
 176             }
 177           else
 178             {
 179               EMSTOP("unknown character", line_count);
 180               exit(1);
 181             }
 182         }
 183     }
 184 }
 185 
 186 static token const
 187 lexbang(int c)
     /* [<][>][^][v][top][bottom][index][help] */
 188 {
 189   int next;
 190 
 191   next = get_character();
 192   if (next == '=')
 193     {
 194       return get_token(LEXVAL_bangeq);
 195     }
 196   else
 197     {
 198       EMSTOP("unknown operator \'!\'", line_count);
 199       exit(1);
 200     }
 201 }
 202 
 203 static token const
 204 lexsl(int c)
     /* [<][>][^][v][top][bottom][index][help] */
 205 {
 206   int next;
 207 
 208   next = get_character();
 209   if (next == '/')
 210     {
 211       do
 212         next = get_character();
 213       while (next != '\n');
 214       return lex();
 215     }
 216   else
 217     {
 218       unget_character(next);
 219       return get_token(c);
 220     }
 221 }
 222 
 223 static token const
 224 lexdec(int c)
     /* [<][>][^][v][top][bottom][index][help] */
 225 {
 226   char buf[2];
 227   zinc_u_word d;
 228 
 229   d = 0;
 230 
 231   for (;;)
 232     {
 233       buf[0] = c;
 234       buf[1] = '\0';
 235       d += strtoul(buf, 0, 10);
 236 
 237       c = get_character();
 238 
 239       if (!isdigit(c))
 240         {
 241           unget_character(c);
 242           break;
 243         }
 244       if (d > (ZINC_WORD_MAX / 5))  /* XXX この判定は厳密ではない */
 245         {
 246           fprintf(stderr, "lex.c : lexdec() : "
 247                           "overflow (literal of decimal) in line %d\n", line_count);
 248           exit(1);
 249         }
 250       d *= 10;
 251     }
 252 
 253   /* printf("%d\n", d); */
 254 
 255   return get_token_literal(d);
 256 }
 257 
 258 static token const
 259 lexcolon(int c)
     /* [<][>][^][v][top][bottom][index][help] */
 260 {
 261   int next;
 262 
 263   next = get_character();
 264   if (next == '=')
 265     {
 266       return get_token(LEXVAL_coloneq);
 267     }
 268   else
 269     {
 270       unget_character(next);
 271       return get_token(c);
 272     }
 273 }
 274 
 275 static token const
 276 lexlt(int c)
     /* [<][>][^][v][top][bottom][index][help] */
 277 {
 278   int next;
 279 
 280   next = get_character();
 281   if (next == '=')
 282     {
 283       return get_token(LEXVAL_lt_or_eq);
 284     }
 285   else if (next == '<')
 286     {
 287       return get_token(LEXVAL_lsl);
 288     }
 289   else
 290     {
 291       unget_character(next);
 292       return get_token(c);
 293     }
 294 }
 295 
 296 static token const
 297 lexeq(int c)
     /* [<][>][^][v][top][bottom][index][help] */
 298 {
 299   int next;
 300 
 301   next = get_character();
 302   if (next == '=')
 303     {
 304       return get_token(LEXVAL_eqeq);
 305     }
 306   else
 307     {
 308       EMSTOP("unknown operator \'=\'", line_count);
 309       exit(1);
 310     }
 311 }
 312 
 313 static token const
 314 lexgt(int c)
     /* [<][>][^][v][top][bottom][index][help] */
 315 {
 316   int next;
 317 
 318   next = get_character();
 319   if (next == '=')
 320     {
 321       return get_token(LEXVAL_gt_or_eq);
 322     }
 323   else if (next == '>')
 324     {
 325       next = get_character();
 326       if (next == '>')
 327         {
 328           return get_token(LEXVAL_lsr);
 329         }
 330       else
 331         {
 332           unget_character(next);
 333           return get_token(LEXVAL_asr);
 334         }
 335     }
 336   else
 337     {
 338       unget_character(next);
 339       return get_token(c);
 340     }
 341 }
 342 
 343 static token const
 344 lexname(int c)
     /* [<][>][^][v][top][bottom][index][help] */
 345 {
 346   char buf[ZINC_C_NAME_MAX_LEN + 1];
 347 
 348   /* 名前を全部読み込む */
 349   {
 350     int i;
 351 
 352     i = 0;
 353     do
 354       {
 355         buf[i++] = c;
 356         if (i >= ZINC_C_NAME_MAX_LEN)
 357           {
 358             EMSTOP("too long name", line_count);
 359             exit(1);
 360           }
 361         c = get_character();
 362       }
 363     while (isalnum(c) || (c == '_'));
 364 
 365     buf[i] = '\0';
 366     unget_character(c);
 367   }
 368 
 369   /* 予約語かどうかチェック */
 370   {
 371     int dn = (sizeof(keywords_tbl)) / (sizeof(keywords_tbl[0])) - 1;
 372     int up = 0;  /* 2 分検索の上界と下界 */
 373     int mid;
 374     int r;
 375 
 376     for(;;)
 377       {
 378         mid = (dn + up) / 2;
 379         r = strcmp(buf, keywords_tbl[mid].str);
 380 
 381         if (r == 0)  /* 予約語が見つかった */
 382           return get_token(keywords_tbl[mid].lexval);
 383         else if (dn <= up)  /* 見つからない */
 384           {
 385             char *name;
 386 
 387             name = malloc(1 + strlen(buf));
 388             strcpy(name, buf);
 389             return get_token_p(LEXVAL_name, name);
 390           }
 391         else if (r < 0)
 392           dn = mid - 1;
 393         else if (r > 0)
 394           up = mid + 1;
 395       }
 396 
 397     /* NOTREACHED */
 398   }
 399 }
/* [<][>][^][v][top][bottom][index][help] */