//! @file scan.c
//! @author J. Marcel van der Veer
//
//! @section Copyright
//
// This file is part of VIF - vintage FORTRAN compiler.
// Copyright 2020-2025 J. Marcel van der Veer <algol68g@xs4all.nl>.
//
//! @section License
//
// This program is free software; you can redistribute it and/or modify it 
// under the terms of the GNU General Public License as published by the 
// Free Software Foundation; either version 3 of the License, or 
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful, but 
// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 
// or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 
// more details. You should have received a copy of the GNU General Public 
// License along with this program. If not, see <http://www.gnu.org/licenses/>.

//! @section Synopsis
//!
//! Fortran scanner.

#include <vif.h>

#define ADD_CHAR(c) {curlex[k++] = (c);}
#define ADD_LEX ADD_CHAR (CUR_COL)
#define ADD_LEX_NEXT {ADD_LEX; next_col (TRUE);}
#define ADD_RAW_NEXT {curlex[k++] = CUR_LIN.text[curcol]; next_col (TRUE);}
#define ADD_CHR(c) {curlex[k++] = tolower (c);}

#define SKIP_SPACE\
  while (CUR_COL == ' ') {\
    next_col (TRUE);\
  }

void strip_leading_zeroes (char *s)
{
// Strip leading zeroes.
  if (s != NO_TEXT) {
    NEW_RECORD (t);
    RECCPY (t, s);
    RECCLR (s);
    int_4 len = strlen (t), k = 0;
    while (k < len && len > 1 && t[k] == '0') {
      k++;
      len--;
    }
    strcpy (s, &t[k]);
  } else {
    SYNTAX (2801, s);
  }
}

void next_col (int_4 cont_allowed)
{
// Early FORTRAN allowed 20 cards for one source line (1 + 19 continuation cards).
// Below code allows for an arbitratry number of continuation cards.
  curcol++;
  if (CUR_COL == '\0') {
    if (! cont_allowed) {
      return;
    }
    SAVE_POS (1);
    curlin++;
    if (curlin >= nftnlines) {
      RESTORE_POS (1);
      return;
    }
    if (strlen (CUR_LIN.text) < 6) {
      RESTORE_POS (1);
      return;
    }
    if (IS_COMMENT (POS (0)) || POS (5) == ' ') {
      RESTORE_POS (1);
      return;
    }
    for (int_4 i = 0; i < 5; i++) {
      if (POS (i) != ' ') {
        SYNTAX (2802, "continuation card columns 1-5");
      }
    }
    curcol = 6;
  }
}

void skip_card (int_4 check)
{
  if (check) {
// When a correct statement has left the scanner at the final token.
    if (!EQUAL (prelex, curlex)) {
      SYNTAX (2803, curlex);
    } else if (prelin == curlin) {
      (void) scan (EXPECT_NONE);
      if (prelin == curlin) {
        if (!EQUAL (prelex, curlex)) {
          SYNTAX (2804, curlex);
        }
      }
    }
  }
// VIF is sloppy about trailing text.
// This is intentional, old code can have text from column 73 onward.
  if (prelin == curlin) {
    int_4 rc;
    do {
      rc = scan (EXPECT_NONE);
    }
    while (rc != END_OF_LINE && rc != END_OF_MODULE);
  } else if (CUR_LIN.text == NO_TEXT) {
    return;
  } else if (strlen (CUR_LIN.text) >= 6 && POS(5) != ' ') {
    int_4 rc;
    do {
      rc = scan (EXPECT_NONE);
    }
    while (rc != END_OF_LINE && rc != END_OF_MODULE);
  }
}

int_4 scan_hollerith (void)
{
  int_4 k = 0, N = 0;
  if (hollerith) {
    SYNTAX (2805, "disabled Hollerith string");
  }
  if (!is_int4 (curlex, &N)) {
    SCANER (2806, "invalid hollerith length", NO_TEXT);
    return curret = END_OF_MODULE;
  } else {
    k = 0;
    RECCLR (curlex);
    ADD_CHR ('"');
    next_col (TRUE);
    for (int_4 chr = 0, go_on = TRUE; chr < N && go_on; chr++) {
      if (CUR_COL == '\0') {
        go_on = FALSE;
      } else if (CUR_COL == '"') {
        ADD_CHR ('\"');
        next_col (TRUE);
      } else {
        ADD_RAW_NEXT;
      }
    }
    ADD_CHR ('"');
    return curret = TEXT;
  }
}

int_4 scan_string (void)
{
  int_4 k = 0;
  ADD_CHR ('"');
  next_col (TRUE);
  int_4 go_on = TRUE;
  while (go_on) {
    if (CUR_COL == '\0') {
      SCANER (2807, "unterminated string", NO_TEXT);
      ADD_CHR ('"');
      return curret = END_OF_MODULE;
    } else if (CUR_COL == '\'') {
      SAVE_POS (1);
      next_col (TRUE);
      if (CUR_COL == '\'') {
        ADD_CHR ('\'');
        next_col (TRUE);
      } else {
        RESTORE_POS (1);
        go_on = FALSE;
      }
    } else if (CUR_COL == '"') {
      ADD_CHR ('\\');
      ADD_CHR ('"');
      next_col (TRUE);
    } else {
      ADD_RAW_NEXT;
    }
  }
  if (CUR_COL != '\'') {
    SCANER (2808, "unterminated string", NO_TEXT);
    return curret = END_OF_MODULE;
  } else {
    ADD_CHR ('"');
    next_col (TRUE);
    return curret = TEXT;
  }
}

int_4 scan_string_alt (void)
{
  int_4 k = 0;
  ADD_CHR ('"');
  next_col (TRUE);
  int_4 go_on = TRUE;
  while (go_on) {
    if (CUR_COL == '\0') {
      SCANER (2809, "unterminated string", NO_TEXT);
      ADD_CHR ('"');
      return curret = END_OF_MODULE;
    } else if (CUR_COL == '"') {
      SAVE_POS (1);
      next_col (TRUE);
      if (CUR_COL == '"') {
        ADD_CHR ('"');
        next_col (TRUE);
      } else {
        RESTORE_POS (1);
        go_on = FALSE;
      }
    } else if (CUR_COL == '"') {
      ADD_CHR ('\\');
      ADD_CHR ('"');
      next_col (TRUE);
    } else {
      ADD_RAW_NEXT;
    }
  }
  if (CUR_COL != '"') {
    SCANER (2810, "unterminated string", NO_TEXT);
    return curret = END_OF_MODULE;
  } else {
    ADD_CHR ('"');
    next_col (TRUE);
    return curret = TEXT;
  }
}

int_4 scan_exponent (void)
{
  int_4 k = strlen (curlex);
  if (EXPONENT (curcol)) {
    ADD_LEX_NEXT;
    SKIP_SPACE;
    if (CUR_COL == '+' || CUR_COL == '-' || isdigit (CUR_COL)) {
      ADD_LEX_NEXT;
      SKIP_SPACE;
      while (isdigit (CUR_COL)) {
        ADD_LEX_NEXT;
        SKIP_SPACE;
      }
    }
  }
  return curret = FLT_NUMBER;
}

int_4 scan_declarer (int_4 k)
{
  if (TOKEN ("integer")) {
    SKIP_SPACE;
    if (CUR_COL == '*') {
      ADD_LEX_NEXT;
      SKIP_SPACE;
      CHECKDIGIT (2811, CUR_COL);
      while (isdigit (CUR_COL)) {
        ADD_LEX_NEXT;
        SKIP_SPACE;
      }
    }
    return DECLAR;
  }
  if (TOKEN ("logical")) {
    SKIP_SPACE;
    if (CUR_COL == '*') {
      ADD_LEX_NEXT;
      SKIP_SPACE;
      CHECKDIGIT (2812, CUR_COL);
      while (isdigit (CUR_COL)) {
        ADD_LEX_NEXT;
        SKIP_SPACE;
      }
    }
    return DECLAR;
  }
  if (TOKEN ("real")) {
    SKIP_SPACE;
    if (CUR_COL == '*') {
      ADD_LEX_NEXT;
      SKIP_SPACE;
      CHECKDIGIT (2813, CUR_COL);
      while (isdigit (CUR_COL)) {
        ADD_LEX_NEXT;
        SKIP_SPACE;
      }
    }
    return DECLAR;
  }
  if (TOKEN ("complex")) {
    SKIP_SPACE;
    if (CUR_COL == '*') {
      ADD_LEX_NEXT;
      SKIP_SPACE;
      CHECKDIGIT (2814, CUR_COL);
      while (isdigit (CUR_COL)) {
        ADD_LEX_NEXT;
        SKIP_SPACE;
      }
    }
    return DECLAR;
  }
  if (TOKEN ("character")) {
    SKIP_SPACE;
    if (CUR_COL == '*') {
      ADD_LEX_NEXT;
      SKIP_SPACE;
      if (CUR_COL == '(') {
        ADD_LEX_NEXT;
        SKIP_SPACE;
        if (CUR_COL == '*') {
          ADD_LEX_NEXT;
        } else {
          while (islower (CUR_COL) || isdigit (CUR_COL) || CUR_COL == '_') {
            ADD_LEX_NEXT;
          }
        }
        SKIP_SPACE;
        if (CUR_COL == ')') {
          ADD_LEX_NEXT;
        }
      } else {
        CHECKDIGIT (2815, CUR_COL);
        while (isdigit (CUR_COL)) {
          ADD_LEX_NEXT;
          SKIP_SPACE;
        }
      }
    }
    return DECLAR;
  }
  return LEXEME;
}

int_4 scan_bin (int_4 k)
{
// Radix 2 number.
  ADD_CHR ('0');
  ADD_CHR ('b');
  next_col (TRUE);
  while (CUR_COL != '\'' && CUR_COL != '\0') {
    if (CUR_COL != '0' && CUR_COL != '1') {
      SCANER (2816, "invalid binary digit", NO_TEXT);
      return END_OF_MODULE;
    }
    ADD_LEX_NEXT;
  }
  if (CUR_COL != '\'') {
    SCANER (2817, "invalid denotation", NO_TEXT);
    return END_OF_MODULE;
  }
  next_col (TRUE);
  strip_leading_zeroes (curlex);
  return INT_NUMBER;
}

int_4 scan_hex (int_4 k)
{
// Radix 16 number.
  ADD_CHR ('0');
  ADD_CHR ('x');
  next_col (TRUE);
  while (CUR_COL != '\'' && CUR_COL != '\0') {
    if (!isxdigit (CUR_COL)) {
      SCANER (2818, "invalid hex digit", NO_TEXT);
      return END_OF_MODULE;
    }
    ADD_LEX_NEXT;
  }
  if (CUR_COL != '\'') {
    SCANER (2819, "invalid denotation", NO_TEXT);
    return END_OF_MODULE;
  }
  next_col (TRUE);
  strip_leading_zeroes (curlex);
  return INT_NUMBER;
}

int_4 scan_part (char *expect)
{
  int_4 k = 0;
  RECCLR (curlex);
  CUR_LIN.proc = nprocs;
// Skip empty lines.
  if (curcol == START_OF_LINE && curlin < nftnlines) {
    if (POS (0) == '\0') {
      curlin++;
      return curret = scan_part (expect);
    }
  }
  if (curcol > START_OF_LINE && CUR_COL == '\0') {
// Next scan starts at new line.
    curlin++;
    curcol = START_OF_LINE;
  }
  if (curlin >= nftnlines) {
    return curret = END_OF_MODULE;
  }
  if (curcol == START_OF_LINE) {
    while (POS (0) == '\0' || IS_COMMENT (POS (0))) {
      if (POS (0) == '/') {
        vif_jcl ();
      }
      curlin++;
      if (curlin >= nftnlines) {
        return curret = END_OF_MODULE;
      }
    }
    if (CUR_LIN.isn > 0) {
      if (POS (5) == ' ') {
        curcol = 0;
        return curret = END_OF_LINE;
      } else {
// All but first line can be continuations.
        curcol = 6;
      }
    } else {
      curcol = 0;
    }
  }
// Skip trailing blanks.
  SKIP_SPACE;
  if (CUR_COL == '\0') {
// No symbol left at card, scan again on next card.
    curlin++;
    curcol = START_OF_LINE;
    return curret = scan_part (expect);
  }
// Mark start of lexeme for messages.
  prelin = curlin;
  precol = curcol;
  if (islower (CUR_COL)) {
// A letter, possibly a radix.
    if (CUR_COL == 'b') {
      SAVE_POS (1);
      next_col (TRUE);
      if (CUR_COL != '\'') {
        RESTORE_POS (1);
      } else {
        return curret = scan_bin (k);
      }
    } else if (CUR_COL == 'x') {
      SAVE_POS (1);
      next_col (TRUE);
      if (CUR_COL != '\'') {
        RESTORE_POS (1);
      } else {
        return curret = scan_hex (k);
      }
    }
// Fetch identifier or keyword.
// Identifiers may contain spaces if the part upto the first space is not a keyword.
// Here VIF differs from vintage FORTRAN.
    int_4 space_chk = TRUE;
    while (_IDFCHAR (CUR_COL)) {
      if (CUR_COL == ' ') {
        if (space_chk && reserved (curlex)) {
          break;
        } else {
          space_chk = FALSE;
        }
      } else if (CUR_COL == '$') {
        ADD_CHAR ('_');
      } else {
        ADD_LEX;
      }
      next_col (TRUE);
    }
// END, END DO, END IF, END WHILE
    if (TOKEN ("end")) {
      SKIP_SPACE;
      while (islower (CUR_COL)) {
        ADD_LEX_NEXT;
      }
    }
// ELSE IF
    if (TOKEN ("else")) {
      SKIP_SPACE;
      while (islower (CUR_COL)) {
        ADD_LEX_NEXT;
      }
    }
// Catch declarers here.
    if (scan_declarer (k) == DECLAR) {
      return curret = DECLAR;
    } else {
      return curret = WORD;
    }
  } else if (isdigit (CUR_COL)) {
    if (curcol < 5) {
// Label.
      while (curcol < 5 && isdigit (CUR_COL)) {
        ADD_LEX_NEXT;
        SKIP_SPACE;
      }
      return curret = LABEL;
    }
// Number.
    while (isdigit (CUR_COL)) {
      ADD_LEX_NEXT;
      SKIP_SPACE;
    }
    if (EQUAL (expect, EXPECT_LABEL)) {
      return curret = LABEL;
    }
    if (CUR_COL == 'h') {
// Hollerith operand
      return scan_hollerith ();
    }
    if (CUR_COL != '.' && !EXPONENT (curcol)) {
      strip_leading_zeroes (curlex);
      return curret = INT_NUMBER;
    } else {
      if (CUR_COL == '.') {
// Special symbols .XYZZY. like (n/2.eq.1).
        SAVE_POS (1);
        next_col (TRUE);
        while (islower (CUR_COL)) {
          next_col (TRUE);
        }
        if (CUR_COL == '.') {
          RESTORE_POS (1);
          strip_leading_zeroes (curlex);
          return curret = INT_NUMBER;
        } else {
          RESTORE_POS (1);
        }
// Fraction.
        ADD_LEX_NEXT;
        SKIP_SPACE;
        while (isdigit (CUR_COL)) {
          ADD_LEX_NEXT;
          SKIP_SPACE;
        }
      }
// Exponent part.
      return scan_exponent ();
    }
  } else if (CUR_COL == '.') {
    ADD_LEX_NEXT;
    SKIP_SPACE;
// Fraction.
    if (isdigit (CUR_COL)) {
      while (isdigit (CUR_COL)) {
        ADD_LEX_NEXT;
        SKIP_SPACE;
      }
// Exponent part.
      return scan_exponent ();
    }
// Special symbols .XYZZY. .
    if (CUR_COL == '.') {
      next_col (TRUE);
    }
    while (islower (CUR_COL)) {
      ADD_LEX_NEXT;
    }
    if (CUR_COL == '.') {
      ADD_LEX_NEXT;
    } else {
      SCANER (2820, "stray dot", NO_TEXT);
    }
  } else if (CUR_COL == '*') {
    ADD_LEX_NEXT;
    if (CUR_COL == '*') {
      ADD_LEX_NEXT;
    } else if (CUR_COL == '%') {
      ADD_LEX_NEXT;
    }
  } else if (CUR_COL == '/') {
// Concatenation.
    ADD_LEX_NEXT;
    if (CUR_COL == '/') {
      ADD_LEX_NEXT;
    } else if (CUR_COL == '=') {
      ADD_LEX_NEXT;
    }
  } else if (CUR_COL == '\'') {
// Character string.
    return scan_string ();
  } else if (CUR_COL == '"') {
// Character string.
    return scan_string_alt ();
  } else if (CUR_COL == '=') {
    ADD_LEX_NEXT;
    if (CUR_COL == '=') {
      ADD_LEX_NEXT;
    }
  } else if (CUR_COL == '!') {
    ADD_LEX_NEXT;
    if (CUR_COL == '=') {
      ADD_LEX_NEXT;
    }
  } else if (CUR_COL == '<') {
    ADD_LEX_NEXT;
    if (CUR_COL == '=') {
      ADD_LEX_NEXT;
    }
  } else if (CUR_COL == '>') {
    ADD_LEX_NEXT;
    if (CUR_COL == '=') {
      ADD_LEX_NEXT;
    }
  } else if (CUR_COL != '\0') {
// Something else.
    ADD_LEX_NEXT;
  } else {
// No symbol left at card, scan again on next card.
    curlin++;
    curcol = START_OF_LINE;
    return curret = scan_part (expect);
  }
  return curret = LEXEME;
}

int_4 scan (char *expect)
{
  int_4 rc;
  RECCPY (prelex, curlex);
  preret = curret;
  RECCLR (curlex);
  rc = scan_part (expect);
  if (rc == END_OF_LINE || rc == END_OF_MODULE) {
    return curret = rc;
  }
  if (rc == LABEL) {
    return curret = rc;
  }
  if (TOKEN ("double")) {
    scan_part (EXPECT_NONE);
    if (TOKEN ("precision")) {
      // RECCPY (curlex, "real*8");
      RECCPY (curlex, "doubleprecision");
    } else if (TOKEN ("complex")) {
      // RECCPY (curlex, "complex*16");
      RECCPY (curlex, "doublecomplex");
    } else {
      RECCPY (curlex, "real*8");
      EXPECT (2821, "precision");
    }
    return curret = DECLAR;
  } else if (TOKEN ("go")) {
    scan_part (EXPECT_NONE);
    if (!TOKEN ("to")) {
      SCANER (2822, "invalid goto", NO_TEXT);
    }
    RECCPY (curlex, "goto");
    return curret = WORD;
  }
  if (EQUAL (expect, EXPECT_LABEL)) {
    ;
  } else if (expect != NO_TEXT && !EQUAL (curlex, expect)) {
    NEW_RECORD (str);
    _srecordf (str, "%s but found %s", expect, curlex);
    EXPECT (2823, str);
    return curret = ERR;
  }
  return curret = rc;
}

int_4 scan_fmt (void)
{
  int_4 k = 0;
  RECCPY (prelex, curlex);
  preret = curret;
  RECCLR (curlex);
  CUR_LIN.proc = nprocs;
// Skip empty lines.
  if (curcol == START_OF_LINE && curlin < nftnlines) {
    if (POS (0) == '\0') {
      curlin++;
      return curret = scan_fmt ();
    }
  }
  if (curcol > START_OF_LINE && CUR_COL == '\0') {
// Next scan starts at new line.
    curlin++;
    curcol = START_OF_LINE;
  }
  if (curlin >= nftnlines) {
    return curret = END_OF_MODULE;
  }
  if (curcol == START_OF_LINE) {
    while (IS_COMMENT (POS (0))) {
      curlin++;
      if (curlin >= nftnlines) {
        return curret = END_OF_MODULE;
      }
    }
    if (CUR_LIN.isn > 1) {
      if (POS (5) == ' ') {
        curcol = 0;
        return curret = END_OF_LINE;
      } else {
// All but first line can be continuations.
        curcol = 6;
      }
    } else {
      curcol = 0;
    }
  }
// Skip trailing blanks.
  SKIP_SPACE;
  if (CUR_COL == '\0') {
// No symbol left at card, scan again on next card.
    curlin++;
    curcol = START_OF_LINE;
    return curret = scan_fmt ();
  }
// Mark start of lexeme for messages.
  prelin = curlin;
  precol = curcol;
  if (islower (CUR_COL)) {
// Format specifier.
    while (islower (CUR_COL) || isdigit (CUR_COL)) {
      ADD_RAW_NEXT;
    }
    if (CUR_COL == '.') {
      ADD_LEX_NEXT;
    }
    while (isdigit (CUR_COL)) {
      ADD_LEX_NEXT;
    }
    if (islower (CUR_COL)) {
      ADD_RAW_NEXT;
      while (isdigit (CUR_COL)) {
        ADD_LEX_NEXT;
      }
    }
    return curret = LEXEME;
  } else if (isdigit (CUR_COL)) {
// Number.
    while (isdigit (CUR_COL)) {
      ADD_LEX_NEXT;
    }
    if (CUR_COL == 'h') {
// Hollerith format item
      return scan_hollerith ();
    } else {
      strip_leading_zeroes (curlex);
      return curret = INT_NUMBER;
    }
  } else if (CUR_COL == '\'') {
// Character string.
    return scan_string ();
  } else if (CUR_COL == '"') {
// Character string.
    return scan_string_alt ();
  } else if (CUR_COL != '\0') {
// Something else.
    ADD_LEX_NEXT;
  } else {
// No symbol left at card, scan again on next card.
    curlin++;
    curcol = START_OF_LINE;
    return curret = scan_fmt ();
  }
  return curret = LEXEME;
}

logical_4 lookahead(char *expect)
{
  (void) scan (EXPECT_NONE);
  logical_4 check = TOKEN (expect);
  UNSCAN;
  return check;
}
