|
- MODULE FoxCSharpScanner;
- IMPORT Streams, Strings, Diagnostics, Commands, StringPool,
- D := Debugging, Basic := FoxBasic, FoxScanner;
- CONST
- Trace = FALSE;
- (* overal scanner limitation *)
- MaxIdentifierLength* = FoxScanner.MaxIdentifierLength;
- (* parametrization of numeric scanner: *)
- MaxHexDigits* = FoxScanner.MaxHexDigits;
- MaxHugeHexDigits* = FoxScanner.MaxHugeHexDigits;
- MaxRealExponent* = FoxScanner.MaxRealExponent;
- MaxLongrealExponent* = FoxScanner.MaxLongrealExponent;
- (* scanner constants *)
- EOT* = 0X;
- LF* = 0AX;
- CR* = 0DX;
- TAB* = 09X;
- TYPE
- StringType* = FoxScanner.StringType;
- IdentifierType* = FoxScanner.IdentifierType;
- IdentifierString* = FoxScanner.IdentifierString;
- CONST
- (* tokens *)
- None*= 0;
- (* operators and punctuators *)
- Exclamation* = 1; (* ! *)
- ExclamationEqual* = 2; (* != *)
- Percent* = 3; (* % *)
- PercentEqual* = 4; (* %= *)
- And* = 5; (* & *)
- AndEqual* = 6; (* &= *)
- AndAnd* = 7; (* && *)
- LeftParenthesis* = 8; (* ( *)
- RightParenthesis* = 9; (* ) *)
- Times* = 10; (* * *)
- TimesEqual* = 11; (* *= *)
- Plus* = 12; (* + *)
- PlusEqual* = 13; (* += *)
- PlusPlus* = 14; (* ++ *)
- Comma* = 15; (* , *)
- Minus* = 16; (* - *)
- MinusEqual* = 17; (* -= *)
- MinusMinus* = 18; (* -- *)
- Period* = 19; (* . *)
- Slash* = 20; (* / *)
- SlashEqual* = 21; (* /= *)
- Colon* = 22; (* : *)
- Semicolon* = 23; (* ; *)
- Less* = 24; (* < *)
- LessEqual* = 25; (* <= *)
- LeftShift* = 26; (* << *)
- LeftShiftEqual* = 27; (* <<= *)
- Equal* = 28; (* = *)
- EqualEqual* = 29; (* == *)
- Greater* = 30; (* > *)
- GreaterEqual* = 31; (* >= *)
- RightShift* = 32; (* >> *)
- RightShiftEqual* = 33; (* >>= *)
- LeftBracket* = 34; (* [ *)
- RightBracket* = 35; (* ] *)
- Arrow* = 36; (* ^ *)
- ArrowEqual* = 37; (* ^= *)
- LeftBrace* = 38; (* { *)
- Bar* = 39; (* | *)
- BarEqual* = 40; (* |= *)
- BarBar* = 41; (* || *)
- RightBrace* = 42; (* } *)
- Tilde* = 43; (* ~ *)
- (* keywords *)
- As* = 44;
- Base* = 45;
- Bool* = 46;
- Break* = 47;
- Case* = 48;
- Char* = 49;
- Class* = 50;
- Const* = 51;
- Default* = 52;
- Delegate* = 53;
- Do* = 54;
- Double* = 55;
- Else* = 56;
- False* = 57;
- Float* = 58;
- For* = 59;
- If* = 60;
- Import* = 61;
- Int* = 62;
- Internal* = 63;
- Is* = 64;
- Long* = 65;
- Module* = 66;
- New* = 67;
- Null* = 68;
- Object* = 69;
- Public* = 70;
- Readonly* = 71;
- Ref* = 72;
- Return* = 73;
- Sbyte* = 74;
- Short* = 75;
- String* = 76;
- Struct* = 77;
- Switch* = 78;
- This* = 79;
- True* = 80;
- Void* = 81;
- While* = 82;
- Identifier* = 83;
- IntegerLiteral* = 84;
- RealLiteral* = 85;
- CharacterLiteral* = 86;
- StringLiteral* = 87;
- Comment*= 88;
- (* Active Cells *)
- Cell* = 89;
- Cellnet* = 90;
- In* = 91;
- Out* = 92;
- Select* = 93;
- Question* = 94; (* ? *)
- QuestionQuestion* = 95; (* ?? *)
- EndOfText*= 96;
- (* number types *)
- IntNumber*= 1;
- LongNumber*= 2;
- FloatNumber*= 3;
- DoubleNumber*= 4;
- SingleQuote = 27X;
- DoubleQuote* = 22X;
- Backslash = 5CX;
- TYPE
- (* keywords book keeping *)
- Keyword* = FoxScanner.Keyword;
- KeywordTable* = FoxScanner.KeywordTable;
- TYPE
- Token* = LONGINT;
- Position* = Basic.Position;
- (**
- symbol: data structure for the data transfer of
- the last read input from the scanner to the parser
- **)
- Symbol* = RECORD
- position*: Position;
- token*: Token;
- identifier*: IdentifierType;
- identifierString*: IdentifierString;
- string*: StringType;
- stringLength*: LONGINT;
- numberType*: FoxScanner.SubType;
- integer*: LONGINT;
- hugeint*: HUGEINT;
- character*: CHAR;
- real*: LONGREAL;
- END;
- StringMaker* = FoxScanner.StringMaker;
- Scanner* = OBJECT
- VAR
- source-: StringType;
- reader: Streams.Reader;
- diagnostics: Diagnostics.Diagnostics;
- ch: CHAR;
- position-: Position;
- error-: BOOLEAN;
- stringWriter: Streams.Writer;
- stringMaker: StringMaker;
- (*
- source: name of the source code for reference in error outputs
- reader: input stream
- position: reference position (offset) of the input stream, for error output
- diagnostics: error output object
- *)
- PROCEDURE & InitializeScanner*(
- CONST source: ARRAY OF CHAR;
- reader: Streams.Reader;
- position: LONGINT;
- diagnostics: Diagnostics.Diagnostics);
- BEGIN
- NEW(stringMaker,1024);
- stringWriter := stringMaker.GetWriter();
- error := FALSE;
- NEW(SELF.source, Strings.Length(source)+1);
- COPY(source, SELF.source^);
- SELF.reader := reader;
- SELF.diagnostics := diagnostics;
- ch := " ";
- IF reader = NIL THEN
- ch := EOT
- ELSE
- GetNextCharacter
- END;
- IF Trace THEN
- D.Str("New scanner ");
- D.Ln;
- END;
- SELF.position.start := position;
- SELF.position.line := 0;
- END InitializeScanner;
- (** report an error occured during scanning **)
- PROCEDURE ErrorS(CONST msg: ARRAY OF CHAR);
- BEGIN
- Basic.Error(diagnostics, source^, position, msg);
- error := TRUE;
- END ErrorS;
- (** report an error occured during scanning **)
- PROCEDURE Error(code: INTEGER);
- BEGIN
- Basic.ErrorC(diagnostics, source^, position, code, "");
- error := TRUE;
- END Error;
- (** get next character, end of text results in ch = EOT **)
- PROCEDURE GetNextCharacter;
- BEGIN
- reader.Char(ch);
- INC(position.start);
- IF ch = LF THEN
- INC(position.line); position.linepos := position.start;
- END;
- END GetNextCharacter;
- PROCEDURE IsNewlineCharacter(ch: CHAR): BOOLEAN;
- BEGIN
- RETURN (ch = 0AX) OR (ch = 0DX);
- END IsNewlineCharacter;
- (**
- CharacterLiteral =
- "'" Character "'".
- Character =
- SingleCharacter |
- SimpleEscapeSequence |
- HexadecimalEscapeSequence.
- SingleCharacter =
- any character except ' (27X), \ (5CX), and NewLineCharacter.
- SimpleEscapeSequence =
- "\'" | '\"' | '\\' | '\0' |
- '\a' | '\b' | '\f' | '\n' | '\r' | '\t' | '\v'.
- HexadecimalEscapeSequence =
- '\x' HexDigit {HexDigit}.
- **)
- PROCEDURE GetEscapeSequence(VAR esc: CHAR);
- VAR i, k, n: LONGINT;
- PROCEDURE HexDigit(ch: CHAR; VAR dig: LONGINT): BOOLEAN;
- BEGIN
- IF (ch >= '0') & (ch <= '9') THEN
- dig := ORD(ch) - ORD('0');
- RETURN TRUE;
- END;
- IF (ch >= 'A') & (ch <= 'F') THEN
- dig := ORD(ch) - ORD('A') + 10;
- RETURN TRUE;
- END;
- IF (ch >= 'a') & (ch <= 'f') THEN
- dig := ORD(ch) - ORD('a') + 10;
- RETURN TRUE;
- END;
- RETURN FALSE;
- END HexDigit;
- BEGIN
- CASE ch OF
- SingleQuote, DoubleQuote, Backslash:
- esc := ch;
- GetNextCharacter
- | '0':
- esc := 0X;
- GetNextCharacter
- | 'a':
- esc := 07X;
- GetNextCharacter
- | 'b':
- esc := 08X;
- GetNextCharacter
- | 'f':
- esc := 0CX;
- GetNextCharacter
- | 'n':
- esc := 0AX;
- GetNextCharacter
- | 'r':
- esc := 0DX;
- GetNextCharacter
- | 't':
- esc := 09X;
- GetNextCharacter
- | 'v':
- esc := 0BX;
- GetNextCharacter
- | 'x':
- GetNextCharacter;
- n := 0;
- i := 0;
- WHILE (i < 2) & HexDigit(ch, k) DO
- n := n * 16 + k;
- GetNextCharacter;
- INC(i);
- END;
- IF i = 0 THEN
- ErrorS("Illegal hexadecimal escape sequence");
- END;
- esc := CHR(n);
- ELSE
- ErrorS("Illegal escape sequence");
- esc := 0X; (* arbitrary *)
- END;
- END GetEscapeSequence;
- PROCEDURE GetCharacter(VAR symbol: Symbol);
- VAR vch: CHAR;
- BEGIN
- GetNextCharacter;
- IF ch = EOT THEN
- ErrorS("Unexpected end of character literal");
- symbol.character := 0X;
- RETURN;
- END;
- IF ch = Backslash THEN
- GetNextCharacter;
- GetEscapeSequence(vch);
- ELSIF (ch = SingleQuote) OR IsNewlineCharacter(ch) THEN
- ErrorS("Illegal character literal");
- vch := 0X;
- ELSE
- vch := ch;
- GetNextCharacter;
- END;
- IF ch = SingleQuote THEN
- GetNextCharacter
- ELSE
- ErrorS("Illegal character literal")
- END;
- symbol.character := vch;
- symbol.integer := ORD(vch);
- END GetCharacter;
- (**
- StringLiteral =
- '"' {StringCharacter} '"'.
- StringCharacter =
- SingleStringCharacter |
- SimpleEscapeSequence |
- HexadecimalEscapeSequence.
- SingleStringCharacter =
- any character except " (22X), \ (5CX), and NewLineCharacter.
- **)
- PROCEDURE GetString(VAR symbol: Symbol);
- VAR vch: CHAR;
- BEGIN
- stringMaker.Clear;
- GetNextCharacter;
- LOOP
- IF (ch = EOT) THEN
- ErrorS("Unexpected end of string literal");
- EXIT;
- END;
- IF IsNewlineCharacter(ch) THEN
- ErrorS("Illegal string literal");
- EXIT;
- END;
- IF ch = DoubleQuote THEN
- GetNextCharacter;
- EXIT;
- END;
- IF ch = Backslash THEN
- GetNextCharacter;
- GetEscapeSequence(vch);
- ELSE
- vch := ch;
- GetNextCharacter;
- END;
- stringWriter.Char(vch);
- END;
- stringWriter.Char(0X);
- stringWriter.Update;
- (*
- ACHTUNG: Due to the implementation of GetStringCopy
- the length of the copied string may be inconsistent
- with its contents if the string contains 0X.
- This must be fixed in the future.
- *)
- symbol.string := stringMaker.GetStringCopy(symbol.stringLength);
- END GetString;
- (**
- Identifier =
- (Letter | '_') {Letter | Digit | '_'}.
- Letter =
- 'A' | 'B' | .. | 'Z' | 'a' | 'b' | .. | 'z' .
- **)
- PROCEDURE GetIdentifier(VAR symbol: Symbol);
- VAR i: LONGINT;
- BEGIN
- i := 0;
- REPEAT
- symbol.identifierString[i] := ch;
- INC(i);
- GetNextCharacter
- UNTIL reservedCharacter[ORD(ch)] OR (i = MaxIdentifierLength);
- IF i = MaxIdentifierLength THEN
- Error(Basic.IdentifierTooLong);
- DEC(i)
- END;
- symbol.identifierString[i] := 0X;
- StringPool.GetIndex(symbol.identifierString, symbol.identifier);
- END GetIdentifier;
- (**
- Number =
- IntegerLiteral |
- RealLiteral.
- IntegerLiteral =
- DecimalIntegerLiteral |
- HexadecimalIntegerLiteral.
- DecimalIntegerLiteral =
- Digit {Digit} [IntegerTypeSuffix].
- HexadecimalIntegerLiteral =
- ('0X' | '0x') HexDigit {HexDigit} [IntegerTypeSuffix].
- IntegerTypeSuffix =
- 'L' | 'l'.
- RealLiteral =
- Digit {Digit} '.' Digit {Digit} [ExponentPart] [RealTypeSuffix] |
- '.' Digit {Digit} [ExponentPart] [RealTypeSuffix] |
- Digit {Digit} [ExponentPart] [RealTypeSuffix] |
- Digit {Digit} [RealTypeSuffix].
- ExponentPart =
- ('E' | 'e') ['+' | '-'] Digit {Digit}.
- RealTypeSuffix =
- 'F' | 'f' | 'D' | 'd'.
- Digit =
- '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'.
- HexDigit =
- '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' |
- 'A' | 'B' | 'C' | 'D' | 'E' | 'F' |
- 'a' | 'b' | 'c' | 'd' | 'e' | 'f'.
- **)
- PROCEDURE GetNumber(VAR symbol: Symbol; frac: BOOLEAN): Token;
- VAR i, nextInt, m, n, d, e, si: LONGINT;
- dig: ARRAY 24 OF CHAR;
- f: LONGREAL;
- hex, neg, long: BOOLEAN;
- hugeint, tenh: HUGEINT;
- result: LONGINT;
- PROCEDURE Append(ch: CHAR);
- BEGIN
- symbol.identifierString[si] := ch;
- INC(si);
- END Append;
- PROCEDURE Digit(ch: CHAR): LONGINT;
- BEGIN
- RETURN ORD(ch) - ORD('0')
- END Digit;
- PROCEDURE HexDigit(ch: CHAR): LONGINT;
- BEGIN
- IF (ch >= '0') & (ch <= '9') THEN
- RETURN ORD(ch) - ORD('0')
- END;
- IF (ch >= 'A') & (ch <= 'F') THEN
- RETURN ORD(ch) - ORD('A') + 10
- END;
- IF (ch >= 'a') & (ch <= 'f') THEN
- RETURN ORD(ch) - ORD('a') + 10
- END;
- (* cannot happen *)
- Error(Basic.NumberIllegalCharacter);
- RETURN 0
- END HexDigit;
- PROCEDURE Ten(e: LONGINT): LONGREAL;
- VAR x, p: LONGREAL;
- BEGIN
- x := 1;
- p := 10;
- WHILE e > 0 DO
- IF ODD(e) THEN
- x := x * p
- END;
- e := e DIV 2;
- IF e > 0 THEN
- p := p * p
- END
- END;
- RETURN x
- END Ten;
- BEGIN
- (* ch is '.' or Digit *)
- si := 0;
- hex := FALSE;
- IF ~frac & (ch = '0') THEN
- Append(ch);
- GetNextCharacter;
- IF (ch = 'X') OR (ch = 'x') THEN
- Append(ch);
- GetNextCharacter;
- hex := TRUE
- END;
- END;
- i := 0;
- m := 0;
- n := 0;
- d := 0;
- long := FALSE;
- IF frac THEN
- Append('.');
- (* fictionary zero mantissa: *)
- i := 1;
- d := 1;
- END;
- LOOP (* read mantissa *)
- IF ((ch >= '0') & (ch <= '9')) OR
- (hex &
- (((ch >= 'A') & (ch <= 'F')) OR
- ((ch >= 'a') & (ch <= 'f')))) THEN
- IF (m > 0) OR (ch # '0') THEN (* ignore leading zeros *)
- IF n < LEN(dig) THEN
- dig[n] := ch;
- INC(n);
- END;
- INC(m);
- END;
- Append(ch);
- GetNextCharacter;
- INC(i)
- ELSIF ~hex & (ch = '.') THEN
- IF d # 0 THEN
- EXIT
- END;
- Append(ch);
- GetNextCharacter;
- d := i
- ELSE
- EXIT
- END
- END;
- IF d = 0 THEN
- IF ((ch = 'F') OR (ch = 'f') OR
- (ch = 'D') OR (ch = 'd') OR
- (ch = 'E') OR (ch = 'e')) THEN
- d := i
- END;
- ELSIF d = i THEN
- (* '.' must be followed by a digit *)
- Error(Basic.NumberIllegalCharacter)
- END;
- (* 0 <= n <= m <= i, 0 <= d <= i *)
- IF d = 0 THEN (* integer *)
- IF (ch = 'L') OR (ch = 'l') THEN
- Append(ch);
- GetNextCharacter;
- long := TRUE
- END;
- IF n = m THEN
- symbol.integer := 0;
- symbol.hugeint := 0;
- i := 0;
- IF hex THEN
- IF ~long &
- ((n < MaxHexDigits) OR
- ((n = MaxHexDigits) & (dig[0] <= '7'))) THEN
- WHILE i < n DO
- symbol.integer := symbol.integer * 10H + HexDigit(dig[i]);
- INC(i)
- END;
- symbol.numberType := IntNumber;
- symbol.hugeint := symbol.integer;
- ELSIF n <= MaxHugeHexDigits THEN
- hugeint := 0;
- IF (n = MaxHugeHexDigits) & (dig[0] > '7') THEN
- (* prevent overflow *)
- hugeint := -1
- END;
- WHILE i < n DO
- hugeint := hugeint * 10H + HexDigit(dig[i]);
- INC(i)
- END;
- symbol.numberType := LongNumber;
- symbol.hugeint := hugeint;
- symbol.integer := SHORT(symbol.hugeint);
- ELSE
- symbol.numberType := LongNumber;
- Error(Basic.NumberTooLarge)
- END
- ELSE (* decimal *)
- WHILE (i < n) & ~long DO
- d := Digit(dig[i]);
- INC(i);
- nextInt := symbol.integer * 10 + d;
- IF nextInt >= 0 THEN
- symbol.integer := nextInt
- ELSE (* overflow *)
- long := TRUE
- END;
- END;
- IF long THEN
- (* restart computation *)
- (*
- ACHTUNG: Reportedly, Fox has or had certain limitations
- working with HUGEINT that affected the original code.
- Furthermore, at present Ronin supports HUGEINT as
- a mere alias to LONGINT, therefore the following code
- is just reserved for the future and long integer
- constants are not yet supported.
- *)
- i := 0;
- hugeint := 0;
- tenh := 10;
- WHILE i < n DO
- d := Digit(dig[i]);
- INC(i);
- hugeint := hugeint * tenh + d;
- IF hugeint < 0 THEN
- Error(Basic.NumberTooLarge)
- END
- END;
- symbol.numberType := LongNumber;
- symbol.hugeint := hugeint;
- symbol.integer := SHORT(symbol.hugeint);
- ELSE
- symbol.numberType := IntNumber;
- symbol.hugeint := symbol.integer;
- END
- END
- ELSE
- symbol.numberType := LongNumber;
- Error(Basic.NumberTooLarge)
- END;
- result := IntegerLiteral;
- ELSE (* fraction *)
- IF (ch = 'F') OR (ch = 'f') THEN
- Append(ch);
- GetNextCharacter;
- long := FALSE
- ELSIF (ch = 'D') OR (ch = 'd') THEN
- Append(ch);
- GetNextCharacter;
- long := TRUE
- ELSE
- long := TRUE
- END;
- f := 0;
- e := 0;
- WHILE n > 0 DO
- (* 0 <= f < 1 *)
- DEC(n);
- f := (Digit(dig[n]) + f) / 10
- END;
- IF (ch = 'E') OR (ch = 'e') THEN
- Append(ch);
- GetNextCharacter;
- neg := FALSE;
- IF ch = '-' THEN
- neg := TRUE;
- Append(ch);
- GetNextCharacter
- ELSIF ch = '+' THEN
- Append(ch);
- GetNextCharacter
- END;
- IF (ch >= '0') & (ch <= '9') THEN
- REPEAT
- n := Digit(ch);
- Append(ch);
- GetNextCharacter;
- IF e <= (MAX(INTEGER) - n) DIV 10 THEN
- e := e * 10 + n
- ELSE
- Error(Basic.NumberTooLarge)
- END
- UNTIL (ch < '0') OR (ch > '9');
- IF neg THEN
- e := -e
- END
- ELSE
- Error(Basic.NumberIllegalCharacter)
- END
- END;
- (* decimal point shift *)
- DEC(e, i-d-m);
- IF long THEN
- symbol.numberType := DoubleNumber;
- IF (1 - MaxLongrealExponent < e) & (e <= MaxLongrealExponent) THEN
- IF e < 0 THEN
- symbol.real := f / Ten(-e)
- ELSE
- symbol.real := f * Ten(e)
- END
- ELSE
- Error(Basic.NumberTooLarge)
- END
- ELSE
- symbol.numberType := FloatNumber;
- IF (1- MaxRealExponent < e) & (e <= MaxRealExponent) THEN
- IF e < 0 THEN
- symbol.real := f / Ten(-e)
- ELSE
- symbol.real := f * Ten(e)
- END
- ELSE
- Error(Basic.NumberTooLarge)
- END
- END;
- result := RealLiteral;
- END;
- symbol.identifierString[si] := 0X;
- RETURN result;
- END GetNumber;
- (** read / skip a comment **)
- (**
- Comment =
- SingleLineComment |
- DelimitedComment.
- SingleLineComment =
- '//' {InputCharacter}.
- InputCharacter =
- any character except a NewLineCharacter.
- NewlineCharacter =
- Carriage return character (0DX) |
- Line feed character (0AX) |
- EOT (0X).
- DelimitedComment =
- '/*' [DelimitedCommentText] Asterisks '/'.
- DelimitedCommentText =
- DelimitedCommentSection {DelimitedCommentSection}.
- DelimitedCommentSection =
- NotAsterisk |
- Asterisks NotSlash.
- Asterisks =
- '*' {'*'}.
- NotAsterisk =
- any character except *.
- NotSlash =
- any character except /.
- **)
- PROCEDURE ReadSingleLineComment(VAR symbol: Symbol);
- BEGIN
- stringMaker.Clear;
- WHILE (ch # 0DX) & (ch # 0AX) & (ch # EOT) DO
- stringWriter.Char(ch);
- GetNextCharacter;
- END;
- stringWriter.Char(0X);
- stringWriter.Update;
- symbol.token := Comment;
- symbol.string := stringMaker.GetString(symbol.stringLength);
- END ReadSingleLineComment;
- PROCEDURE ReadDelimitedComment(VAR symbol: Symbol);
- VAR done: BOOLEAN;
- BEGIN
- stringMaker.Clear;
- done := FALSE;
- WHILE ~done & (ch # EOT) DO
- IF ch = '*' THEN
- GetNextCharacter;
- IF ch = '/' THEN
- GetNextCharacter;
- done := TRUE;
- ELSE
- stringWriter.Char('*');
- END;
- ELSE
- stringWriter.Char(ch);
- GetNextCharacter;
- END;
- END;
- IF ~done THEN
- Error(Basic.CommentNotClosed);
- END;
- stringWriter.Char(0X);
- stringWriter.Update;
- symbol.token := Comment;
- symbol.string := stringMaker.GetString(symbol.stringLength);
- END ReadDelimitedComment;
- PROCEDURE SkipBlanks;
- BEGIN
- WHILE (ch = 20X) OR (ch = 09X) OR (ch = 0BX) OR (ch = 0CX) OR
- (ch = 0DX) OR (ch = 0AX) DO
- (* ignore C# whitespace characters '\t' (09X), '\v' (0BX), '\f' (0CX)
- and newlines '\r' (0DX), '\n' (0AX) *)
- IF ch = EOT THEN
- IF Trace THEN
- D.String("EOT");
- D.Ln;
- END;
- RETURN
- ELSE
- GetNextCharacter
- END
- END;
- END SkipBlanks;
- (** get next symbol **)
- PROCEDURE GetNextSymbol*(VAR symbol: Symbol): BOOLEAN;
- VAR s, token: LONGINT;
- BEGIN
- SkipBlanks;
- symbol.position := position;
- stringMaker.Clear;
- (* @@@ *)
- (*
- KernelLog.String("GetNextSymbol ");
- KernelLog.Char(ch);
- KernelLog.Ln();
- *)
- CASE ch OF
- EOT:
- s := EndOfText
- | DoubleQuote:
- s := StringLiteral;
- GetString(symbol)
- | SingleQuote:
- s := CharacterLiteral;
- GetCharacter(symbol)
- | '!':
- GetNextCharacter;
- IF ch = '=' THEN
- GetNextCharacter;
- s := ExclamationEqual
- ELSE
- s := Exclamation
- END
- | '%':
- GetNextCharacter;
- IF ch = '=' THEN
- GetNextCharacter;
- s := PercentEqual
- ELSE
- s := Percent
- END
- | '&':
- GetNextCharacter;
- IF ch = '=' THEN
- GetNextCharacter;
- s := AndEqual
- ELSIF ch = '&' THEN
- GetNextCharacter;
- s := AndAnd
- ELSE
- s := And
- END
- | '(':
- s := LeftParenthesis;
- GetNextCharacter
- | ')':
- s := RightParenthesis;
- GetNextCharacter
- | '*':
- GetNextCharacter;
- IF ch = '=' THEN
- GetNextCharacter;
- s := TimesEqual
- ELSE
- s := Times
- END
- | '+':
- GetNextCharacter;
- IF ch = '=' THEN
- GetNextCharacter;
- s := PlusEqual
- ELSIF ch = '+' THEN
- GetNextCharacter;
- s := PlusPlus
- ELSE
- s := Plus
- END
- | ',':
- s := Comma;
- GetNextCharacter
- | '-':
- GetNextCharacter;
- IF ch = '=' THEN
- GetNextCharacter;
- s := MinusEqual
- ELSIF ch = '-' THEN
- GetNextCharacter;
- s := MinusMinus
- ELSE
- s := Minus
- END
- | '.':
- GetNextCharacter;
- IF (ch >= '0') & (ch <= '9') THEN
- s := GetNumber(symbol, TRUE)
- ELSE
- s := Period
- END
- | '/':
- GetNextCharacter;
- IF ch = '=' THEN
- GetNextCharacter;
- s := SlashEqual
- ELSIF ch = '/' THEN
- GetNextCharacter;
- ReadSingleLineComment(symbol);
- s := Comment
- ELSIF ch = '*' THEN
- GetNextCharacter;
- ReadDelimitedComment(symbol);
- s := Comment
- ELSE
- s := Slash
- END
- | '0' .. '9':
- s := GetNumber(symbol, FALSE)
- | ':':
- s := Colon;
- GetNextCharacter
- | ';':
- s := Semicolon;
- GetNextCharacter
- | '<':
- GetNextCharacter;
- IF ch = '=' THEN
- GetNextCharacter;
- s := LessEqual
- ELSIF ch = '<' THEN
- GetNextCharacter;
- IF ch = '=' THEN
- GetNextCharacter;
- s := LeftShiftEqual
- ELSE
- s := LeftShift
- END
- ELSE
- s := Less
- END
- | '=':
- GetNextCharacter;
- IF ch = '=' THEN
- GetNextCharacter;
- s := EqualEqual
- ELSE
- s := Equal
- END
- | '>':
- GetNextCharacter;
- IF ch = '=' THEN
- GetNextCharacter;
- s := GreaterEqual
- ELSIF ch = '>' THEN
- GetNextCharacter;
- IF ch = '=' THEN
- GetNextCharacter;
- s := RightShiftEqual
- ELSE
- s := RightShift
- END
- ELSE
- s := Greater
- END
- | '?':
- GetNextCharacter;
- IF ch = '?' THEN
- GetNextCharacter;
- s := QuestionQuestion
- ELSE
- s := Question;
- END
- | '[':
- s := LeftBracket;
- GetNextCharacter
- | ']':
- s := RightBracket;
- GetNextCharacter
- | '^':
- GetNextCharacter;
- IF ch = '=' THEN
- GetNextCharacter;
- s := ArrowEqual
- ELSE
- s := Arrow
- END
- | '{':
- s := LeftBrace;
- GetNextCharacter
- | '|':
- GetNextCharacter;
- IF ch = '=' THEN
- GetNextCharacter;
- s := BarEqual
- ELSIF ch = '|' THEN
- GetNextCharacter;
- s := BarBar
- ELSE
- s := Bar
- END
- | '}':
- s := RightBrace;
- GetNextCharacter
- | '~':
- s := Tilde;
- GetNextCharacter
- | 'A' .. 'Z':
- s := Identifier;
- GetIdentifier(symbol)
- | 'a' .. 'z':
- s := Identifier;
- GetIdentifier(symbol);
- token := keywords.IndexByIdentifier(symbol.identifier);
- IF (token >= 0) THEN
- s := token
- END;
- ELSE
- s := Identifier;
- GetIdentifier(symbol)
- END;
- symbol.token := s;
- symbol.position.end := position.start;
- IF Trace THEN
- OutSymbol(D.Log, symbol);
- D.Ln;
- END;
- RETURN ~error
- END GetNextSymbol;
- PROCEDURE ResetError*();
- BEGIN
- error := FALSE
- END ResetError;
- (** set the diagnostics mode of the scanner
- (diagnostics = NIL ==> no report) and reset the error state
- intended for silent symbol peeeking after the end of a module *)
- PROCEDURE ResetErrorDiagnostics*(VAR diagnostics: Diagnostics.Diagnostics);
- VAR
- d: Diagnostics.Diagnostics;
- BEGIN
- error := FALSE;
- d := SELF.diagnostics;
- SELF.diagnostics := diagnostics;
- diagnostics := d;
- END ResetErrorDiagnostics;
- END Scanner;
- (** return a new scanner on a stream, error output via diagnostics **)
- PROCEDURE NewScanner*(
- CONST source: ARRAY OF CHAR;
- reader: Streams.Reader;
- position: LONGINT;
- diagnostics: Diagnostics.Diagnostics): Scanner;
- VAR s: Scanner;
- BEGIN
- NEW( s, source, reader, position, diagnostics );
- RETURN s;
- END NewScanner;
- VAR
- reservedCharacter: ARRAY 256 OF BOOLEAN;
- tokens-: ARRAY EndOfText+1 OF Keyword;
- keywords: KeywordTable;
- PROCEDURE SymbolToString*(CONST symbol: Symbol; VAR str: ARRAY OF CHAR);
- VAR id: StringPool.Index;
- BEGIN
- CASE symbol.token OF
- Identifier, IntegerLiteral, RealLiteral:
- COPY(symbol.identifierString, str)
- | StringLiteral, Comment:
- ASSERT(LEN(str) >= LEN(symbol.string^));
- COPY(symbol.string^, str);
- ELSE
- GetKeyword(symbol.token, id);
- IF id < 0 THEN
- str[0] := 0X
- ELSE
- StringPool.GetString(id, str) END;
- END;
- END SymbolToString;
- (** debugging output **)
- PROCEDURE OutSymbol*(w: Streams.Writer; CONST symbol: Symbol);
- VAR str: ARRAY 256 OF CHAR;
- i: LONGINT;
- PROCEDURE OutChar(ch: CHAR);
- BEGIN
- IF ((ch >= 20X) & (ch < 7FX)) OR ((ch > 0A0X) & (ch # 0ADX)) THEN
- w.Char(ch);
- ELSE
- w.Char(Backslash);
- w.Char('x');
- w.Hex(ORD(ch), 1);
- END;
- END OutChar;
- BEGIN
- w.Int(symbol.position.start, 1);
- w.String("-");
- w.Int(symbol.position.end, 1);
- w.String(":");
- w.String(tokens[symbol.token]);
- IF (symbol.token = IntegerLiteral) OR (symbol.token = RealLiteral) THEN
- CASE symbol.numberType OF
- IntNumber:
- w.String("(int)")
- | LongNumber:
- w.String("(long)")
- | FloatNumber:
- w.String("(float)")
- | DoubleNumber:
- w.String("(double)")
- END;
- ELSIF symbol.token = CharacterLiteral THEN
- w.String(":");
- w.Char("'");
- OutChar(symbol.character);
- w.Char("'");
- ELSIF symbol.token = StringLiteral THEN
- w.String(":");
- w.Char('"');
- i := 0;
- WHILE symbol.string^[i] # 0X DO
- OutChar(symbol.string^[i]);
- INC(i);
- END;
- w.Char('"');
- ELSIF symbol.token = Comment THEN
- w.String("/*");
- w.String(symbol.string^);
- w.String("*/");
- ELSE
- SymbolToString(symbol, str);
- w.String(": ");
- w.String(str);
- END
- END OutSymbol;
- (** reserved characters are the characters that may not occur within an identifier **)
- PROCEDURE InitReservedCharacters;
- VAR i: LONGINT;
- BEGIN
- FOR i := 0 TO LEN(reservedCharacter) - 1 DO
- CASE CHR(i) OF
- | 'a' .. 'z', 'A' .. 'Z':
- reservedCharacter[i] := FALSE;
- | '0'..'9':
- reservedCharacter[i] := FALSE;
- | '_':
- reservedCharacter[i] := FALSE
- ELSE
- reservedCharacter[i] := TRUE
- END;
- END;
- END InitReservedCharacters;
- (* get keyword by token *)
- PROCEDURE GetKeyword*(token: LONGINT; VAR identifier: IdentifierType);
- BEGIN
- keywords.IdentifierByIndex(token, identifier);
- END GetKeyword;
- PROCEDURE InitTokens;
- VAR i: LONGINT;
- BEGIN
- tokens[None] := "None";
- (* operators and punctuators *)
- tokens[Exclamation] := "Exclamation";
- tokens[ExclamationEqual] := "ExclamationEqual";
- tokens[Percent] := "Percent";
- tokens[PercentEqual] := "PercentEqual";
- tokens[And] := "And";
- tokens[AndEqual] := "AndEqual";
- tokens[AndAnd] := "AndAnd";
- tokens[LeftParenthesis] := "LeftParenthesis";
- tokens[RightParenthesis] := "RightParenthesis";
- tokens[Times] := "Times";
- tokens[TimesEqual] := "TimesEqual";
- tokens[Plus] := "Plus";
- tokens[PlusEqual] := "PlusEqual";
- tokens[PlusPlus] := "PlusPlus";
- tokens[Comma] := "Comma";
- tokens[Minus] := "Minus";
- tokens[MinusEqual] := "MinusEqual";
- tokens[MinusMinus] := "MinusMinus";
- tokens[Period] := "Period";
- tokens[Slash] := "Slash";
- tokens[SlashEqual] := "SlashEqual";
- tokens[Colon] := "Colon";
- tokens[Semicolon] := "Semicolon";
- tokens[Less] := "Less";
- tokens[LessEqual] := "LessEqual";
- tokens[LeftShift] := "LeftShift";
- tokens[LeftShiftEqual] := "LeftShiftEqual";
- tokens[Equal] := "Equal";
- tokens[EqualEqual] := "EqualEqual";
- tokens[Greater] := "Greater";
- tokens[GreaterEqual] := "GreaterEqual";
- tokens[RightShift] := "RightShift";
- tokens[RightShiftEqual] := "RightShiftEqual";
- tokens[LeftBracket] := "LeftBracket";
- tokens[RightBracket] := "RightBracket";
- tokens[Arrow] := "Arrow";
- tokens[ArrowEqual] := "ArrowEqual";
- tokens[LeftBrace] := "LeftBrace";
- tokens[Bar] := "Bar";
- tokens[BarEqual] := "BarEqual";
- tokens[BarBar] := "BarBar";
- tokens[RightBrace] := "RightBrace";
- tokens[Tilde] := "Tilde";
- (* keywords *)
- tokens[As] := "As";
- tokens[Base] := "Base";
- tokens[Bool] := "Bool";
- tokens[Break] := "Break";
- tokens[Case] := "Case";
- tokens[Char] := "Char";
- tokens[Class] := "Class";
- tokens[Const] := "Const";
- tokens[Default] := "Default";
- tokens[Delegate] := "Delegate";
- tokens[Do] := "Do";
- tokens[Double] := "Double";
- tokens[Else] := "Else";
- tokens[False] := "False";
- tokens[Float] := "Float";
- tokens[For] := "For";
- tokens[If] := "If";
- tokens[Import] := "Import";
- tokens[Int] := "Int";
- tokens[Internal] := "Internal";
- tokens[Is] := "Is";
- tokens[Long] := "Long";
- tokens[Module] := "Module";
- tokens[New] := "New";
- tokens[Null] := "Null";
- tokens[Object] := "Object";
- tokens[Public] := "Public";
- tokens[Readonly] := "Readonly";
- tokens[Ref] := "Ref";
- tokens[Return] := "Return";
- tokens[Sbyte] := "Sbyte";
- tokens[Short] := "Short";
- tokens[String] := "String";
- tokens[Struct] := "Struct";
- tokens[Switch] := "Switch";
- tokens[This] := "This";
- tokens[True] := "True";
- tokens[Void] := "Void";
- tokens[While] := "While";
- tokens[Identifier] := "Identifier";
- tokens[IntegerLiteral] := "IntegerLiteral";
- tokens[RealLiteral] := "RealLiteral";
- tokens[CharacterLiteral] := "CharacterLiteral";
- tokens[StringLiteral] := "StringLiteral";
- tokens[Comment] := "Comment";
- tokens[EndOfText] := "EndOfText";
- (* Active Cells *)
- tokens[Cell] := "Cell";
- tokens[Cellnet] := "Cellnet";
- tokens[In] := "In";
- tokens[Out] := "Out";
- tokens[Select] := "Select";
- tokens[Question] := "Question";
- tokens[QuestionQuestion] := "QuestionQuestion";
- FOR i := 0 TO EndOfText DO
- ASSERT(tokens[i] # "")
- END;
- END InitTokens;
- (** enter keywords in the list of keywords **)
- PROCEDURE InitKeywords;
- PROCEDURE Enter(CONST name: ARRAY OF CHAR; token: LONGINT);
- BEGIN
- keywords.PutString(name, token);
- Basic.SetErrorExpected(token, name);
- END Enter;
- BEGIN
- NEW(keywords,EndOfText+1);
- (* keywords *)
- Enter("as", As);
- Enter("base", Base);
- Enter("bool", Bool);
- Enter("break", Break);
- Enter("case", Case);
- Enter("char", Char);
- Enter("class", Class);
- Enter("const", Const);
- Enter("default", Default);
- Enter("delegate", Delegate);
- Enter("do", Do);
- Enter("double", Double);
- Enter("else", Else);
- Enter("false", False);
- Enter("float", Float);
- Enter("for", For);
- Enter("if", If);
- Enter("import", Import);
- Enter("int", Int);
- Enter("internal", Internal);
- Enter("is", Is);
- Enter("long", Long);
- Enter("module", Module);
- Enter("new", New);
- Enter("null", Null);
- Enter("object", Object);
- Enter("public", Public);
- Enter("readonly", Readonly);
- Enter("ref", Ref);
- Enter("return", Return);
- Enter("sbyte", Sbyte);
- Enter("short", Short);
- Enter("string", String);
- Enter("struct", Struct);
- Enter("switch", Switch);
- Enter("this", This);
- Enter("true", True);
- Enter("void", Void);
- Enter("while", While);
- (* operators and punctuators *)
- Enter("!", Exclamation);
- Enter("!=", ExclamationEqual);
- Enter("%", Percent);
- Enter("%=", PercentEqual);
- Enter("&", And);
- Enter("&=", AndEqual);
- Enter("&&", AndAnd);
- Enter("(", LeftParenthesis);
- Enter(")", RightParenthesis);
- Enter("*", Times);
- Enter("*=", TimesEqual);
- Enter("+", Plus);
- Enter("+=", PlusEqual);
- Enter("++", PlusPlus);
- Enter(",", Comma);
- Enter("-", Minus);
- Enter("-=", MinusEqual);
- Enter("--", MinusMinus);
- Enter(".", Period);
- Enter("/", Slash);
- Enter("/=", SlashEqual);
- Enter(":", Colon);
- Enter(";", Semicolon);
- Enter("<", Less);
- Enter("<=", LessEqual);
- Enter("<<", LeftShift);
- Enter("<<=", LeftShiftEqual);
- Enter("=", Equal);
- Enter("==", EqualEqual);
- Enter(">", Greater);
- Enter(">=", GreaterEqual);
- Enter(">>", RightShift);
- Enter(">>=", RightShiftEqual);
- Enter("[", LeftBracket);
- Enter("]", RightBracket);
- Enter("^", Arrow);
- Enter("^=", ArrowEqual);
- Enter("{", LeftBrace);
- Enter("|", Bar);
- Enter("}", BarEqual);
- Enter("||", BarBar);
- Enter("}", RightBrace);
- Enter("~", Tilde);
- (* Active Cells *)
- Enter("cell", Cell);
- Enter("cellnet", Cellnet);
- Enter("in", In);
- Enter("out", Out);
- Enter("select", Select);
- Enter("?", Question);
- Enter("??", QuestionQuestion);
- Basic.SetErrorMessage(Identifier, "missing identifier");
- Basic.SetErrorMessage(IntegerLiteral, "missing integer literal");
- Basic.SetErrorMessage(RealLiteral, "missing real literal");
- Basic.SetErrorMessage(CharacterLiteral, "missing character literal");
- Basic.SetErrorMessage(StringLiteral, "missing string literal");
- END InitKeywords;
- (** debugging / reporting **)
- PROCEDURE ReportKeywords*(context: Commands.Context);
- VAR i: LONGINT;
- name: Keyword;
- BEGIN
- FOR i := 0 TO EndOfText DO
- context.out.Int(i, 1);
- context.out.String(": ");
- context.out.Char('"');
- keywords.StringByIndex(i, name);
- context.out.String(name);
- context.out.Char('"');
- context.out.Ln;
- END;
- END ReportKeywords;
- (*
- PROCEDURE TestScanner*(context: Commands.Context);
- VAR filename: ARRAY 256 OF CHAR;
- reader: Streams.Reader;
- scanner: Scanner;
- sym: Symbol;
- BEGIN
- context.arg.SkipWhitespace;
- context.arg.String(filename);
- reader := TextUtilities.GetTextReader(filename);
- scanner := NewScanner(filename, reader, 0, NIL);
- REPEAT
- IF scanner.GetNextSymbol(sym) THEN
- OutSymbol(context.out, sym);
- context.out.Ln;
- END;
- UNTIL scanner.error OR (sym.token = EndOfText)
- END TestScanner;
- *)
- BEGIN
- InitReservedCharacters;
- InitTokens;
- InitKeywords
- END FoxCSharpScanner.
|