MODULE BimboScanner; IMPORT Trace, Texts, Streams, UTF8Strings, Strings; CONST Eot* = 0X; ObjectMarker = 020X; LF = 0AX; (* numtyp values *) char* = 1; integer* = 2; longinteger* = 3; real* = 4; longreal* = 5; MaxHDig* = 8; (* maximal hexadecimal longint length *) MaxHHDig* = 16; (* maximal hexadecimal hugeint length *) MaxRExp* = 38; (* maximal real exponent *) MaxLExp* = 308; (* maximal longreal exponent *) null* = 0; times* = 1; slash* = 2; div* = 3; mod* = 4; and* = 5; plus* = 6; minus* = 7; or* = 8; eql* = 9; neq* = 10; lss* = 11; leq* = 12; gtr* = 13; geq* = 14; in* = 15; is* = 16; arrow* = 17; period* = 18; comma* = 19; colon* = 20; upto* = 21; rparen* = 22; rbrak* = 23; rbrace* = 24; of* = 25; then* = 26; do* = 27; to* = 28; by* = 29; lparen* = 30; lbrak* = 31; lbrace* = 32; not* = 33; becomes* = 34; number* = 35; nil* = 36; true* = 37; false* = 38; string* = 39; ident* = 40; semicolon* = 41; bar* = 42; end* = 43; else* = 44; elsif* = 45; until* = 46; if* = 47; case* = 48; while* = 49; repeat* = 50; for* = 51; loop* = 52; with* = 53; exit* = 54; passivate* = 55; return* = 56; refines* = 57; implements* = 58; array* = 59; definition* = 60; object* = 61; record* = 62; pointer* = 63; begin* = 64; code* = 65; const* = 66; type* = 67; var* = 68; procedure* = 69; import* = 70; module* = 71; eof* = 72; comment* = 73; newLine* = 74; question* = 75; finally* = 76; VAR reservedChar-, ignoredChar, newChar-: ARRAY 256 OF BOOLEAN; TYPE StringMaker = OBJECT VAR length : LONGINT; data : Strings.String; PROCEDURE &Init(initialSize : LONGINT); BEGIN IF initialSize < 256 THEN initialSize := 256 END; NEW(data, initialSize); length := 0; END Init; PROCEDURE Add*(CONST buf: ARRAY OF CHAR; ofs, len: LONGINT; propagate: BOOLEAN; VAR res: WORD); VAR i : LONGINT; n : Strings.String; BEGIN IF length + len + 1 >= LEN(data) THEN NEW(n, LEN(data) * 2 + len + 1); FOR i := 0 TO length - 1 DO n[i] := data[i] END; data := n END; WHILE len > 0 DO data[length] := buf[ofs]; INC(ofs); INC(length); DEC(len) END; data[length] := 0X; END Add; (* remove last n characters *) PROCEDURE Shorten(n : LONGINT); BEGIN DEC(length, n); IF length < 0 THEN length := 0 END; IF length > 0 THEN data[length - 1] := 0X ELSE data[length] := 0X END END Shorten; PROCEDURE Clear*; BEGIN data[0] := 0X; length := 0 END Clear; PROCEDURE GetWriter*() : Streams.Writer; VAR w : Streams.Writer; BEGIN NEW(w, SELF.Add, 256); RETURN w END GetWriter; PROCEDURE GetLength*() : LONGINT; BEGIN RETURN length END GetLength; PROCEDURE GetString*() : Strings.String; BEGIN RETURN data END GetString; END StringMaker; Scanner* = OBJECT VAR buffer: Strings.String; pos-: LONGINT; (*pos in buffer*) ch-: CHAR; (**look-ahead *) str-: ARRAY 1024 OF CHAR; sym- : LONGINT; numtyp-: INTEGER; (* 1 = char, 2 = integer, 3 = real, 4 = longreal *) intval-: LONGINT; (* integer value or string length *) longintval-: HUGEINT; realval-: REAL; lrlval-: LONGREAL; numStartPos, numEndPos: LONGINT; lastpos-, curpos-, errpos-: LONGINT; (*pos in text*) isNummer: BOOLEAN; commentStr- : StringMaker; cw : Streams.Writer; PROCEDURE &Init; BEGIN NEW(commentStr, 1024); cw := commentStr.GetWriter() END Init; PROCEDURE err(n: INTEGER); BEGIN END err; PROCEDURE NextChar*; BEGIN IF pos < LEN(buffer) THEN ch := buffer[pos]; INC(pos) ELSE ch := Eot END; IF newChar[ORD(ch)] THEN INC(curpos) END; (* curpos := pos; *) END NextChar; PROCEDURE Str(VAR sym: LONGINT); VAR i: LONGINT; och: CHAR; BEGIN i := 0; och := ch; LOOP NextChar; IF ch = och THEN EXIT END ; IF ch < " " THEN err(3); EXIT END ; IF i = LEN(str)-1 THEN err(241); EXIT END ; str[i] := ch; INC(i) END ; NextChar; str[i] := 0X; IF i = 1 THEN sym := number ELSE sym := string END END Str; PROCEDURE Identifier(VAR sym: LONGINT); VAR i: LONGINT; BEGIN i := 0; REPEAT str[i] := ch; INC(i); NextChar UNTIL reservedChar[ORD(ch)] OR (i = LEN(str)); IF i = LEN(str) THEN err(240); DEC(i) END ; str[i] := 0X; sym := ident; (* temporary code! delete when moving to ANY and adapt PCT *) IF str = "ANY" THEN COPY("PTR", str) END; END Identifier; PROCEDURE Number; VAR i, m, n, d, e: INTEGER; dig: ARRAY 24 OF CHAR; f: LONGREAL; expCh: CHAR; neg, long: BOOLEAN; PROCEDURE Ten(e: INTEGER): LONGREAL; VAR x, p: LONGREAL; BEGIN x := 1; p := 10; WHILE e > 0 DO IF ODD(e) THEN x := x*p END; e := e DIV 2; IF e > 0 THEN p := p*p END (* prevent overflow *) END; RETURN x END Ten; PROCEDURE Ord(ch: CHAR; hex: BOOLEAN): INTEGER; BEGIN (* ("0" <= ch) & (ch <= "9") OR ("A" <= ch) & (ch <= "F") *) IF ch <= "9" THEN RETURN ORD(ch) - ORD("0") ELSIF hex THEN RETURN ORD(ch) - ORD("A") + 10 ELSE err(2); RETURN 0 END END Ord; BEGIN (* ("0" <= ch) & (ch <= "9") *) i := 0; m := 0; n := 0; d := 0; long := FALSE; LOOP (* read mantissa *) IF ("0" <= ch) & (ch <= "9") OR (d = 0) & ("A" <= ch) & (ch <= "F") THEN IF (m > 0) OR (ch # "0") THEN (* ignore leading zeros *) IF n < LEN(dig) THEN dig[n] := ch; INC(n) END; INC(m) END; NextChar; INC(i) ELSIF ch = "." THEN NextChar; IF ch = "." THEN (* ellipsis *) ch := 7FX; EXIT ELSIF d = 0 THEN (* i > 0 *) d := i ELSE err(2) END ELSE EXIT END END; (* 0 <= n <= m <= i, 0 <= d <= i *) IF d = 0 THEN (* integer *) IF n = m THEN intval := 0; i := 0; (* > bootstrap 1 *) longintval := 0; (* < bootstrap 1 *) IF ch = "X" THEN (* character *) NextChar; numtyp := char; (* IF PCM.LocalUnicodeSupport & (n <= 8) THEN IF (n = 8) & (dig[0] > "7") THEN (* prevent overflow *) intval := -1 END; WHILE i < n DO intval := intval*10H + Ord(dig[i], TRUE); INC(i) END ELSIF ~PCM.LocalUnicodeSupport & (n <= 2) THEN WHILE i < n DO intval := intval*10H + Ord(dig[i], TRUE); INC(i) END ELSE err(203) END *) ELSIF ch = "H" THEN (* hexadecimal *) NextChar; IF n <= MaxHDig THEN numtyp := integer; IF (n = MaxHDig) & (dig[0] > "7") THEN (* prevent overflow *) intval := -1 END; WHILE i < n DO intval := intval*10H + Ord(dig[i], TRUE); INC(i) END (* > bootstrap 1 *) ELSIF n <= MaxHHDig THEN numtyp := longinteger; IF (n = MaxHHDig) & (dig[0] > "7") THEN (* prevent overflow *) longintval := -1 END; WHILE i < n DO longintval := Ord(dig[i], TRUE) + longintval*10H; INC(i) END (* < bootstrap 1 *) ELSE err(203) END ELSE (* decimal *) numtyp := integer; WHILE i < n DO d := Ord(dig[i], FALSE); INC(i); IF intval <= (MAX(LONGINT) - d) DIV 10 THEN intval := intval*10 + d (* > bootstrap 2 ELSE err(203) < bootstrap 2 *) (* > bootstrap 1 *) ELSE long := TRUE (* < bootstrap 1 *) END END; (* > bootstrap 1 *) IF long THEN numtyp := longinteger; longintval := LONG(intval)*10+d; WHILE i < n DO d := Ord(dig[i], FALSE); INC(i); IF longintval*10+d >= 0 THEN longintval := longintval*10 + d ELSE err(203) END END END (* < bootstrap 1 *) END ELSE err(203) END ELSE (* fraction *) f := 0; e := 0; expCh := "E"; WHILE n > 0 DO (* 0 <= f < 1 *) DEC(n); f := (Ord(dig[n], FALSE) + f)/10 END; IF (ch = "E") OR (ch = "D") THEN expCh := ch; NextChar; neg := FALSE; IF ch = "-" THEN neg := TRUE; NextChar ELSIF ch = "+" THEN NextChar END; IF ("0" <= ch) & (ch <= "9") THEN REPEAT n := Ord(ch, FALSE); NextChar; IF e <= (MAX(INTEGER) - n) DIV 10 THEN e := e*10 + n ELSE err(203) END UNTIL (ch < "0") OR ("9" < ch); IF neg THEN e := -e END ELSE err(2) END END; DEC(e, i-d-m); (* decimal point shift *) IF expCh = "E" THEN numtyp := real; IF (1-MaxRExp < e) & (e <= MaxRExp) THEN IF e < 0 THEN realval := SHORT(f / Ten(-e)) ELSE realval := SHORT(f * Ten(e)) END ELSE err(203) END ELSE numtyp := longreal; IF (1-MaxLExp < e) & (e <= MaxLExp) THEN IF e < 0 THEN lrlval := f / Ten(-e) ELSE lrlval := f * Ten(e) END ELSE err(203) END END END END Number; PROCEDURE GetNumAsString*(VAR val: ARRAY OF CHAR); VAR i, l: LONGINT; BEGIN (*Strings.Copy(buffer^, numStartPos, numEndPos-numStartPos, val);*) IF isNummer THEN i := 0; l := LEN(val)-1; WHILE (i < numEndPos-numStartPos) & (i < l) DO val[i] := buffer[numStartPos + i]; INC(i); END; END; val[i] := 0X END GetNumAsString; PROCEDURE Get(VAR s: LONGINT); PROCEDURE Comment; (* do not read after end of file *) BEGIN NextChar; cw.Char(ch); LOOP LOOP WHILE ch = "(" DO NextChar; cw.Char(ch); IF ch = "*" THEN Comment END END; IF ch = "*" THEN NextChar; cw.Char(ch); EXIT END ; IF ch = Eot THEN EXIT END ; NextChar; cw.Char(ch); END ; IF ch = ")" THEN NextChar; cw.Char(ch); EXIT END ; IF ch = Eot THEN err(5); EXIT END END; END Comment; BEGIN REPEAT WHILE (ignoredChar[ORD(ch)]) DO (*ignore control characters*) IF ch = Eot THEN s := eof; RETURN ELSE NextChar END END ; lastpos := curpos - 1; errpos := curpos - 1; isNummer := FALSE; CASE ch OF (* ch > " " *) | LF: s := newLine; NextChar | 22X, 27X : Str(s) | "#" : s := neq; NextChar | "&" : s := and; NextChar | "(" : NextChar; IF ch = "*" THEN commentStr.Clear; Comment; cw.Update; commentStr.Shorten(2); s := comment; (*allow recursion without reentrancy*) ELSE s := lparen END | ")" : s := rparen; NextChar | "*" : s:=times; NextChar | "+" : s := plus; NextChar | "," : s := comma; NextChar | "-" : s := minus; NextChar | "." : NextChar; IF ch = "." THEN NextChar; s := upto ELSE s := period END | "/" : s := slash; NextChar | "0".."9": isNummer := TRUE; numStartPos := pos-1; (* WHILE (ch >="0") & (ch <= "9") OR (ch >= "A") & (ch <="F") OR (ch="H") OR (ch="X") OR (ch=".") DO NextChar END; *) Number; numEndPos := pos-1; s := number | ":" : NextChar; IF ch = "=" THEN NextChar; s := becomes ELSE s := colon END | ";" : s := semicolon; NextChar | "<" : NextChar; IF ch = "=" THEN NextChar; s := leq; ELSE s := lss; END | "=" : s := eql; NextChar | ">" : NextChar; IF ch = "=" THEN NextChar; s := geq; ELSE s := gtr; END | "A": Identifier(s); IF str = "ARRAY" THEN s := array ELSIF str = "AWAIT" THEN s := passivate END | "B": Identifier(s); IF str = "BEGIN" THEN s := begin ELSIF str = "BY" THEN s := by END | "C": Identifier(s); IF str = "CONST" THEN s := const ELSIF str = "CASE" THEN s := case ELSIF str = "CODE" THEN s := code END | "D": Identifier(s); IF str = "DO" THEN s := do ELSIF str = "DIV" THEN s := div ELSIF str = "DEFINITION" THEN s := definition END | "E": Identifier(s); IF str = "END" THEN s := end ELSIF str = "ELSE" THEN s := else ELSIF str = "ELSIF" THEN s := elsif ELSIF str = "EXIT" THEN s := exit END | "F": Identifier(s); IF str = "FALSE" THEN s := false ELSIF str = "FOR" THEN s := for ELSIF str = "FINALLY" THEN s := finally END | "I": Identifier(s); IF str = "IF" THEN s := if ELSIF str = "IN" THEN s := in ELSIF str = "IS" THEN s := is ELSIF str = "IMPORT" THEN s := import ELSIF str = "IMPLEMENTS" THEN s := implements END | "L": Identifier(s); IF str = "LOOP" THEN s := loop END | "M": Identifier(s); IF str = "MOD" THEN s := mod ELSIF str = "MODULE" THEN s := module END | "N": Identifier(s); IF str = "NIL" THEN s := nil END | "O": Identifier(s); IF str = "OR" THEN s := or ELSIF str = "OF" THEN s := of ELSIF str = "OBJECT" THEN s := object END | "P": Identifier(s); IF str = "PROCEDURE" THEN s := procedure ELSIF str = "POINTER" THEN s := pointer END | "R": Identifier(s); IF str = "RECORD" THEN s := record ELSIF str = "REPEAT" THEN s := repeat ELSIF str = "RETURN" THEN s := return ELSIF str = "REFINES" THEN s := refines END | "T": Identifier(s); IF str = "THEN" THEN s := then ELSIF str = "TRUE" THEN s := true ELSIF str = "TO" THEN s := to ELSIF str = "TYPE" THEN s := type END | "U": Identifier(s); IF str = "UNTIL" THEN s := until END | "V": Identifier(s); IF str = "VAR" THEN s := var END | "W": Identifier(s); IF str = "WHILE" THEN s := while ELSIF str = "WITH" THEN s := with END | "G".."H", "J", "K", "Q", "S", "X".."Z": Identifier(s) | "[" : s := lbrak; NextChar | "]" : s := rbrak; NextChar | "^" : s := arrow; NextChar | "a".."z": Identifier(s) | "{" : s := lbrace; NextChar | "|" : s := bar; NextChar | "}" : s := rbrace; NextChar | "~" : s := not; NextChar | "?" : s := question; NextChar | 7FX : s := upto; NextChar ELSE Identifier(s); (* s := null; NextChar; *) END ; UNTIL s >= 0; END Get; PROCEDURE Next*; BEGIN Get(sym) END Next; END Scanner; PROCEDURE InitWithText*(t: Texts.Text; pos: LONGINT): Scanner; VAR buffer: Strings.String; len, i, j, ch: LONGINT; r: Texts.TextReader; bytesPerChar: LONGINT; s : Scanner; BEGIN t.AcquireRead; len := t.GetLength(); bytesPerChar := 2; NEW(buffer, len * bytesPerChar); (* UTF8 encoded characters use up to 5 bytes *) NEW(r, t); r.SetPosition(pos); j := 0; FOR i := 0 TO len-1 DO r.ReadCh(ch); WHILE ~UTF8Strings.EncodeChar(ch, buffer^, j) DO (* buffer too small *) INC(bytesPerChar); ExpandBuf(buffer, bytesPerChar * len); END; END; t.ReleaseRead; NEW(s); s.buffer := buffer; s.pos := 0; s.ch := " "; RETURN s; END InitWithText; PROCEDURE ExpandBuf(VAR oldBuf: Strings.String; newSize: LONGINT); VAR newBuf: Strings.String; i: LONGINT; BEGIN IF LEN(oldBuf^) >= newSize THEN RETURN END; NEW(newBuf, newSize); FOR i := 0 TO LEN(oldBuf^)-1 DO newBuf[i] := oldBuf[i]; END; oldBuf := newBuf; END ExpandBuf; PROCEDURE InitReservedChars; VAR i: LONGINT; BEGIN FOR i := 0 TO LEN(reservedChar)-1 DO IF CHR(i) <= 20X THEN (* TAB, CR, ESC ... *) reservedChar[i] := TRUE; ELSE CASE CHR(i) OF | "#", "&", "(", ")", "*", "+", ",", "-", ".", "/", "?": reservedChar[i] := TRUE; | ":", ";", "<", "=", ">": reservedChar[i] := TRUE; | "[", "]", "^", "{", "|", "}", "~": reservedChar[i] := TRUE; | "$": reservedChar[i] := TRUE; | 22X, 27X, 7FX: reservedChar[i] := TRUE; (* 22X = ", 27X = ', 7FX = del *) ELSE reservedChar[i] := FALSE; END; END; END; END InitReservedChars; PROCEDURE InitNewChar; VAR i: LONGINT; BEGIN FOR i := 0 TO LEN(newChar)-1 DO (* UTF-8 encoded characters with bits 10XXXXXX do not start a new unicode character *) IF (i < 80H) OR (i > 0BFH) THEN newChar[i] := TRUE; ELSE newChar[i] := FALSE; END END END InitNewChar; PROCEDURE InitIgnoredChar; VAR i: LONGINT; BEGIN FOR i := 0 TO LEN(ignoredChar)-1 DO ignoredChar[i] := (i <= ORD(" ")) & (i # ORD(LF)) END END InitIgnoredChar; BEGIN InitReservedChars; InitNewChar; InitIgnoredChar END BimboScanner.