123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545 |
- MODULE BimboScanner;
- IMPORT
- Trace, Texts, Streams, UTF8Strings, Strings;
- CONST
- Eot* = 0X;
- ObjectMarker = 020X;
- LF = 0AX;
- (* numtyp values *)
- char* = 1; integer* = 2; longinteger* = 3; real* = 4; longreal* = 5;
- MaxHDig* = 8; (* maximal hexadecimal longint length *)
- MaxHHDig* = 16; (* maximal hexadecimal hugeint length *)
- MaxRExp* = 38; (* maximal real exponent *)
- MaxLExp* = 308; (* maximal longreal exponent *)
- null* = 0; times* = 1; slash* = 2; div* = 3; mod* = 4; and* = 5;
- plus* = 6; minus* = 7; or* = 8; eql* = 9; neq* = 10; lss* = 11;
- leq* = 12; gtr* = 13; geq* = 14; in* = 15; is* = 16; arrow* = 17;
- period* = 18; comma* = 19; colon* = 20; upto* = 21; rparen* = 22;
- rbrak* = 23; rbrace* = 24; of* = 25; then* = 26; do* = 27; to* = 28;
- by* = 29; lparen* = 30; lbrak* = 31; lbrace* = 32; not* = 33;
- becomes* = 34; number* = 35; nil* = 36; true* = 37; false* = 38;
- string* = 39; ident* = 40; semicolon* = 41; bar* = 42; end* = 43;
- else* = 44; elsif* = 45; until* = 46; if* = 47; case* = 48; while* = 49;
- repeat* = 50; for* = 51; loop* = 52; with* = 53; exit* = 54;
- passivate* = 55; return* = 56; refines* = 57; implements* = 58;
- array* = 59; definition* = 60; object* = 61; record* = 62; pointer* = 63;
- begin* = 64; code* = 65; const* = 66; type* = 67; var* = 68;
- procedure* = 69; import* = 70; module* = 71; eof* = 72;
- comment* = 73; newLine* = 74; question* = 75; finally* = 76;
- VAR
- reservedChar-, ignoredChar, newChar-: ARRAY 256 OF BOOLEAN;
- TYPE
- StringMaker = OBJECT
- VAR length : LONGINT;
- data : Strings.String;
- PROCEDURE &Init(initialSize : LONGINT);
- BEGIN
- IF initialSize < 256 THEN initialSize := 256 END;
- NEW(data, initialSize); length := 0;
- END Init;
- PROCEDURE Add*(CONST buf: ARRAY OF CHAR; ofs, len: LONGINT; propagate: BOOLEAN; VAR res: WORD);
- VAR i : LONGINT; n : Strings.String;
- BEGIN
- IF length + len + 1 >= LEN(data) THEN
- NEW(n, LEN(data) * 2 + len + 1); FOR i := 0 TO length - 1 DO n[i] := data[i] END;
- data := n
- END;
- WHILE len > 0 DO
- data[length] := buf[ofs];
- INC(ofs); INC(length); DEC(len)
- END;
- data[length] := 0X;
- END Add;
- (* remove last n characters *)
- PROCEDURE Shorten(n : LONGINT);
- BEGIN
- DEC(length, n);
- IF length < 0 THEN length := 0 END;
- IF length > 0 THEN data[length - 1] := 0X ELSE data[length] := 0X END
- END Shorten;
- PROCEDURE Clear*;
- BEGIN
- data[0] := 0X;
- length := 0
- END Clear;
- PROCEDURE GetWriter*() : Streams.Writer;
- VAR w : Streams.Writer;
- BEGIN
- NEW(w, SELF.Add, 256);
- RETURN w
- END GetWriter;
- PROCEDURE GetLength*() : LONGINT;
- BEGIN
- RETURN length
- END GetLength;
- PROCEDURE GetString*() : Strings.String;
- BEGIN
- RETURN data
- END GetString;
- END StringMaker;
- Scanner* = OBJECT
- VAR
- buffer: Strings.String;
- pos-: LONGINT; (*pos in buffer*)
- ch-: CHAR; (**look-ahead *)
- str-: ARRAY 1024 OF CHAR;
- sym- : LONGINT;
- numtyp-: INTEGER; (* 1 = char, 2 = integer, 3 = real, 4 = longreal *)
- intval-: LONGINT; (* integer value or string length *)
- longintval-: HUGEINT;
- realval-: REAL;
- lrlval-: LONGREAL;
- numStartPos, numEndPos: LONGINT;
- lastpos-, curpos-, errpos-: LONGINT; (*pos in text*)
- isNummer: BOOLEAN;
- commentStr- : StringMaker;
- cw : Streams.Writer;
- PROCEDURE &Init;
- BEGIN
- NEW(commentStr, 1024);
- cw := commentStr.GetWriter()
- END Init;
- PROCEDURE err(n: INTEGER);
- BEGIN
- END err;
- PROCEDURE NextChar*;
- BEGIN
- IF pos < LEN(buffer) THEN
- ch := buffer[pos]; INC(pos)
- ELSE
- ch := Eot
- END;
- IF newChar[ORD(ch)] THEN INC(curpos) END; (* curpos := pos; *)
- END NextChar;
- PROCEDURE Str(VAR sym: LONGINT);
- VAR i: LONGINT; och: CHAR;
- BEGIN i := 0; och := ch;
- LOOP NextChar;
- IF ch = och THEN EXIT END ;
- IF ch < " " THEN err(3); EXIT END ;
- IF i = LEN(str)-1 THEN err(241); EXIT END ;
- str[i] := ch; INC(i)
- END ;
- NextChar; str[i] := 0X;
- IF i = 1 THEN
- sym := number
- ELSE sym := string
- END
- END Str;
- PROCEDURE Identifier(VAR sym: LONGINT);
- VAR i: LONGINT;
- BEGIN i := 0;
- REPEAT
- str[i] := ch; INC(i); NextChar
- UNTIL reservedChar[ORD(ch)] OR (i = LEN(str));
- IF i = LEN(str) THEN err(240); DEC(i) END ;
- str[i] := 0X; sym := ident;
- (* temporary code! delete when moving to ANY and adapt PCT *)
- IF str = "ANY" THEN COPY("PTR", str) END;
- END Identifier;
- PROCEDURE Number;
- VAR i, m, n, d, e: INTEGER; dig: ARRAY 24 OF CHAR; f: LONGREAL; expCh: CHAR; neg, long: BOOLEAN;
- PROCEDURE Ten(e: INTEGER): LONGREAL;
- VAR x, p: LONGREAL;
- BEGIN x := 1; p := 10;
- WHILE e > 0 DO
- IF ODD(e) THEN x := x*p END;
- e := e DIV 2;
- IF e > 0 THEN p := p*p END (* prevent overflow *)
- END;
- RETURN x
- END Ten;
- PROCEDURE Ord(ch: CHAR; hex: BOOLEAN): INTEGER;
- BEGIN (* ("0" <= ch) & (ch <= "9") OR ("A" <= ch) & (ch <= "F") *)
- IF ch <= "9" THEN RETURN ORD(ch) - ORD("0")
- ELSIF hex THEN RETURN ORD(ch) - ORD("A") + 10
- ELSE err(2); RETURN 0
- END
- END Ord;
- BEGIN (* ("0" <= ch) & (ch <= "9") *)
- i := 0; m := 0; n := 0; d := 0; long := FALSE;
- LOOP (* read mantissa *)
- IF ("0" <= ch) & (ch <= "9") OR (d = 0) & ("A" <= ch) & (ch <= "F") THEN
- IF (m > 0) OR (ch # "0") THEN (* ignore leading zeros *)
- IF n < LEN(dig) THEN dig[n] := ch; INC(n) END;
- INC(m)
- END;
- NextChar; INC(i)
- ELSIF ch = "." THEN NextChar;
- IF ch = "." THEN (* ellipsis *) ch := 7FX; EXIT
- ELSIF d = 0 THEN (* i > 0 *) d := i
- ELSE err(2)
- END
- ELSE EXIT
- END
- END; (* 0 <= n <= m <= i, 0 <= d <= i *)
- IF d = 0 THEN (* integer *)
- IF n = m THEN intval := 0; i := 0;
- (* > bootstrap 1 *)
- longintval := 0;
- (* < bootstrap 1 *)
- IF ch = "X" THEN (* character *) NextChar; numtyp := char;
- (* IF PCM.LocalUnicodeSupport & (n <= 8) THEN
- IF (n = 8) & (dig[0] > "7") THEN (* prevent overflow *) intval := -1 END;
- WHILE i < n DO intval := intval*10H + Ord(dig[i], TRUE); INC(i) END
- ELSIF ~PCM.LocalUnicodeSupport & (n <= 2) THEN
- WHILE i < n DO intval := intval*10H + Ord(dig[i], TRUE); INC(i) END
- ELSE err(203)
- END *)
- ELSIF ch = "H" THEN (* hexadecimal *) NextChar;
- IF n <= MaxHDig THEN
- numtyp := integer;
- IF (n = MaxHDig) & (dig[0] > "7") THEN (* prevent overflow *) intval := -1 END;
- WHILE i < n DO intval := intval*10H + Ord(dig[i], TRUE); INC(i) END
- (* > bootstrap 1 *)
- ELSIF n <= MaxHHDig THEN
- numtyp := longinteger;
- IF (n = MaxHHDig) & (dig[0] > "7") THEN (* prevent overflow *) longintval := -1 END;
- WHILE i < n DO longintval := Ord(dig[i], TRUE) + longintval*10H; INC(i) END
- (* < bootstrap 1 *)
- ELSE err(203)
- END
- ELSE (* decimal *) numtyp := integer;
- WHILE i < n DO d := Ord(dig[i], FALSE); INC(i);
- IF intval <= (MAX(LONGINT) - d) DIV 10 THEN intval := intval*10 + d
- (* > bootstrap 2
- ELSE err(203)
- < bootstrap 2 *)
- (* > bootstrap 1 *)
- ELSE long := TRUE
- (* < bootstrap 1 *)
- END
- END;
- (* > bootstrap 1 *)
- IF long THEN
- numtyp := longinteger; longintval := LONG(intval)*10+d;
- WHILE i < n DO d := Ord(dig[i], FALSE); INC(i);
- IF longintval*10+d >= 0 THEN longintval := longintval*10 + d
- ELSE err(203)
- END
- END
- END
- (* < bootstrap 1 *)
- END
- ELSE err(203)
- END
- ELSE (* fraction *)
- f := 0; e := 0; expCh := "E";
- WHILE n > 0 DO (* 0 <= f < 1 *) DEC(n); f := (Ord(dig[n], FALSE) + f)/10 END;
- IF (ch = "E") OR (ch = "D") THEN expCh := ch; NextChar; neg := FALSE;
- IF ch = "-" THEN neg := TRUE; NextChar
- ELSIF ch = "+" THEN NextChar
- END;
- IF ("0" <= ch) & (ch <= "9") THEN
- REPEAT n := Ord(ch, FALSE); NextChar;
- IF e <= (MAX(INTEGER) - n) DIV 10 THEN e := e*10 + n
- ELSE err(203)
- END
- UNTIL (ch < "0") OR ("9" < ch);
- IF neg THEN e := -e END
- ELSE err(2)
- END
- END;
- DEC(e, i-d-m); (* decimal point shift *)
- IF expCh = "E" THEN numtyp := real;
- IF (1-MaxRExp < e) & (e <= MaxRExp) THEN
- IF e < 0 THEN realval := SHORT(f / Ten(-e))
- ELSE realval := SHORT(f * Ten(e))
- END
- ELSE err(203)
- END
- ELSE numtyp := longreal;
- IF (1-MaxLExp < e) & (e <= MaxLExp) THEN
- IF e < 0 THEN lrlval := f / Ten(-e)
- ELSE lrlval := f * Ten(e)
- END
- ELSE err(203)
- END
- END
- END
- END Number;
- PROCEDURE GetNumAsString*(VAR val: ARRAY OF CHAR);
- VAR i, l: LONGINT;
- BEGIN
- (*Strings.Copy(buffer^, numStartPos, numEndPos-numStartPos, val);*)
- IF isNummer THEN
- i := 0; l := LEN(val)-1;
- WHILE (i < numEndPos-numStartPos) & (i < l) DO
- val[i] := buffer[numStartPos + i];
- INC(i);
- END;
- END;
- val[i] := 0X
- END GetNumAsString;
- PROCEDURE Get(VAR s: LONGINT);
- PROCEDURE Comment; (* do not read after end of file *)
- BEGIN NextChar; cw.Char(ch);
- LOOP
- LOOP
- WHILE ch = "(" DO NextChar; cw.Char(ch);
- IF ch = "*" THEN Comment END
- END;
- IF ch = "*" THEN NextChar; cw.Char(ch); EXIT END ;
- IF ch = Eot THEN EXIT END ;
- NextChar; cw.Char(ch);
- END ;
- IF ch = ")" THEN NextChar; cw.Char(ch); EXIT END ;
- IF ch = Eot THEN err(5); EXIT END
- END;
- END Comment;
- BEGIN
- REPEAT
- WHILE (ignoredChar[ORD(ch)]) DO (*ignore control characters*)
- IF ch = Eot THEN
- s := eof; RETURN
- ELSE NextChar
- END
- END ;
- lastpos := curpos - 1;
- errpos := curpos - 1;
- isNummer := FALSE;
- CASE ch OF (* ch > " " *)
- | LF: s := newLine; NextChar
- | 22X, 27X : Str(s)
- | "#" : s := neq; NextChar
- | "&" : s := and; NextChar
- | "(" : NextChar;
- IF ch = "*" THEN commentStr.Clear; Comment; cw.Update; commentStr.Shorten(2); s := comment; (*allow recursion without reentrancy*)
- ELSE s := lparen
- END
- | ")" : s := rparen; NextChar
- | "*" : s:=times; NextChar
- | "+" : s := plus; NextChar
- | "," : s := comma; NextChar
- | "-" : s := minus; NextChar
- | "." : NextChar;
- IF ch = "." THEN NextChar; s := upto ELSE s := period END
- | "/" : s := slash; NextChar
- | "0".."9": isNummer := TRUE; numStartPos := pos-1;
- (* WHILE (ch >="0") & (ch <= "9") OR (ch >= "A") & (ch <="F") OR (ch="H") OR (ch="X") OR (ch=".") DO NextChar END; *)
- Number;
- numEndPos := pos-1; s := number
- | ":" : NextChar;
- IF ch = "=" THEN NextChar; s := becomes ELSE s := colon END
- | ";" : s := semicolon; NextChar
- | "<" : NextChar;
- IF ch = "=" THEN NextChar; s := leq; ELSE s := lss; END
- | "=" : s := eql; NextChar
- | ">" : NextChar;
- IF ch = "=" THEN NextChar; s := geq; ELSE s := gtr; END
- | "A": Identifier(s);
- IF str = "ARRAY" THEN s := array
- ELSIF str = "AWAIT" THEN s := passivate
- END
- | "B": Identifier(s);
- IF str = "BEGIN" THEN s := begin
- ELSIF str = "BY" THEN s := by
- END
- | "C": Identifier(s);
- IF str = "CONST" THEN s := const
- ELSIF str = "CASE" THEN s := case
- ELSIF str = "CODE" THEN s := code
- END
- | "D": Identifier(s);
- IF str = "DO" THEN s := do
- ELSIF str = "DIV" THEN s := div
- ELSIF str = "DEFINITION" THEN s := definition
- END
- | "E": Identifier(s);
- IF str = "END" THEN s := end
- ELSIF str = "ELSE" THEN s := else
- ELSIF str = "ELSIF" THEN s := elsif
- ELSIF str = "EXIT" THEN s := exit
- END
- | "F": Identifier(s);
- IF str = "FALSE" THEN s := false
- ELSIF str = "FOR" THEN s := for
- ELSIF str = "FINALLY" THEN s := finally
- END
- | "I": Identifier(s);
- IF str = "IF" THEN s := if
- ELSIF str = "IN" THEN s := in
- ELSIF str = "IS" THEN s := is
- ELSIF str = "IMPORT" THEN s := import
- ELSIF str = "IMPLEMENTS" THEN s := implements
- END
- | "L": Identifier(s);
- IF str = "LOOP" THEN s := loop END
- | "M": Identifier(s);
- IF str = "MOD" THEN s := mod
- ELSIF str = "MODULE" THEN s := module
- END
- | "N": Identifier(s);
- IF str = "NIL" THEN s := nil END
- | "O": Identifier(s);
- IF str = "OR" THEN s := or
- ELSIF str = "OF" THEN s := of
- ELSIF str = "OBJECT" THEN s := object
- END
- | "P": Identifier(s);
- IF str = "PROCEDURE" THEN s := procedure
- ELSIF str = "POINTER" THEN s := pointer
- END
- | "R": Identifier(s);
- IF str = "RECORD" THEN s := record
- ELSIF str = "REPEAT" THEN s := repeat
- ELSIF str = "RETURN" THEN s := return
- ELSIF str = "REFINES" THEN s := refines
- END
- | "T": Identifier(s);
- IF str = "THEN" THEN s := then
- ELSIF str = "TRUE" THEN s := true
- ELSIF str = "TO" THEN s := to
- ELSIF str = "TYPE" THEN s := type
- END
- | "U": Identifier(s);
- IF str = "UNTIL" THEN s := until END
- | "V": Identifier(s);
- IF str = "VAR" THEN s := var END
- | "W": Identifier(s);
- IF str = "WHILE" THEN s := while
- ELSIF str = "WITH" THEN s := with
- END
- | "G".."H", "J", "K", "Q", "S", "X".."Z": Identifier(s)
- | "[" : s := lbrak; NextChar
- | "]" : s := rbrak; NextChar
- | "^" : s := arrow; NextChar
- | "a".."z": Identifier(s)
- | "{" : s := lbrace; NextChar
- | "|" : s := bar; NextChar
- | "}" : s := rbrace; NextChar
- | "~" : s := not; NextChar
- | "?" : s := question; NextChar
- | 7FX : s := upto; NextChar
- ELSE Identifier(s); (* s := null; NextChar; *)
- END ;
- UNTIL s >= 0;
- END Get;
- PROCEDURE Next*;
- BEGIN
- Get(sym)
- END Next;
- END Scanner;
- PROCEDURE InitWithText*(t: Texts.Text; pos: LONGINT): Scanner;
- VAR buffer: Strings.String; len, i, j, ch: LONGINT; r: Texts.TextReader;
- bytesPerChar: LONGINT;
- s : Scanner;
- BEGIN
- t.AcquireRead;
- len := t.GetLength();
- bytesPerChar := 2;
- NEW(buffer, len * bytesPerChar); (* UTF8 encoded characters use up to 5 bytes *)
- NEW(r, t);
- r.SetPosition(pos);
- j := 0;
- FOR i := 0 TO len-1 DO
- r.ReadCh(ch);
- WHILE ~UTF8Strings.EncodeChar(ch, buffer^, j) DO
- (* buffer too small *)
- INC(bytesPerChar);
- ExpandBuf(buffer, bytesPerChar * len);
- END;
- END;
- t.ReleaseRead;
- NEW(s); s.buffer := buffer;
- s.pos := 0;
- s.ch := " ";
- RETURN s;
- END InitWithText;
- PROCEDURE ExpandBuf(VAR oldBuf: Strings.String; newSize: LONGINT);
- VAR newBuf: Strings.String; i: LONGINT;
- BEGIN
- IF LEN(oldBuf^) >= newSize THEN RETURN END;
- NEW(newBuf, newSize);
- FOR i := 0 TO LEN(oldBuf^)-1 DO
- newBuf[i] := oldBuf[i];
- END;
- oldBuf := newBuf;
- END ExpandBuf;
- PROCEDURE InitReservedChars;
- VAR
- i: LONGINT;
- BEGIN
- FOR i := 0 TO LEN(reservedChar)-1 DO
- IF CHR(i) <= 20X THEN (* TAB, CR, ESC ... *)
- reservedChar[i] := TRUE;
- ELSE
- CASE CHR(i) OF
- | "#", "&", "(", ")", "*", "+", ",", "-", ".", "/", "?": reservedChar[i] := TRUE;
- | ":", ";", "<", "=", ">": reservedChar[i] := TRUE;
- | "[", "]", "^", "{", "|", "}", "~": reservedChar[i] := TRUE;
- | "$": reservedChar[i] := TRUE;
- | 22X, 27X, 7FX: reservedChar[i] := TRUE; (* 22X = ", 27X = ', 7FX = del *)
- ELSE
- reservedChar[i] := FALSE;
- END;
- END;
- END;
- END InitReservedChars;
- PROCEDURE InitNewChar;
- VAR
- i: LONGINT;
- BEGIN
- FOR i := 0 TO LEN(newChar)-1 DO
- (* UTF-8 encoded characters with bits 10XXXXXX do not start a new unicode character *)
- IF (i < 80H) OR (i > 0BFH) THEN
- newChar[i] := TRUE;
- ELSE
- newChar[i] := FALSE;
- END
- END
- END InitNewChar;
- PROCEDURE InitIgnoredChar;
- VAR
- i: LONGINT;
- BEGIN
- FOR i := 0 TO LEN(ignoredChar)-1 DO
- ignoredChar[i] := (i <= ORD(" ")) & (i # ORD(LF))
- END
- END InitIgnoredChar;
- BEGIN
- InitReservedChars;
- InitNewChar;
- InitIgnoredChar
- END BimboScanner.
|