123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828 |
- (* ==================================================================== *)
- (* *)
- (* Scanner Module for the Gardens Point Component Pascal Compiler. *)
- (* Copyright (c) John Gough 1999, 2000. *)
- (* This module was extensively modified from the scanner *)
- (* automatically produced by the M2 version of COCO/R, using *)
- (* the CPascal.atg grammar used for the JVM version of GPCP. *)
- (* *)
- (* ==================================================================== *)
- MODULE CPascalS;
- (* This is a modified version for Mburg --- it computes column positions *)
- (* Scanner generated by Coco/R *)
- IMPORT
- GPCPcopyright,
- RTS,
- ASCII,
- Console,
- Tok := CPascalG,
- GPBinFiles,
- GPTextFiles;
- CONST
- noSym = Tok.NOSYM; (*error token code*)
- (* not only for errors but also for not finished states of scanner analysis *)
- eof = 0X;
- eofByt = 0;
- EOL = 0AX;
- BlkSize = 32768;
- BlkNmbr = 32;
- asciiHT = 9X;
- asciiLF = EOL;
- CONST
- listAlways* = 2; (* listing control constants *)
- listErrOnly* = 1;
- listNever* = 0;
- TYPE
- BufBlk = ARRAY BlkSize OF UBYTE;
- Buffer = ARRAY BlkNmbr OF POINTER TO BufBlk;
- StartTable = ARRAY 256 OF INTEGER;
- (* ======================== EXPORTS ========================= *)
- TYPE
- ErrorHandler* = POINTER TO ABSTRACT RECORD END;
-
- Token* = POINTER TO RECORD
- sym* : INTEGER;
- lin* : INTEGER;
- col* : INTEGER;
- pos* : INTEGER;
- len* : INTEGER;
- dlr* : BOOLEAN;
- END;
-
- Span* = POINTER TO RECORD
- sLin*, sCol*, eLin*, eCol* : INTEGER
- END;
- (* ====================== END EXPORTS ======================= *)
- VAR
- ch: CHAR; (*current input character*)
- curLine: INTEGER; (*current input line (may be higher than line)*)
- lineStart: INTEGER; (*start position of current line*)
- apx: INTEGER; (*length of appendix (CONTEXT phrase)*)
- oldEols: INTEGER; (*number of EOLs in a comment*)
- bp: INTEGER; (*current position in buf*)
- bp0: INTEGER; (*position of current token)*)
- LBlkSize: INTEGER; (*BlkSize*)
- inputLen: INTEGER; (*source file size*)
- buf: Buffer; (*source buffer for low-level access*)
- savedBuf: Buffer;
- bufSaved: BOOLEAN;
- start: StartTable; (*start state for every character*)
- nextLine: INTEGER; (*line of lookahead symbol*)
- nextCol: INTEGER; (*column of lookahead symbol*)
- nextLen: INTEGER; (*length of lookahead symbol*)
- nextPos: INTEGER; (*file position of lookahead symbol*)
- spaces: INTEGER; (* ############# NEW ############## *)
- (* ======================== EXPORTS ========================= *)
- VAR
- src*: GPBinFiles.FILE; (*source file. To be opened by main *)
- lst*: GPTextFiles.FILE; (*list file. To be opened by main *)
- line*, col*: INTEGER; (*line and column of current symbol*)
- len*: INTEGER; (*length of current symbol*)
- pos*: INTEGER; (*file position of current symbol*)
- errors*: INTEGER; (*number of detected errors*)
- warnings*: INTEGER; (*number of detected warnings*)
- prevTok*: Token;
- ParseErr*: ErrorHandler;
- SemError*: ErrorHandler;
- (* ====================== END EXPORTS ======================= *)
- (* ======================== EXPORTS ========================= *)
- PROCEDURE (s : ErrorHandler)Report*(num : INTEGER;
- lin : INTEGER;
- col : INTEGER) ,NEW,ABSTRACT;
- PROCEDURE (s : ErrorHandler)RepSt1*(num : INTEGER;
- IN str : ARRAY OF CHAR;
- lin : INTEGER;
- col : INTEGER) ,NEW,ABSTRACT;
- PROCEDURE (s : ErrorHandler)RepSt2*(num : INTEGER;
- IN st1 : ARRAY OF CHAR;
- IN st2 : ARRAY OF CHAR;
- lin : INTEGER;
- col : INTEGER) ,NEW,ABSTRACT;
- PROCEDURE (s : Span)SpanSS*(e : Span) : Span,NEW;
- VAR res : Span;
- BEGIN
- IF e = NIL THEN RETURN s;
- ELSE
- NEW(res);
- res.sLin := s.sLin; res.eLin := e.eLin;
- res.sCol := s.sCol; res.eCol := e.eCol;
- END;
- RETURN res;
- END SpanSS;
- PROCEDURE mkSpanTT*(s, e : Token) : Span;
- VAR res : Span;
- BEGIN
- NEW(res);
- res.sLin := s.lin; res.eLin := e.lin;
- res.sCol := s.col; res.eCol := e.col + e.len;
- RETURN res;
- END mkSpanTT;
- PROCEDURE mkSpanT*(t : Token) : Span;
- VAR res : Span;
- BEGIN
- NEW(res);
- res.sLin := t.lin; res.eLin := t.lin;
- res.sCol := t.col; res.eCol := t.col + t.len;
- RETURN res;
- END mkSpanT;
-
- PROCEDURE Merge*(s, e : Span) : Span;
- BEGIN
- IF s # NIL THEN RETURN s.SpanSS(e) ELSE RETURN NIL END;
- END Merge;
- (* ====================== END EXPORTS ======================= *)
- PROCEDURE^ get*() : Token;
- (* Gets next symbol from source file *)
- PROCEDURE^ GetString*(pos: INTEGER; len: INTEGER; OUT name: ARRAY OF CHAR);
- (* Retrieves exact string of max length len from position pos in source file *)
- PROCEDURE^ charAt*(pos: INTEGER): CHAR;
- (* Returns exact character at position pos in source file *)
- PROCEDURE^ Reset*;
- (* Reads and stores source file internally *)
- PROCEDURE^ SkipAndGetLine*(i : INTEGER; (* indent to skip *)
- e : INTEGER; (* end file-pos *)
- VAR p : INTEGER; (* crnt file-pos *)
- OUT l : INTEGER; (* fetched length *)
- VAR s : ARRAY OF CHAR); (* output string *)
- (* ==================================================================== *)
- PROCEDURE (t : Token)DiagToken*(),NEW;
- VAR i : INTEGER;
- BEGIN
- Console.Write("l"); Console.WriteInt(t.lin,1); Console.Write(":");
- Console.Write("c"); Console.WriteInt(t.col,1); Console.WriteString(" '");
- FOR i := 0 TO t.len - 1 DO Console.Write(charAt(t.pos+i)) END;
- Console.Write("'"); Console.WriteLn;
- END DiagToken;
- PROCEDURE digitAt(pos : INTEGER) : INTEGER;
- VAR ch : CHAR;
- BEGIN
- ch := charAt(pos);
- IF (ch >= '0') & (ch <= '9') THEN RETURN ORD(ch) - ORD('0');
- ELSE RETURN ORD(ch) + (10 - ORD('A'));
- END;
- END digitAt;
- PROCEDURE getHex*(pos, len : INTEGER) : INTEGER;
- VAR ch : CHAR;
- ix : INTEGER;
- rslt : INTEGER;
- BEGIN
- rslt := 0;
- FOR ix := pos TO pos + len - 1 DO
- ch := charAt(ix);
- IF (ch >= '0') & (ch <= '9') THEN rslt := rslt * 16 + ORD(ch) - ORD('0');
- ELSIF (ch >= 'a') & (ch <= 'f') THEN rslt := rslt * 16 + ORD(ch) + (10 - ORD('a'));
- ELSIF (ch >= 'A') & (ch <= 'F') THEN rslt := rslt * 16 + ORD(ch) + (10 - ORD('A'));
- ELSE RETURN -237;
- END;
- END;
- RETURN rslt;
- END getHex;
- PROCEDURE tokToLong*(t : Token) : LONGINT;
- VAR long : LONGINT;
- last : LONGINT;
- indx : INTEGER;
- limt : INTEGER;
- hexD : INTEGER;
- ch : CHAR;
- BEGIN [UNCHECKED_ARITHMETIC]
- (*
- * This code requires special care.
- * For the CLR it would be simplest to catch overflows
- * in the per-character loop, and put in a rescue clause
- * that reported the Error-233. Unfortunately this does
- * not work on the JVM, so we have to catch the overflow
- * manually by detecting the sum wrapping to negative.
- *)
- limt := t.pos + t.len - 1;
- long := 0;
- ch := charAt(limt);
- IF (ch = "H") OR (ch = "L") THEN
- DEC(limt);
- FOR indx := t.pos TO limt DO
- hexD := digitAt(indx);
- long := long * 16 + hexD;
- IF long < 0 THEN SemError.Report(233, t.lin, t.col); RETURN 0 END;
- END;
- IF ch = "H" THEN
- IF RTS.hiInt(long) # 0 THEN
- SemError.Report(232, t.lin, t.col); RETURN 0;
- ELSE
- long := LONG(RTS.loInt(long));
- END;
- END;
- ELSE
- FOR indx := t.pos TO limt DO
- ch := charAt(indx);
- long := long * 10 + (ORD(ch) - ORD('0'));
- IF long < 0 THEN SemError.Report(233, t.lin, t.col); RETURN 0 END;
- END;
- END;
- RETURN long;
- END tokToLong;
- PROCEDURE tokToReal*(t : Token) : REAL;
- VAR str : ARRAY 256 OF CHAR;
- pOk : BOOLEAN;
- num : REAL;
- BEGIN
- GetString(t.pos, t.len, str);
- RTS.StrToRealInvar(str$, num, pOk);
- IF ~pOk THEN
- SemError.Report(45, t.lin, t.col); RETURN 0.0;
- ELSE
- RETURN num;
- END;
- END tokToReal;
- PROCEDURE tokToChar*(t : Token) : CHAR;
- VAR cOrd : LONGINT;
- indx : INTEGER;
- limt : INTEGER;
- hexD : INTEGER;
- ch : CHAR;
- BEGIN
- limt := t.pos + t.len - 2;
- cOrd := 0;
- FOR indx := t.pos TO limt DO
- hexD := digitAt(indx);
- cOrd := cOrd * 16 + hexD;
- END;
- (* RANGE CHECK HERE *)
- RETURN CHR(cOrd);
- END tokToChar;
- (* ====================== END EXPORTS ======================= *)
- PROCEDURE NextCh;
- (* Return global variable ch *)
- BEGIN
- INC(bp); ch := charAt(bp);
- IF ch = asciiHT THEN
- INC(spaces,8); DEC(spaces,spaces MOD 8);
- ELSE
- INC(spaces);
- END;
- IF ch = EOL THEN INC(curLine); lineStart := bp; spaces := 0 END
- END NextCh;
- (* ==================================================================== *)
- PROCEDURE comment (): BOOLEAN;
- VAR
- level, startLine: INTEGER;
- oldLineStart : INTEGER;
- oldSpaces : INTEGER;
- BEGIN
- level := 1; startLine := curLine;
- oldLineStart := lineStart; oldSpaces := spaces;
- IF (ch = "(") THEN
- NextCh;
- IF (ch = "*") THEN
- NextCh;
- LOOP
- IF (ch = "*") THEN
- NextCh;
- IF (ch = ")") THEN
- DEC(level); NextCh;
- IF level = 0 THEN RETURN TRUE END
- END;
- ELSIF (ch = "(") THEN
- NextCh;
- IF (ch = "*") THEN INC(level); NextCh END;
- ELSIF ch = eof THEN RETURN FALSE
- ELSE NextCh END;
- END; (* LOOP *)
- ELSE
- IF ch = EOL THEN DEC(curLine); lineStart := oldLineStart END;
- DEC(bp, 2); NextCh; spaces := oldSpaces; RETURN FALSE
- END;
- END;
- RETURN FALSE;
- END comment;
- (* ==================================================================== *)
- PROCEDURE get() : Token;
- VAR
- state: INTEGER;
- sym : INTEGER;
- PROCEDURE equal (IN s: ARRAY OF CHAR): BOOLEAN;
- VAR
- i: INTEGER;
- q: INTEGER;
- BEGIN
- (* Assert: only called with literals ==> LEN(s$) = LEN(s)-1 *)
- IF nextLen # LEN(s)-1 THEN RETURN FALSE END;
- i := 1; q := bp0; INC(q);
- WHILE i < nextLen DO
- IF charAt(q) # s[i] THEN RETURN FALSE END;
- INC(i); INC(q)
- END;
- RETURN TRUE
- END equal;
- PROCEDURE CheckLiteral(VAR sym : INTEGER);
- BEGIN
- CASE charAt(bp0) OF
- "A": IF equal("ABSTRACT") THEN sym := Tok.ABSTRACTSym;
- ELSIF equal("ARRAY") THEN sym := Tok.ARRAYSym;
- END
- | "B": IF equal("BEGIN") THEN sym := Tok.BEGINSym;
- ELSIF equal("BY") THEN sym := Tok.BYSym;
- END
- | "C": IF equal("CASE") THEN sym := Tok.CASESym;
- ELSIF equal("CLOSE") THEN sym := Tok.CLOSESym;
- ELSIF equal("CONST") THEN sym := Tok.CONSTSym;
- END
- | "D": IF equal("DO") THEN sym := Tok.DOSym;
- ELSIF equal("DIV") THEN sym := Tok.DIVSym;
- ELSIF equal("DIV0") THEN sym := Tok.DIV0Sym;
- END
- | "E": IF equal("ELSE") THEN sym := Tok.ELSESym;
- ELSIF equal("ELSIF") THEN sym := Tok.ELSIFSym;
- ELSIF equal("EMPTY") THEN sym := Tok.EMPTYSym;
- ELSIF equal("END") THEN sym := Tok.ENDSym;
- ELSIF equal("EXIT") THEN sym := Tok.EXITSym;
- ELSIF equal("EXTENSIBLE") THEN sym := Tok.EXTENSIBLESym;
- ELSIF equal("ENUM") THEN sym := Tok.ENUMSym;
- ELSIF equal("EVENT") THEN sym := Tok.EVENTSym;
- END
- | "F": IF equal("FOR") THEN sym := Tok.FORSym;
- END
- | "I": IF equal("IF") THEN sym := Tok.IFSym;
- ELSIF equal("IMPORT") THEN sym := Tok.IMPORTSym;
- ELSIF equal("IN") THEN sym := Tok.INSym;
- ELSIF equal("IS") THEN sym := Tok.ISSym;
- ELSIF equal("INTERFACE") THEN sym := Tok.INTERFACESym;
- END
- | "L": IF equal("LIMITED") THEN sym := Tok.LIMITEDSym;
- ELSIF equal("LOOP") THEN sym := Tok.LOOPSym;
- END
- | "M": IF equal("MOD") THEN sym := Tok.MODSym;
- ELSIF equal("MODULE") THEN sym := Tok.MODULESym;
- END
- | "N": IF equal("NEW") THEN sym := Tok.NEWSym;
- ELSIF equal("NIL") THEN sym := Tok.NILSym;
- END
- | "O": IF equal("OF") THEN sym := Tok.OFSym;
- ELSIF equal("OR") THEN sym := Tok.ORSym;
- ELSIF equal("OUT") THEN sym := Tok.OUTSym;
- END
- | "P": IF equal("POINTER") THEN sym := Tok.POINTERSym;
- ELSIF equal("PROCEDURE") THEN sym := Tok.PROCEDURESym;
- END
- | "R": IF equal("RECORD") THEN sym := Tok.RECORDSym;
- ELSIF equal("REPEAT") THEN sym := Tok.REPEATSym;
- ELSIF equal("RETURN") THEN sym := Tok.RETURNSym;
- ELSIF equal("RESCUE") THEN sym := Tok.RESCUESym;
- ELSIF equal("REM0") THEN sym := Tok.REM0Sym;
- END
- | "S": IF equal("STATIC") THEN sym := Tok.STATICSym;
- END
- | "T": IF equal("THEN") THEN sym := Tok.THENSym;
- ELSIF equal("TO") THEN sym := Tok.TOSym;
- ELSIF equal("TYPE") THEN sym := Tok.TYPESym;
- END
- | "U": IF equal("UNTIL") THEN sym := Tok.UNTILSym;
- END
- | "V": IF equal("VAR") THEN sym := Tok.VARSym;
- ELSIF equal("VECTOR") THEN sym := Tok.VECTORSym;
- END
- | "W": IF equal("WHILE") THEN sym := Tok.WHILESym;
- ELSIF equal("WITH") THEN sym := Tok.WITHSym;
- END
- ELSE
- END
- END CheckLiteral;
- PROCEDURE mkToken(kind : INTEGER) : Token;
- VAR new : Token;
- BEGIN
- NEW(new);
- IF kind = Tok.idVariant THEN kind := Tok.identSym; new.dlr := TRUE END;
- new.sym := kind;
- new.lin := nextLine; new.col := nextCol;
- new.len := nextLen; new.pos := nextPos;
- RETURN new;
- END mkToken;
-
- BEGIN (*get*)
- WHILE (ch=' ') OR
- (ch >= CHR(9)) & (ch <= CHR(10)) OR
- (ch = CHR(13)) DO NextCh END;
- IF ((ch = "(")) & comment() THEN RETURN get() END;
- pos := nextPos; nextPos := bp;
- col := nextCol; nextCol := spaces;
- line := nextLine; nextLine := curLine;
- len := nextLen; nextLen := 0;
- apx := 0; state := start[ORD(ch)]; bp0 := bp;
- LOOP
- NextCh; INC(nextLen);
- CASE state OF
- (* ---------------------------------- *)
- 1: (* start of ordinary identifier *)
- IF (ch >= "0") & (ch <= "9") OR
- (ch >= "A") & (ch <= "Z") OR
- (ch >= "a") & (ch <= "z") OR
- (ch >= 0C0X) & (ch <= 0D6X) OR
- (ch >= 0D8X) & (ch <= 0F6X) OR
- (ch >= 0F8X) & (ch <= 0FFX) OR
- (ch = "_") THEN (* skip *)
- ELSIF ch = "@" THEN state := 45;
- ELSIF ch = "$" THEN state := 46;
- ELSE sym := Tok.identSym; CheckLiteral(sym); RETURN mkToken(sym);
- END;
- (* ---------------------------------- *)
- | 44:(* start of ` escaped identifier *)
- IF (ch >= "0") & (ch <= "9") OR
- (ch >= "A") & (ch <= "Z") OR
- (ch >= "a") & (ch <= "z") OR
- (ch >= 0C0X) & (ch <= 0D6X) OR
- (ch >= 0D8X) & (ch <= 0F6X) OR
- (ch >= 0F8X) & (ch <= 0FFX) OR
- (ch = "_") THEN (* skip *)
- ELSE
- SemError.Report(187, nextLine, spaces);
- RETURN mkToken(noSym);
- END;
- (* throw away the escape char *)
- INC(nextPos); INC(nextCol); DEC(nextLen);
- state := 45;
- (* ---------------------------------- *)
- | 45:(* rest of ` escaped identifier *)
- IF (ch >= "0") & (ch <= "9") OR
- (ch >= "A") & (ch <= "Z") OR
- (ch >= "a") & (ch <= "z") OR
- (ch = "@") OR
- (ch = "_") THEN (* skip *)
- ELSIF ch = "$" THEN state := 47;
- ELSE RETURN mkToken(Tok.idVariant); (* No check for reserved words *)
- END;
- (* ---------------------------------- *)
- | 46:(* check for $ at end of ident. *)
- IF (ch >= "0") & (ch <= "9") OR
- (ch >= "A") & (ch <= "Z") OR
- (ch >= "a") & (ch <= "z") OR
- (ch = "_") THEN state := 45; (* embedded "$" *)
- ELSE
- DEC(bp, 2); DEC(nextLen); NextCh;
- sym := Tok.identSym; CheckLiteral(sym); RETURN mkToken(sym);
- END;
- (* ---------------------------------- *)
- | 47:(* check for $ at end of idVar't *)
- IF (ch >= "0") & (ch <= "9") OR
- (ch >= "A") & (ch <= "Z") OR
- (ch >= "a") & (ch <= "z") OR
- (ch = "_") THEN state := 45; (* embedded "$" *)
- ELSE
- DEC(bp, 2); DEC(nextLen); NextCh;
- RETURN mkToken(Tok.idVariant); (* No check for reserved words *)
- END;
- (* ---------------------------------- *)
- | 49: (* !" ..." format string *)
- IF ch = '"' THEN state := 51;
- ELSIF ch = '\' THEN state := 50;
- END;
- | 50: (* Last char was '\' inside bangStr *)
- state := 49;
- | 51: RETURN mkToken(Tok.bangStrSym);
- (* ---------------------------------- *)
- | 2: RETURN mkToken(Tok.integerSym);
- | 3: DEC(bp, apx+1); DEC(nextLen, apx);
- NextCh; RETURN mkToken(Tok.integerSym);
- | 4: IF (ch >= "0") & (ch <= "9") THEN
- ELSIF (ch = "E") THEN state := 5;
- ELSE RETURN mkToken(Tok.realSym);
- END;
- | 5: IF (ch >= "0") & (ch <= "9") THEN state := 7;
- ELSIF (ch = "+") OR
- (ch = "-") THEN state := 6;
- ELSE RETURN mkToken(noSym);
- END;
- | 6: IF (ch >= "0") & (ch <= "9") THEN state := 7;
- ELSE RETURN mkToken(noSym);
- END;
- | 7: IF (ch >= "0") & (ch <= "9") THEN
- ELSE RETURN mkToken(Tok.realSym);
- END;
- | 8: RETURN mkToken(Tok.CharConstantSym);
- | 9: IF (ch <= CHR(9)) OR
- (ch >= CHR(11)) & (ch <= CHR(12)) OR
- (ch >= CHR(14)) & (ch <= "!") OR
- (ch >= "#") THEN
- ELSIF (ch = '"') THEN state := 10;
- ELSE RETURN mkToken(noSym);
- END;
- | 10: RETURN mkToken(Tok.stringSym);
- | 11: IF (ch <= CHR(9)) OR
- (ch >= CHR(11)) & (ch <= CHR(12)) OR
- (ch >= CHR(14)) & (ch <= "&") OR
- (ch>="(") THEN
- ELSIF (ch = "'") THEN state := 10;
- ELSE RETURN mkToken(noSym);
- END;
- | 12: IF (ch >= "0") & (ch <= "9") THEN
- ELSIF (ch >= "A") & (ch <= "F") THEN state := 13;
- ELSIF (ch = "H") OR
- (ch = "L") THEN state := 2;
- ELSIF (ch = ".") THEN state := 14; INC(apx)
- ELSIF (ch = "X") THEN state := 8;
- ELSE RETURN mkToken(Tok.integerSym);
- END;
- | 13: IF (ch >= "0") & (ch <= "9") OR
- (ch >= "A") & (ch <= "F") THEN
- ELSIF (ch = "H") OR
- (ch = "L") THEN state := 2;
- ELSIF (ch = "X") THEN state := 8;
- ELSE RETURN mkToken(noSym);
- END;
- | 14: IF (ch >= "0") & (ch <= "9") THEN state := 4; apx := 0
- ELSIF (ch = ".") THEN state := 3; INC(apx)
- ELSIF (ch = "E") THEN state := 5; apx := 0
- ELSE RETURN mkToken(Tok.realSym);
- END;
- | 15: RETURN mkToken(Tok.starSym);
- | 16: RETURN mkToken(Tok.minusSym);
- | 17: IF (ch = '"') THEN state := 49;
- ELSE RETURN mkToken(Tok.bangSym);
- END;
- | 18: IF (ch = ".") THEN state := 40;
- ELSE RETURN mkToken(Tok.pointSym);
- END;
- | 19: RETURN mkToken(Tok.equalSym);
- | 20: RETURN mkToken(Tok.commaSym);
- | 21: RETURN mkToken(Tok.lparenSym);
- | 22: RETURN mkToken(Tok.plusSym);
- | 23: RETURN mkToken(Tok.rparenSym);
- | 24: RETURN mkToken(Tok.semicolonSym);
- | 25: IF (ch = "=") THEN state := 41;
- ELSE RETURN mkToken(Tok.colonSym);
- END;
- | 26: RETURN mkToken(Tok.lbrackSym);
- | 27: RETURN mkToken(Tok.rbrackSym);
- | 28: RETURN mkToken(Tok.uparrowSym);
- | 29: RETURN mkToken(Tok.dollarSym);
- | 30: RETURN mkToken(Tok.hashSym);
- | 31: IF (ch = "=") THEN state := 32;
- ELSE RETURN mkToken(Tok.lessSym);
- END;
- | 32: RETURN mkToken(Tok.lessequalSym);
- | 33: IF (ch = "=") THEN state := 34;
- ELSE RETURN mkToken(Tok.greaterSym);
- END;
- | 34: RETURN mkToken(Tok.greaterequalSym);
- | 35: RETURN mkToken(Tok.slashSym);
- | 36: RETURN mkToken(Tok.andSym);
- | 37: RETURN mkToken(Tok.tildeSym);
- | 38: RETURN mkToken(Tok.lbraceSym);
- | 39: RETURN mkToken(Tok.rbraceSym);
- | 40: RETURN mkToken(Tok.pointpointSym);
- | 41: RETURN mkToken(Tok.colonequalSym);
- | 42: RETURN mkToken(Tok.barSym);
- | 43: ch := 0X; DEC(bp); RETURN mkToken(Tok.EOFSYM);
- ELSE RETURN mkToken(noSym); (*NextCh already done*)
- END
- END
- END get;
- (* ==================================================================== *)
- PROCEDURE SkipAndGetLine(i : INTEGER; (* indent to skip *)
- e : INTEGER; (* end file-pos *)
- VAR p : INTEGER; (* crnt file-pos *)
- OUT l : INTEGER; (* fetched length *)
- VAR s : ARRAY OF CHAR); (* output string *)
- VAR
- ch : CHAR;
- ix : INTEGER;
- sp : INTEGER;
- BEGIN
- sp := 0;
- ch := charAt(p); INC(p);
- (* skip i positions if possible *)
- WHILE (sp < i) & (ch <= " ") & (p <= e) & (ch # asciiLF) DO
- IF ch = asciiHT THEN INC(sp,8); DEC(sp,sp MOD 8) ELSE INC(sp) END;
- ch := charAt(p); INC(p);
- END;
- ix := 0;
- WHILE sp > i DO
- s[ix] := " "; INC(ix); DEC(sp);
- END;
- WHILE (p <= e) & (ch # asciiLF) DO
- s[ix] := ch; INC(ix);
- ch := charAt(p); INC(p);
- END;
- s[ix] := 0X; l := ix;
- END SkipAndGetLine;
- (* ==================================================================== *)
- PROCEDURE GetString (pos: INTEGER; len: INTEGER; OUT name: ARRAY OF CHAR);
- VAR
- i: INTEGER;
- p: INTEGER;
- BEGIN
- IF len >= LEN(name) THEN len := LEN(name)-1 END;
- p := pos; i := 0;
- WHILE i < len DO
- name[i] := charAt(p); INC(i); INC(p)
- END;
- name[len] := 0X;
- END GetString;
- (* ==================================================================== *)
- PROCEDURE charAt (pos: INTEGER): CHAR;
- VAR
- ch : CHAR;
- BEGIN
- IF pos >= inputLen THEN RETURN eof END;
- ch := buf[pos DIV LBlkSize][pos MOD LBlkSize];
- IF ch # eof THEN RETURN ch ELSE RETURN eof END
- END charAt;
- (* ==================================================================== *)
- PROCEDURE Reset;
- VAR
- len: INTEGER;
- i, read: INTEGER;
- BEGIN (*assert: src has been opened*)
- FOR i := 0 TO BlkNmbr - 1 DO savedBuf[i] := NIL END; bufSaved := FALSE;
- i := -1;
- inputLen := 0;
- REPEAT
- INC(i);
- (*
- * Conserve memory by not deallocating the buffer.
- * Reuse for later compilation, expanding if necessary.
- *)
- IF buf[i] = NIL THEN NEW(buf[i]) END;
- read := GPBinFiles.readNBytes(src, buf[i]^, BlkSize);
- INC(inputLen, read);
- UNTIL read < BlkSize;
- GPBinFiles.CloseFile(src);
- buf[i][read] := eofByt;
- curLine := 1; lineStart := -2; bp := -1;
- oldEols := 0; apx := 0; errors := 0; warnings := 0;
- spaces := 0; (* # new # *)
- NextCh;
- END Reset;
-
- PROCEDURE NewReadBuffer*(source : ARRAY OF POINTER TO ARRAY OF CHAR);
- VAR count, linIx, chrIx, index : INTEGER;
- lineP : POINTER TO ARRAY OF CHAR;
- theCh : CHAR;
- BEGIN
- IF ~bufSaved THEN
- count := 0;
- WHILE (count < BlkNmbr) & (buf[count] # NIL) DO
- savedBuf[count] := buf[count]; INC(count);
- END;
- END;
- bufSaved := TRUE;
- NEW(buf[0]);
- index := 0;
- FOR linIx := 0 TO LEN(source) - 1 DO
- lineP := source[linIx];
- chrIx := 0;
- IF lineP = NIL THEN theCh := 0X ELSE theCh := lineP[0] END;
- WHILE theCh # 0X DO
- buf[0][index] := USHORT(ORD(theCh)); INC(index); INC(chrIx);
- theCh := lineP[chrIx];
- END;
- buf[0][index] := ORD(ASCII.LF); INC(index);
- END;
- buf[0][index] := eofByt;
- (*
- * Initialize the scanner state.
- *)
- curLine := 1; lineStart := -2; bp := -1;
- oldEols := 0; apx := 0;
- spaces := 0; (* # new # *)
- NextCh;
- END NewReadBuffer;
-
- PROCEDURE RestoreFileBuffer*();
- VAR count : INTEGER;
- BEGIN
- count := 0;
- WHILE (count < BlkNmbr) & (savedBuf[count] # NIL) DO
- buf[count] := savedBuf[count]; INC(count);
- END;
- END RestoreFileBuffer;
- (* ==================================================================== *)
- BEGIN
- start[ 0] := 43; start[ 1] := 48; start[ 2] := 48; start[ 3] := 48;
- start[ 4] := 48; start[ 5] := 48; start[ 6] := 48; start[ 7] := 48;
- start[ 8] := 48; start[ 9] := 48; start[ 10] := 48; start[ 11] := 48;
- start[ 12] := 48; start[ 13] := 48; start[ 14] := 48; start[ 15] := 48;
- start[ 16] := 48; start[ 17] := 48; start[ 18] := 48; start[ 19] := 48;
- start[ 20] := 48; start[ 21] := 48; start[ 22] := 48; start[ 23] := 48;
- start[ 24] := 48; start[ 25] := 48; start[ 26] := 48; start[ 27] := 48;
- start[ 28] := 48; start[ 29] := 48; start[ 30] := 48; start[ 31] := 48;
- start[ 32] := 48; start[ 33] := 17; start[ 34] := 9; start[ 35] := 30; (* '!' = 33 => state 17 *)
- start[ 36] := 29; start[ 37] := 48; start[ 38] := 36; start[ 39] := 11; (* '%' = 37 => state 48 *)
- start[ 40] := 21; start[ 41] := 23; start[ 42] := 15; start[ 43] := 22;
- start[ 44] := 20; start[ 45] := 16; start[ 46] := 18; start[ 47] := 35;
- start[ 48] := 12; start[ 49] := 12; start[ 50] := 12; start[ 51] := 12;
- start[ 52] := 12; start[ 53] := 12; start[ 54] := 12; start[ 55] := 12;
- start[ 56] := 12; start[ 57] := 12; start[ 58] := 25; start[ 59] := 24;
- start[ 60] := 31; start[ 61] := 19; start[ 62] := 33; start[ 63] := 48;
- start[ 64] := 48; start[ 65] := 1; start[ 66] := 1; start[ 67] := 1;
- start[ 68] := 1; start[ 69] := 1; start[ 70] := 1; start[ 71] := 1;
- start[ 72] := 1; start[ 73] := 1; start[ 74] := 1; start[ 75] := 1;
- start[ 76] := 1; start[ 77] := 1; start[ 78] := 1; start[ 79] := 1;
- start[ 80] := 1; start[ 81] := 1; start[ 82] := 1; start[ 83] := 1;
- start[ 84] := 1; start[ 85] := 1; start[ 86] := 1; start[ 87] := 1;
- start[ 88] := 1; start[ 89] := 1; start[ 90] := 1; start[ 91] := 26;
- start[ 92] := 48; start[ 93] := 27; start[ 94] := 28;
- (* ------------------------------------------- *)
- (* Two special-case characters ... "_" and "`" *)
- (* ------------------------------------------- *)
- start[ 95] := 1; start[ 96] := 44;
- (* ------------------------------------------- *)
- start[ 97] := 1; start[ 98] := 1; start[ 99] := 1;
- start[100] := 1; start[101] := 1; start[102] := 1; start[103] := 1;
- start[104] := 1; start[105] := 1; start[106] := 1; start[107] := 1;
- start[108] := 1; start[109] := 1; start[110] := 1; start[111] := 1;
- start[112] := 1; start[113] := 1; start[114] := 1; start[115] := 1;
- start[116] := 1; start[117] := 1; start[118] := 1; start[119] := 1;
- start[120] := 1; start[121] := 1; start[122] := 1; start[123] := 38;
- start[124] := 42; start[125] := 39; start[126] := 37; start[127] := 48;
- start[128] := 48; start[129] := 48; start[130] := 48; start[131] := 48;
- start[132] := 48; start[133] := 48; start[134] := 48; start[135] := 48;
- start[136] := 48; start[137] := 48; start[138] := 48; start[139] := 48;
- start[140] := 48; start[141] := 48; start[142] := 48; start[143] := 48;
- start[144] := 48; start[145] := 48; start[148] := 48; start[147] := 48;
- start[148] := 48; start[149] := 48; start[150] := 48; start[151] := 48;
- start[152] := 48; start[153] := 48; start[154] := 48; start[155] := 48;
- start[156] := 48; start[157] := 48; start[158] := 48; start[159] := 48;
- start[160] := 48; start[161] := 48; start[162] := 48; start[163] := 48;
- start[164] := 48; start[165] := 48; start[166] := 48; start[167] := 48;
- start[168] := 48; start[169] := 48; start[170] := 48; start[171] := 48;
- start[172] := 48; start[173] := 48; start[174] := 48; start[175] := 48;
- start[176] := 48; start[177] := 48; start[178] := 48; start[179] := 48;
- start[180] := 48; start[181] := 48; start[182] := 48; start[183] := 48;
- start[184] := 48; start[185] := 48; start[186] := 48; start[187] := 48;
- start[188] := 48; start[189] := 48; start[190] := 48; start[191] := 48;
- (* ------------------------------------------- *)
- (* Latin-8 alphabetics start here ... *)
- (* ------------------------------------------- *)
- start[192] := 1; start[193] := 1; start[194] := 1; start[195] := 1;
- start[196] := 1; start[197] := 1; start[198] := 1; start[199] := 1;
- start[200] := 1; start[201] := 1; start[202] := 1; start[203] := 1;
- start[204] := 1; start[205] := 1; start[206] := 1; start[207] := 1;
- start[208] := 1; start[209] := 1; start[210] := 1; start[211] := 1;
- start[212] := 1; start[213] := 1; start[214] := 1;
- (* odd character out *)
- start[215] := 48;
- start[216] := 1; start[217] := 1; start[218] := 1; start[219] := 1;
- start[220] := 1; start[221] := 1; start[222] := 1; start[223] := 1;
- start[224] := 1; start[225] := 1; start[226] := 1; start[227] := 1;
- start[228] := 1; start[229] := 1; start[230] := 1; start[231] := 1;
- start[232] := 1; start[233] := 1; start[234] := 1; start[235] := 1;
- start[236] := 1; start[237] := 1; start[238] := 1; start[239] := 1;
- start[240] := 1; start[241] := 1; start[242] := 1; start[243] := 1;
- start[244] := 1; start[245] := 1; start[246] := 1;
- (* odd character out *)
- start[247] := 48;
- start[248] := 1; start[249] := 1; start[250] := 1; start[251] := 1;
- start[252] := 1; start[253] := 1; start[254] := 1; start[255] := 1;
- LBlkSize := BlkSize;
- END CPascalS.
|