BimboScanner.Mod 16 KB


  1. MODULE BimboScanner;
  2. IMPORT
  3. Trace, Texts, Streams, UTF8Strings, Strings;
  4. CONST
  5. Eot* = 0X;
  6. ObjectMarker = 020X;
  7. LF = 0AX;
  8. (* numtyp values *)
  9. char* = 1; integer* = 2; longinteger* = 3; real* = 4; longreal* = 5;
  10. MaxHDig* = 8; (* maximal hexadecimal longint length *)
  11. MaxHHDig* = 16; (* maximal hexadecimal hugeint length *)
  12. MaxRExp* = 38; (* maximal real exponent *)
  13. MaxLExp* = 308; (* maximal longreal exponent *)
  14. null* = 0; times* = 1; slash* = 2; div* = 3; mod* = 4; and* = 5;
  15. plus* = 6; minus* = 7; or* = 8; eql* = 9; neq* = 10; lss* = 11;
  16. leq* = 12; gtr* = 13; geq* = 14; in* = 15; is* = 16; arrow* = 17;
  17. period* = 18; comma* = 19; colon* = 20; upto* = 21; rparen* = 22;
  18. rbrak* = 23; rbrace* = 24; of* = 25; then* = 26; do* = 27; to* = 28;
  19. by* = 29; lparen* = 30; lbrak* = 31; lbrace* = 32; not* = 33;
  20. becomes* = 34; number* = 35; nil* = 36; true* = 37; false* = 38;
  21. string* = 39; ident* = 40; semicolon* = 41; bar* = 42; end* = 43;
  22. else* = 44; elsif* = 45; until* = 46; if* = 47; case* = 48; while* = 49;
  23. repeat* = 50; for* = 51; loop* = 52; with* = 53; exit* = 54;
  24. passivate* = 55; return* = 56; refines* = 57; implements* = 58;
  25. array* = 59; definition* = 60; object* = 61; record* = 62; pointer* = 63;
  26. begin* = 64; code* = 65; const* = 66; type* = 67; var* = 68;
  27. procedure* = 69; import* = 70; module* = 71; eof* = 72;
  28. comment* = 73; newLine* = 74; question* = 75; finally* = 76;
  29. VAR
  30. reservedChar-, ignoredChar, newChar-: ARRAY 256 OF BOOLEAN;
  31. TYPE
  32. StringMaker = OBJECT
  33. VAR length : LONGINT;
  34. data : Strings.String;
  35. PROCEDURE &Init(initialSize : LONGINT);
  36. BEGIN
  37. IF initialSize < 256 THEN initialSize := 256 END;
  38. NEW(data, initialSize); length := 0;
  39. END Init;
  40. PROCEDURE Add*(CONST buf: ARRAY OF CHAR; ofs, len: LONGINT; propagate: BOOLEAN; VAR res: WORD);
  41. VAR i : LONGINT; n : Strings.String;
  42. BEGIN
  43. IF length + len + 1 >= LEN(data) THEN
  44. NEW(n, LEN(data) * 2 + len + 1); FOR i := 0 TO length - 1 DO n[i] := data[i] END;
  45. data := n
  46. END;
  47. WHILE len > 0 DO
  48. data[length] := buf[ofs];
  49. INC(ofs); INC(length); DEC(len)
  50. END;
  51. data[length] := 0X;
  52. END Add;
  53. (* remove last n characters *)
  54. PROCEDURE Shorten(n : LONGINT);
  55. BEGIN
  56. DEC(length, n);
  57. IF length < 0 THEN length := 0 END;
  58. IF length > 0 THEN data[length - 1] := 0X ELSE data[length] := 0X END
  59. END Shorten;
  60. PROCEDURE Clear*;
  61. BEGIN
  62. data[0] := 0X;
  63. length := 0
  64. END Clear;
  65. PROCEDURE GetWriter*() : Streams.Writer;
  66. VAR w : Streams.Writer;
  67. BEGIN
  68. NEW(w, SELF.Add, 256);
  69. RETURN w
  70. END GetWriter;
  71. PROCEDURE GetLength*() : LONGINT;
  72. BEGIN
  73. RETURN length
  74. END GetLength;
  75. PROCEDURE GetString*() : Strings.String;
  76. BEGIN
  77. RETURN data
  78. END GetString;
  79. END StringMaker;
  80. Scanner* = OBJECT
  81. VAR
  82. buffer: Strings.String;
  83. pos-: LONGINT; (*pos in buffer*)
  84. ch-: CHAR; (**look-ahead *)
  85. str-: ARRAY 1024 OF CHAR;
  86. sym- : LONGINT;
  87. numtyp-: INTEGER; (* 1 = char, 2 = integer, 3 = real, 4 = longreal *)
  88. intval-: LONGINT; (* integer value or string length *)
  89. longintval-: HUGEINT;
  90. realval-: REAL;
  91. lrlval-: LONGREAL;
  92. numStartPos, numEndPos: LONGINT;
  93. lastpos-, curpos-, errpos-: LONGINT; (*pos in text*)
  94. isNummer: BOOLEAN;
  95. commentStr- : StringMaker;
  96. cw : Streams.Writer;
  97. PROCEDURE &Init;
  98. BEGIN
  99. NEW(commentStr, 1024);
  100. cw := commentStr.GetWriter()
  101. END Init;
  102. PROCEDURE err(n: INTEGER);
  103. BEGIN
  104. END err;
  105. PROCEDURE NextChar*;
  106. BEGIN
  107. IF pos < LEN(buffer) THEN
  108. ch := buffer[pos]; INC(pos)
  109. ELSE
  110. ch := Eot
  111. END;
  112. IF newChar[ORD(ch)] THEN INC(curpos) END; (* curpos := pos; *)
  113. END NextChar;
  114. PROCEDURE Str(VAR sym: LONGINT);
  115. VAR i: LONGINT; och: CHAR;
  116. BEGIN i := 0; och := ch;
  117. LOOP NextChar;
  118. IF ch = och THEN EXIT END ;
  119. IF ch < " " THEN err(3); EXIT END ;
  120. IF i = LEN(str)-1 THEN err(241); EXIT END ;
  121. str[i] := ch; INC(i)
  122. END ;
  123. NextChar; str[i] := 0X;
  124. IF i = 1 THEN
  125. sym := number
  126. ELSE sym := string
  127. END
  128. END Str;
  129. PROCEDURE Identifier(VAR sym: LONGINT);
  130. VAR i: LONGINT;
  131. BEGIN i := 0;
  132. REPEAT
  133. str[i] := ch; INC(i); NextChar
  134. UNTIL reservedChar[ORD(ch)] OR (i = LEN(str));
  135. IF i = LEN(str) THEN err(240); DEC(i) END ;
  136. str[i] := 0X; sym := ident;
  137. (* temporary code! delete when moving to ANY and adapt PCT *)
  138. IF str = "ANY" THEN COPY("PTR", str) END;
  139. END Identifier;
  140. PROCEDURE Number;
  141. VAR i, m, n, d, e: INTEGER; dig: ARRAY 24 OF CHAR; f: LONGREAL; expCh: CHAR; neg, long: BOOLEAN;
  142. PROCEDURE Ten(e: INTEGER): LONGREAL;
  143. VAR x, p: LONGREAL;
  144. BEGIN x := 1; p := 10;
  145. WHILE e > 0 DO
  146. IF ODD(e) THEN x := x*p END;
  147. e := e DIV 2;
  148. IF e > 0 THEN p := p*p END (* prevent overflow *)
  149. END;
  150. RETURN x
  151. END Ten;
  152. PROCEDURE Ord(ch: CHAR; hex: BOOLEAN): INTEGER;
  153. BEGIN (* ("0" <= ch) & (ch <= "9") OR ("A" <= ch) & (ch <= "F") *)
  154. IF ch <= "9" THEN RETURN ORD(ch) - ORD("0")
  155. ELSIF hex THEN RETURN ORD(ch) - ORD("A") + 10
  156. ELSE err(2); RETURN 0
  157. END
  158. END Ord;
  159. BEGIN (* ("0" <= ch) & (ch <= "9") *)
  160. i := 0; m := 0; n := 0; d := 0; long := FALSE;
  161. LOOP (* read mantissa *)
  162. IF ("0" <= ch) & (ch <= "9") OR (d = 0) & ("A" <= ch) & (ch <= "F") THEN
  163. IF (m > 0) OR (ch # "0") THEN (* ignore leading zeros *)
  164. IF n < LEN(dig) THEN dig[n] := ch; INC(n) END;
  165. INC(m)
  166. END;
  167. NextChar; INC(i)
  168. ELSIF ch = "." THEN NextChar;
  169. IF ch = "." THEN (* ellipsis *) ch := 7FX; EXIT
  170. ELSIF d = 0 THEN (* i > 0 *) d := i
  171. ELSE err(2)
  172. END
  173. ELSE EXIT
  174. END
  175. END; (* 0 <= n <= m <= i, 0 <= d <= i *)
  176. IF d = 0 THEN (* integer *)
  177. IF n = m THEN intval := 0; i := 0;
  178. (* > bootstrap 1 *)
  179. longintval := 0;
  180. (* < bootstrap 1 *)
  181. IF ch = "X" THEN (* character *) NextChar; numtyp := char;
  182. (* IF PCM.LocalUnicodeSupport & (n <= 8) THEN
  183. IF (n = 8) & (dig[0] > "7") THEN (* prevent overflow *) intval := -1 END;
  184. WHILE i < n DO intval := intval*10H + Ord(dig[i], TRUE); INC(i) END
  185. ELSIF ~PCM.LocalUnicodeSupport & (n <= 2) THEN
  186. WHILE i < n DO intval := intval*10H + Ord(dig[i], TRUE); INC(i) END
  187. ELSE err(203)
  188. END *)
  189. ELSIF ch = "H" THEN (* hexadecimal *) NextChar;
  190. IF n <= MaxHDig THEN
  191. numtyp := integer;
  192. IF (n = MaxHDig) & (dig[0] > "7") THEN (* prevent overflow *) intval := -1 END;
  193. WHILE i < n DO intval := intval*10H + Ord(dig[i], TRUE); INC(i) END
  194. (* > bootstrap 1 *)
  195. ELSIF n <= MaxHHDig THEN
  196. numtyp := longinteger;
  197. IF (n = MaxHHDig) & (dig[0] > "7") THEN (* prevent overflow *) longintval := -1 END;
  198. WHILE i < n DO longintval := Ord(dig[i], TRUE) + longintval*10H; INC(i) END
  199. (* < bootstrap 1 *)
  200. ELSE err(203)
  201. END
  202. ELSE (* decimal *) numtyp := integer;
  203. WHILE i < n DO d := Ord(dig[i], FALSE); INC(i);
  204. IF intval <= (MAX(LONGINT) - d) DIV 10 THEN intval := intval*10 + d
  205. (* > bootstrap 2
  206. ELSE err(203)
  207. < bootstrap 2 *)
  208. (* > bootstrap 1 *)
  209. ELSE long := TRUE
  210. (* < bootstrap 1 *)
  211. END
  212. END;
  213. (* > bootstrap 1 *)
  214. IF long THEN
  215. numtyp := longinteger; longintval := LONG(intval)*10+d;
  216. WHILE i < n DO d := Ord(dig[i], FALSE); INC(i);
  217. IF longintval*10+d >= 0 THEN longintval := longintval*10 + d
  218. ELSE err(203)
  219. END
  220. END
  221. END
  222. (* < bootstrap 1 *)
  223. END
  224. ELSE err(203)
  225. END
  226. ELSE (* fraction *)
  227. f := 0; e := 0; expCh := "E";
  228. WHILE n > 0 DO (* 0 <= f < 1 *) DEC(n); f := (Ord(dig[n], FALSE) + f)/10 END;
  229. IF (ch = "E") OR (ch = "D") THEN expCh := ch; NextChar; neg := FALSE;
  230. IF ch = "-" THEN neg := TRUE; NextChar
  231. ELSIF ch = "+" THEN NextChar
  232. END;
  233. IF ("0" <= ch) & (ch <= "9") THEN
  234. REPEAT n := Ord(ch, FALSE); NextChar;
  235. IF e <= (MAX(INTEGER) - n) DIV 10 THEN e := e*10 + n
  236. ELSE err(203)
  237. END
  238. UNTIL (ch < "0") OR ("9" < ch);
  239. IF neg THEN e := -e END
  240. ELSE err(2)
  241. END
  242. END;
  243. DEC(e, i-d-m); (* decimal point shift *)
  244. IF expCh = "E" THEN numtyp := real;
  245. IF (1-MaxRExp < e) & (e <= MaxRExp) THEN
  246. IF e < 0 THEN realval := SHORT(f / Ten(-e))
  247. ELSE realval := SHORT(f * Ten(e))
  248. END
  249. ELSE err(203)
  250. END
  251. ELSE numtyp := longreal;
  252. IF (1-MaxLExp < e) & (e <= MaxLExp) THEN
  253. IF e < 0 THEN lrlval := f / Ten(-e)
  254. ELSE lrlval := f * Ten(e)
  255. END
  256. ELSE err(203)
  257. END
  258. END
  259. END
  260. END Number;
  261. PROCEDURE GetNumAsString*(VAR val: ARRAY OF CHAR);
  262. VAR i, l: LONGINT;
  263. BEGIN
  264. (*Strings.Copy(buffer^, numStartPos, numEndPos-numStartPos, val);*)
  265. IF isNummer THEN
  266. i := 0; l := LEN(val)-1;
  267. WHILE (i < numEndPos-numStartPos) & (i < l) DO
  268. val[i] := buffer[numStartPos + i];
  269. INC(i);
  270. END;
  271. END;
  272. val[i] := 0X
  273. END GetNumAsString;
  274. PROCEDURE Get(VAR s: LONGINT);
  275. PROCEDURE Comment; (* do not read after end of file *)
  276. BEGIN NextChar; cw.Char(ch);
  277. LOOP
  278. LOOP
  279. WHILE ch = "(" DO NextChar; cw.Char(ch);
  280. IF ch = "*" THEN Comment END
  281. END;
  282. IF ch = "*" THEN NextChar; cw.Char(ch); EXIT END ;
  283. IF ch = Eot THEN EXIT END ;
  284. NextChar; cw.Char(ch);
  285. END ;
  286. IF ch = ")" THEN NextChar; cw.Char(ch); EXIT END ;
  287. IF ch = Eot THEN err(5); EXIT END
  288. END;
  289. END Comment;
  290. BEGIN
  291. REPEAT
  292. WHILE (ignoredChar[ORD(ch)]) DO (*ignore control characters*)
  293. IF ch = Eot THEN
  294. s := eof; RETURN
  295. ELSE NextChar
  296. END
  297. END ;
  298. lastpos := curpos - 1;
  299. errpos := curpos - 1;
  300. isNummer := FALSE;
  301. CASE ch OF (* ch > " " *)
  302. | LF: s := newLine; NextChar
  303. | 22X, 27X : Str(s)
  304. | "#" : s := neq; NextChar
  305. | "&" : s := and; NextChar
  306. | "(" : NextChar;
  307. IF ch = "*" THEN commentStr.Clear; Comment; cw.Update; commentStr.Shorten(2); s := comment; (*allow recursion without reentrancy*)
  308. ELSE s := lparen
  309. END
  310. | ")" : s := rparen; NextChar
  311. | "*" : s:=times; NextChar
  312. | "+" : s := plus; NextChar
  313. | "," : s := comma; NextChar
  314. | "-" : s := minus; NextChar
  315. | "." : NextChar;
  316. IF ch = "." THEN NextChar; s := upto ELSE s := period END
  317. | "/" : s := slash; NextChar
  318. | "0".."9": isNummer := TRUE; numStartPos := pos-1;
  319. (* WHILE (ch >="0") & (ch <= "9") OR (ch >= "A") & (ch <="F") OR (ch="H") OR (ch="X") OR (ch=".") DO NextChar END; *)
  320. Number;
  321. numEndPos := pos-1; s := number
  322. | ":" : NextChar;
  323. IF ch = "=" THEN NextChar; s := becomes ELSE s := colon END
  324. | ";" : s := semicolon; NextChar
  325. | "<" : NextChar;
  326. IF ch = "=" THEN NextChar; s := leq; ELSE s := lss; END
  327. | "=" : s := eql; NextChar
  328. | ">" : NextChar;
  329. IF ch = "=" THEN NextChar; s := geq; ELSE s := gtr; END
  330. | "A": Identifier(s);
  331. IF str = "ARRAY" THEN s := array
  332. ELSIF str = "AWAIT" THEN s := passivate
  333. END
  334. | "B": Identifier(s);
  335. IF str = "BEGIN" THEN s := begin
  336. ELSIF str = "BY" THEN s := by
  337. END
  338. | "C": Identifier(s);
  339. IF str = "CONST" THEN s := const
  340. ELSIF str = "CASE" THEN s := case
  341. ELSIF str = "CODE" THEN s := code
  342. END
  343. | "D": Identifier(s);
  344. IF str = "DO" THEN s := do
  345. ELSIF str = "DIV" THEN s := div
  346. ELSIF str = "DEFINITION" THEN s := definition
  347. END
  348. | "E": Identifier(s);
  349. IF str = "END" THEN s := end
  350. ELSIF str = "ELSE" THEN s := else
  351. ELSIF str = "ELSIF" THEN s := elsif
  352. ELSIF str = "EXIT" THEN s := exit
  353. END
  354. | "F": Identifier(s);
  355. IF str = "FALSE" THEN s := false
  356. ELSIF str = "FOR" THEN s := for
  357. ELSIF str = "FINALLY" THEN s := finally
  358. END
  359. | "I": Identifier(s);
  360. IF str = "IF" THEN s := if
  361. ELSIF str = "IN" THEN s := in
  362. ELSIF str = "IS" THEN s := is
  363. ELSIF str = "IMPORT" THEN s := import
  364. ELSIF str = "IMPLEMENTS" THEN s := implements
  365. END
  366. | "L": Identifier(s);
  367. IF str = "LOOP" THEN s := loop END
  368. | "M": Identifier(s);
  369. IF str = "MOD" THEN s := mod
  370. ELSIF str = "MODULE" THEN s := module
  371. END
  372. | "N": Identifier(s);
  373. IF str = "NIL" THEN s := nil END
  374. | "O": Identifier(s);
  375. IF str = "OR" THEN s := or
  376. ELSIF str = "OF" THEN s := of
  377. ELSIF str = "OBJECT" THEN s := object
  378. END
  379. | "P": Identifier(s);
  380. IF str = "PROCEDURE" THEN s := procedure
  381. ELSIF str = "POINTER" THEN s := pointer
  382. END
  383. | "R": Identifier(s);
  384. IF str = "RECORD" THEN s := record
  385. ELSIF str = "REPEAT" THEN s := repeat
  386. ELSIF str = "RETURN" THEN s := return
  387. ELSIF str = "REFINES" THEN s := refines
  388. END
  389. | "T": Identifier(s);
  390. IF str = "THEN" THEN s := then
  391. ELSIF str = "TRUE" THEN s := true
  392. ELSIF str = "TO" THEN s := to
  393. ELSIF str = "TYPE" THEN s := type
  394. END
  395. | "U": Identifier(s);
  396. IF str = "UNTIL" THEN s := until END
  397. | "V": Identifier(s);
  398. IF str = "VAR" THEN s := var END
  399. | "W": Identifier(s);
  400. IF str = "WHILE" THEN s := while
  401. ELSIF str = "WITH" THEN s := with
  402. END
  403. | "G".."H", "J", "K", "Q", "S", "X".."Z": Identifier(s)
  404. | "[" : s := lbrak; NextChar
  405. | "]" : s := rbrak; NextChar
  406. | "^" : s := arrow; NextChar
  407. | "a".."z": Identifier(s)
  408. | "{" : s := lbrace; NextChar
  409. | "|" : s := bar; NextChar
  410. | "}" : s := rbrace; NextChar
  411. | "~" : s := not; NextChar
  412. | "?" : s := question; NextChar
  413. | 7FX : s := upto; NextChar
  414. ELSE Identifier(s); (* s := null; NextChar; *)
  415. END ;
  416. UNTIL s >= 0;
  417. END Get;
  418. PROCEDURE Next*;
  419. BEGIN
  420. Get(sym)
  421. END Next;
  422. END Scanner;
  423. PROCEDURE InitWithText*(t: Texts.Text; pos: LONGINT): Scanner;
  424. VAR buffer: Strings.String; len, i, j, ch: LONGINT; r: Texts.TextReader;
  425. bytesPerChar: LONGINT;
  426. s : Scanner;
  427. BEGIN
  428. t.AcquireRead;
  429. len := t.GetLength();
  430. bytesPerChar := 2;
  431. NEW(buffer, len * bytesPerChar); (* UTF8 encoded characters use up to 5 bytes *)
  432. NEW(r, t);
  433. r.SetPosition(pos);
  434. j := 0;
  435. FOR i := 0 TO len-1 DO
  436. r.ReadCh(ch);
  437. WHILE ~UTF8Strings.EncodeChar(ch, buffer^, j) DO
  438. (* buffer too small *)
  439. INC(bytesPerChar);
  440. ExpandBuf(buffer, bytesPerChar * len);
  441. END;
  442. END;
  443. t.ReleaseRead;
  444. NEW(s); s.buffer := buffer;
  445. s.pos := 0;
  446. s.ch := " ";
  447. RETURN s;
  448. END InitWithText;
  449. PROCEDURE ExpandBuf(VAR oldBuf: Strings.String; newSize: LONGINT);
  450. VAR newBuf: Strings.String; i: LONGINT;
  451. BEGIN
  452. IF LEN(oldBuf^) >= newSize THEN RETURN END;
  453. NEW(newBuf, newSize);
  454. FOR i := 0 TO LEN(oldBuf^)-1 DO
  455. newBuf[i] := oldBuf[i];
  456. END;
  457. oldBuf := newBuf;
  458. END ExpandBuf;
  459. PROCEDURE InitReservedChars;
  460. VAR
  461. i: LONGINT;
  462. BEGIN
  463. FOR i := 0 TO LEN(reservedChar)-1 DO
  464. IF CHR(i) <= 20X THEN (* TAB, CR, ESC ... *)
  465. reservedChar[i] := TRUE;
  466. ELSE
  467. CASE CHR(i) OF
  468. | "#", "&", "(", ")", "*", "+", ",", "-", ".", "/", "?": reservedChar[i] := TRUE;
  469. | ":", ";", "<", "=", ">": reservedChar[i] := TRUE;
  470. | "[", "]", "^", "{", "|", "}", "~": reservedChar[i] := TRUE;
  471. | "$": reservedChar[i] := TRUE;
  472. | 22X, 27X, 7FX: reservedChar[i] := TRUE; (* 22X = ", 27X = ', 7FX = del *)
  473. ELSE
  474. reservedChar[i] := FALSE;
  475. END;
  476. END;
  477. END;
  478. END InitReservedChars;
  479. PROCEDURE InitNewChar;
  480. VAR
  481. i: LONGINT;
  482. BEGIN
  483. FOR i := 0 TO LEN(newChar)-1 DO
  484. (* UTF-8 encoded characters with bits 10XXXXXX do not start a new unicode character *)
  485. IF (i < 80H) OR (i > 0BFH) THEN
  486. newChar[i] := TRUE;
  487. ELSE
  488. newChar[i] := FALSE;
  489. END
  490. END
  491. END InitNewChar;
  492. PROCEDURE InitIgnoredChar;
  493. VAR
  494. i: LONGINT;
  495. BEGIN
  496. FOR i := 0 TO LEN(ignoredChar)-1 DO
  497. ignoredChar[i] := (i <= ORD(" ")) & (i # ORD(LF))
  498. END
  499. END InitIgnoredChar;
  500. BEGIN
  501. InitReservedChars;
  502. InitNewChar;
  503. InitIgnoredChar
  504. END BimboScanner.