FoxScanner.Mod 51 KB


  1. MODULE FoxScanner; (** AUTHOR "fof & fn"; PURPOSE "Oberon Compiler: Scanner"; **)
  2. (* (c) fof ETH Zürich, 2009 *)
  3. IMPORT Streams, Strings, Diagnostics, Basic := FoxBasic, D := Debugging, Commands, StringPool;
  4. CONST
  5. Trace = FALSE; (* debugging output *)
  6. (* overal scanner limitation *)
  7. MaxIdentifierLength* = 128;
  8. (* parametrization of numeric scanner: *)
  9. MaxHexDigits* = 8; (* maximal hexadecimal longint length *)
  10. MaxHugeHexDigits* = 16; (* maximal hexadecimal hugeint length *)
  11. MaxRealExponent* = 38; (* maximal real exponent *)
  12. MaxLongrealExponent* = 308; (* maximal longreal exponent *)
  13. (* scanner constants *)
  14. EOT* = 0X; LF* = 0AX; CR* = 0DX; TAB* = 09X; ESC* = 1BX;
  15. TYPE
  16. StringType* = Strings.String;
  17. IdentifierType* = StringPool.Index;
  18. IdentifierString*= ARRAY MaxIdentifierLength+1 OF CHAR;
  19. CONST
  20. (** tokens *)
  21. (*
  22. note: order of tokens is important for the parser, do not modify without looking it up
  23. FoxProgTools.Enum --export --linefeed=6
  24. None
  25. (* RelationOps: Equal ... Is *)
  26. Equal DotEqual Unequal DotUnequal
  27. Less DotLess LessEqual DotLessEqual Greater DotGreater GreaterEqual DotGreaterEqual
  28. LessLessQ GreaterGreaterQ Questionmarks ExclamationMarks
  29. In Is
  30. (* MulOps: Times ... And *)
  31. Times TimesTimes DotTimes PlusTimes Slash Backslash DotSlash Div Mod And
  32. (* AddOps: Or ... Minus *)
  33. Or Plus Minus
  34. (* Prefix Unary Operators Plus ... Not *)
  35. Not
  36. (* expressions may start with Plus ... Identifier *)
  37. LeftParenthesis LeftBracket LeftBrace Number Character String Nil Imag True False Self Result New Identifier
  38. (* statementy may start with Self ... Begin *)
  39. If Case While Repeat For Loop With Exit Await Return Begin
  40. (* symbols, expressions and statements cannot start with *)
  41. Semicolon Transpose RightBrace RightBracket RightParenthesis
  42. Questionmark ExclamationMark
  43. LessLess GreaterGreater
  44. Upto Arrow Period Comma Colon Of Then Do To By Becomes Bar End Else Elsif Until Finally
  45. (* declaration elements *)
  46. Code Const Type Var Out Procedure Operator Import Definition Module Cell CellNet Extern
  47. (* composite type symbols *)
  48. Array Object Record Pointer Enum Port Address Size Alias
  49. (* assembler constants *)
  50. Ln PC PCOffset
  51. (* number types *)
  52. Shortint Integer Longint Hugeint Real Longreal
  53. Comment EndOfText Escape
  54. ~
  55. *)
  56. None*= 0;
  57. (* RelationOps: Equal ... Is *)
  58. Equal*= 1; DotEqual*= 2; Unequal*= 3; DotUnequal*= 4; Less*= 5; DotLess*= 6;
  59. LessEqual*= 7; DotLessEqual*= 8; Greater*= 9; DotGreater*= 10; GreaterEqual*= 11; DotGreaterEqual*= 12;
  60. LessLessQ*= 13; GreaterGreaterQ*= 14; Questionmarks*= 15; ExclamationMarks*= 16; In*= 17; Is*= 18;
  61. (* MulOps: Times ... And *)
  62. Times*= 19; TimesTimes*= 20; DotTimes*= 21; PlusTimes*= 22; Slash*= 23; Backslash*= 24;
  63. DotSlash*= 25; Div*= 26; Mod*= 27; And*= 28;
  64. (* AddOps: Or ... Minus *)
  65. Or*= 29; Plus*= 30; Minus*= 31;
  66. (* Prefix Unary Operators Plus ... Not *)
  67. Not*= 32;
  68. (* expressions may start with Plus ... Identifier *)
  69. LeftParenthesis*= 33; LeftBracket*= 34; LeftBrace*= 35; Number*= 36; Character*= 37; String*= 38;
  70. Nil*= 39; Imag*= 40; True*= 41; False*= 42; Self*= 43; Result*= 44;
  71. New*= 45; Identifier*= 46;
  72. (* statementy may start with Self ... Begin *)
  73. If*= 47; Case*= 48; While*= 49; Repeat*= 50; For*= 51; Loop*= 52;
  74. With*= 53; Exit*= 54; Await*= 55; Return*= 56; Begin*= 57;
  75. (* symbols, expressions and statements cannot start with *)
  76. Semicolon*= 58; Transpose*= 59; RightBrace*= 60; RightBracket*= 61; RightParenthesis*= 62; Questionmark*= 63;
  77. ExclamationMark*= 64; LessLess*= 65; GreaterGreater*= 66; Upto*= 67; Arrow*= 68; Period*= 69;
  78. Comma*= 70; Colon*= 71; Of*= 72; Then*= 73; Do*= 74; To*= 75;
  79. By*= 76; Becomes*= 77; Bar*= 78; End*= 79; Else*= 80; Elsif*= 81;
  80. Until*= 82; Finally*= 83;
  81. (* declaration elements *)
  82. Code*= 84; Const*= 85; Type*= 86; Var*= 87; Out*= 88; Procedure*= 89;
  83. Operator*= 90; Import*= 91; Definition*= 92; Module*= 93; Cell*= 94; CellNet*= 95;
  84. Extern*= 96;
  85. (* composite type symbols *)
  86. Array*= 97; Object*= 98; Record*= 99; Pointer*= 100; Enum*= 101; Port*= 102;
  87. Address*= 103; Size*= 104; Alias*= 105;
  88. (* assembler constants *)
  89. Ln*= 106; PC*= 107; PCOffset*= 108;
  90. (* number types *)
  91. Shortint*= 109; Integer*= 110; Longint*= 111; Hugeint*= 112; Real*= 113; Longreal*= 114;
  92. Comment*= 115; EndOfText*= 116; Escape*= 117;
  93. SingleQuote = 27X; DoubleQuote* = 22X;
  94. Ellipsis = 7FX; (* used in Scanner.GetNumber to return with ".." when reading an interval like 3..5 *)
  95. Uppercase*=0;
  96. Lowercase*=1;
  97. Unknown*=2;
  98. TYPE
  99. (* keywords book keeping *)
  100. Keyword* = ARRAY 32 OF CHAR;
  101. KeywordTable* = OBJECT(Basic.HashTableInt); (* string -> index *)
  102. VAR table: POINTER TO ARRAY OF LONGINT;
  103. PROCEDURE &InitTable*(size: LONGINT);
  104. VAR i: LONGINT;
  105. BEGIN
  106. Init(size); NEW(table,size); FOR i := 0 TO size-1 DO table[i] := -1; END;
  107. END InitTable;
  108. PROCEDURE IndexByIdentifier*(identifier: IdentifierType): LONGINT;
  109. BEGIN
  110. IF Has(identifier) THEN
  111. RETURN GetInt(identifier)
  112. ELSE (* do not modify index *)
  113. RETURN -1
  114. END;
  115. END IndexByIdentifier;
  116. PROCEDURE IndexByString*(CONST name: ARRAY OF CHAR): LONGINT;
  117. VAR stringPoolIndex: LONGINT;
  118. BEGIN
  119. StringPool.GetIndex(name,stringPoolIndex);
  120. IF Has(stringPoolIndex) THEN
  121. RETURN GetInt(stringPoolIndex)
  122. ELSE (* do not modify index *)
  123. RETURN -1
  124. END;
  125. END IndexByString;
  126. PROCEDURE IdentifierByIndex*(index: LONGINT; VAR identifier: IdentifierType);
  127. BEGIN
  128. identifier := table[index]
  129. END IdentifierByIndex;
  130. PROCEDURE StringByIndex*(index: LONGINT; VAR name: ARRAY OF CHAR);
  131. VAR stringPoolIndex: LONGINT;
  132. BEGIN
  133. stringPoolIndex := table[index];
  134. IF stringPoolIndex < 0 THEN
  135. name := ""
  136. ELSE
  137. StringPool.GetString(stringPoolIndex,name);
  138. END;
  139. END StringByIndex;
  140. PROCEDURE PutString*(CONST name: ARRAY OF CHAR; index: LONGINT);
  141. VAR stringPoolIndex: LONGINT;
  142. BEGIN
  143. StringPool.GetIndex(name,stringPoolIndex);
  144. table[index] := stringPoolIndex;
  145. PutInt(stringPoolIndex,index);
  146. END PutString;
  147. END KeywordTable;
  148. TYPE
  149. Token*=LONGINT;
  150. Position*= Basic.Position;
  151. (**
  152. symbol: data structure for the data transfer of the last read input from the scanner to the parser
  153. **)
  154. Symbol*= RECORD
  155. position*: Position;
  156. token*: Token; (* token of symbol *)
  157. identifier*: IdentifierType; (* identifier *)
  158. identifierString*: IdentifierString; (* cache of identifier's string *)
  159. string*: StringType; (* string or identifier *)
  160. stringLength*: LONGINT; (* length of string, if stringLength = 2 then this may be interpreted as character and integer = ORD(ch) *)
  161. numberType*: LONGINT; (* Integer, HugeInteger, Real or Longreal *)
  162. integer*: LONGINT;
  163. hugeint*: HUGEINT; (*! unify longint and hugeint *)
  164. character*: CHAR;
  165. real*: LONGREAL;
  166. END;
  167. StringMaker* = OBJECT (* taken from TF's scanner *)
  168. VAR length : LONGINT;
  169. data : StringType;
  170. PROCEDURE &Init*(initialSize : LONGINT);
  171. BEGIN
  172. IF initialSize < 256 THEN initialSize := 256 END;
  173. NEW(data, initialSize); length := 0;
  174. END Init;
  175. PROCEDURE Add*(CONST buf: ARRAY OF CHAR; ofs, len: LONGINT; propagate: BOOLEAN; VAR res: LONGINT);
  176. VAR i : LONGINT; n: StringType;
  177. BEGIN
  178. IF length + len + 1 >= LEN(data) THEN
  179. NEW(n, LEN(data) + len + 1); FOR i := 0 TO length - 1 DO n[i] := data[i] END;
  180. data := n
  181. END;
  182. WHILE len > 0 DO
  183. data[length] := buf[ofs];
  184. INC(ofs); INC(length); DEC(len)
  185. END;
  186. data[length] := 0X;
  187. END Add;
  188. (* remove last n characters *)
  189. PROCEDURE Shorten*(n : LONGINT);
  190. BEGIN
  191. DEC(length, n);
  192. IF length < 0 THEN length := 0 END;
  193. IF length > 0 THEN data[length - 1] := 0X ELSE data[length] := 0X END
  194. END Shorten;
  195. PROCEDURE Clear*;
  196. BEGIN
  197. data[0] := 0X;
  198. length := 0
  199. END Clear;
  200. PROCEDURE GetWriter*() : Streams.Writer;
  201. VAR w : Streams.Writer;
  202. BEGIN
  203. NEW(w, SELF.Add, 256);
  204. RETURN w
  205. END GetWriter;
  206. PROCEDURE GetReader*(): Streams.Reader;
  207. VAR r: Streams.StringReader;
  208. BEGIN
  209. NEW(r, 256);
  210. r.Set(data^);
  211. RETURN r
  212. END GetReader;
  213. PROCEDURE GetString*(VAR len: LONGINT) : StringType;
  214. BEGIN
  215. len := length;
  216. RETURN data
  217. END GetString;
  218. PROCEDURE GetStringCopy*(VAR len: LONGINT): StringType;
  219. VAR new: StringType;
  220. BEGIN
  221. len := length;
  222. NEW(new,len+1);
  223. COPY(data^,new^);
  224. RETURN new
  225. END GetStringCopy;
  226. END StringMaker;
  227. (** scanner reflects the following EBNF
  228. Symbol = String | Token | Number | Keyword | Identifier.
  229. Token = | '#' | '&' | '(' ['*' any '*' ')'] | ')' | '*'['*'] | '+'['*'] | ',' | '-' | '.' [ '.' | '*' | '/' | '=' | '#' | '>'['='] | '<' ['=']
  230. | '/' | ':' ['='] | ';' | '<' ['=' | '<' ['?'] ] | '=' | '>' [ '=' | '>' ['?']]
  231. | '[' | ']' | '^' | '{' | '|' | '}' | '~' | '\' | '`' | '?' ['?'] | '!' ['!']
  232. Identifier = Letter {Letter | Digit | '_'}.
  233. Letter = 'A' | 'B' | .. | 'Z' | 'a' | 'b' | .. | 'z'.
  234. Digit = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' .
  235. String = '"' {Character} '"' | "'" {Character} "'".
  236. Character = Digit [HexDigit] 'X'.
  237. Number = Integer | Real.
  238. Integer = Digit {Digit} | Digit {HexDigit} 'H' | '0x' {HexDigit}.
  239. Real = Digit {Digit} '.' {Digit} [ScaleFactor].
  240. ScaleFactor = ('E' | 'D') ['+' | '-'] digit {digit}.
  241. HexDigit = Digit | 'A' | 'B' | 'C' | 'D' | 'E' | 'F'.
  242. **)
  243. Scanner* = OBJECT
  244. VAR
  245. (* helper state information *)
  246. source-: StringType;
  247. reader-: Streams.Reader; (* source *)
  248. diagnostics: Diagnostics.Diagnostics; (* error logging *)
  249. ch-: CHAR; (* look-ahead character *)
  250. position-: Position;
  251. (*
  252. position-: LONGINT; (* current position *)
  253. line-: LONGINT;
  254. *)
  255. error-: BOOLEAN; (* if error occured during scanning *)
  256. firstIdentifier: BOOLEAN; (* support of lower vs. upper case keywords *)
  257. case-: LONGINT;
  258. stringWriter: Streams.Writer;
  259. stringMaker: StringMaker;
  260. useLineNumbers*: BOOLEAN;
  261. (*
  262. source: name of the source code for reference in error outputs
  263. reader: input stream
  264. position: reference position (offset) of the input stream , for error output
  265. diagnostics: error output object
  266. *)
  267. PROCEDURE & InitializeScanner*( CONST source: ARRAY OF CHAR; reader: Streams.Reader; pos: Position; diagnostics: Diagnostics.Diagnostics );
  268. BEGIN
  269. NEW(stringMaker,1024);
  270. stringWriter := stringMaker.GetWriter();
  271. error := FALSE;
  272. NEW(SELF.source, Strings.Length(source)+1);
  273. COPY (source, SELF.source^);
  274. SELF.reader := reader;
  275. SELF.diagnostics := diagnostics;
  276. ch := " ";
  277. case := Unknown;
  278. firstIdentifier := TRUE;
  279. SELF.position := pos;
  280. DEC(position.start, 1); (* one symbol lookahead *)
  281. IF reader = NIL THEN ch := EOT ELSE GetNextCharacter END;
  282. IF Trace THEN D.Str( "New scanner " ); D.Ln; END;
  283. IF source = "" THEN SELF.position.reader := reader END;
  284. useLineNumbers := FALSE;
  285. END InitializeScanner;
  286. PROCEDURE ResetCase*; (*! needs a better naming ! *)
  287. BEGIN
  288. firstIdentifier := TRUE; case := Unknown;
  289. END ResetCase;
  290. PROCEDURE SetCase*(c: LONGINT);
  291. BEGIN
  292. case := c;
  293. END SetCase;
  294. (** report an error occured during scanning **)
  295. PROCEDURE ErrorS(CONST msg: ARRAY OF CHAR);
  296. BEGIN
  297. Basic.Error(diagnostics, source^, position, msg);
  298. error := TRUE;
  299. END ErrorS;
  300. (** report an error occured during scanning **)
  301. PROCEDURE Error( code: INTEGER );
  302. BEGIN
  303. Basic.ErrorC(diagnostics, source^, position, code, "");
  304. error := TRUE;
  305. END Error;
  306. (** get next character, end of text results in ch = EOT **)
  307. PROCEDURE GetNextCharacter*;
  308. BEGIN
  309. reader.Char(ch); INC(position.start);
  310. IF ch = LF THEN INC(position.line); position.linepos := position.start+1 END;
  311. (*
  312. (* not necessary, as Streams returns 0X if reading failed, but in case Streams.Reader.Char is modified ... *)
  313. IF reader.res # Streams.Ok THEN ch := EOT END;
  314. *)
  315. END GetNextCharacter;
  316. (*
  317. The following is an implementation of the KMP algorithm used in order to traverse strings until some pattern occurs.
  318. It is not necessary for our implementation of string escape sequences, because the first character of the pattern does not occur in the pattern elsewhere
  319. I found the code useful and keep it here for the time being....
  320. (* generate a table to be able to quickly search for string containing overlaps - KMP algorithm *)
  321. PROCEDURE MakeOverlapTable*(CONST pattern: ARRAY OF CHAR; VAR table: ARRAY OF LONGINT);
  322. VAR i, cnd: LONGINT;
  323. BEGIN
  324. ASSERT(pattern[0] # 0X);
  325. (* if first character did not match: reset search *)
  326. table[0] := -1;
  327. (* if second character did not match: compare to first *)
  328. IF pattern[1] # 0X THEN
  329. table[1] := 0;
  330. END;
  331. (* for all other characters: switch back to previous overlay in pattern *)
  332. i := 2; cnd := 0;
  333. WHILE(pattern[i] # 0X) DO
  334. (* do patterns [i-cnd, i-1] match with pattern[0.. cnd] ? *)
  335. IF pattern[i-1] = pattern[cnd] THEN
  336. INC(cnd); table[i] := cnd; INC(i);
  337. (* no, switch back to last overlap, if possible *)
  338. ELSIF cnd > 0 THEN cnd := table[cnd]
  339. (* not possible: restart at beginning *)
  340. ELSE table[i] := 0; INC(i)
  341. END;
  342. END;
  343. END MakeOverlapTable;
  344. (* using KMP substring search algorithm consume and reproduce all characters of a string until endString *)
  345. PROCEDURE GetString(CONST endString: ARRAY OF CHAR);
  346. VAR escapePos: LONGINT; ech: CHAR; i: LONGINT; table: ARRAY 16 OF LONGINT;
  347. next: LONGINT;
  348. PROCEDURE Append(ch :CHAR);
  349. BEGIN
  350. IF ch = 0X THEN
  351. ErrorS("Unexpected end of text in string"); error := TRUE
  352. ELSE
  353. stringWriter.Char(ch)
  354. END;
  355. END Append;
  356. BEGIN
  357. MakeOverlapTable(endString, table);
  358. (* traverse *)
  359. escapePos := 0; ech := endString[0];
  360. GetNextCharacter;
  361. REPEAT
  362. IF ch = ech THEN
  363. INC(escapePos); ech := endString[escapePos];
  364. GetNextCharacter;
  365. ELSIF escapePos = 0 THEN (* frequent case *)
  366. Append(ch); GetNextCharacter;
  367. ELSE
  368. (* overlaps ? *)
  369. next := table[escapePos];
  370. IF next < 0 THEN next := 0 END;
  371. (* account for "forgotten" characters *)
  372. FOR i := 0 TO escapePos-1-next DO
  373. Append(endString[i]);
  374. END;
  375. (* to next overlapping ? *)
  376. escapePos := table[escapePos];
  377. (* no overlapping *)
  378. IF escapePos < 0 THEN
  379. Append(ch);
  380. escapePos := 0;
  381. GetNextCharacter;
  382. END;
  383. ech := endString[escapePos];
  384. END;
  385. UNTIL (ch = EOT) OR (ech = 0X);
  386. END GetString;
  387. *)
  388. (* simple case can be utilized when endString does not contain first character, which is the case for our string convention *)
  389. PROCEDURE ConsumeStringUntil(CONST endString: ARRAY OF CHAR; useControl: BOOLEAN);
  390. VAR escapePos: LONGINT; ech: CHAR; i: LONGINT; startPosition: LONGINT;
  391. CONST
  392. Control = '\';
  393. Delimiter = '"';
  394. PROCEDURE Append(ch :CHAR);
  395. BEGIN
  396. IF ch = 0X THEN
  397. ErrorS("Unexpected end of text in string"); error := TRUE;
  398. ELSE
  399. stringWriter.Char(ch)
  400. END;
  401. END Append;
  402. BEGIN
  403. (* traverse *)
  404. escapePos := 0; ech := endString[0]; startPosition := position.start;
  405. GetNextCharacter;
  406. REPEAT
  407. IF ch = ech THEN
  408. INC(escapePos); ech := endString[escapePos];
  409. GetNextCharacter;
  410. ELSIF useControl & (ch = Control) THEN
  411. GetNextCharacter;
  412. IF (ch = Control) OR (ch = Delimiter) THEN
  413. Append(ch)
  414. ELSIF ch = 'n' THEN
  415. Append(CR); Append(LF);
  416. ELSIF ch = 't' THEN
  417. Append(TAB)
  418. ELSE
  419. ErrorS("Unknown control sequence")
  420. END;
  421. GetNextCharacter
  422. ELSIF escapePos = 0 THEN (* frequent case *)
  423. Append(ch); GetNextCharacter;
  424. ELSE
  425. (* account for "forgotten" characters *)
  426. FOR i := 0 TO escapePos-1 DO
  427. Append(endString[i]);
  428. END;
  429. (* restart *)
  430. ech := endString[0]; escapePos := 0;
  431. END;
  432. UNTIL (ch = EOT) OR (ech = 0X) OR error;
  433. IF ch = EOT THEN position.start := startPosition; ErrorS("Unexpected end of text in string") END;
  434. END ConsumeStringUntil;
  435. PROCEDURE GetEscapedString(VAR symbol: Symbol);
  436. VAR endString: ARRAY 4 OF CHAR; escape: CHAR;
  437. BEGIN
  438. (* backslash already consumed *)
  439. stringMaker.Clear;
  440. IF ch = '"' THEN
  441. escape := 0X;
  442. ELSE
  443. escape := ch; GetNextCharacter;
  444. END;
  445. ASSERT((ch = '"') OR (ch = "'"));
  446. REPEAT
  447. IF escape # 0X THEN
  448. endString[0] := ch;
  449. endString[1] := escape;
  450. endString[2] := '\';
  451. endString[3] := 0X;
  452. ELSE
  453. endString[0] := ch;
  454. endString[1] := '\';
  455. endString[2] := 0X;
  456. END;
  457. ConsumeStringUntil(endString, escape = 0X);
  458. UNTIL TRUE;
  459. stringWriter.Char(0X);
  460. stringWriter.Update;
  461. symbol.string := stringMaker.GetStringCopy(symbol.stringLength);
  462. END GetEscapedString;
  463. (** get a string starting at current position
  464. string = {'"' {Character} '"'} | {"'" {Character} "'"}.
  465. **)
  466. (* multiline indicates that a string may occupy more than one lines, either concatenated or via multi-strings " " " "
  467. *)
  468. PROCEDURE GetString(VAR symbol: Symbol; multiLine, multiString, useControl: BOOLEAN);
  469. VAR och: CHAR; error: BOOLEAN;
  470. CONST control = '\';
  471. PROCEDURE Append(ch :CHAR);
  472. BEGIN
  473. IF ch = 0X THEN
  474. ErrorS("Unexpected end of text in string"); error := TRUE
  475. ELSE
  476. stringWriter.Char(ch)
  477. END;
  478. END Append;
  479. BEGIN
  480. stringMaker.Clear;
  481. och := ch; error := FALSE;
  482. REPEAT
  483. LOOP
  484. IF error THEN EXIT END;
  485. GetNextCharacter;
  486. IF (ch = och) OR (ch = EOT) THEN EXIT END;
  487. IF useControl & (ch = control) THEN
  488. GetNextCharacter;
  489. IF (ch = control) OR (ch = och) THEN
  490. Append(ch)
  491. ELSIF ch = 'n' THEN
  492. Append(CR); Append(LF);
  493. ELSIF ch = 't' THEN
  494. Append(TAB)
  495. ELSE
  496. ErrorS("Unknown control sequence")
  497. END;
  498. ELSE
  499. IF ~multiLine & (ch < " ") THEN Error( Basic.StringIllegalCharacter ); EXIT END;
  500. Append(ch)
  501. END;
  502. END;
  503. IF ch = EOT THEN
  504. ErrorS("Unexpected end of text in string")
  505. ELSE
  506. GetNextCharacter;
  507. IF multiString THEN SkipBlanks END;
  508. END;
  509. UNTIL ~multiString OR (ch # och);
  510. stringWriter.Char(0X);
  511. stringWriter.Update;
  512. symbol.string := stringMaker.GetStringCopy(symbol.stringLength);
  513. END GetString;
  514. (**
  515. Identifier = Letter {Letter | Digit | '_'} .
  516. Letter = 'A' | 'B' | .. | 'Z' | 'a' | 'b' | .. | 'z' .
  517. Digit = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'.
  518. '_' is the underscore character
  519. **)
  520. PROCEDURE GetIdentifier( VAR symbol: Symbol );
  521. VAR i: LONGINT;
  522. BEGIN
  523. i := 0;
  524. REPEAT symbol.identifierString[i] := ch; INC( i ); GetNextCharacter UNTIL reservedCharacter[ORD( ch )] OR (i = MaxIdentifierLength);
  525. IF i = MaxIdentifierLength THEN Error( Basic.IdentifierTooLong ); DEC( i ) END;
  526. symbol.identifierString[i] := 0X;
  527. StringPool.GetIndex(symbol.identifierString, symbol.identifier);
  528. END GetIdentifier;
  529. (**
  530. Number = Integer | Real.
  531. Integer = Digit {Digit} | Digit {HexDigit} 'H' | '0x' {HexDigit}.
  532. Real = Digit {Digit} '.' {Digit} [ScaleFactor].
  533. ScaleFactor = ('E' | 'D') ['+' | '-'] digit {digit}.
  534. HexDigit = Digit | 'A' | 'B' | 'C' | 'D' | 'E' | 'F'.
  535. Digit = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' .
  536. **)
  537. PROCEDURE GetNumber(VAR symbol: Symbol): Token;
  538. VAR i, nextInt, m, n, d, e, si: LONGINT;
  539. dig: ARRAY 24 OF CHAR;
  540. f: LONGREAL; expCh: CHAR; neg, long: BOOLEAN;
  541. result: Token;
  542. hugeint, tenh, number: HUGEINT;
  543. digits: LONGINT;
  544. (** 10^e **)
  545. PROCEDURE Ten( e: LONGINT ): LONGREAL;
  546. VAR x, p: LONGREAL;
  547. BEGIN
  548. x := 1; p := 10;
  549. WHILE e > 0 DO
  550. IF ODD( e ) THEN x := x * p END;
  551. e := e DIV 2;
  552. IF e > 0 THEN p := p * p END (* prevent overflow *)
  553. END;
  554. RETURN x
  555. END Ten;
  556. (** return decimal number associated to character ch , error if none **)
  557. PROCEDURE Decimal( ch: CHAR ): LONGINT;
  558. BEGIN (* ("0" <= ch) & (ch <= "9") OR ("A" <= ch) & (ch <= "F") *)
  559. IF ch <= "9" THEN RETURN ORD( ch ) - ORD( "0" ) ELSE Error( Basic.NumberIllegalCharacter ); RETURN 0 END
  560. END Decimal;
  561. (** return hexadecimal number associated to character ch, error if none **)
  562. PROCEDURE Hexadecimal( ch: CHAR ): LONGINT;
  563. BEGIN
  564. IF ch <= "9" THEN RETURN ORD( ch ) - ORD( "0" )
  565. ELSIF ch <= "F" THEN RETURN ORD( ch ) - ORD( "A" ) + 10
  566. ELSIF ch <= "f" THEN RETURN ORD( ch ) - ORD( "a" ) + 10
  567. ELSE Error( Basic.NumberIllegalCharacter ); RETURN 0
  568. END
  569. END Hexadecimal;
  570. PROCEDURE IsHexDigit(ch: CHAR): BOOLEAN;
  571. BEGIN
  572. RETURN (ch >= "0") & (ch <= "9") OR (ch >= "a") & (ch <="f") OR (ch >= "A") & (ch <= "F")
  573. END IsHexDigit;
  574. PROCEDURE IsBinaryDigit(ch: CHAR): BOOLEAN;
  575. BEGIN
  576. RETURN (ch >= "0") & (ch <= "1")
  577. END IsBinaryDigit;
  578. BEGIN (* ("0" <= ch) & (ch <= "9") *)
  579. result := Number;
  580. i := 0; m := 0; n := 0; d := 0; si := 0; long := FALSE;
  581. IF (ch = "0") THEN
  582. IF (reader.Peek() = "x") THEN (* hex number *)
  583. digits := 0;
  584. GetNextCharacter; GetNextCharacter;
  585. IF (ch = "'")& IsHexDigit(reader.Peek()) THEN GetNextCharacter END;
  586. WHILE IsHexDigit(ch) DO
  587. number := number * 10H + Hexadecimal(ch);
  588. INC(digits);
  589. GetNextCharacter;
  590. IF (ch = "'") & IsHexDigit(reader.Peek()) THEN GetNextCharacter END;
  591. END;
  592. symbol.hugeint := number;
  593. symbol.integer := SHORT(number);
  594. IF (digits > MaxHexDigits) OR (digits = MaxHexDigits) & (number > MAX(LONGINT)) THEN
  595. symbol.numberType := Hugeint
  596. ELSE
  597. symbol.numberType := Integer
  598. END;
  599. RETURN result;
  600. ELSIF reader.Peek() = "b" THEN (* binary number *)
  601. digits := 0;
  602. GetNextCharacter; GetNextCharacter;
  603. IF (ch = "'") & IsBinaryDigit(reader.Peek()) THEN GetNextCharacter END;
  604. WHILE IsBinaryDigit(ch) DO
  605. number := number * 2;
  606. INC(digits);
  607. IF ch = "1" THEN INC(number) END;
  608. GetNextCharacter;
  609. IF (ch = "'") & IsBinaryDigit(reader.Peek()) THEN GetNextCharacter END;
  610. END;
  611. symbol.hugeint := number;
  612. symbol.integer := SHORT(number);
  613. IF digits > 32 THEN
  614. symbol.numberType := Hugeint
  615. ELSE
  616. symbol.numberType := Integer
  617. END;
  618. RETURN result;
  619. END;
  620. END;
  621. LOOP (* read mantissa *)
  622. IF ("0" <= ch) & (ch <= "9") OR (d = 0) & ("A" <= ch) & (ch <= "F") THEN
  623. IF (m > 0) OR (ch # "0") THEN (* ignore leading zeros *)
  624. IF n < LEN( dig ) THEN dig[n] := ch; INC( n ) END;
  625. INC( m )
  626. END;
  627. symbol.identifierString[si] := ch; INC( si ); GetNextCharacter; INC( i )
  628. ELSIF ch = "." THEN
  629. symbol.identifierString[si] := ch; INC( si ); GetNextCharacter;
  630. IF ch = "." THEN ch := Ellipsis; EXIT
  631. ELSIF d = 0 THEN (* i > 0 *) d := i
  632. ELSE Error( Basic.NumberIllegalCharacter )
  633. END
  634. ELSIF ch = "'" THEN GetNextCharacter; (* ignore *)
  635. ELSE EXIT
  636. END
  637. END; (* 0 <= n <= m <= i, 0 <= d <= i *)
  638. IF d = 0 THEN (* integer *)
  639. IF n = m THEN
  640. symbol.integer := 0; i := 0; symbol.hugeint := 0;
  641. IF ch = "X" THEN (* character *)
  642. symbol.identifierString[si] := ch; INC( si ); GetNextCharacter; result := Character;
  643. IF (n <= 2) THEN
  644. WHILE i < n DO symbol.integer := symbol.integer * 10H + Hexadecimal( dig[i] ); INC( i ) END;
  645. symbol.character := CHR(symbol.integer);
  646. ELSE Error( Basic.NumberTooLarge )
  647. END
  648. ELSIF ch = "H" THEN (* hexadecimal *)
  649. symbol.identifierString[si] := ch; INC( si ); GetNextCharacter;
  650. IF (n < MaxHexDigits) OR (n=MaxHexDigits) & (dig[0] <= "7") THEN (* otherwise the positive (!) number is not in the range of longints *)
  651. symbol.numberType := Integer;
  652. (* IF (n = MaxHexDigits) & (dig[0] > "7") THEN (* prevent overflow *) symbol.integer := -1 END; *)
  653. WHILE i < n DO symbol.integer := symbol.integer * 10H + Hexadecimal( dig[i] ); INC( i ) END;
  654. symbol.hugeint := symbol.integer;
  655. ELSIF n <= MaxHugeHexDigits THEN
  656. symbol.numberType := Hugeint;
  657. IF (n = MaxHugeHexDigits) & (dig[0] > "7") THEN (* prevent overflow *) symbol.hugeint := -1 END;
  658. WHILE i < n DO symbol.hugeint := Hexadecimal( dig[i] ) + symbol.hugeint * 10H; INC( i ) END;
  659. symbol.integer :=SHORT(symbol.hugeint);
  660. ELSE
  661. symbol.numberType := Hugeint; (* to make parser able to go on *)
  662. Error( Basic.NumberTooLarge )
  663. END
  664. ELSE (* decimal *)
  665. symbol.numberType := Integer;
  666. WHILE (i < n) & ~long DO
  667. d := Decimal( dig[i] ); INC( i );
  668. IF symbol.integer >= MAX(LONGINT) DIV 10 THEN (* multiplication overflow *)long := TRUE END;
  669. nextInt := symbol.integer*10+d;
  670. IF nextInt >=0 THEN symbol.integer := nextInt ELSE (* overflow *) long := TRUE END;
  671. END;
  672. IF long THEN
  673. i := 0; (* restart computation , artificial limit because of compiler problems with hugeint *)
  674. hugeint := 0;
  675. tenh := 10; (* compiler does not like constants here ! *)
  676. symbol.numberType := Hugeint;
  677. WHILE i < n DO
  678. d := Decimal( dig[i] ); INC( i );
  679. IF hugeint > MAX(HUGEINT) DIV 10 THEN Error( Basic.NumberTooLarge) END;
  680. hugeint := hugeint * tenh + d;
  681. IF hugeint < 0 THEN Error( Basic.NumberTooLarge ) END
  682. END;
  683. symbol.hugeint := hugeint;
  684. symbol.integer := SHORT(symbol.hugeint);
  685. ELSE
  686. symbol.hugeint := symbol.integer;
  687. END
  688. END
  689. ELSE
  690. symbol.numberType := Hugeint;
  691. Error( Basic.NumberTooLarge )
  692. END
  693. ELSE (* fraction *)
  694. f := 0; e := 0; expCh := "E";
  695. WHILE n > 0 DO (* 0 <= f < 1 *) DEC( n ); f := (Decimal( dig[n] ) + f) / 10 END;
  696. IF (ch = "E") OR (ch = "D") THEN
  697. expCh := ch; symbol.identifierString[si] := ch; INC( si ); GetNextCharacter; neg := FALSE;
  698. IF ch = "-" THEN neg := TRUE; symbol.identifierString[si] := ch; INC( si ); GetNextCharacter
  699. ELSIF ch = "+" THEN symbol.identifierString[si] := ch; INC( si ); GetNextCharacter
  700. END;
  701. IF ("0" <= ch) & (ch <= "9") THEN
  702. REPEAT
  703. n := Decimal( ch ); symbol.identifierString[si] := ch; INC( si ); GetNextCharacter;
  704. IF e <= (MAX( INTEGER ) - n) DIV 10 THEN e := e * 10 + n ELSE Error( Basic.NumberTooLarge ) END
  705. UNTIL (ch < "0") OR ("9" < ch);
  706. IF neg THEN e := -e END
  707. ELSE Error( Basic.NumberIllegalCharacter )
  708. END
  709. END;
  710. DEC( e, i - d - m ); (* decimal point shift *)
  711. IF expCh = "E" THEN
  712. symbol.numberType := Real;
  713. IF (1 - MaxRealExponent < e) & (e <= MaxRealExponent) THEN
  714. IF e < 0 THEN symbol.real := f / Ten( -e ) ELSE symbol.real := f * Ten( e ) END
  715. ELSE Error( Basic.NumberTooLarge )
  716. END
  717. ELSE
  718. symbol.numberType := Longreal;
  719. IF (1 - MaxLongrealExponent < e) & (e <= MaxLongrealExponent) THEN
  720. IF e < 0 THEN symbol.real := f / Ten( -e ) ELSE symbol.real := f * Ten( e ) END
  721. ELSE Error( Basic.NumberTooLarge )
  722. END
  723. END
  724. END;
  725. symbol.identifierString[si] := 0X;
  726. RETURN result;
  727. END GetNumber;
  728. (** read / skip a comment **)
  729. PROCEDURE ReadComment(VAR symbol: Symbol);
  730. VAR level: LONGINT;
  731. BEGIN
  732. stringMaker.Clear;
  733. level := 1;
  734. WHILE (level > 0) & (ch # EOT) DO
  735. IF ch = "(" THEN
  736. stringWriter.Char(ch);
  737. GetNextCharacter;
  738. IF ch = "*" THEN INC(level); stringWriter.Char(ch); GetNextCharacter; END;
  739. ELSIF ch = "*" THEN
  740. stringWriter.Char(ch);
  741. GetNextCharacter;
  742. IF ch =")" THEN DEC(level); stringWriter.Char(ch); GetNextCharacter; END;
  743. ELSE
  744. stringWriter.Char(ch);
  745. GetNextCharacter;
  746. END;
  747. END;
  748. IF level > 0 THEN
  749. Error(Basic.CommentNotClosed)
  750. END;
  751. stringWriter.Char(0X);
  752. stringWriter.Update;
  753. stringMaker.Shorten(2); (* remove comment closing *)
  754. symbol.token := Comment;
  755. symbol.string := stringMaker.GetString(symbol.stringLength);
  756. END ReadComment;
  757. PROCEDURE SkipToEndOfCode*(VAR startPos,endPos: LONGINT; VAR symbol: Symbol): Token;
  758. VAR s: LONGINT;
  759. BEGIN
  760. ASSERT(case # Unknown);
  761. stringMaker.Clear;
  762. startPos := symbol.position.end;
  763. s := symbol.token;
  764. WHILE (s # EndOfText) & (s # End) & (s # With) DO
  765. symbol.position := position;
  766. endPos := position.start;
  767. CASE ch OF
  768. 'A' .. 'Z','a'..'z': s := Identifier;
  769. GetIdentifier(symbol);
  770. IF (case=Uppercase) & (symbol.identifierString = "END") OR (case=Lowercase) & (symbol.identifierString = "end") THEN
  771. s := End
  772. ELSIF (case = Uppercase) & (symbol.identifierString = "WITH") OR (case = Lowercase) & (symbol.identifierString = "with") THEN
  773. s := With
  774. ELSE
  775. stringWriter.String(symbol.identifierString);
  776. END;
  777. ELSE
  778. stringWriter.Char(ch);
  779. GetNextCharacter;
  780. END;
  781. symbol.position.end := position.start;
  782. END;
  783. stringWriter.Update;
  784. symbol.string := stringMaker.GetStringCopy(symbol.stringLength);
  785. symbol.token := s;
  786. IF Trace THEN
  787. D.String("skip to end: "); D.Int(startPos,1); D.String(","); D.Int(endPos,1); D.Ln;
  788. OutSymbol(D.Log,symbol); D.Ln;
  789. END;
  790. RETURN s
  791. END SkipToEndOfCode;
  792. PROCEDURE SkipBlanks;
  793. BEGIN
  794. WHILE (ch <= " ") & (ch # ESC) DO (*ignore control characters*)
  795. IF ch = EOT THEN
  796. IF Trace THEN D.String("EOT"); D.Ln; END;
  797. RETURN
  798. ELSE GetNextCharacter
  799. END
  800. END;
  801. END SkipBlanks;
  802. (** get next symbol **)
  803. PROCEDURE GetNextSymbol*(VAR symbol: Symbol ): BOOLEAN;
  804. VAR s,token: LONGINT;
  805. BEGIN
  806. SkipBlanks;
  807. symbol.position := position;
  808. stringMaker.Clear;
  809. CASE ch OF (* ch > " " *)
  810. EOT: s := EndOfText
  811. |ESC: s := Escape; GetNextCharacter
  812. | DoubleQuote:
  813. s := String; GetString(symbol,TRUE, TRUE, FALSE);
  814. | SingleQuote:
  815. s := String; GetString(symbol,FALSE, FALSE,FALSE);
  816. (* to be replaced by:
  817. s := Character; GetString(symbol);
  818. IF symbol.stringLength #2 THEN (* stringlength = 1 for empty string '' *)
  819. Error(Basic.IllegalCharacterValue)
  820. END;
  821. *)
  822. | '#': s := Unequal; GetNextCharacter
  823. | '&': s := And; GetNextCharacter
  824. | '(': GetNextCharacter;
  825. IF ch = '*' THEN GetNextCharacter; ReadComment(symbol); s := Comment; ELSE s := LeftParenthesis END
  826. | ')': s := RightParenthesis; GetNextCharacter
  827. | '*': GetNextCharacter; IF ch = '*' THEN GetNextCharacter; s := TimesTimes ELSE s := Times END
  828. | '+': GetNextCharacter; IF ch = '*' THEN GetNextCharacter; s := PlusTimes ELSE s := Plus END
  829. | ',': s := Comma; GetNextCharacter
  830. | '-': s := Minus; GetNextCharacter
  831. | '.': GetNextCharacter;
  832. IF ch = '.' THEN GetNextCharacter; s := Upto;
  833. ELSIF ch = '*' THEN GetNextCharacter; s := DotTimes;
  834. ELSIF ch = '/' THEN GetNextCharacter; s := DotSlash;
  835. ELSIF ch='=' THEN GetNextCharacter; s := DotEqual;
  836. ELSIF ch='#' THEN GetNextCharacter; s := DotUnequal;
  837. ELSIF ch='>' THEN GetNextCharacter;
  838. IF ch='=' THEN s := DotGreaterEqual; GetNextCharacter
  839. ELSE s := DotGreater;
  840. END
  841. ELSIF ch='<' THEN GetNextCharacter;
  842. IF ch='=' THEN s := DotLessEqual; GetNextCharacter
  843. ELSE s := DotLess;
  844. END
  845. ELSE s := Period END
  846. | '/': s := Slash; GetNextCharacter
  847. | '0'..'9': s := GetNumber(symbol);
  848. | ':': GetNextCharacter;
  849. IF ch = '=' THEN GetNextCharacter; s := Becomes ELSE s := Colon END
  850. | ';': s := Semicolon; GetNextCharacter
  851. | '<': GetNextCharacter;
  852. IF ch = '=' THEN GetNextCharacter; s := LessEqual
  853. ELSIF ch ='<' THEN GetNextCharacter;
  854. IF ch ='?' THEN GetNextCharacter; s := LessLessQ
  855. ELSE s := LessLess
  856. END;
  857. ELSE s := Less;
  858. END
  859. | '=': s := Equal; GetNextCharacter
  860. | '>': GetNextCharacter;
  861. IF ch = '=' THEN GetNextCharacter; s := GreaterEqual
  862. ELSIF ch ='>' THEN GetNextCharacter;
  863. IF ch ='?' THEN GetNextCharacter; s := GreaterGreaterQ
  864. ELSE s := GreaterGreater
  865. END;
  866. ELSE s := Greater; END
  867. | '[': s := LeftBracket; GetNextCharacter
  868. | ']': s := RightBracket; GetNextCharacter
  869. | '^': s := Arrow; GetNextCharacter
  870. | '{': s := LeftBrace; GetNextCharacter
  871. | '|': s := Bar; GetNextCharacter
  872. | '}': s := RightBrace; GetNextCharacter
  873. | '~': s := Not; GetNextCharacter
  874. | '\': s := Backslash; GetNextCharacter;
  875. IF ch = DoubleQuote THEN
  876. s := String;
  877. GetEscapedString(symbol);
  878. (*
  879. GetString(symbol, TRUE, TRUE, TRUE)
  880. *)
  881. ELSIF (ch > " ") & (reader.Peek() = DoubleQuote) THEN
  882. s := String;
  883. GetEscapedString(symbol);
  884. END;
  885. | '`': s := Transpose; GetNextCharacter
  886. | '?': s := Questionmark; GetNextCharacter; IF ch = '?' THEN s := Questionmarks; GetNextCharacter END;
  887. | '!': s := ExclamationMark; GetNextCharacter; IF ch = '!' THEN s := ExclamationMarks; GetNextCharacter END;
  888. | Ellipsis:
  889. s := Upto; GetNextCharacter
  890. | 'A'..'Z': s := Identifier; GetIdentifier( symbol );
  891. IF (case=Uppercase) OR (case=Unknown) THEN
  892. token := keywordsUpper.IndexByIdentifier(symbol.identifier);
  893. IF (token >= 0) THEN s := token END;
  894. IF (s = Module) OR (s=CellNet) THEN case := Uppercase END;
  895. END;
  896. | 'a'..'z': s := Identifier; GetIdentifier( symbol);
  897. IF (case = Lowercase) OR (case=Unknown) THEN
  898. token := keywordsLower.IndexByIdentifier(symbol.identifier);
  899. IF (token >= 0) THEN s := token END;
  900. IF (s = Module) OR (s=CellNet) THEN case := Lowercase END;
  901. END;
  902. IF firstIdentifier & (s # Module) & (s # CellNet) & (case = Unknown) THEN case := Uppercase; s := Identifier END;
  903. ELSE s := Identifier; GetIdentifier( symbol );
  904. END;
  905. firstIdentifier := FALSE;
  906. symbol.token := s;
  907. symbol.position.end := position.start;
  908. IF Trace THEN OutSymbol(D.Log,symbol); D.Ln; END;
  909. RETURN ~error
  910. END GetNextSymbol;
  911. PROCEDURE ResetError*();
  912. BEGIN error := FALSE
  913. END ResetError;
  914. (** set the diagnostics mode of the scanner (diagnostics = NIL ==> no report) and reset the error state
  915. intended for silent symbol peeeking after the end of a module *)
  916. PROCEDURE ResetErrorDiagnostics*(VAR diagnostics: Diagnostics.Diagnostics);
  917. VAR d: Diagnostics.Diagnostics;
  918. BEGIN
  919. error := FALSE;
  920. d := SELF.diagnostics; SELF.diagnostics := diagnostics; diagnostics := d;
  921. END ResetErrorDiagnostics;
  922. END Scanner;
  923. Context*=RECORD
  924. position: Position;
  925. readerPosition : LONGINT;
  926. ch: CHAR;
  927. END;
  928. (** assembler scanner reflects the following EBNF
  929. Symbol = String | Token | Number | Identifier.
  930. Token = '\' | '#' | '(' ['*' any '*' ')'] | ')' | CR [LF] | LF | '*' | '+' | ',' | '-' | '~' | '.' | '/' | '%' | ':' | ';' | '=' | '[' | ']' | '{' | '}' | '!' | '^' | '$'['$'].
  931. String = '"' {Character} '"' | "'" {Character} "'".
  932. Identifier = '@' | Letter {'@' | '.' | Letter | Digit | '_'} .
  933. Letter = 'A' | 'B' | .. | 'Z' | 'a' | 'b' | .. | 'z' .
  934. Digit = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'.
  935. BinaryDigit = '0' | '1' .
  936. Number = Integer | Real.
  937. Character = Digit [HexDigit] 'X'.
  938. Integer = Digit {Digit} | Digit {HexDigit} 'H' | '0x' {HexDigit} | '0b' {BinaryDigit}.
  939. Real = Digit {Digit} '.' {Digit} [ScaleFactor].
  940. ScaleFactor = ('E' | 'D') ['+' | '-'] digit {digit}.
  941. HexDigit = Digit | 'A' | 'B' | 'C' | 'D' | 'E' | 'F'.
  942. **)
  943. AssemblerScanner* = OBJECT (Scanner) (*! move to different module? unify with compiler scanner? *)
  944. VAR
  945. startContext-: Context;
  946. PROCEDURE &InitAssemblerScanner*( CONST source: ARRAY OF CHAR; reader: Streams.Reader; position: Position; diagnostics: Diagnostics.Diagnostics );
  947. BEGIN
  948. InitializeScanner(source,reader,position,diagnostics);
  949. GetContext(startContext);
  950. END InitAssemblerScanner;
  951. PROCEDURE GetContext*(VAR context: Context);
  952. BEGIN
  953. context.ch := ch;
  954. context.position := position;
  955. context.readerPosition := reader.Pos();
  956. END GetContext;
  957. PROCEDURE SetContext*(CONST context: Context);
  958. BEGIN
  959. reader.SetPos(context.readerPosition);
  960. ch := context.ch;
  961. position := context.position;
  962. END SetContext;
  963. PROCEDURE SkipToEndOfLine*;
  964. BEGIN
  965. WHILE (ch # EOT) & (ch # CR) & (ch # LF) DO
  966. GetNextCharacter
  967. END;
  968. END SkipToEndOfLine;
  969. (**
  970. note: in contrast to a regular identifier, an assembler scanner identifier may also contain periods and the '@'-symbol
  971. Identifier = '@' | Letter {'@' | '.' | Letter | Digit | '_'} .
  972. Letter = 'A' | 'B' | .. | 'Z' | 'a' | 'b' | .. | 'z' .
  973. Digit = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'.
  974. '_' is the underscore character
  975. **)
  976. PROCEDURE GetIdentifier( VAR symbol: Symbol );
  977. VAR
  978. i: LONGINT;
  979. PROCEDURE CharacterIsAllowed(character: CHAR): BOOLEAN;
  980. BEGIN
  981. CASE character OF
  982. | 'a' .. 'z', 'A' .. 'Z', '0' .. '9', '@', '.', '_': RETURN TRUE
  983. ELSE RETURN FALSE
  984. END;
  985. END CharacterIsAllowed;
  986. BEGIN
  987. i := 0;
  988. REPEAT
  989. symbol.identifierString[i] := ch; INC( i ); GetNextCharacter
  990. UNTIL ~CharacterIsAllowed(ch) OR (i = MaxIdentifierLength);
  991. IF i = MaxIdentifierLength THEN Error( Basic.IdentifierTooLong ); DEC( i ) END;
  992. symbol.identifierString[i] := 0X;
  993. END GetIdentifier;
  994. (** get next symbol **)
  995. PROCEDURE GetNextSymbol*(VAR symbol: Symbol ): BOOLEAN;
  996. VAR s: LONGINT;
  997. PROCEDURE SkipBlanks;
  998. BEGIN
  999. WHILE (ch <= ' ') & (ch # CR) & (ch # LF) & (ch # EOT) DO (* ignore control characters except line feeds *)
  1000. GetNextCharacter
  1001. END;
  1002. END SkipBlanks;
  1003. BEGIN
  1004. REPEAT
  1005. SkipBlanks;
  1006. symbol.position := position;
  1007. CASE ch OF (* ch > ' ' *)
  1008. | EOT: s := EndOfText;
  1009. | DoubleQuote:
  1010. s := String; GetString(symbol, TRUE, FALSE, TRUE);
  1011. | SingleQuote:
  1012. s := Character; GetString(symbol, FALSE, FALSE, FALSE); symbol.character := symbol.string[0];
  1013. IF symbol.stringLength #2 THEN (* stringlength = 1 for empty string '' *)
  1014. Error(Basic.IllegalCharacterValue)
  1015. END;
  1016. | '\': s := Backslash; GetNextCharacter;
  1017. IF ch = DoubleQuote THEN s := String; GetString(symbol, FALSE, FALSE, TRUE) END;
  1018. | '#': s := Unequal; GetNextCharacter; (* for the ARM assembler *)
  1019. | '(': GetNextCharacter;
  1020. IF ch = '*' THEN GetNextCharacter; ReadComment(symbol); s := Comment; ELSE s := LeftParenthesis END
  1021. | ')': s := RightParenthesis; GetNextCharacter
  1022. | CR: GetNextCharacter; s := Ln;IF ch = LF THEN GetNextCharacter END;
  1023. | LF: GetNextCharacter; s := Ln; IF ch = CR THEN GetNextCharacter END;
  1024. | '*': s := Times; GetNextCharacter;
  1025. | '+': s := Plus ; GetNextCharacter;
  1026. | ',': s := Comma; GetNextCharacter
  1027. | '-': s := Minus; GetNextCharacter
  1028. | '~': s := Not; GetNextCharacter
  1029. | '.': s:= Period; GetNextCharacter
  1030. | '/': s := Div; GetNextCharacter
  1031. | '%': s := Mod; GetNextCharacter
  1032. | '0'..'9': s := GetNumber(symbol);
  1033. | ':': s := Colon; GetNextCharacter;
  1034. | ';': s := Comment; SkipToEndOfLine;
  1035. | '=': s := Equal; GetNextCharacter
  1036. | '[': s := LeftBracket; GetNextCharacter
  1037. | ']': s := RightBracket; GetNextCharacter
  1038. | '{': s := LeftBrace; GetNextCharacter
  1039. | '}': s := RightBrace; GetNextCharacter
  1040. | '!': s := ExclamationMark; GetNextCharacter;
  1041. | '^': s := Arrow; GetNextCharacter;
  1042. | 'A'..'Z': s := Identifier; GetIdentifier( symbol );
  1043. | 'a'..'z': s := Identifier; GetIdentifier( symbol);
  1044. | '@': s := Identifier; GetIdentifier( symbol); (* the '@'-symbol initiates an assembly scanner identifier *)
  1045. | '$': GetNextCharacter;
  1046. IF ch = '$' THEN s := PCOffset; GetNextCharacter ELSE s := PC; END
  1047. ELSE s := None; GetNextCharacter;
  1048. END;
  1049. symbol.position.end := position.start;
  1050. UNTIL s # Comment;
  1051. symbol.token := s;
  1052. IF Trace THEN D.Ln; D.Str( "Scan at " ); D.Int( symbol.position.start,1 ); D.Str( ": " ); OutSymbol(D.Log,symbol); D.Update; END;
  1053. RETURN ~error
  1054. END GetNextSymbol;
  1055. END AssemblerScanner;
  1056. VAR
  1057. reservedCharacter: ARRAY 256 OF BOOLEAN;
  1058. tokens-: ARRAY EndOfText+1 OF Keyword;
  1059. keywordsLower, keywordsUpper: KeywordTable;
  1060. (** return a new scanner on a stream, error output via diagnostics **)
  1061. PROCEDURE NewScanner*( CONST source: ARRAY OF CHAR; reader: Streams.Reader; position: LONGINT; diagnostics: Diagnostics.Diagnostics ): Scanner;
  1062. VAR s: Scanner; pos: Position;
  1063. BEGIN
  1064. pos.start := position;
  1065. pos.end := position;
  1066. pos.line := 1;
  1067. pos.linepos := 0;
  1068. NEW( s, source, reader, pos, diagnostics ); RETURN s;
  1069. END NewScanner;
  1070. PROCEDURE NewAssemblerScanner*( CONST source: ARRAY OF CHAR; reader: Streams.Reader; position: LONGINT; diagnostics: Diagnostics.Diagnostics ): AssemblerScanner;
  1071. VAR s: AssemblerScanner;pos: Position;
  1072. BEGIN
  1073. pos.start := position;
  1074. pos.end := position;
  1075. pos.line := 1;
  1076. pos.linepos := 0;
  1077. NEW( s, source, reader, pos, diagnostics ); RETURN s;
  1078. END NewAssemblerScanner;
  1079. PROCEDURE SymbolToString*(CONST symbol: Symbol; case: LONGINT; VAR str: ARRAY OF CHAR);
  1080. VAR id: StringPool.Index;
  1081. BEGIN
  1082. CASE symbol.token OF
  1083. Identifier, Number: COPY(symbol.identifierString, str)
  1084. | String, Comment: ASSERT(LEN(str) >= LEN(symbol.string^)); COPY(symbol.string^, str);
  1085. ELSE
  1086. GetKeyword(case, symbol.token, id);
  1087. IF id < 0 THEN str := "" ELSE StringPool.GetString(id, str) END;
  1088. END;
  1089. END SymbolToString;
  1090. (** debugging output **)
  1091. PROCEDURE OutSymbol*(w: Streams.Writer; CONST symbol: Symbol);
  1092. VAR str: ARRAY 256 OF CHAR;
  1093. BEGIN
  1094. w.Int(symbol.position.start,1); w.String("-");w.Int(symbol.position.end,1); w.String(":");
  1095. w.String(tokens[symbol.token]);
  1096. IF symbol.token= Number THEN
  1097. CASE symbol.numberType OF
  1098. Integer: w.String("(integer)")
  1099. |Hugeint: w.String("(hugeint)")
  1100. |Real: w.String("(real)")
  1101. |Longreal: w.String("(longreal)")
  1102. END;
  1103. END;
  1104. IF symbol.token = String THEN
  1105. w.String(":"); w.Char('"'); w.String(symbol.string^); w.Char('"');
  1106. ELSIF symbol.token = Comment THEN
  1107. w.String("(*"); w.String(symbol.string^); w.String("*)");
  1108. ELSE
  1109. SymbolToString(symbol, Uppercase, str); w.String(": "); w.String(str);
  1110. END
  1111. END OutSymbol;
  1112. (** reserved characters are the characters that may not occur within an identifier **)
  1113. PROCEDURE InitReservedCharacters;
  1114. VAR i: LONGINT;
  1115. BEGIN
  1116. FOR i := 0 TO LEN( reservedCharacter ) - 1 DO
  1117. CASE CHR(i) OF
  1118. | 'a' .. 'z', 'A' .. 'Z': reservedCharacter[i] := FALSE;
  1119. | '0'..'9': reservedCharacter[i] := FALSE;
  1120. | '_': reservedCharacter[i] := FALSE
  1121. ELSE
  1122. reservedCharacter[i] := TRUE
  1123. END;
  1124. END;
  1125. END InitReservedCharacters;
  1126. (* get keyword by token *)
  1127. PROCEDURE GetKeyword*(case:LONGINT; token: LONGINT; VAR identifier: IdentifierType);
  1128. BEGIN
  1129. IF case = Uppercase THEN
  1130. keywordsUpper.IdentifierByIndex(token,identifier);
  1131. ELSE ASSERT(case=Lowercase);
  1132. keywordsLower.IdentifierByIndex(token,identifier);
  1133. END;
  1134. END GetKeyword;
  1135. PROCEDURE InitTokens;
  1136. VAR i: LONGINT;
  1137. BEGIN
  1138. tokens[None] := "None";
  1139. tokens[Equal] := "Equal";
  1140. tokens[DotEqual] := "DotEqual";
  1141. tokens[Unequal] := "Unequal";
  1142. tokens[DotUnequal] := "DotUnequal";
  1143. tokens[Less] := "Less";
  1144. tokens[DotLess] := "DotLess";
  1145. tokens[LessEqual] := "LessEqual";
  1146. tokens[DotLessEqual] := "DotLessEqual";
  1147. tokens[Greater] := "Greater";
  1148. tokens[DotGreater] := "DotGreater";
  1149. tokens[GreaterEqual] := "GreaterEqual";
  1150. tokens[DotGreaterEqual] := "DotGreaterEqual";
  1151. tokens[LessLessQ] := "LessLessQ";
  1152. tokens[GreaterGreaterQ] := "GreaterGreaterQ";
  1153. tokens[In] := "In";
  1154. tokens[Is] := "Is";
  1155. tokens[Times] := "Times";
  1156. tokens[TimesTimes] := "TimesTimes";
  1157. tokens[DotTimes] := "DotTimes";
  1158. tokens[PlusTimes] := "PlusTimes";
  1159. tokens[Slash] := "Slash";
  1160. tokens[Backslash] := "Backslash";
  1161. tokens[DotSlash] := "DotSlash";
  1162. tokens[Div] := "Div";
  1163. tokens[Mod] := "Mod";
  1164. tokens[And] := "And";
  1165. tokens[Or] := "Or";
  1166. tokens[Plus] := "Plus";
  1167. tokens[Minus] := "Minus";
  1168. tokens[Not] := "Not";
  1169. tokens[LeftParenthesis] := "LeftParenthesis";
  1170. tokens[LeftBracket] := "LeftBracket";
  1171. tokens[LeftBrace] := "LeftBrace";
  1172. tokens[Number] := "Number";
  1173. tokens[Character] := "Character";
  1174. tokens[String] := "String";
  1175. tokens[Nil] := "Nil";
  1176. tokens[Imag] := "Imag";
  1177. tokens[True] := "True";
  1178. tokens[False] := "False";
  1179. tokens[Self] := "Self";
  1180. tokens[New] := "New";
  1181. tokens[Result] := "Result";
  1182. tokens[Identifier] := "Identifier";
  1183. tokens[If] := "If";
  1184. tokens[Case] := "Case";
  1185. tokens[While] := "While";
  1186. tokens[Repeat] := "Repeat";
  1187. tokens[For] := "For";
  1188. tokens[Loop] := "Loop";
  1189. tokens[With] := "With";
  1190. tokens[Exit] := "Exit";
  1191. tokens[Await] := "Await";
  1192. tokens[Return] := "Return";
  1193. tokens[Begin] := "Begin";
  1194. tokens[Semicolon] := "Semicolon";
  1195. tokens[Transpose] := "Transpose";
  1196. tokens[RightBrace] := "RightBrace";
  1197. tokens[RightBracket] := "RightBracket";
  1198. tokens[RightParenthesis] := "RightParenthesis";
  1199. tokens[Questionmark] := "Questionmark";
  1200. tokens[ExclamationMark] := "ExclamationMark";
  1201. tokens[Questionmarks] := "Questionmarks";
  1202. tokens[ExclamationMarks] := "ExclamationMarks";
  1203. tokens[LessLess] := "LessLess";
  1204. tokens[GreaterGreater] := "GreaterGreater";
  1205. tokens[Upto] := "Upto";
  1206. tokens[Arrow] := "Arrow";
  1207. tokens[Period] := "Period";
  1208. tokens[Comma] := "Comma";
  1209. tokens[Colon] := "Colon";
  1210. tokens[Of] := "Of";
  1211. tokens[Then] := "Then";
  1212. tokens[Do] := "Do";
  1213. tokens[To] := "To";
  1214. tokens[By] := "By";
  1215. tokens[Becomes] := "Becomes";
  1216. tokens[Bar] := "Bar";
  1217. tokens[End] := "End";
  1218. tokens[Else] := "Else";
  1219. tokens[Elsif] := "Elsif";
  1220. tokens[Extern] := "Extern";
  1221. tokens[Until] := "Until";
  1222. tokens[Finally] := "Finally";
  1223. tokens[Code] := "Code";
  1224. tokens[Const] := "Const";
  1225. tokens[Type] := "Type";
  1226. tokens[Var] := "Var";
  1227. tokens[Out] := "Out";
  1228. tokens[Procedure] := "Procedure";
  1229. tokens[Operator] := "Operator";
  1230. tokens[Import] := "Import";
  1231. tokens[Definition] := "Definition";
  1232. tokens[Module] := "Module";
  1233. tokens[Cell] := "Cell";
  1234. tokens[CellNet] := "CellNet";
  1235. tokens[Array] := "Array";
  1236. tokens[Object] := "Object";
  1237. tokens[Record] := "Record";
  1238. tokens[Pointer] := "Pointer";
  1239. tokens[Enum] := "Enum";
  1240. tokens[Port] := "Port";
  1241. tokens[Address] := "Address";
  1242. tokens[Alias] := "Alias";
  1243. tokens[Size] := "Size";
  1244. tokens[Ln] := "Ln";
  1245. tokens[PC] := "PC";
  1246. tokens[PCOffset] := "PCOffset";
  1247. tokens[Shortint] := "Shortint";
  1248. tokens[Integer] := "Integer";
  1249. tokens[Longint] := "Longint";
  1250. tokens[Hugeint] := "Hugeint";
  1251. tokens[Real] := "Real";
  1252. tokens[Longreal] := "Longreal";
  1253. tokens[Comment] := "Comment";
  1254. tokens[EndOfText] := "EndOfText";
  1255. FOR i := 0 TO EndOfText DO ASSERT(tokens[i] # "") END;
  1256. END InitTokens;
  1257. (** enter keywords in the list of keywords (both upper- and lowercase) **)
  1258. PROCEDURE InitKeywords;
  1259. PROCEDURE Upper(CONST source: ARRAY OF CHAR; VAR dest: ARRAY OF CHAR);
  1260. VAR c: CHAR; i: LONGINT;
  1261. BEGIN
  1262. i := 0;
  1263. REPEAT
  1264. c := source[i];
  1265. IF (c >= 'a') & (c<= 'z') THEN c := CHR(ORD(c)-ORD('a')+ORD('A')) END;
  1266. dest[i] := c; INC(i);
  1267. UNTIL c = 0X;
  1268. END Upper;
  1269. PROCEDURE Enter1(CONST name: ARRAY OF CHAR; token: LONGINT; case: SET);
  1270. BEGIN
  1271. IF Lowercase IN case THEN keywordsLower.PutString(name,token) END;
  1272. IF Uppercase IN case THEN keywordsUpper.PutString(name,token) END;
  1273. Basic.SetErrorExpected(token,name);
  1274. END Enter1;
  1275. PROCEDURE Enter(CONST name: ARRAY OF CHAR; token: LONGINT);
  1276. VAR upper: Keyword;
  1277. BEGIN
  1278. Enter1(name,token,{Lowercase});
  1279. Upper(name,upper);
  1280. Enter1(upper,token,{Uppercase});
  1281. END Enter;
  1282. PROCEDURE EnterSymbol(CONST name: ARRAY OF CHAR; token: LONGINT);
  1283. BEGIN
  1284. Enter1(name,token,{Lowercase,Uppercase});
  1285. END EnterSymbol;
  1286. BEGIN
  1287. NEW(keywordsUpper,EndOfText+1);
  1288. NEW(keywordsLower,EndOfText+1);
  1289. (* constructs and statements *)
  1290. Enter( "cell", Cell );
  1291. Enter( "cellnet", CellNet);
  1292. Enter( "await" , Await);
  1293. Enter( "begin" , Begin);
  1294. Enter( "by" , By);
  1295. Enter( "const" , Const);
  1296. Enter( "case" , Case);
  1297. Enter( "code" , Code);
  1298. Enter( "definition", Definition);
  1299. Enter( "do" , Do);
  1300. Enter( "div" , Div);
  1301. Enter( "end" , End);
  1302. Enter( "enum", Enum);
  1303. Enter( "else" , Else);
  1304. Enter( "elsif" , Elsif);
  1305. Enter( "exit" , Exit);
  1306. Enter( "extern" , Extern);
  1307. Enter( "false" , False);
  1308. Enter( "for" , For);
  1309. Enter( "finally" , Finally);
  1310. Enter( "if" , If);
  1311. Enter( "imag" , Imag);
  1312. Enter( "in" , In);
  1313. Enter( "is" , Is);
  1314. Enter( "import" , Import);
  1315. Enter( "loop" , Loop);
  1316. Enter( "module", Module);
  1317. Enter( "mod" , Mod);
  1318. Enter( "nil" , Nil );
  1319. Enter( "of" , Of);
  1320. Enter( "or" , Or);
  1321. Enter( "out", Out);
  1322. Enter( "operator" , Operator);
  1323. Enter( "procedure" , Procedure);
  1324. Enter( "port", Port);
  1325. Enter( "repeat" , Repeat);
  1326. Enter( "return" , Return);
  1327. Enter( "self", Self);
  1328. Enter( "new", New);
  1329. Enter( "result", Result);
  1330. Enter( "then" , Then);
  1331. Enter( "true" , True);
  1332. Enter( "to" , To);
  1333. Enter( "type" , Type);
  1334. Enter( "until" , Until );
  1335. Enter( "var" , Var );
  1336. Enter( "while" , While);
  1337. Enter( "with" , With);
  1338. (* types *)
  1339. Enter( "array" , Array );
  1340. Enter( "object" , Object);
  1341. Enter( "pointer" , Pointer);
  1342. Enter( "record" , Record);
  1343. Enter( "address" , Address);
  1344. Enter( "size" , Size);
  1345. Enter( "alias" , Alias);
  1346. (* symbols *)
  1347. EnterSymbol( "#", Unequal);
  1348. EnterSymbol( "&", And);
  1349. EnterSymbol( "(", LeftParenthesis);
  1350. EnterSymbol( ")", RightParenthesis);
  1351. EnterSymbol( "*", Times);
  1352. EnterSymbol( "**",TimesTimes);
  1353. EnterSymbol( "+", Plus);
  1354. EnterSymbol( "+*", PlusTimes);
  1355. EnterSymbol( ",", Comma);
  1356. EnterSymbol( "-", Minus);
  1357. EnterSymbol(".",Period );
  1358. EnterSymbol("..",Upto );
  1359. EnterSymbol(".*",DotTimes );
  1360. EnterSymbol("./",DotSlash );
  1361. EnterSymbol(".=",DotEqual );
  1362. EnterSymbol(".#",DotUnequal );
  1363. EnterSymbol(".>",DotGreater );
  1364. EnterSymbol(".>=",DotGreaterEqual );
  1365. EnterSymbol(".<", DotLess);
  1366. EnterSymbol(".<=",DotLessEqual );
  1367. EnterSymbol( "/", Slash);
  1368. EnterSymbol( ":", Colon);
  1369. EnterSymbol( ":=",Becomes);
  1370. EnterSymbol( ";", Semicolon);
  1371. EnterSymbol( "<", Less);
  1372. EnterSymbol( "<=", LessEqual);
  1373. EnterSymbol( "=", Equal);
  1374. EnterSymbol( ">", Greater);
  1375. EnterSymbol( ">=", GreaterEqual);
  1376. EnterSymbol( "[", LeftBracket);
  1377. EnterSymbol( "]", RightBracket);
  1378. EnterSymbol( "^", Arrow);
  1379. EnterSymbol( "{", LeftBrace);
  1380. EnterSymbol( "|",Bar);
  1381. EnterSymbol( "}", RightBrace);
  1382. EnterSymbol( "~", Not);
  1383. EnterSymbol( "\", Backslash);
  1384. EnterSymbol( "`", Transpose);
  1385. EnterSymbol( "?",Questionmark);
  1386. EnterSymbol( "??",Questionmarks);
  1387. EnterSymbol( "!",ExclamationMark);
  1388. EnterSymbol( "!!",ExclamationMarks);
  1389. EnterSymbol( "<<",LessLess);
  1390. EnterSymbol( "<<?",LessLessQ);
  1391. EnterSymbol( ">>",GreaterGreater);
  1392. EnterSymbol( ">>?",GreaterGreaterQ);
  1393. Basic.SetErrorMessage(Number,"missing number");
  1394. Basic.SetErrorMessage(String,"missing string");
  1395. Basic.SetErrorMessage(Character,"missing character");
  1396. Basic.SetErrorMessage(Identifier,"missing identifier");
  1397. Basic.SetErrorMessage(EndOfText,"unexpected symbol before end");
  1398. END InitKeywords;
  1399. (** debugging / reporting **)
  1400. PROCEDURE ReportKeywords*(context: Commands.Context);
  1401. VAR i: LONGINT; name: Keyword;
  1402. BEGIN
  1403. FOR i := 0 TO EndOfText DO
  1404. context.out.Int(i,1); context.out.String(": ");
  1405. context.out.Char('"');
  1406. keywordsLower.StringByIndex(i,name);
  1407. context.out.String(name);
  1408. context.out.Char('"');
  1409. context.out.String(", ");
  1410. context.out.Char('"');
  1411. keywordsUpper.StringByIndex(i,name);
  1412. context.out.String(name);
  1413. context.out.Char('"');
  1414. context.out.Ln;
  1415. END;
  1416. END ReportKeywords;
  1417. (*
  1418. PROCEDURE TestScanner*(context: Commands.Context);
  1419. VAR filename: ARRAY 256 OF CHAR; reader: Streams.Reader; scanner: Scanner;sym: Symbol;
  1420. BEGIN
  1421. context.arg.SkipWhitespace; context.arg.String(filename);
  1422. reader := TextUtilities.GetTextReader(filename);
  1423. scanner := NewScanner(filename,reader,0,NIL);
  1424. REPEAT
  1425. IF scanner.GetNextSymbol(sym) THEN
  1426. OutSymbol(context.out,sym);context.out.Ln;
  1427. END;
  1428. UNTIL scanner.error OR (sym.token=EndOfText)
  1429. END TestScanner;
  1430. *)
  1431. BEGIN
  1432. InitReservedCharacters; InitTokens; InitKeywords
  1433. END FoxScanner.
  1434. FoxScanner.ReportKeywords
  1435. FoxScanner.TestScanner Test.Mod ~