FoxScanner.Mod 51 KB


  1. MODULE FoxScanner; (** AUTHOR "fof & fn"; PURPOSE "Oberon Compiler: Scanner"; **)
  2. (* (c) fof ETH Zürich, 2009 *)
  3. IMPORT Streams, Strings, Diagnostics, Basic := FoxBasic, D := Debugging, Commands, StringPool;
  4. CONST
  5. Trace = FALSE; (* debugging output *)
  6. (* overal scanner limitation *)
  7. MaxIdentifierLength* = 128;
  8. (* parametrization of numeric scanner: *)
  9. MaxHexDigits* = 8; (* maximal hexadecimal longint length *)
  10. MaxHugeHexDigits* = 16; (* maximal hexadecimal hugeint length *)
  11. MaxRealExponent* = 38; (* maximal real exponent *)
  12. MaxLongrealExponent* = 308; (* maximal longreal exponent *)
  13. (* scanner constants *)
  14. EOT* = 0X; LF* = 0AX; CR* = 0DX; TAB* = 09X; ESC* = 1BX;
  15. TYPE
  16. StringType* = Strings.String;
  17. IdentifierType* = StringPool.Index;
  18. IdentifierString*= ARRAY MaxIdentifierLength+1 OF CHAR;
  19. SubType*= SHORTINT;
  20. CONST
  21. (** symbols *)
  22. (*
  23. note: order of symbols is important for the parser, do not modify without looking it up
  24. FoxProgTools.Enum --export --linefeed=6
  25. None
  26. (* RelationOps: Equal ... Is *)
  27. Equal DotEqual Unequal DotUnequal
  28. Less DotLess LessEqual DotLessEqual Greater DotGreater GreaterEqual DotGreaterEqual
  29. LessLessQ GreaterGreaterQ Questionmarks ExclamationMarks
  30. In Is
  31. (* MulOps: Times ... And *)
  32. Times TimesTimes DotTimes PlusTimes Slash Backslash DotSlash Div Mod And
  33. (* AddOps: Or ... Minus *)
  34. Or Plus Minus
  35. (* Prefix Unary Operators Plus ... Not *)
  36. Not
  37. (* expressions may start with Plus ... Identifier *)
  38. LeftParenthesis LeftBracket LeftBrace Number Character String Nil Imag True False Self Result New Identifier
  39. (* statementy may start with Self ... Begin *)
  40. If Case While Repeat For Loop With Exit Await Return Ignore Begin
  41. (* symbols, expressions and statements cannot start with *)
  42. Semicolon Transpose RightBrace RightBracket RightParenthesis
  43. Questionmark ExclamationMark
  44. LessLess GreaterGreater
  45. Upto Arrow Period Comma Colon Of Then Do To By Becomes Bar End Else Elsif Until Finally
  46. (* declaration elements *)
  47. Code Const Type Var Out Procedure Operator Import Definition Module Cell CellNet Extern
  48. (* composite type symbols *)
  49. Array Object Record Pointer Enum Port Address Size Alias
  50. (* assembler constants *)
  51. Ln PC PCOffset
  52. (* number types *)
  53. Shortint Integer Longint Hugeint Real Longreal
  54. Comment EndOfText Escape
  55. ~
  56. *)
  57. None*= 0;
  58. (* RelationOps: Equal ... Is *)
  59. Equal*= 1; DotEqual*= 2; Unequal*= 3; DotUnequal*= 4; Less*= 5; DotLess*= 6;
  60. LessEqual*= 7; DotLessEqual*= 8; Greater*= 9; DotGreater*= 10; GreaterEqual*= 11; DotGreaterEqual*= 12;
  61. LessLessQ*= 13; GreaterGreaterQ*= 14; Questionmarks*= 15; ExclamationMarks*= 16; In*= 17; Is*= 18;
  62. (* MulOps: Times ... And *)
  63. Times*= 19; TimesTimes*= 20; DotTimes*= 21; PlusTimes*= 22; Slash*= 23; Backslash*= 24;
  64. DotSlash*= 25; Div*= 26; Mod*= 27; And*= 28;
  65. (* AddOps: Or ... Minus *)
  66. Or*= 29; Plus*= 30; Minus*= 31;
  67. (* Prefix Unary Operators Plus ... Not *)
  68. Not*= 32;
  69. (* expressions may start with Plus ... Identifier *)
  70. LeftParenthesis*= 33; LeftBracket*= 34; LeftBrace*= 35; Number*= 36; Character*= 37; String*= 38;
  71. Nil*= 39; Imag*= 40; True*= 41; False*= 42; Self*= 43; Result*= 44;
  72. New*= 45; Identifier*= 46;
  73. (* statementy may start with Self ... Begin *)
  74. If*= 47; Case*= 48; While*= 49; Repeat*= 50; For*= 51; Loop*= 52;
  75. With*= 53; Exit*= 54; Await*= 55; Return*= 56; Ignore*= 57; Begin*= 58;
  76. (* symbols, expressions and statements cannot start with *)
  77. Semicolon*= 59; Transpose*= 60; RightBrace*= 61; RightBracket*= 62; RightParenthesis*= 63; Questionmark*= 64;
  78. ExclamationMark*= 65; LessLess*= 66; GreaterGreater*= 67; Upto*= 68; Arrow*= 69; Period*= 70;
  79. Comma*= 71; Colon*= 72; Of*= 73; Then*= 74; Do*= 75; To*= 76;
  80. By*= 77; Becomes*= 78; Bar*= 79; End*= 80; Else*= 81; Elsif*= 82;
  81. Until*= 83; Finally*= 84;
  82. (* declaration elements *)
  83. Code*= 85; Const*= 86; Type*= 87; Var*= 88; Out*= 89; Procedure*= 90;
  84. Operator*= 91; Import*= 92; Definition*= 93; Module*= 94; Cell*= 95; CellNet*= 96;
  85. Extern*= 97;
  86. (* composite type symbols *)
  87. Array*= 98; Object*= 99; Record*= 100; Pointer*= 101; Enum*= 102; Port*= 103;
  88. Address*= 104; Size*= 105; Alias*= 106;
  89. (* assembler constants *)
  90. Ln*= 107; PC*= 108; PCOffset*= 109;
  91. (* number types *)
  92. Shortint*= 110; Integer*= 111; Longint*= 112; Hugeint*= 113; Real*= 114; Longreal*= 115;
  93. Comment*= 116; EndOfText*= 117; Escape*= 118;
  94. SingleQuote = 27X; DoubleQuote* = 22X;
  95. Ellipsis = 7FX; (* used in Scanner.GetNumber to return with ".." when reading an interval like 3..5 *)
  96. Uppercase*=0;
  97. Lowercase*=1;
  98. Unknown*=2;
  99. TYPE
  100. (* keywords book keeping *)
  101. Keyword* = ARRAY 32 OF CHAR;
  102. KeywordTable* = OBJECT(Basic.HashTableInt); (* string -> index *)
  103. VAR table: POINTER TO ARRAY OF LONGINT;
  104. PROCEDURE &InitTable*(size: LONGINT);
  105. VAR i: LONGINT;
  106. BEGIN
  107. Init(size); NEW(table,size); FOR i := 0 TO size-1 DO table[i] := -1; END;
  108. END InitTable;
  109. PROCEDURE IndexByIdentifier*(identifier: IdentifierType): LONGINT;
  110. BEGIN
  111. IF Has(identifier) THEN
  112. RETURN GetInt(identifier)
  113. ELSE (* do not modify index *)
  114. RETURN -1
  115. END;
  116. END IndexByIdentifier;
  117. PROCEDURE IndexByString*(CONST name: ARRAY OF CHAR): LONGINT;
  118. VAR stringPoolIndex: LONGINT;
  119. BEGIN
  120. StringPool.GetIndex(name,stringPoolIndex);
  121. IF Has(stringPoolIndex) THEN
  122. RETURN GetInt(stringPoolIndex)
  123. ELSE (* do not modify index *)
  124. RETURN -1
  125. END;
  126. END IndexByString;
  127. PROCEDURE IdentifierByIndex*(index: LONGINT; VAR identifier: IdentifierType);
  128. BEGIN
  129. identifier := table[index]
  130. END IdentifierByIndex;
  131. PROCEDURE StringByIndex*(index: LONGINT; VAR name: ARRAY OF CHAR);
  132. VAR stringPoolIndex: LONGINT;
  133. BEGIN
  134. stringPoolIndex := table[index];
  135. IF stringPoolIndex < 0 THEN
  136. name := ""
  137. ELSE
  138. StringPool.GetString(stringPoolIndex,name);
  139. END;
  140. END StringByIndex;
  141. PROCEDURE PutString*(CONST name: ARRAY OF CHAR; index: LONGINT);
  142. VAR stringPoolIndex: LONGINT;
  143. BEGIN
  144. StringPool.GetIndex(name,stringPoolIndex);
  145. table[index] := stringPoolIndex;
  146. PutInt(stringPoolIndex,index);
  147. END PutString;
  148. END KeywordTable;
  149. TYPE
  150. Symbol*=LONGINT;
  151. Position*= Basic.Position;
  152. (**
  153. token: data structure for the data transfer of the last read input from the scanner to the parser
  154. **)
  155. Token*= RECORD
  156. position*: Position;
  157. symbol*: Symbol; (* symbol of token *)
  158. identifier*: IdentifierType; (* identifier *)
  159. identifierString*: IdentifierString; (* cache of identifier's string *)
  160. string*: StringType; (* string or identifier *)
  161. stringLength*: LONGINT; (* length of string, if stringLength = 2 then this may be interpreted as character and integer = ORD(ch) *)
  162. numberType*: SubType; (* Integer, HugeInteger, Real or Longreal *)
  163. integer*: LONGINT;
  164. hugeint*: HUGEINT; (*! unify longint and hugeint *)
  165. character*: CHAR;
  166. real*: LONGREAL;
  167. END;
  168. StringMaker* = OBJECT (* taken from TF's scanner *)
  169. VAR length : LONGINT;
  170. data : StringType;
  171. PROCEDURE &Init*(initialSize : LONGINT);
  172. BEGIN
  173. IF initialSize < 256 THEN initialSize := 256 END;
  174. NEW(data, initialSize); length := 0;
  175. END Init;
  176. PROCEDURE Add*(CONST buf: ARRAY OF CHAR; ofs, len: LONGINT; propagate: BOOLEAN; VAR res: WORD);
  177. VAR i : LONGINT; n: StringType;
  178. BEGIN
  179. IF length + len + 1 >= LEN(data) THEN
  180. NEW(n, LEN(data) + len + 1); FOR i := 0 TO length - 1 DO n[i] := data[i] END;
  181. data := n
  182. END;
  183. WHILE len > 0 DO
  184. data[length] := buf[ofs];
  185. INC(ofs); INC(length); DEC(len)
  186. END;
  187. data[length] := 0X;
  188. END Add;
  189. (* remove last n characters *)
  190. PROCEDURE Shorten*(n : LONGINT);
  191. BEGIN
  192. DEC(length, n);
  193. IF length < 0 THEN length := 0 END;
  194. IF length > 0 THEN data[length - 1] := 0X ELSE data[length] := 0X END
  195. END Shorten;
  196. PROCEDURE Clear*;
  197. BEGIN
  198. data[0] := 0X;
  199. length := 0
  200. END Clear;
  201. PROCEDURE GetWriter*() : Streams.Writer;
  202. VAR w : Streams.Writer;
  203. BEGIN
  204. NEW(w, SELF.Add, 256);
  205. RETURN w
  206. END GetWriter;
  207. PROCEDURE GetReader*(): Streams.Reader;
  208. VAR r: Streams.StringReader;
  209. BEGIN
  210. NEW(r, 256);
  211. r.Set(data^);
  212. RETURN r
  213. END GetReader;
  214. PROCEDURE GetString*(VAR len: LONGINT) : StringType;
  215. BEGIN
  216. len := length;
  217. RETURN data
  218. END GetString;
  219. PROCEDURE GetStringCopy*(VAR len: LONGINT): StringType;
  220. VAR new: StringType;
  221. BEGIN
  222. len := length;
  223. NEW(new,len+1);
  224. COPY(data^,new^);
  225. RETURN new
  226. END GetStringCopy;
  227. END StringMaker;
  228. (** scanner reflects the following EBNF
  229. Token = String | Symbol | Number | Keyword | Identifier.
  230. Symbol = | '#' | '&' | '(' ['*' any '*' ')'] | ')' | '*'['*'] | '+'['*'] | ',' | '-' | '.' [ '.' | '*' | '/' | '=' | '#' | '>'['='] | '<' ['=']
  231. | '/' | ':' ['='] | ';' | '<' ['=' | '<' ['?'] ] | '=' | '>' [ '=' | '>' ['?']]
  232. | '[' | ']' | '^' | '{' | '|' | '}' | '~' | '\' | '`' | '?' ['?'] | '!' ['!']
  233. Identifier = Letter {Letter | Digit | '_'}.
  234. Letter = 'A' | 'B' | .. | 'Z' | 'a' | 'b' | .. | 'z'.
  235. Digit = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' .
  236. String = '"' {Character} '"' | "'" {Character} "'".
  237. Character = Digit [HexDigit] 'X'.
  238. Number = Integer | Real.
  239. Integer = Digit {Digit} | Digit {HexDigit} 'H' | '0x' {HexDigit}.
  240. Real = Digit {Digit} '.' {Digit} [ScaleFactor].
  241. ScaleFactor = ('E' | 'D') ['+' | '-'] digit {digit}.
  242. HexDigit = Digit | 'A' | 'B' | 'C' | 'D' | 'E' | 'F'.
  243. **)
  244. Scanner* = OBJECT
  245. VAR
  246. (* helper state information *)
  247. source-: StringType;
  248. reader-: Streams.Reader; (* source *)
  249. diagnostics: Diagnostics.Diagnostics; (* error logging *)
  250. ch-: CHAR; (* look-ahead character *)
  251. position-: Position; (* current position *)
  252. error-: BOOLEAN; (* if error occured during scanning *)
  253. firstIdentifier: BOOLEAN; (* support of lower vs. upper case keywords *)
  254. case-: LONGINT;
  255. stringWriter: Streams.Writer;
  256. stringMaker: StringMaker;
  257. useLineNumbers*: BOOLEAN;
  258. (*
  259. source: name of the source code for reference in error outputs
  260. reader: input stream
  261. position: reference position (offset) of the input stream , for error output
  262. diagnostics: error output object
  263. *)
  264. PROCEDURE & InitializeScanner*( CONST source: ARRAY OF CHAR; reader: Streams.Reader; pos: Position; diagnostics: Diagnostics.Diagnostics );
  265. BEGIN
  266. NEW(stringMaker,1024);
  267. stringWriter := stringMaker.GetWriter();
  268. error := FALSE;
  269. NEW(SELF.source, Strings.Length(source)+1);
  270. COPY (source, SELF.source^);
  271. SELF.reader := reader;
  272. SELF.diagnostics := diagnostics;
  273. ch := " ";
  274. case := Unknown;
  275. firstIdentifier := TRUE;
  276. SELF.position := pos;
  277. DEC(position.start, 1); (* one token lookahead *)
  278. IF reader = NIL THEN ch := EOT ELSE GetNextCharacter END;
  279. IF Trace THEN D.Str( "New scanner " ); D.Ln; END;
  280. IF source = "" THEN SELF.position.reader := reader END;
  281. useLineNumbers := FALSE;
  282. END InitializeScanner;
  283. PROCEDURE ResetCase*; (*! needs a better naming ! *)
  284. BEGIN
  285. firstIdentifier := TRUE; case := Unknown;
  286. END ResetCase;
  287. PROCEDURE SetCase*(c: LONGINT);
  288. BEGIN
  289. case := c;
  290. END SetCase;
  291. (** report an error occured during scanning **)
  292. PROCEDURE ErrorS(CONST msg: ARRAY OF CHAR);
  293. BEGIN
  294. Basic.Error(diagnostics, source^, position, msg);
  295. error := TRUE;
  296. END ErrorS;
  297. (** report an error occured during scanning **)
  298. PROCEDURE Error( code: INTEGER );
  299. BEGIN
  300. Basic.ErrorC(diagnostics, source^, position, code, "");
  301. error := TRUE;
  302. END Error;
  303. (** get next character, end of text results in ch = EOT **)
  304. PROCEDURE GetNextCharacter*;
  305. BEGIN
  306. reader.Char(ch); INC(position.start);
  307. IF ch = LF THEN INC(position.line); position.linepos := position.start+1 END;
  308. (*
  309. (* not necessary, as Streams returns 0X if reading failed, but in case Streams.Reader.Char is modified ... *)
  310. IF reader.res # Streams.Ok THEN ch := EOT END;
  311. *)
  312. END GetNextCharacter;
  313. (*
  314. The following is an implementation of the KMP algorithm used in order to traverse strings until some pattern occurs.
  315. It is not necessary for our implementation of string escape sequences, because the first character of the pattern does not occur in the pattern elsewhere
  316. I found the code useful and keep it here for the time being....
  317. (* generate a table to be able to quickly search for string containing overlaps - KMP algorithm *)
  318. PROCEDURE MakeOverlapTable*(CONST pattern: ARRAY OF CHAR; VAR table: ARRAY OF LONGINT);
  319. VAR i, cnd: LONGINT;
  320. BEGIN
  321. ASSERT(pattern[0] # 0X);
  322. (* if first character did not match: reset search *)
  323. table[0] := -1;
  324. (* if second character did not match: compare to first *)
  325. IF pattern[1] # 0X THEN
  326. table[1] := 0;
  327. END;
  328. (* for all other characters: switch back to previous overlay in pattern *)
  329. i := 2; cnd := 0;
  330. WHILE(pattern[i] # 0X) DO
  331. (* do patterns [i-cnd, i-1] match with pattern[0.. cnd] ? *)
  332. IF pattern[i-1] = pattern[cnd] THEN
  333. INC(cnd); table[i] := cnd; INC(i);
  334. (* no, switch back to last overlap, if possible *)
  335. ELSIF cnd > 0 THEN cnd := table[cnd]
  336. (* not possible: restart at beginning *)
  337. ELSE table[i] := 0; INC(i)
  338. END;
  339. END;
  340. END MakeOverlapTable;
  341. (* using KMP substring search algorithm consume and reproduce all characters of a string until endString *)
  342. PROCEDURE GetString(CONST endString: ARRAY OF CHAR);
  343. VAR escapePos: LONGINT; ech: CHAR; i: LONGINT; table: ARRAY 16 OF LONGINT;
  344. next: LONGINT;
  345. PROCEDURE Append(ch :CHAR);
  346. BEGIN
  347. IF ch = 0X THEN
  348. ErrorS("Unexpected end of text in string"); error := TRUE
  349. ELSE
  350. stringWriter.Char(ch)
  351. END;
  352. END Append;
  353. BEGIN
  354. MakeOverlapTable(endString, table);
  355. (* traverse *)
  356. escapePos := 0; ech := endString[0];
  357. GetNextCharacter;
  358. REPEAT
  359. IF ch = ech THEN
  360. INC(escapePos); ech := endString[escapePos];
  361. GetNextCharacter;
  362. ELSIF escapePos = 0 THEN (* frequent case *)
  363. Append(ch); GetNextCharacter;
  364. ELSE
  365. (* overlaps ? *)
  366. next := table[escapePos];
  367. IF next < 0 THEN next := 0 END;
  368. (* account for "forgotten" characters *)
  369. FOR i := 0 TO escapePos-1-next DO
  370. Append(endString[i]);
  371. END;
  372. (* to next overlapping ? *)
  373. escapePos := table[escapePos];
  374. (* no overlapping *)
  375. IF escapePos < 0 THEN
  376. Append(ch);
  377. escapePos := 0;
  378. GetNextCharacter;
  379. END;
  380. ech := endString[escapePos];
  381. END;
  382. UNTIL (ch = EOT) OR (ech = 0X);
  383. END GetString;
  384. *)
  385. (* simple case can be utilized when endString does not contain first character, which is the case for our string convention *)
  386. PROCEDURE ConsumeStringUntil(CONST endString: ARRAY OF CHAR; useControl: BOOLEAN);
  387. VAR escapePos: LONGINT; ech: CHAR; i: LONGINT; startPosition: LONGINT;
  388. CONST
  389. Control = '\';
  390. Delimiter = '"';
  391. PROCEDURE Append(ch :CHAR);
  392. BEGIN
  393. IF ch = 0X THEN
  394. ErrorS("Unexpected end of text in string"); error := TRUE;
  395. ELSE
  396. stringWriter.Char(ch)
  397. END;
  398. END Append;
  399. BEGIN
  400. (* traverse *)
  401. escapePos := 0; ech := endString[0]; startPosition := position.start;
  402. GetNextCharacter;
  403. REPEAT
  404. IF ch = ech THEN
  405. INC(escapePos); ech := endString[escapePos];
  406. GetNextCharacter;
  407. ELSIF useControl & (ch = Control) THEN
  408. GetNextCharacter;
  409. IF (ch = Control) OR (ch = Delimiter) THEN
  410. Append(ch)
  411. ELSIF ch = 'n' THEN
  412. Append(CR); Append(LF);
  413. ELSIF ch = 't' THEN
  414. Append(TAB)
  415. ELSE
  416. ErrorS("Unknown control sequence")
  417. END;
  418. GetNextCharacter
  419. ELSIF escapePos = 0 THEN (* frequent case *)
  420. Append(ch); GetNextCharacter;
  421. ELSE
  422. (* account for "forgotten" characters *)
  423. FOR i := 0 TO escapePos-1 DO
  424. Append(endString[i]);
  425. END;
  426. (* restart *)
  427. ech := endString[0]; escapePos := 0;
  428. END;
  429. UNTIL (ch = EOT) OR (ech = 0X) OR error;
  430. IF ch = EOT THEN position.start := startPosition; ErrorS("Unexpected end of text in string") END;
  431. END ConsumeStringUntil;
  432. PROCEDURE GetEscapedString(VAR token: Token);
  433. VAR endString: ARRAY 4 OF CHAR; escape: CHAR;
  434. BEGIN
  435. (* backslash already consumed *)
  436. stringMaker.Clear;
  437. IF ch = '"' THEN
  438. escape := 0X;
  439. ELSE
  440. escape := ch; GetNextCharacter;
  441. END;
  442. ASSERT((ch = '"') OR (ch = "'"));
  443. REPEAT
  444. IF escape # 0X THEN
  445. endString[0] := ch;
  446. endString[1] := escape;
  447. endString[2] := '\';
  448. endString[3] := 0X;
  449. ELSE
  450. endString[0] := ch;
  451. endString[1] := '\';
  452. endString[2] := 0X;
  453. END;
  454. ConsumeStringUntil(endString, escape = 0X);
  455. UNTIL TRUE;
  456. stringWriter.Char(0X);
  457. stringWriter.Update;
  458. token.string := stringMaker.GetStringCopy(token.stringLength);
  459. END GetEscapedString;
  460. (** get a string starting at current position
  461. string = {'"' {Character} '"'} | {"'" {Character} "'"}.
  462. **)
  463. (* multiline indicates that a string may occupy more than one lines, either concatenated or via multi-strings " " " "
  464. *)
  465. PROCEDURE GetString(VAR token: Token; multiLine, multiString, useControl: BOOLEAN);
  466. VAR och: CHAR; error: BOOLEAN;
  467. CONST control = '\';
  468. PROCEDURE Append(ch :CHAR);
  469. BEGIN
  470. IF ch = 0X THEN
  471. ErrorS("Unexpected end of text in string"); error := TRUE
  472. ELSE
  473. stringWriter.Char(ch)
  474. END;
  475. END Append;
  476. BEGIN
  477. stringMaker.Clear;
  478. och := ch; error := FALSE;
  479. REPEAT
  480. LOOP
  481. IF error THEN EXIT END;
  482. GetNextCharacter;
  483. IF (ch = och) OR (ch = EOT) THEN EXIT END;
  484. IF useControl & (ch = control) THEN
  485. GetNextCharacter;
  486. IF (ch = control) OR (ch = och) THEN
  487. Append(ch)
  488. ELSIF ch = 'n' THEN
  489. Append(CR); Append(LF);
  490. ELSIF ch = 't' THEN
  491. Append(TAB)
  492. ELSE
  493. ErrorS("Unknown control sequence")
  494. END;
  495. ELSE
  496. IF ~multiLine & (ch < " ") THEN Error( Basic.StringIllegalCharacter ); EXIT END;
  497. Append(ch)
  498. END;
  499. END;
  500. IF ch = EOT THEN
  501. ErrorS("Unexpected end of text in string")
  502. ELSE
  503. GetNextCharacter;
  504. IF multiString THEN SkipBlanks END;
  505. END;
  506. UNTIL ~multiString OR (ch # och);
  507. stringWriter.Char(0X);
  508. stringWriter.Update;
  509. token.string := stringMaker.GetStringCopy(token.stringLength);
  510. END GetString;
  511. (**
  512. Identifier = Letter {Letter | Digit | '_'} .
  513. Letter = 'A' | 'B' | .. | 'Z' | 'a' | 'b' | .. | 'z' .
  514. Digit = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'.
  515. '_' is the underscore character
  516. **)
  517. PROCEDURE GetIdentifier( VAR token: Token );
  518. VAR i: LONGINT;
  519. BEGIN
  520. i := 0;
  521. REPEAT token.identifierString[i] := ch; INC( i ); GetNextCharacter UNTIL reservedCharacter[ORD( ch )] OR (i = MaxIdentifierLength);
  522. IF i = MaxIdentifierLength THEN Error( Basic.IdentifierTooLong ); DEC( i ) END;
  523. token.identifierString[i] := 0X;
  524. StringPool.GetIndex(token.identifierString, token.identifier);
  525. END GetIdentifier;
  526. (**
  527. Number = Integer | Real.
  528. Integer = Digit {Digit} | Digit {HexDigit} 'H' | '0x' {HexDigit}.
  529. Real = Digit {Digit} '.' {Digit} [ScaleFactor].
  530. ScaleFactor = ('E' | 'D') ['+' | '-'] digit {digit}.
  531. HexDigit = Digit | 'A' | 'B' | 'C' | 'D' | 'E' | 'F'.
  532. Digit = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' .
  533. **)
  534. PROCEDURE GetNumber(VAR token: Token): Symbol;
  535. VAR i, nextInt, m, n, d, e, si: LONGINT;
  536. dig: ARRAY 24 OF CHAR;
  537. f: LONGREAL; expCh: CHAR; neg, long: BOOLEAN;
  538. result: Symbol;
  539. hugeint, tenh, number: HUGEINT;
  540. digits: LONGINT;
  541. (** 10^e **)
  542. PROCEDURE Ten( e: LONGINT ): LONGREAL;
  543. VAR x, p: LONGREAL;
  544. BEGIN
  545. x := 1; p := 10;
  546. WHILE e > 0 DO
  547. IF ODD( e ) THEN x := x * p END;
  548. e := e DIV 2;
  549. IF e > 0 THEN p := p * p END (* prevent overflow *)
  550. END;
  551. RETURN x
  552. END Ten;
  553. (** return decimal number associated to character ch , error if none **)
  554. PROCEDURE Decimal( ch: CHAR ): LONGINT;
  555. BEGIN (* ("0" <= ch) & (ch <= "9") OR ("A" <= ch) & (ch <= "F") *)
  556. IF ch <= "9" THEN RETURN ORD( ch ) - ORD( "0" ) ELSE Error( Basic.NumberIllegalCharacter ); RETURN 0 END
  557. END Decimal;
  558. (** return hexadecimal number associated to character ch, error if none **)
  559. PROCEDURE Hexadecimal( ch: CHAR ): LONGINT;
  560. BEGIN
  561. IF ch <= "9" THEN RETURN ORD( ch ) - ORD( "0" )
  562. ELSIF ch <= "F" THEN RETURN ORD( ch ) - ORD( "A" ) + 10
  563. ELSIF ch <= "f" THEN RETURN ORD( ch ) - ORD( "a" ) + 10
  564. ELSE Error( Basic.NumberIllegalCharacter ); RETURN 0
  565. END
  566. END Hexadecimal;
  567. PROCEDURE IsHexDigit(ch: CHAR): BOOLEAN;
  568. BEGIN
  569. RETURN (ch >= "0") & (ch <= "9") OR (ch >= "a") & (ch <="f") OR (ch >= "A") & (ch <= "F")
  570. END IsHexDigit;
  571. PROCEDURE IsBinaryDigit(ch: CHAR): BOOLEAN;
  572. BEGIN
  573. RETURN (ch >= "0") & (ch <= "1")
  574. END IsBinaryDigit;
  575. BEGIN (* ("0" <= ch) & (ch <= "9") *)
  576. result := Number;
  577. i := 0; m := 0; n := 0; d := 0; si := 0; long := FALSE;
  578. IF (ch = "0") THEN
  579. IF (reader.Peek() = "x") THEN (* hex number *)
  580. digits := 0;
  581. GetNextCharacter; GetNextCharacter;
  582. IF (ch = "'")& IsHexDigit(reader.Peek()) THEN GetNextCharacter END;
  583. WHILE IsHexDigit(ch) DO
  584. number := number * 10H + Hexadecimal(ch);
  585. INC(digits);
  586. GetNextCharacter;
  587. IF (ch = "'") & IsHexDigit(reader.Peek()) THEN GetNextCharacter END;
  588. END;
  589. token.hugeint := number;
  590. token.integer := SHORT(number);
  591. IF (digits > MaxHexDigits) OR (digits = MaxHexDigits) & (number > MAX(LONGINT)) THEN
  592. token.numberType := Hugeint
  593. ELSE
  594. token.numberType := Integer
  595. END;
  596. RETURN result;
  597. ELSIF reader.Peek() = "b" THEN (* binary number *)
  598. digits := 0;
  599. GetNextCharacter; GetNextCharacter;
  600. IF (ch = "'") & IsBinaryDigit(reader.Peek()) THEN GetNextCharacter END;
  601. WHILE IsBinaryDigit(ch) DO
  602. number := number * 2;
  603. INC(digits);
  604. IF ch = "1" THEN INC(number) END;
  605. GetNextCharacter;
  606. IF (ch = "'") & IsBinaryDigit(reader.Peek()) THEN GetNextCharacter END;
  607. END;
  608. token.hugeint := number;
  609. token.integer := SHORT(number);
  610. IF digits > 32 THEN
  611. token.numberType := Hugeint
  612. ELSE
  613. token.numberType := Integer
  614. END;
  615. RETURN result;
  616. END;
  617. END;
  618. LOOP (* read mantissa *)
  619. IF ("0" <= ch) & (ch <= "9") OR (d = 0) & ("A" <= ch) & (ch <= "F") THEN
  620. IF (m > 0) OR (ch # "0") THEN (* ignore leading zeros *)
  621. IF n < LEN( dig ) THEN dig[n] := ch; INC( n ) END;
  622. INC( m )
  623. END;
  624. token.identifierString[si] := ch; INC( si ); GetNextCharacter; INC( i )
  625. ELSIF ch = "." THEN
  626. token.identifierString[si] := ch; INC( si ); GetNextCharacter;
  627. IF ch = "." THEN ch := Ellipsis; EXIT
  628. ELSIF d = 0 THEN (* i > 0 *) d := i
  629. ELSE Error( Basic.NumberIllegalCharacter )
  630. END
  631. ELSIF ch = "'" THEN GetNextCharacter; (* ignore *)
  632. ELSE EXIT
  633. END
  634. END; (* 0 <= n <= m <= i, 0 <= d <= i *)
  635. IF d = 0 THEN (* integer *)
  636. IF n = m THEN
  637. token.integer := 0; i := 0; token.hugeint := 0;
  638. IF ch = "X" THEN (* character *)
  639. token.identifierString[si] := ch; INC( si ); GetNextCharacter; result := Character;
  640. IF (n <= 2) THEN
  641. WHILE i < n DO token.integer := token.integer * 10H + Hexadecimal( dig[i] ); INC( i ) END;
  642. token.character := CHR(token.integer);
  643. ELSE Error( Basic.NumberTooLarge )
  644. END
  645. ELSIF ch = "H" THEN (* hexadecimal *)
  646. token.identifierString[si] := ch; INC( si ); GetNextCharacter;
  647. IF (n < MaxHexDigits) OR (n=MaxHexDigits) & (dig[0] <= "7") THEN (* otherwise the positive (!) number is not in the range of longints *)
  648. token.numberType := Integer;
  649. (* IF (n = MaxHexDigits) & (dig[0] > "7") THEN (* prevent overflow *) token.integer := -1 END; *)
  650. WHILE i < n DO token.integer := token.integer * 10H + Hexadecimal( dig[i] ); INC( i ) END;
  651. token.hugeint := token.integer;
  652. ELSIF n <= MaxHugeHexDigits THEN
  653. token.numberType := Hugeint;
  654. IF (n = MaxHugeHexDigits) & (dig[0] > "7") THEN (* prevent overflow *) token.hugeint := -1 END;
  655. WHILE i < n DO token.hugeint := Hexadecimal( dig[i] ) + token.hugeint * 10H; INC( i ) END;
  656. token.integer :=SHORT(token.hugeint);
  657. ELSE
  658. token.numberType := Hugeint; (* to make parser able to go on *)
  659. Error( Basic.NumberTooLarge )
  660. END
  661. ELSE (* decimal *)
  662. token.numberType := Integer;
  663. WHILE (i < n) & ~long DO
  664. d := Decimal( dig[i] ); INC( i );
  665. IF token.integer >= MAX(LONGINT) DIV 10 THEN (* multiplication overflow *)long := TRUE END;
  666. nextInt := token.integer*10+d;
  667. IF nextInt >=0 THEN token.integer := nextInt ELSE (* overflow *) long := TRUE END;
  668. END;
  669. IF long THEN
  670. i := 0; (* restart computation , artificial limit because of compiler problems with hugeint *)
  671. hugeint := 0;
  672. tenh := 10; (* compiler does not like constants here ! *)
  673. token.numberType := Hugeint;
  674. WHILE i < n DO
  675. d := Decimal( dig[i] ); INC( i );
  676. IF hugeint > MAX(HUGEINT) DIV 10 THEN Error( Basic.NumberTooLarge) END;
  677. hugeint := hugeint * tenh + d;
  678. IF hugeint < 0 THEN Error( Basic.NumberTooLarge ) END
  679. END;
  680. token.hugeint := hugeint;
  681. token.integer := SHORT(token.hugeint);
  682. ELSE
  683. token.hugeint := token.integer;
  684. END
  685. END
  686. ELSE
  687. token.numberType := Hugeint;
  688. Error( Basic.NumberTooLarge )
  689. END
  690. ELSE (* fraction *)
  691. f := 0; e := 0; expCh := "E";
  692. WHILE n > 0 DO (* 0 <= f < 1 *) DEC( n ); f := (Decimal( dig[n] ) + f) / 10 END;
  693. IF (ch = "E") OR (ch = "D") THEN
  694. expCh := ch; token.identifierString[si] := ch; INC( si ); GetNextCharacter; neg := FALSE;
  695. IF ch = "-" THEN neg := TRUE; token.identifierString[si] := ch; INC( si ); GetNextCharacter
  696. ELSIF ch = "+" THEN token.identifierString[si] := ch; INC( si ); GetNextCharacter
  697. END;
  698. IF ("0" <= ch) & (ch <= "9") THEN
  699. REPEAT
  700. n := Decimal( ch ); token.identifierString[si] := ch; INC( si ); GetNextCharacter;
  701. IF e <= (MAX( INTEGER ) - n) DIV 10 THEN e := e * 10 + n ELSE Error( Basic.NumberTooLarge ) END
  702. UNTIL (ch < "0") OR ("9" < ch);
  703. IF neg THEN e := -e END
  704. ELSE Error( Basic.NumberIllegalCharacter )
  705. END
  706. END;
  707. DEC( e, i - d - m ); (* decimal point shift *)
  708. IF expCh = "E" THEN
  709. token.numberType := Real;
  710. IF (1 - MaxRealExponent < e) & (e <= MaxRealExponent) THEN
  711. IF e < 0 THEN token.real := f / Ten( -e ) ELSE token.real := f * Ten( e ) END;
  712. token.real := REAL (token.real);
  713. ELSE Error( Basic.NumberTooLarge )
  714. END
  715. ELSE
  716. token.numberType := Longreal;
  717. IF (1 - MaxLongrealExponent < e) & (e <= MaxLongrealExponent) THEN
  718. IF e < 0 THEN token.real := f / Ten( -e ) ELSE token.real := f * Ten( e ) END
  719. ELSE Error( Basic.NumberTooLarge )
  720. END
  721. END
  722. END;
  723. token.identifierString[si] := 0X;
  724. RETURN result;
  725. END GetNumber;
  726. (** read / skip a comment **)
  727. PROCEDURE ReadComment(VAR token: Token);
  728. VAR level: LONGINT;
  729. BEGIN
  730. stringMaker.Clear;
  731. level := 1;
  732. WHILE (level > 0) & (ch # EOT) DO
  733. IF ch = "(" THEN
  734. stringWriter.Char(ch);
  735. GetNextCharacter;
  736. IF ch = "*" THEN INC(level); stringWriter.Char(ch); GetNextCharacter; END;
  737. ELSIF ch = "*" THEN
  738. stringWriter.Char(ch);
  739. GetNextCharacter;
  740. IF ch =")" THEN DEC(level); stringWriter.Char(ch); GetNextCharacter; END;
  741. ELSE
  742. stringWriter.Char(ch);
  743. GetNextCharacter;
  744. END;
  745. END;
  746. IF level > 0 THEN
  747. Error(Basic.CommentNotClosed)
  748. END;
  749. stringWriter.Char(0X);
  750. stringWriter.Update;
  751. stringMaker.Shorten(2); (* remove comment closing *)
  752. token.symbol := Comment;
  753. token.string := stringMaker.GetString(token.stringLength);
  754. END ReadComment;
  755. PROCEDURE SkipToEndOfCode*(VAR startPos,endPos: LONGINT; VAR token: Token): Symbol;
  756. VAR s: LONGINT; newline: BOOLEAN;
  757. BEGIN
  758. ASSERT(case # Unknown);
  759. stringMaker.Clear;
  760. startPos := token.position.end;
  761. s := Code; newline := FALSE;
  762. WHILE (s # EndOfText) & (s # End) & (s # With) & (s # Unequal) DO
  763. token.position := position;
  764. endPos := position.start;
  765. IF (ch >= 'A') & (ch <= 'Z') OR (ch >= 'a') & (ch <= 'z') THEN
  766. newline := FALSE;
  767. GetIdentifier(token);
  768. IF (case=Uppercase) & (token.identifierString = "END") OR (case=Lowercase) & (token.identifierString = "end") THEN
  769. s := End
  770. ELSIF (case = Uppercase) & (token.identifierString = "WITH") OR (case = Lowercase) & (token.identifierString = "with") THEN
  771. s := With
  772. ELSE
  773. stringWriter.String(token.identifierString);
  774. END;
  775. ELSIF (ch = '#') & newline THEN
  776. s := Unequal;
  777. GetNextCharacter;
  778. ELSE
  779. IF ch > ' ' THEN newline := FALSE;
  780. ELSIF (ch = CR) OR (ch = LF) THEN newline := TRUE;
  781. END;
  782. stringWriter.Char(ch);
  783. GetNextCharacter;
  784. END;
  785. token.position.end := position.start;
  786. END;
  787. stringWriter.Update;
  788. token.string := stringMaker.GetStringCopy(token.stringLength);
  789. token.symbol := s;
  790. IF Trace THEN
  791. D.String("skip to end: "); D.Int(startPos,1); D.String(","); D.Int(endPos,1); D.Ln;
  792. PrintToken(D.Log,token); D.Ln;
  793. END;
  794. RETURN s
  795. END SkipToEndOfCode;
  796. PROCEDURE SkipBlanks;
  797. BEGIN
  798. WHILE (ch <= " ") & (ch # ESC) DO (*ignore control characters*)
  799. IF ch = EOT THEN
  800. IF Trace THEN D.String("EOT"); D.Ln; END;
  801. RETURN
  802. ELSE GetNextCharacter
  803. END
  804. END;
  805. END SkipBlanks;
  806. (** get next token **)
  807. PROCEDURE GetNextToken*(VAR token: Token ): BOOLEAN;
  808. VAR s,symbol: LONGINT;
  809. BEGIN
  810. SkipBlanks;
  811. token.position := position;
  812. stringMaker.Clear;
  813. CASE ch OF (* ch > " " *)
  814. EOT: s := EndOfText
  815. |ESC: s := Escape; GetNextCharacter
  816. | DoubleQuote:
  817. s := String; GetString(token,TRUE, TRUE, FALSE);
  818. | SingleQuote:
  819. s := String; GetString(token,FALSE, FALSE,FALSE);
  820. (* to be replaced by:
  821. s := Character; GetString(token);
  822. IF token.stringLength #2 THEN (* stringlength = 1 for empty string '' *)
  823. Error(Basic.IllegalCharacterValue)
  824. END;
  825. *)
  826. | '#': s := Unequal; GetNextCharacter
  827. | '&': s := And; GetNextCharacter
  828. | '(': GetNextCharacter;
  829. IF ch = '*' THEN GetNextCharacter; ReadComment(token); s := Comment; ELSE s := LeftParenthesis END
  830. | ')': s := RightParenthesis; GetNextCharacter
  831. | '*': GetNextCharacter; IF ch = '*' THEN GetNextCharacter; s := TimesTimes ELSE s := Times END
  832. | '+': GetNextCharacter; IF ch = '*' THEN GetNextCharacter; s := PlusTimes ELSE s := Plus END
  833. | ',': s := Comma; GetNextCharacter
  834. | '-': s := Minus; GetNextCharacter
  835. | '.': GetNextCharacter;
  836. IF ch = '.' THEN GetNextCharacter; s := Upto;
  837. ELSIF ch = '*' THEN GetNextCharacter; s := DotTimes;
  838. ELSIF ch = '/' THEN GetNextCharacter; s := DotSlash;
  839. ELSIF ch='=' THEN GetNextCharacter; s := DotEqual;
  840. ELSIF ch='#' THEN GetNextCharacter; s := DotUnequal;
  841. ELSIF ch='>' THEN GetNextCharacter;
  842. IF ch='=' THEN s := DotGreaterEqual; GetNextCharacter
  843. ELSE s := DotGreater;
  844. END
  845. ELSIF ch='<' THEN GetNextCharacter;
  846. IF ch='=' THEN s := DotLessEqual; GetNextCharacter
  847. ELSE s := DotLess;
  848. END
  849. ELSE s := Period END
  850. | '/': s := Slash; GetNextCharacter
  851. | '0'..'9': s := GetNumber(token);
  852. | ':': GetNextCharacter;
  853. IF ch = '=' THEN GetNextCharacter; s := Becomes ELSE s := Colon END
  854. | ';': s := Semicolon; GetNextCharacter
  855. | '<': GetNextCharacter;
  856. IF ch = '=' THEN GetNextCharacter; s := LessEqual
  857. ELSIF ch ='<' THEN GetNextCharacter;
  858. IF ch ='?' THEN GetNextCharacter; s := LessLessQ
  859. ELSE s := LessLess
  860. END;
  861. ELSE s := Less;
  862. END
  863. | '=': s := Equal; GetNextCharacter
  864. | '>': GetNextCharacter;
  865. IF ch = '=' THEN GetNextCharacter; s := GreaterEqual
  866. ELSIF ch ='>' THEN GetNextCharacter;
  867. IF ch ='?' THEN GetNextCharacter; s := GreaterGreaterQ
  868. ELSE s := GreaterGreater
  869. END;
  870. ELSE s := Greater; END
  871. | '[': s := LeftBracket; GetNextCharacter
  872. | ']': s := RightBracket; GetNextCharacter
  873. | '^': s := Arrow; GetNextCharacter
  874. | '{': s := LeftBrace; GetNextCharacter
  875. | '|': s := Bar; GetNextCharacter
  876. | '}': s := RightBrace; GetNextCharacter
  877. | '~': s := Not; GetNextCharacter
  878. | '\': s := Backslash; GetNextCharacter;
  879. IF ch = DoubleQuote THEN
  880. s := String;
  881. GetEscapedString(token);
  882. (*
  883. GetString(token, TRUE, TRUE, TRUE)
  884. *)
  885. ELSIF (ch > " ") & (reader.Peek() = DoubleQuote) THEN
  886. s := String;
  887. GetEscapedString(token);
  888. END;
  889. | '`': s := Transpose; GetNextCharacter
  890. | '?': s := Questionmark; GetNextCharacter; IF ch = '?' THEN s := Questionmarks; GetNextCharacter END;
  891. | '!': s := ExclamationMark; GetNextCharacter; IF ch = '!' THEN s := ExclamationMarks; GetNextCharacter END;
  892. | Ellipsis:
  893. s := Upto; GetNextCharacter
  894. | 'A'..'Z': s := Identifier; GetIdentifier( token );
  895. IF (case=Uppercase) OR (case=Unknown) THEN
  896. symbol := keywordsUpper.IndexByIdentifier(token.identifier);
  897. IF (symbol >= 0) THEN s := symbol END;
  898. IF (s = Module) OR (s=CellNet) THEN case := Uppercase END;
  899. END;
  900. | 'a'..'z': s := Identifier; GetIdentifier( token);
  901. IF (case = Lowercase) OR (case=Unknown) THEN
  902. symbol := keywordsLower.IndexByIdentifier(token.identifier);
  903. IF (symbol >= 0) THEN s := symbol END;
  904. IF (s = Module) OR (s=CellNet) THEN case := Lowercase END;
  905. END;
  906. IF firstIdentifier & (s # Module) & (s # CellNet) & (case = Unknown) THEN case := Uppercase; s := Identifier END;
  907. ELSE s := Identifier; GetIdentifier( token );
  908. END;
  909. firstIdentifier := FALSE;
  910. token.symbol := s;
  911. token.position.end := position.start;
  912. IF Trace THEN PrintToken(D.Log,token); D.Ln; END;
  913. RETURN ~error
  914. END GetNextToken;
  915. PROCEDURE ResetError*();
  916. BEGIN error := FALSE
  917. END ResetError;
  918. (** set the diagnostics mode of the scanner (diagnostics = NIL ==> no report) and reset the error state
  919. intended for silent token peeeking after the end of a module *)
  920. PROCEDURE ResetErrorDiagnostics*(VAR diagnostics: Diagnostics.Diagnostics);
  921. VAR d: Diagnostics.Diagnostics;
  922. BEGIN
  923. error := FALSE;
  924. d := SELF.diagnostics; SELF.diagnostics := diagnostics; diagnostics := d;
  925. END ResetErrorDiagnostics;
  926. END Scanner;
  927. Context*=RECORD
  928. position: Position;
  929. readerPosition : LONGINT;
  930. ch: CHAR;
  931. END;
  932. (** assembler scanner reflects the following EBNF
  933. Token = String | Symbol | Number | Identifier.
  934. Symbol = '\' | '#' | '(' ['*' any '*' ')'] | ')' | CR [LF] | LF | '*' | '+' | ',' | '-' | '~' | '.' | '/' | '%' | ':' | ';' | '=' | '[' | ']' | '{' | '}' | '!' | '^' | '$'['$'].
  935. String = '"' {Character} '"' | "'" {Character} "'".
  936. Identifier = '@' | Letter {'@' | '.' | Letter | Digit | '_'} .
  937. Letter = 'A' | 'B' | .. | 'Z' | 'a' | 'b' | .. | 'z' .
  938. Digit = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'.
  939. BinaryDigit = '0' | '1' .
  940. Number = Integer | Real.
  941. Character = Digit [HexDigit] 'X'.
  942. Integer = Digit {Digit} | Digit {HexDigit} 'H' | '0x' {HexDigit} | '0b' {BinaryDigit}.
  943. Real = Digit {Digit} '.' {Digit} [ScaleFactor].
  944. ScaleFactor = ('E' | 'D') ['+' | '-'] digit {digit}.
  945. HexDigit = Digit | 'A' | 'B' | 'C' | 'D' | 'E' | 'F'.
  946. **)
  947. AssemblerScanner* = OBJECT (Scanner) (*! move to different module? unify with compiler scanner? *)
  948. VAR
  949. startContext-: Context;
  950. PROCEDURE &InitAssemblerScanner*( CONST source: ARRAY OF CHAR; reader: Streams.Reader; position: Position; diagnostics: Diagnostics.Diagnostics );
  951. BEGIN
  952. InitializeScanner(source,reader,position,diagnostics);
  953. GetContext(startContext);
  954. END InitAssemblerScanner;
  955. PROCEDURE GetContext*(VAR context: Context);
  956. BEGIN
  957. context.ch := ch;
  958. context.position := position;
  959. context.readerPosition := reader.Pos();
  960. END GetContext;
  961. PROCEDURE SetContext*(CONST context: Context);
  962. BEGIN
  963. reader.SetPos(context.readerPosition);
  964. ch := context.ch;
  965. position := context.position;
  966. END SetContext;
  967. PROCEDURE SkipToEndOfLine*;
  968. BEGIN
  969. WHILE (ch # EOT) & (ch # CR) & (ch # LF) DO
  970. GetNextCharacter
  971. END;
  972. END SkipToEndOfLine;
  973. (**
  974. note: in contrast to a regular identifier, an assembler scanner identifier may also contain periods and the '@'-token
  975. Identifier = '@' | Letter {'@' | '.' | Letter | Digit | '_'} .
  976. Letter = 'A' | 'B' | .. | 'Z' | 'a' | 'b' | .. | 'z' .
  977. Digit = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'.
  978. '_' is the underscore character
  979. **)
  980. PROCEDURE GetIdentifier( VAR token: Token );
  981. VAR
  982. i: LONGINT;
  983. PROCEDURE CharacterIsAllowed(character: CHAR): BOOLEAN;
  984. BEGIN
  985. CASE character OF
  986. | 'a' .. 'z', 'A' .. 'Z', '0' .. '9', '@', '.', '_': RETURN TRUE
  987. ELSE RETURN FALSE
  988. END;
  989. END CharacterIsAllowed;
  990. BEGIN
  991. i := 0;
  992. REPEAT
  993. token.identifierString[i] := ch; INC( i ); GetNextCharacter
  994. UNTIL ~CharacterIsAllowed(ch) OR (i = MaxIdentifierLength);
  995. IF i = MaxIdentifierLength THEN Error( Basic.IdentifierTooLong ); DEC( i ) END;
  996. token.identifierString[i] := 0X;
  997. END GetIdentifier;
  998. (** get next token **)
  999. PROCEDURE GetNextToken*(VAR token: Token ): BOOLEAN;
  1000. VAR s: LONGINT;
  1001. PROCEDURE SkipBlanks;
  1002. BEGIN
  1003. WHILE (ch <= ' ') & (ch # CR) & (ch # LF) & (ch # EOT) DO (* ignore control characters except line feeds *)
  1004. GetNextCharacter
  1005. END;
  1006. END SkipBlanks;
  1007. BEGIN
  1008. REPEAT
  1009. SkipBlanks;
  1010. token.position := position;
  1011. CASE ch OF (* ch > ' ' *)
  1012. | EOT: s := EndOfText;
  1013. | DoubleQuote:
  1014. s := String; GetString(token, TRUE, FALSE, TRUE);
  1015. | SingleQuote:
  1016. s := Character; GetString(token, FALSE, FALSE, FALSE); token.character := token.string[0];
  1017. IF token.stringLength #2 THEN (* stringlength = 1 for empty string '' *)
  1018. Error(Basic.IllegalCharacterValue)
  1019. END;
  1020. | '\': s := Backslash; GetNextCharacter;
  1021. IF ch = DoubleQuote THEN s := String; GetString(token, FALSE, FALSE, TRUE) END;
  1022. | '#': s := Unequal; GetNextCharacter; (* for the ARM assembler *)
  1023. | '(': GetNextCharacter;
  1024. IF ch = '*' THEN GetNextCharacter; ReadComment(token); s := Comment; ELSE s := LeftParenthesis END
  1025. | ')': s := RightParenthesis; GetNextCharacter
  1026. | CR: GetNextCharacter; s := Ln;IF ch = LF THEN GetNextCharacter END;
  1027. | LF: GetNextCharacter; s := Ln; IF ch = CR THEN GetNextCharacter END;
  1028. | '*': s := Times; GetNextCharacter;
  1029. | '+': s := Plus ; GetNextCharacter;
  1030. | ',': s := Comma; GetNextCharacter
  1031. | '-': s := Minus; GetNextCharacter
  1032. | '~': s := Not; GetNextCharacter
  1033. | '.': s:= Period; GetNextCharacter
  1034. | '/': s := Div; GetNextCharacter
  1035. | '%': s := Mod; GetNextCharacter
  1036. | '0'..'9': s := GetNumber(token);
  1037. | ':': s := Colon; GetNextCharacter;
  1038. | ';': s := Comment; SkipToEndOfLine;
  1039. | '=': s := Equal; GetNextCharacter
  1040. | '[': s := LeftBracket; GetNextCharacter
  1041. | ']': s := RightBracket; GetNextCharacter
  1042. | '{': s := LeftBrace; GetNextCharacter
  1043. | '}': s := RightBrace; GetNextCharacter
  1044. | '!': s := ExclamationMark; GetNextCharacter;
  1045. | '^': s := Arrow; GetNextCharacter;
  1046. | 'A'..'Z': s := Identifier; GetIdentifier( token );
  1047. | 'a'..'z': s := Identifier; GetIdentifier( token);
  1048. | '@': s := Identifier; GetIdentifier( token); (* the '@'-token initiates an assembly scanner identifier *)
  1049. | '$': GetNextCharacter;
  1050. IF ch = '$' THEN s := PCOffset; GetNextCharacter ELSE s := PC; END
  1051. ELSE s := None; GetNextCharacter;
  1052. END;
  1053. token.position.end := position.start;
  1054. UNTIL s # Comment;
  1055. token.symbol := s;
  1056. IF Trace THEN D.Ln; D.Str( "Scan at " ); D.Int( token.position.start,1 ); D.Str( ": " ); PrintToken(D.Log,token); D.Update; END;
  1057. RETURN ~error
  1058. END GetNextToken;
  1059. END AssemblerScanner;
  1060. VAR
  1061. reservedCharacter: ARRAY 256 OF BOOLEAN;
  1062. symbols-: ARRAY EndOfText+1 OF Keyword;
  1063. keywordsLower, keywordsUpper: KeywordTable;
  1064. (** return a new scanner on a stream, error output via diagnostics **)
  1065. PROCEDURE NewScanner*( CONST source: ARRAY OF CHAR; reader: Streams.Reader; position: LONGINT; diagnostics: Diagnostics.Diagnostics ): Scanner;
  1066. VAR s: Scanner; pos: Position;
  1067. BEGIN
  1068. pos.start := position;
  1069. pos.end := position;
  1070. pos.line := 1;
  1071. pos.linepos := 0;
  1072. NEW( s, source, reader, pos, diagnostics ); RETURN s;
  1073. END NewScanner;
  1074. PROCEDURE NewAssemblerScanner*( CONST source: ARRAY OF CHAR; reader: Streams.Reader; position: LONGINT; diagnostics: Diagnostics.Diagnostics ): AssemblerScanner;
  1075. VAR s: AssemblerScanner;pos: Position;
  1076. BEGIN
  1077. pos.start := position;
  1078. pos.end := position;
  1079. pos.line := 1;
  1080. pos.linepos := 0;
  1081. NEW( s, source, reader, pos, diagnostics ); RETURN s;
  1082. END NewAssemblerScanner;
  1083. PROCEDURE TokenToString*(CONST token: Token; case: LONGINT; VAR str: ARRAY OF CHAR);
  1084. VAR id: StringPool.Index;
  1085. BEGIN
  1086. CASE token.symbol OF
  1087. Identifier, Number: COPY(token.identifierString, str)
  1088. | String, Comment: ASSERT(LEN(str) >= LEN(token.string^)); COPY(token.string^, str);
  1089. ELSE
  1090. GetKeyword(case, token.symbol, id);
  1091. IF id < 0 THEN str := "" ELSE StringPool.GetString(id, str) END;
  1092. END;
  1093. END TokenToString;
  1094. (** debugging output **)
  1095. PROCEDURE PrintToken*(w: Streams.Writer; CONST token: Token);
  1096. VAR str: ARRAY 256 OF CHAR;
  1097. BEGIN
  1098. w.Int(token.position.start,1); w.String("-");w.Int(token.position.end,1); w.String(":");
  1099. w.String(symbols[token.symbol]);
  1100. IF token.symbol= Number THEN
  1101. CASE token.numberType OF
  1102. Integer: w.String("(integer)")
  1103. |Hugeint: w.String("(hugeint)")
  1104. |Real: w.String("(real)")
  1105. |Longreal: w.String("(longreal)")
  1106. END;
  1107. END;
  1108. IF token.symbol = String THEN
  1109. w.String(":"); w.Char('"'); w.String(token.string^); w.Char('"');
  1110. ELSIF token.symbol = Comment THEN
  1111. w.String("(*"); w.String(token.string^); w.String("*)");
  1112. ELSE
  1113. TokenToString(token, Uppercase, str); w.String(": "); w.String(str);
  1114. END
  1115. END PrintToken;
  1116. (** reserved characters are the characters that may not occur within an identifier **)
  1117. PROCEDURE InitReservedCharacters;
  1118. VAR i: LONGINT;
  1119. BEGIN
  1120. FOR i := 0 TO LEN( reservedCharacter ) - 1 DO
  1121. CASE CHR(i) OF
  1122. | 'a' .. 'z', 'A' .. 'Z': reservedCharacter[i] := FALSE;
  1123. | '0'..'9': reservedCharacter[i] := FALSE;
  1124. | '_': reservedCharacter[i] := FALSE
  1125. ELSE
  1126. reservedCharacter[i] := TRUE
  1127. END;
  1128. END;
  1129. END InitReservedCharacters;
  1130. (* get keyword by symbol *)
  1131. PROCEDURE GetKeyword*(case:LONGINT; symbol: LONGINT; VAR identifier: IdentifierType);
  1132. BEGIN
  1133. IF case = Uppercase THEN
  1134. keywordsUpper.IdentifierByIndex(symbol,identifier);
  1135. ELSE ASSERT(case=Lowercase);
  1136. keywordsLower.IdentifierByIndex(symbol,identifier);
  1137. END;
  1138. END GetKeyword;
  1139. PROCEDURE InitSymbols;
  1140. VAR i: LONGINT;
  1141. BEGIN
  1142. symbols[None] := "None";
  1143. symbols[Equal] := "Equal";
  1144. symbols[DotEqual] := "DotEqual";
  1145. symbols[Unequal] := "Unequal";
  1146. symbols[DotUnequal] := "DotUnequal";
  1147. symbols[Less] := "Less";
  1148. symbols[DotLess] := "DotLess";
  1149. symbols[LessEqual] := "LessEqual";
  1150. symbols[DotLessEqual] := "DotLessEqual";
  1151. symbols[Greater] := "Greater";
  1152. symbols[DotGreater] := "DotGreater";
  1153. symbols[GreaterEqual] := "GreaterEqual";
  1154. symbols[DotGreaterEqual] := "DotGreaterEqual";
  1155. symbols[LessLessQ] := "LessLessQ";
  1156. symbols[GreaterGreaterQ] := "GreaterGreaterQ";
  1157. symbols[In] := "In";
  1158. symbols[Is] := "Is";
  1159. symbols[Times] := "Times";
  1160. symbols[TimesTimes] := "TimesTimes";
  1161. symbols[DotTimes] := "DotTimes";
  1162. symbols[PlusTimes] := "PlusTimes";
  1163. symbols[Slash] := "Slash";
  1164. symbols[Backslash] := "Backslash";
  1165. symbols[DotSlash] := "DotSlash";
  1166. symbols[Div] := "Div";
  1167. symbols[Mod] := "Mod";
  1168. symbols[And] := "And";
  1169. symbols[Or] := "Or";
  1170. symbols[Plus] := "Plus";
  1171. symbols[Minus] := "Minus";
  1172. symbols[Not] := "Not";
  1173. symbols[LeftParenthesis] := "LeftParenthesis";
  1174. symbols[LeftBracket] := "LeftBracket";
  1175. symbols[LeftBrace] := "LeftBrace";
  1176. symbols[Number] := "Number";
  1177. symbols[Character] := "Character";
  1178. symbols[String] := "String";
  1179. symbols[Nil] := "Nil";
  1180. symbols[Imag] := "Imag";
  1181. symbols[True] := "True";
  1182. symbols[False] := "False";
  1183. symbols[Self] := "Self";
  1184. symbols[New] := "New";
  1185. symbols[Result] := "Result";
  1186. symbols[Identifier] := "Identifier";
  1187. symbols[If] := "If";
  1188. symbols[Case] := "Case";
  1189. symbols[While] := "While";
  1190. symbols[Repeat] := "Repeat";
  1191. symbols[For] := "For";
  1192. symbols[Loop] := "Loop";
  1193. symbols[With] := "With";
  1194. symbols[Exit] := "Exit";
  1195. symbols[Await] := "Await";
  1196. symbols[Return] := "Return";
  1197. symbols[Ignore] := "Ignore";
  1198. symbols[Begin] := "Begin";
  1199. symbols[Semicolon] := "Semicolon";
  1200. symbols[Transpose] := "Transpose";
  1201. symbols[RightBrace] := "RightBrace";
  1202. symbols[RightBracket] := "RightBracket";
  1203. symbols[RightParenthesis] := "RightParenthesis";
  1204. symbols[Questionmark] := "Questionmark";
  1205. symbols[ExclamationMark] := "ExclamationMark";
  1206. symbols[Questionmarks] := "Questionmarks";
  1207. symbols[ExclamationMarks] := "ExclamationMarks";
  1208. symbols[LessLess] := "LessLess";
  1209. symbols[GreaterGreater] := "GreaterGreater";
  1210. symbols[Upto] := "Upto";
  1211. symbols[Arrow] := "Arrow";
  1212. symbols[Period] := "Period";
  1213. symbols[Comma] := "Comma";
  1214. symbols[Colon] := "Colon";
  1215. symbols[Of] := "Of";
  1216. symbols[Then] := "Then";
  1217. symbols[Do] := "Do";
  1218. symbols[To] := "To";
  1219. symbols[By] := "By";
  1220. symbols[Becomes] := "Becomes";
  1221. symbols[Bar] := "Bar";
  1222. symbols[End] := "End";
  1223. symbols[Else] := "Else";
  1224. symbols[Elsif] := "Elsif";
  1225. symbols[Extern] := "Extern";
  1226. symbols[Until] := "Until";
  1227. symbols[Finally] := "Finally";
  1228. symbols[Code] := "Code";
  1229. symbols[Const] := "Const";
  1230. symbols[Type] := "Type";
  1231. symbols[Var] := "Var";
  1232. symbols[Out] := "Out";
  1233. symbols[Procedure] := "Procedure";
  1234. symbols[Operator] := "Operator";
  1235. symbols[Import] := "Import";
  1236. symbols[Definition] := "Definition";
  1237. symbols[Module] := "Module";
  1238. symbols[Cell] := "Cell";
  1239. symbols[CellNet] := "CellNet";
  1240. symbols[Array] := "Array";
  1241. symbols[Object] := "Object";
  1242. symbols[Record] := "Record";
  1243. symbols[Pointer] := "Pointer";
  1244. symbols[Enum] := "Enum";
  1245. symbols[Port] := "Port";
  1246. symbols[Address] := "Address";
  1247. symbols[Alias] := "Alias";
  1248. symbols[Size] := "Size";
  1249. symbols[Ln] := "Ln";
  1250. symbols[PC] := "PC";
  1251. symbols[PCOffset] := "PCOffset";
  1252. symbols[Shortint] := "Shortint";
  1253. symbols[Integer] := "Integer";
  1254. symbols[Longint] := "Longint";
  1255. symbols[Hugeint] := "Hugeint";
  1256. symbols[Real] := "Real";
  1257. symbols[Longreal] := "Longreal";
  1258. symbols[Comment] := "Comment";
  1259. symbols[EndOfText] := "EndOfText";
  1260. FOR i := 0 TO EndOfText DO ASSERT(symbols[i] # "") END;
  1261. END InitSymbols;
  1262. (** enter keywords in the list of keywords (both upper- and lowercase) **)
  1263. PROCEDURE InitKeywords;
  1264. PROCEDURE Upper(CONST source: ARRAY OF CHAR; VAR dest: ARRAY OF CHAR);
  1265. VAR c: CHAR; i: LONGINT;
  1266. BEGIN
  1267. i := 0;
  1268. REPEAT
  1269. c := source[i];
  1270. IF (c >= 'a') & (c<= 'z') THEN c := CHR(ORD(c)-ORD('a')+ORD('A')) END;
  1271. dest[i] := c; INC(i);
  1272. UNTIL c = 0X;
  1273. END Upper;
  1274. PROCEDURE Enter1(CONST name: ARRAY OF CHAR; symbol: LONGINT; case: SET);
  1275. BEGIN
  1276. IF Lowercase IN case THEN keywordsLower.PutString(name,symbol) END;
  1277. IF Uppercase IN case THEN keywordsUpper.PutString(name,symbol) END;
  1278. Basic.SetErrorExpected(symbol,name);
  1279. END Enter1;
  1280. PROCEDURE Enter(CONST name: ARRAY OF CHAR; symbol: LONGINT);
  1281. VAR upper: Keyword;
  1282. BEGIN
  1283. Enter1(name,symbol,{Lowercase});
  1284. Upper(name,upper);
  1285. Enter1(upper,symbol,{Uppercase});
  1286. END Enter;
  1287. PROCEDURE EnterSymbol(CONST name: ARRAY OF CHAR; symbol: LONGINT);
  1288. BEGIN
  1289. Enter1(name,symbol,{Lowercase,Uppercase});
  1290. END EnterSymbol;
  1291. BEGIN
  1292. NEW(keywordsUpper,EndOfText+1);
  1293. NEW(keywordsLower,EndOfText+1);
  1294. (* constructs and statements *)
  1295. Enter( "cell", Cell );
  1296. Enter( "cellnet", CellNet);
  1297. Enter( "await" , Await);
  1298. Enter( "begin" , Begin);
  1299. Enter( "by" , By);
  1300. Enter( "const" , Const);
  1301. Enter( "case" , Case);
  1302. Enter( "code" , Code);
  1303. Enter( "definition", Definition);
  1304. Enter( "do" , Do);
  1305. Enter( "div" , Div);
  1306. Enter( "end" , End);
  1307. Enter( "enum", Enum);
  1308. Enter( "else" , Else);
  1309. Enter( "elsif" , Elsif);
  1310. Enter( "exit" , Exit);
  1311. Enter( "extern" , Extern);
  1312. Enter( "false" , False);
  1313. Enter( "for" , For);
  1314. Enter( "finally" , Finally);
  1315. Enter( "if" , If);
  1316. Enter( "ignore" , Ignore);
  1317. Enter( "imag" , Imag);
  1318. Enter( "in" , In);
  1319. Enter( "is" , Is);
  1320. Enter( "import" , Import);
  1321. Enter( "loop" , Loop);
  1322. Enter( "module", Module);
  1323. Enter( "mod" , Mod);
  1324. Enter( "nil" , Nil );
  1325. Enter( "of" , Of);
  1326. Enter( "or" , Or);
  1327. Enter( "out", Out);
  1328. Enter( "operator" , Operator);
  1329. Enter( "procedure" , Procedure);
  1330. Enter( "port", Port);
  1331. Enter( "repeat" , Repeat);
  1332. Enter( "return" , Return);
  1333. Enter( "self", Self);
  1334. Enter( "new", New);
  1335. Enter( "result", Result);
  1336. Enter( "then" , Then);
  1337. Enter( "true" , True);
  1338. Enter( "to" , To);
  1339. Enter( "type" , Type);
  1340. Enter( "until" , Until );
  1341. Enter( "var" , Var );
  1342. Enter( "while" , While);
  1343. Enter( "with" , With);
  1344. (* types *)
  1345. Enter( "array" , Array );
  1346. Enter( "object" , Object);
  1347. Enter( "pointer" , Pointer);
  1348. Enter( "record" , Record);
  1349. Enter( "address" , Address);
  1350. Enter( "size" , Size);
  1351. Enter( "alias" , Alias);
  1352. (* tokens *)
  1353. EnterSymbol( "#", Unequal);
  1354. EnterSymbol( "&", And);
  1355. EnterSymbol( "(", LeftParenthesis);
  1356. EnterSymbol( ")", RightParenthesis);
  1357. EnterSymbol( "*", Times);
  1358. EnterSymbol( "**",TimesTimes);
  1359. EnterSymbol( "+", Plus);
  1360. EnterSymbol( "+*", PlusTimes);
  1361. EnterSymbol( ",", Comma);
  1362. EnterSymbol( "-", Minus);
  1363. EnterSymbol(".",Period );
  1364. EnterSymbol("..",Upto );
  1365. EnterSymbol(".*",DotTimes );
  1366. EnterSymbol("./",DotSlash );
  1367. EnterSymbol(".=",DotEqual );
  1368. EnterSymbol(".#",DotUnequal );
  1369. EnterSymbol(".>",DotGreater );
  1370. EnterSymbol(".>=",DotGreaterEqual );
  1371. EnterSymbol(".<", DotLess);
  1372. EnterSymbol(".<=",DotLessEqual );
  1373. EnterSymbol( "/", Slash);
  1374. EnterSymbol( ":", Colon);
  1375. EnterSymbol( ":=",Becomes);
  1376. EnterSymbol( ";", Semicolon);
  1377. EnterSymbol( "<", Less);
  1378. EnterSymbol( "<=", LessEqual);
  1379. EnterSymbol( "=", Equal);
  1380. EnterSymbol( ">", Greater);
  1381. EnterSymbol( ">=", GreaterEqual);
  1382. EnterSymbol( "[", LeftBracket);
  1383. EnterSymbol( "]", RightBracket);
  1384. EnterSymbol( "^", Arrow);
  1385. EnterSymbol( "{", LeftBrace);
  1386. EnterSymbol( "|",Bar);
  1387. EnterSymbol( "}", RightBrace);
  1388. EnterSymbol( "~", Not);
  1389. EnterSymbol( "\", Backslash);
  1390. EnterSymbol( "`", Transpose);
  1391. EnterSymbol( "?",Questionmark);
  1392. EnterSymbol( "??",Questionmarks);
  1393. EnterSymbol( "!",ExclamationMark);
  1394. EnterSymbol( "!!",ExclamationMarks);
  1395. EnterSymbol( "<<",LessLess);
  1396. EnterSymbol( "<<?",LessLessQ);
  1397. EnterSymbol( ">>",GreaterGreater);
  1398. EnterSymbol( ">>?",GreaterGreaterQ);
  1399. Basic.SetErrorMessage(Number,"missing number");
  1400. Basic.SetErrorMessage(String,"missing string");
  1401. Basic.SetErrorMessage(Character,"missing character");
  1402. Basic.SetErrorMessage(Identifier,"missing identifier");
  1403. Basic.SetErrorMessage(EndOfText,"unexpected token before end");
  1404. END InitKeywords;
  1405. (** debugging / reporting **)
  1406. PROCEDURE ReportKeywords*(context: Commands.Context);
  1407. VAR i: LONGINT; name: Keyword;
  1408. BEGIN
  1409. FOR i := 0 TO EndOfText DO
  1410. context.out.Int(i,1); context.out.String(": ");
  1411. context.out.Char('"');
  1412. keywordsLower.StringByIndex(i,name);
  1413. context.out.String(name);
  1414. context.out.Char('"');
  1415. context.out.String(", ");
  1416. context.out.Char('"');
  1417. keywordsUpper.StringByIndex(i,name);
  1418. context.out.String(name);
  1419. context.out.Char('"');
  1420. context.out.Ln;
  1421. END;
  1422. END ReportKeywords;
  1423. (*
  1424. PROCEDURE TestScanner*(context: Commands.Context);
  1425. VAR filename: ARRAY 256 OF CHAR; reader: Streams.Reader; scanner: Scanner;token: Token;
  1426. BEGIN
  1427. context.arg.SkipWhitespace; context.arg.String(filename);
  1428. reader := TextUtilities.GetTextReader(filename);
  1429. scanner := NewScanner(filename,reader,0,NIL);
  1430. REPEAT
  1431. IF scanner.GetNextToken(token) THEN
  1432. PrintToken(context.out,token);context.out.Ln;
  1433. END;
  1434. UNTIL scanner.error OR (token.symbol=EndOfText)
  1435. END TestScanner;
  1436. *)
  1437. BEGIN
  1438. InitReservedCharacters; InitSymbols; InitKeywords
  1439. END FoxScanner.
  1440. FoxScanner.ReportKeywords
  1441. FoxScanner.TestScanner Test.Mod ~