FoxScanner.Mod 52 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607
  1. MODULE FoxScanner; (** AUTHOR "fof & fn"; PURPOSE "Oberon Compiler: Scanner"; **)
  2. (* (c) fof ETH Zürich, 2009 *)
  3. IMPORT Streams, Strings, Diagnostics, Basic := FoxBasic, D := Debugging, Commands, StringPool;
  4. CONST
  5. Trace = FALSE; (* debugging output *)
  6. (* overal scanner limitation *)
  7. MaxIdentifierLength* = 128;
  8. (* parametrization of numeric scanner: *)
  9. MaxHexDigits* = 8; (* maximal hexadecimal longint length *)
  10. MaxHugeHexDigits* = 16; (* maximal hexadecimal hugeint length *)
  11. MaxRealExponent* = 38; (* maximal real exponent *)
  12. MaxLongrealExponent* = 308; (* maximal longreal exponent *)
  13. (* scanner constants *)
  14. EOT* = 0X; LF* = 0AX; CR* = 0DX; TAB* = 09X; ESC* = 1BX;
  15. TYPE
  16. StringType* = Strings.String;
  17. IdentifierType *= StringPool.Index;
  18. IdentifierString*= ARRAY MaxIdentifierLength+1 OF CHAR;
  19. CONST
  20. (** tokens *)
  21. (*
  22. note: order of tokens is important for the parser, do not modify without looking it up
  23. FoxProgTools.Enum --export --linefeed=6
  24. None
  25. (* RelationOps: Equal ... Is *)
  26. Equal DotEqual Unequal DotUnequal
  27. Less DotLess LessEqual DotLessEqual Greater DotGreater GreaterEqual DotGreaterEqual
  28. LessLessQ GreaterGreaterQ Questionmarks ExclamationMarks
  29. In Is
  30. (* MulOps: Times ... And *)
  31. Times TimesTimes DotTimes PlusTimes Slash Backslash DotSlash Div Mod And
  32. (* AddOps: Or ... Minus *)
  33. Or Plus Minus
  34. (* Prefix Unary Operators Plus ... Not *)
  35. Not
  36. (* expressions may start with Plus ... Identifier *)
  37. LeftParenthesis LeftBracket LeftBrace Number Character String Nil Imag True False Self Result New Identifier
  38. (* statementy may start with Self ... Begin *)
  39. If Case While Repeat For Loop With Exit Await Return Begin
  40. (* symbols, expressions and statements cannot start with *)
  41. Semicolon Transpose RightBrace RightBracket RightParenthesis
  42. Questionmark ExclamationMark
  43. LessLess GreaterGreater
  44. Upto Arrow Period Comma Colon Of Then Do To By Becomes Bar End Else Elsif Until Finally
  45. (* declaration elements *)
  46. Code Const Type Var Out Procedure Operator Import Definition Module Cell CellNet Extern
  47. (* composite type symbols *)
  48. Array Object Record Pointer Enum Port Address Size Alias
  49. (* assembler constants *)
  50. Ln PC PCOffset
  51. (* number types *)
  52. Shortint Integer Longint Hugeint Real Longreal
  53. Comment EndOfText Escape
  54. ~
  55. *)
  56. None*= 0;
  57. (* RelationOps: Equal ... Is *)
  58. Equal*= 1; DotEqual*= 2; Unequal*= 3; DotUnequal*= 4; Less*= 5; DotLess*= 6;
  59. LessEqual*= 7; DotLessEqual*= 8; Greater*= 9; DotGreater*= 10; GreaterEqual*= 11; DotGreaterEqual*= 12;
  60. LessLessQ*= 13; GreaterGreaterQ*= 14; Questionmarks*= 15; ExclamationMarks*= 16; In*= 17; Is*= 18;
  61. (* MulOps: Times ... And *)
  62. Times*= 19; TimesTimes*= 20; DotTimes*= 21; PlusTimes*= 22; Slash*= 23; Backslash*= 24;
  63. DotSlash*= 25; Div*= 26; Mod*= 27; And*= 28;
  64. (* AddOps: Or ... Minus *)
  65. Or*= 29; Plus*= 30; Minus*= 31;
  66. (* Prefix Unary Operators Plus ... Not *)
  67. Not*= 32;
  68. (* expressions may start with Plus ... Identifier *)
  69. LeftParenthesis*= 33; LeftBracket*= 34; LeftBrace*= 35; Number*= 36; Character*= 37; String*= 38;
  70. Nil*= 39; Imag*= 40; True*= 41; False*= 42; Self*= 43; Result*= 44;
  71. New*= 45; Identifier*= 46;
  72. (* statementy may start with Self ... Begin *)
  73. If*= 47; Case*= 48; While*= 49; Repeat*= 50; For*= 51; Loop*= 52;
  74. With*= 53; Exit*= 54; Await*= 55; Return*= 56; Begin*= 57;
  75. (* symbols, expressions and statements cannot start with *)
  76. Semicolon*= 58; Transpose*= 59; RightBrace*= 60; RightBracket*= 61; RightParenthesis*= 62; Questionmark*= 63;
  77. ExclamationMark*= 64; LessLess*= 65; GreaterGreater*= 66; Upto*= 67; Arrow*= 68; Period*= 69;
  78. Comma*= 70; Colon*= 71; Of*= 72; Then*= 73; Do*= 74; To*= 75;
  79. By*= 76; Becomes*= 77; Bar*= 78; End*= 79; Else*= 80; Elsif*= 81;
  80. Until*= 82; Finally*= 83;
  81. (* declaration elements *)
  82. Code*= 84; Const*= 85; Type*= 86; Var*= 87; Out*= 88; Procedure*= 89;
  83. Operator*= 90; Import*= 91; Definition*= 92; Module*= 93; Cell*= 94; CellNet*= 95;
  84. Extern*= 96;
  85. (* composite type symbols *)
  86. Array*= 97; Object*= 98; Record*= 99; Pointer*= 100; Enum*= 101; Port*= 102;
  87. Address*= 103; Size*= 104; Alias*= 105;
  88. (* assembler constants *)
  89. Ln*= 106; PC*= 107; PCOffset*= 108;
  90. (* number types *)
  91. Shortint*= 109; Integer*= 110; Longint*= 111; Hugeint*= 112; Real*= 113; Longreal*= 114;
  92. Comment*= 115; EndOfText*= 116; Escape*= 117;
  93. SingleQuote = 27X; DoubleQuote* = 22X;
  94. Ellipsis = 7FX; (* used in Scanner.GetNumber to return with ".." when reading an interval like 3..5 *)
  95. Uppercase*=0;
  96. Lowercase*=1;
  97. Unknown*=2;
  98. TYPE
  99. (* keywords book keeping *)
  100. Keyword* = ARRAY 32 OF CHAR;
  101. KeywordTable* = OBJECT(Basic.HashTableInt); (* string -> index *)
  102. VAR table: POINTER TO ARRAY OF LONGINT;
  103. PROCEDURE &InitTable*(size: LONGINT);
  104. VAR i: LONGINT;
  105. BEGIN
  106. Init(size); NEW(table,size); FOR i := 0 TO size-1 DO table[i] := -1; END;
  107. END InitTable;
  108. PROCEDURE IndexByIdentifier*(identifier: IdentifierType): LONGINT;
  109. VAR stringPoolIndex: LONGINT;
  110. BEGIN
  111. IF Has(identifier) THEN
  112. RETURN GetInt(identifier)
  113. ELSE (* do not modify index *)
  114. RETURN -1
  115. END;
  116. END IndexByIdentifier;
  117. PROCEDURE IndexByString*(CONST name: ARRAY OF CHAR): LONGINT;
  118. VAR stringPoolIndex: LONGINT;
  119. BEGIN
  120. StringPool.GetIndex(name,stringPoolIndex);
  121. IF Has(stringPoolIndex) THEN
  122. RETURN GetInt(stringPoolIndex)
  123. ELSE (* do not modify index *)
  124. RETURN -1
  125. END;
  126. END IndexByString;
  127. PROCEDURE IdentifierByIndex*(index: LONGINT; VAR identifier: IdentifierType);
  128. BEGIN
  129. identifier := table[index]
  130. END IdentifierByIndex;
  131. PROCEDURE StringByIndex*(index: LONGINT; VAR name: ARRAY OF CHAR);
  132. VAR stringPoolIndex: LONGINT;
  133. BEGIN
  134. stringPoolIndex := table[index];
  135. IF stringPoolIndex < 0 THEN
  136. name := ""
  137. ELSE
  138. StringPool.GetString(stringPoolIndex,name);
  139. END;
  140. END StringByIndex;
  141. PROCEDURE PutString*(CONST name: ARRAY OF CHAR; index: LONGINT);
  142. VAR stringPoolIndex: LONGINT;
  143. BEGIN
  144. StringPool.GetIndex(name,stringPoolIndex);
  145. table[index] := stringPoolIndex;
  146. PutInt(stringPoolIndex,index);
  147. END PutString;
  148. END KeywordTable;
  149. TYPE
  150. Token*=LONGINT;
  151. Position*= Basic.Position;
  152. (**
  153. symbol: data structure for the data transfer of the last read input from the scanner to the parser
  154. **)
  155. Symbol*= RECORD
  156. position*: Position;
  157. token*: Token; (* token of symbol *)
  158. identifier*: IdentifierType; (* identifier *)
  159. identifierString*: IdentifierString; (* cache of identifier's string *)
  160. string*: StringType; (* string or identifier *)
  161. stringLength*: LONGINT; (* length of string, if stringLength = 2 then this may be interpreted as character and integer = ORD(ch) *)
  162. numberType*: LONGINT; (* Integer, HugeInteger, Real or Longreal *)
  163. integer*: LONGINT;
  164. hugeint*: HUGEINT; (*! unify longint and hugeint *)
  165. character*: CHAR;
  166. real*: LONGREAL;
  167. END;
  168. StringMaker* = OBJECT (* taken from TF's scanner *)
  169. VAR length : LONGINT;
  170. data : StringType;
  171. PROCEDURE &Init*(initialSize : LONGINT);
  172. BEGIN
  173. IF initialSize < 256 THEN initialSize := 256 END;
  174. NEW(data, initialSize); length := 0;
  175. END Init;
  176. PROCEDURE Add*(CONST buf: ARRAY OF CHAR; ofs, len: LONGINT; propagate: BOOLEAN; VAR res: LONGINT);
  177. VAR i : LONGINT; n: StringType;
  178. BEGIN
  179. IF length + len + 1 >= LEN(data) THEN
  180. NEW(n, LEN(data) + len + 1); FOR i := 0 TO length - 1 DO n[i] := data[i] END;
  181. data := n
  182. END;
  183. WHILE len > 0 DO
  184. data[length] := buf[ofs];
  185. INC(ofs); INC(length); DEC(len)
  186. END;
  187. data[length] := 0X;
  188. END Add;
  189. (* remove last n characters *)
  190. PROCEDURE Shorten*(n : LONGINT);
  191. BEGIN
  192. DEC(length, n);
  193. IF length < 0 THEN length := 0 END;
  194. IF length > 0 THEN data[length - 1] := 0X ELSE data[length] := 0X END
  195. END Shorten;
  196. PROCEDURE Clear*;
  197. BEGIN
  198. data[0] := 0X;
  199. length := 0
  200. END Clear;
  201. PROCEDURE GetWriter*() : Streams.Writer;
  202. VAR w : Streams.Writer;
  203. BEGIN
  204. NEW(w, SELF.Add, 256);
  205. RETURN w
  206. END GetWriter;
  207. PROCEDURE GetReader*(): Streams.Reader;
  208. VAR r: Streams.StringReader;
  209. BEGIN
  210. NEW(r, 256);
  211. r.Set(data^);
  212. RETURN r
  213. END GetReader;
  214. PROCEDURE GetString*(VAR len: LONGINT) : StringType;
  215. BEGIN
  216. len := length;
  217. RETURN data
  218. END GetString;
  219. PROCEDURE GetStringCopy*(VAR len: LONGINT): StringType;
  220. VAR new: StringType;
  221. BEGIN
  222. len := length;
  223. NEW(new,len+1);
  224. COPY(data^,new^);
  225. RETURN new
  226. END GetStringCopy;
  227. END StringMaker;
  228. (** scanner reflects the following EBNF
  229. Symbol = String | Token | Number | Keyword | Identifier.
  230. Token = | '#' | '&' | '(' ['*' any '*' ')'] | ')' | '*'['*'] | '+'['*'] | ',' | '-' | '.' [ '.' | '*' | '/' | '=' | '#' | '>'['='] | '<' ['=']
  231. | '/' | ':' ['='] | ';' | '<' ['=' | '<' ['?'] ] | '=' | '>' [ '=' | '>' ['?']]
  232. | '[' | ']' | '^' | '{' | '|' | '}' | '~' | '\' | '`' | '?' ['?'] | '!' ['!']
  233. Identifier = Letter {Letter | Digit | '_'}.
  234. Letter = 'A' | 'B' | .. | 'Z' | 'a' | 'b' | .. | 'z'.
  235. Digit = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' .
  236. String = '"' {Character} '"' | "'" {Character} "'".
  237. Character = Digit [HexDigit] 'X'.
  238. Number = Integer | Real.
  239. Integer = Digit {Digit} | Digit {HexDigit} 'H' | '0x' {HexDigit}.
  240. Real = Digit {Digit} '.' {Digit} [ScaleFactor].
  241. ScaleFactor = ('E' | 'D') ['+' | '-'] digit {digit}.
  242. HexDigit = Digit | 'A' | 'B' | 'C' | 'D' | 'E' | 'F'.
  243. **)
  244. Scanner* = OBJECT
  245. VAR
  246. (* helper state information *)
  247. source-: StringType;
  248. reader-: Streams.Reader; (* source *)
  249. diagnostics: Diagnostics.Diagnostics; (* error logging *)
  250. ch-: CHAR; (* look-ahead character *)
  251. position-: Position;
  252. (*
  253. position-: LONGINT; (* current position *)
  254. line-: LONGINT;
  255. *)
  256. error-: BOOLEAN; (* if error occured during scanning *)
  257. firstIdentifier: BOOLEAN; (* support of lower vs. upper case keywords *)
  258. case-: LONGINT;
  259. stringWriter: Streams.Writer;
  260. stringMaker: StringMaker;
  261. useLineNumbers*: BOOLEAN;
  262. (*
  263. source: name of the source code for reference in error outputs
  264. reader: input stream
  265. position: reference position (offset) of the input stream , for error output
  266. diagnostics: error output object
  267. *)
  268. PROCEDURE & InitializeScanner*( CONST source: ARRAY OF CHAR; reader: Streams.Reader; position: LONGINT; diagnostics: Diagnostics.Diagnostics );
  269. BEGIN
  270. NEW(stringMaker,1024);
  271. stringWriter := stringMaker.GetWriter();
  272. error := FALSE;
  273. NEW(SELF.source, Strings.Length(source)+1);
  274. COPY (source, SELF.source^);
  275. SELF.reader := reader;
  276. SELF.diagnostics := diagnostics;
  277. ch := " ";
  278. case := Unknown;
  279. firstIdentifier := TRUE;
  280. IF reader = NIL THEN ch := EOT ELSE GetNextCharacter END;
  281. IF Trace THEN D.Str( "New scanner " ); D.Ln; END;
  282. SELF.position.start := position;
  283. SELF.position.line := 1;
  284. SELF.position.linepos := 0;
  285. useLineNumbers := FALSE;
  286. END InitializeScanner;
  287. PROCEDURE ResetCase*; (*! needs a better naming ! *)
  288. BEGIN
  289. firstIdentifier := TRUE; case := Unknown;
  290. END ResetCase;
  291. PROCEDURE SetCase*(c: LONGINT);
  292. BEGIN
  293. case := c;
  294. END SetCase;
  295. (** report an error occured during scanning **)
  296. PROCEDURE ErrorS(CONST msg: ARRAY OF CHAR);
  297. VAR errorMessage: ARRAY 256 OF CHAR;
  298. BEGIN
  299. IF diagnostics # NIL THEN
  300. COPY(msg, errorMessage);
  301. IF useLineNumbers THEN
  302. Basic.AppendPosition(errorMessage, position);
  303. END;
  304. diagnostics.Error(source^, position.start, Diagnostics.Invalid, errorMessage)
  305. END;
  306. error := TRUE;
  307. END ErrorS;
  308. (** report an error occured during scanning **)
  309. PROCEDURE Error( code: INTEGER );
  310. VAR errorMessage: ARRAY 256 OF CHAR;
  311. BEGIN
  312. IF diagnostics # NIL THEN
  313. Basic.GetErrorMessage(code,"",errorMessage);
  314. IF useLineNumbers THEN Basic.AppendPosition(errorMessage, position) END;
  315. diagnostics.Error(source^, position.start, code, errorMessage)
  316. END;
  317. error := TRUE;
  318. END Error;
  319. (** get next character, end of text results in ch = EOT **)
  320. PROCEDURE GetNextCharacter*;
  321. BEGIN
  322. reader.Char(ch); INC(position.start);
  323. IF ch = LF THEN INC(position.line); position.linepos := position.start END;
  324. (*
  325. (* not necessary, as Streams returns 0X if reading failed, but in case Streams.Reader.Char is modified ... *)
  326. IF reader.res # Streams.Ok THEN ch := EOT END;
  327. *)
  328. END GetNextCharacter;
  329. (*
  330. The following is an implementation of the KMP algorithm used in order to traverse strings until some pattern occurs.
  331. It is not necessary for our implementation of string escape sequences, because the first character of the pattern does not occur in the pattern elsewhere
  332. I found the code useful and keep it here for the time being....
  333. (* generate a table to be able to quickly search for string containing overlaps - KMP algorithm *)
  334. PROCEDURE MakeOverlapTable*(CONST pattern: ARRAY OF CHAR; VAR table: ARRAY OF LONGINT);
  335. VAR i, cnd: LONGINT;
  336. BEGIN
  337. ASSERT(pattern[0] # 0X);
  338. (* if first character did not match: reset search *)
  339. table[0] := -1;
  340. (* if second character did not match: compare to first *)
  341. IF pattern[1] # 0X THEN
  342. table[1] := 0;
  343. END;
  344. (* for all other characters: switch back to previous overlay in pattern *)
  345. i := 2; cnd := 0;
  346. WHILE(pattern[i] # 0X) DO
  347. (* do patterns [i-cnd, i-1] match with pattern[0.. cnd] ? *)
  348. IF pattern[i-1] = pattern[cnd] THEN
  349. INC(cnd); table[i] := cnd; INC(i);
  350. (* no, switch back to last overlap, if possible *)
  351. ELSIF cnd > 0 THEN cnd := table[cnd]
  352. (* not possible: restart at beginning *)
  353. ELSE table[i] := 0; INC(i)
  354. END;
  355. END;
  356. END MakeOverlapTable;
  357. (* using KMP substring search algorithm consume and reproduce all characters of a string until endString *)
  358. PROCEDURE GetString(CONST endString: ARRAY OF CHAR);
  359. VAR escapePos: LONGINT; ech: CHAR; i: LONGINT; table: ARRAY 16 OF LONGINT;
  360. next: LONGINT;
  361. PROCEDURE Append(ch :CHAR);
  362. BEGIN
  363. IF ch = 0X THEN
  364. ErrorS("Unexpected end of text in string"); error := TRUE
  365. ELSE
  366. stringWriter.Char(ch)
  367. END;
  368. END Append;
  369. BEGIN
  370. MakeOverlapTable(endString, table);
  371. (* traverse *)
  372. escapePos := 0; ech := endString[0];
  373. GetNextCharacter;
  374. REPEAT
  375. IF ch = ech THEN
  376. INC(escapePos); ech := endString[escapePos];
  377. GetNextCharacter;
  378. ELSIF escapePos = 0 THEN (* frequent case *)
  379. Append(ch); GetNextCharacter;
  380. ELSE
  381. (* overlaps ? *)
  382. next := table[escapePos];
  383. IF next < 0 THEN next := 0 END;
  384. (* account for "forgotten" characters *)
  385. FOR i := 0 TO escapePos-1-next DO
  386. Append(endString[i]);
  387. END;
  388. (* to next overlapping ? *)
  389. escapePos := table[escapePos];
  390. (* no overlapping *)
  391. IF escapePos < 0 THEN
  392. Append(ch);
  393. escapePos := 0;
  394. GetNextCharacter;
  395. END;
  396. ech := endString[escapePos];
  397. END;
  398. UNTIL (ch = EOT) OR (ech = 0X);
  399. END GetString;
  400. *)
  401. (* simple case can be utilized when endString does not contain first character, which is the case for our string convention *)
  402. PROCEDURE ConsumeStringUntil(CONST endString: ARRAY OF CHAR; useControl: BOOLEAN);
  403. VAR escapePos: LONGINT; ech: CHAR; i: LONGINT; startPosition: LONGINT;
  404. CONST
  405. Control = '\';
  406. Delimiter = '"';
  407. PROCEDURE Append(ch :CHAR);
  408. BEGIN
  409. IF ch = 0X THEN
  410. ErrorS("Unexpected end of text in string"); error := TRUE;
  411. ELSE
  412. stringWriter.Char(ch)
  413. END;
  414. END Append;
  415. BEGIN
  416. (* traverse *)
  417. escapePos := 0; ech := endString[0]; startPosition := position.start;
  418. GetNextCharacter;
  419. REPEAT
  420. IF ch = ech THEN
  421. INC(escapePos); ech := endString[escapePos];
  422. GetNextCharacter;
  423. ELSIF useControl & (ch = Control) THEN
  424. GetNextCharacter;
  425. IF (ch = Control) OR (ch = Delimiter) THEN
  426. Append(ch)
  427. ELSIF ch = 'n' THEN
  428. Append(CR); Append(LF);
  429. ELSIF ch = 't' THEN
  430. Append(TAB)
  431. ELSE
  432. ErrorS("Unknown control sequence")
  433. END;
  434. GetNextCharacter
  435. ELSIF escapePos = 0 THEN (* frequent case *)
  436. Append(ch); GetNextCharacter;
  437. ELSE
  438. (* account for "forgotten" characters *)
  439. FOR i := 0 TO escapePos-1 DO
  440. Append(endString[i]);
  441. END;
  442. (* restart *)
  443. ech := endString[0]; escapePos := 0;
  444. END;
  445. UNTIL (ch = EOT) OR (ech = 0X) OR error;
  446. IF ch = EOT THEN position.start := startPosition; ErrorS("Unexpected end of text in string") END;
  447. END ConsumeStringUntil;
  448. PROCEDURE GetEscapedString(VAR symbol: Symbol);
  449. VAR endString: ARRAY 4 OF CHAR; escape: CHAR;
  450. BEGIN
  451. (* backslash already consumed *)
  452. stringMaker.Clear;
  453. IF ch = '"' THEN
  454. escape := 0X;
  455. ELSE
  456. escape := ch; GetNextCharacter;
  457. END;
  458. ASSERT((ch = '"') OR (ch = "'"));
  459. REPEAT
  460. IF escape # 0X THEN
  461. endString[0] := ch;
  462. endString[1] := escape;
  463. endString[2] := '\';
  464. endString[3] := 0X;
  465. ELSE
  466. endString[0] := ch;
  467. endString[1] := '\';
  468. endString[2] := 0X;
  469. END;
  470. ConsumeStringUntil(endString, escape = 0X);
  471. UNTIL TRUE;
  472. stringWriter.Char(0X);
  473. stringWriter.Update;
  474. symbol.string := stringMaker.GetStringCopy(symbol.stringLength);
  475. END GetEscapedString;
  476. (** get a string starting at current position
  477. string = {'"' {Character} '"'} | {"'" {Character} "'"}.
  478. **)
  479. (* multiline indicates that a string may occupy more than one lines, either concatenated or via multi-strings " " " "
  480. *)
  481. PROCEDURE GetString(VAR symbol: Symbol; multiLine, multiString, useControl: BOOLEAN);
  482. VAR och: CHAR; error: BOOLEAN; done: BOOLEAN;
  483. CONST control = '\';
  484. PROCEDURE Append(ch :CHAR);
  485. BEGIN
  486. IF ch = 0X THEN
  487. ErrorS("Unexpected end of text in string"); error := TRUE
  488. ELSE
  489. stringWriter.Char(ch)
  490. END;
  491. END Append;
  492. BEGIN
  493. stringMaker.Clear;
  494. och := ch; error := FALSE;
  495. REPEAT
  496. LOOP
  497. IF error THEN EXIT END;
  498. GetNextCharacter;
  499. IF (ch = och) OR (ch = EOT) THEN EXIT END;
  500. IF useControl & (ch = control) THEN
  501. GetNextCharacter;
  502. IF (ch = control) OR (ch = och) THEN
  503. Append(ch)
  504. ELSIF ch = 'n' THEN
  505. Append(CR); Append(LF);
  506. ELSIF ch = 't' THEN
  507. Append(TAB)
  508. ELSE
  509. ErrorS("Unknown control sequence")
  510. END;
  511. ELSE
  512. IF ~multiLine & (ch < " ") THEN Error( Basic.StringIllegalCharacter ); EXIT END;
  513. Append(ch)
  514. END;
  515. END;
  516. IF ch = EOT THEN
  517. ErrorS("Unexpected end of text in string")
  518. ELSE
  519. GetNextCharacter;
  520. IF multiString THEN SkipBlanks END;
  521. END;
  522. UNTIL ~multiString OR (ch # och);
  523. stringWriter.Char(0X);
  524. stringWriter.Update;
  525. symbol.string := stringMaker.GetStringCopy(symbol.stringLength);
  526. END GetString;
  527. (**
  528. Identifier = Letter {Letter | Digit | '_'} .
  529. Letter = 'A' | 'B' | .. | 'Z' | 'a' | 'b' | .. | 'z' .
  530. Digit = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'.
  531. '_' is the underscore character
  532. **)
  533. PROCEDURE GetIdentifier( VAR symbol: Symbol );
  534. VAR i: LONGINT;
  535. BEGIN
  536. i := 0;
  537. REPEAT symbol.identifierString[i] := ch; INC( i ); GetNextCharacter UNTIL reservedCharacter[ORD( ch )] OR (i = MaxIdentifierLength);
  538. IF i = MaxIdentifierLength THEN Error( Basic.IdentifierTooLong ); DEC( i ) END;
  539. symbol.identifierString[i] := 0X;
  540. StringPool.GetIndex(symbol.identifierString, symbol.identifier);
  541. END GetIdentifier;
  542. (**
  543. Number = Integer | Real.
  544. Integer = Digit {Digit} | Digit {HexDigit} 'H' | '0x' {HexDigit}.
  545. Real = Digit {Digit} '.' {Digit} [ScaleFactor].
  546. ScaleFactor = ('E' | 'D') ['+' | '-'] digit {digit}.
  547. HexDigit = Digit | 'A' | 'B' | 'C' | 'D' | 'E' | 'F'.
  548. Digit = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' .
  549. **)
  550. PROCEDURE GetNumber(VAR symbol: Symbol): Token;
  551. VAR i, nextInt, m, n, d, e, si: LONGINT;
  552. dig: ARRAY 24 OF CHAR;
  553. f: LONGREAL; expCh: CHAR; neg, long: BOOLEAN;
  554. result: Token;
  555. hugeint, tenh, number: HUGEINT;
  556. digits: LONGINT;
  557. (** 10^e **)
  558. PROCEDURE Ten( e: LONGINT ): LONGREAL;
  559. VAR x, p: LONGREAL;
  560. BEGIN
  561. x := 1; p := 10;
  562. WHILE e > 0 DO
  563. IF ODD( e ) THEN x := x * p END;
  564. e := e DIV 2;
  565. IF e > 0 THEN p := p * p END (* prevent overflow *)
  566. END;
  567. RETURN x
  568. END Ten;
  569. (** return decimal number associated to character ch , error if none **)
  570. PROCEDURE Decimal( ch: CHAR ): LONGINT;
  571. BEGIN (* ("0" <= ch) & (ch <= "9") OR ("A" <= ch) & (ch <= "F") *)
  572. IF ch <= "9" THEN RETURN ORD( ch ) - ORD( "0" ) ELSE Error( Basic.NumberIllegalCharacter ); RETURN 0 END
  573. END Decimal;
  574. (** return hexadecimal number associated to character ch, error if none **)
  575. PROCEDURE Hexadecimal( ch: CHAR ): LONGINT;
  576. BEGIN
  577. IF ch <= "9" THEN RETURN ORD( ch ) - ORD( "0" )
  578. ELSIF ch <= "F" THEN RETURN ORD( ch ) - ORD( "A" ) + 10
  579. ELSIF ch <= "f" THEN RETURN ORD( ch ) - ORD( "a" ) + 10
  580. ELSE Error( Basic.NumberIllegalCharacter ); RETURN 0
  581. END
  582. END Hexadecimal;
  583. BEGIN (* ("0" <= ch) & (ch <= "9") *)
  584. result := Number;
  585. i := 0; m := 0; n := 0; d := 0; si := 0; long := FALSE;
  586. IF (ch = "0") & (reader.Peek() = "x") THEN (* hex number *)
  587. digits := 0;
  588. GetNextCharacter; GetNextCharacter;
  589. WHILE (ch >= "0") & (ch <= "9") OR (ch >= "a") & (ch <="f") OR (ch >= "A") & (ch <= "F") DO
  590. number := number * 10H + Hexadecimal(ch);
  591. INC(digits);
  592. GetNextCharacter;
  593. END;
  594. symbol.hugeint := number;
  595. symbol.integer := SHORT(number);
  596. IF digits > MaxHexDigits THEN
  597. symbol.numberType := Hugeint
  598. ELSE
  599. symbol.numberType := Integer
  600. END;
  601. RETURN result;
  602. END;
  603. LOOP (* read mantissa *)
  604. IF ("0" <= ch) & (ch <= "9") OR (d = 0) & ("A" <= ch) & (ch <= "F") THEN
  605. IF (m > 0) OR (ch # "0") THEN (* ignore leading zeros *)
  606. IF n < LEN( dig ) THEN dig[n] := ch; INC( n ) END;
  607. INC( m )
  608. END;
  609. symbol.identifierString[si] := ch; INC( si ); GetNextCharacter; INC( i )
  610. ELSIF ch = "." THEN
  611. symbol.identifierString[si] := ch; INC( si ); GetNextCharacter;
  612. IF ch = "." THEN ch := Ellipsis; EXIT
  613. ELSIF d = 0 THEN (* i > 0 *) d := i
  614. ELSE Error( Basic.NumberIllegalCharacter )
  615. END
  616. ELSE EXIT
  617. END
  618. END; (* 0 <= n <= m <= i, 0 <= d <= i *)
  619. IF d = 0 THEN (* integer *)
  620. IF n = m THEN
  621. symbol.integer := 0; i := 0; symbol.hugeint := 0;
  622. IF ch = "X" THEN (* character *)
  623. symbol.identifierString[si] := ch; INC( si ); GetNextCharacter; result := Character;
  624. IF (n <= 2) THEN
  625. WHILE i < n DO symbol.integer := symbol.integer * 10H + Hexadecimal( dig[i] ); INC( i ) END;
  626. symbol.character := CHR(symbol.integer);
  627. ELSE Error( Basic.NumberTooLarge )
  628. END
  629. ELSIF ch = "H" THEN (* hexadecimal *)
  630. symbol.identifierString[si] := ch; INC( si ); GetNextCharacter;
  631. IF (n < MaxHexDigits) OR (n=MaxHexDigits) & (dig[0] <= "7") THEN (* otherwise the positive (!) number is not in the range of longints *)
  632. symbol.numberType := Integer;
  633. (* IF (n = MaxHexDigits) & (dig[0] > "7") THEN (* prevent overflow *) symbol.integer := -1 END; *)
  634. WHILE i < n DO symbol.integer := symbol.integer * 10H + Hexadecimal( dig[i] ); INC( i ) END;
  635. symbol.hugeint := symbol.integer;
  636. ELSIF n <= MaxHugeHexDigits THEN
  637. symbol.numberType := Hugeint;
  638. IF (n = MaxHugeHexDigits) & (dig[0] > "7") THEN (* prevent overflow *) symbol.hugeint := -1 END;
  639. WHILE i < n DO symbol.hugeint := Hexadecimal( dig[i] ) + symbol.hugeint * 10H; INC( i ) END;
  640. symbol.integer :=SHORT(symbol.hugeint);
  641. ELSE
  642. symbol.numberType := Hugeint; (* to make parser able to go on *)
  643. Error( Basic.NumberTooLarge )
  644. END
  645. ELSE (* decimal *)
  646. symbol.numberType := Integer;
  647. WHILE (i < n) & ~long DO
  648. d := Decimal( dig[i] ); INC( i );
  649. IF symbol.integer >= MAX(LONGINT) DIV 10 THEN (* multiplication overflow *)long := TRUE END;
  650. nextInt := symbol.integer*10+d;
  651. IF nextInt >=0 THEN symbol.integer := nextInt ELSE (* overflow *) long := TRUE END;
  652. END;
  653. IF long THEN
  654. i := 0; (* restart computation , artificial limit because of compiler problems with hugeint *)
  655. hugeint := 0;
  656. tenh := 10; (* compiler does not like constants here ! *)
  657. symbol.numberType := Hugeint;
  658. WHILE i < n DO
  659. d := Decimal( dig[i] ); INC( i );
  660. IF hugeint >= MAX(HUGEINT) DIV 10 THEN Error( Basic.NumberTooLarge) END;
  661. hugeint := hugeint * tenh + d;
  662. IF hugeint < 0 THEN Error( Basic.NumberTooLarge ) END
  663. END;
  664. symbol.hugeint := hugeint;
  665. symbol.integer := SHORT(symbol.hugeint);
  666. ELSE
  667. symbol.hugeint := symbol.integer;
  668. END
  669. END
  670. ELSE
  671. symbol.numberType := Hugeint;
  672. Error( Basic.NumberTooLarge )
  673. END
  674. ELSE (* fraction *)
  675. f := 0; e := 0; expCh := "E";
  676. WHILE n > 0 DO (* 0 <= f < 1 *) DEC( n ); f := (Decimal( dig[n] ) + f) / 10 END;
  677. IF (ch = "E") OR (ch = "D") THEN
  678. expCh := ch; symbol.identifierString[si] := ch; INC( si ); GetNextCharacter; neg := FALSE;
  679. IF ch = "-" THEN neg := TRUE; symbol.identifierString[si] := ch; INC( si ); GetNextCharacter
  680. ELSIF ch = "+" THEN symbol.identifierString[si] := ch; INC( si ); GetNextCharacter
  681. END;
  682. IF ("0" <= ch) & (ch <= "9") THEN
  683. REPEAT
  684. n := Decimal( ch ); symbol.identifierString[si] := ch; INC( si ); GetNextCharacter;
  685. IF e <= (MAX( INTEGER ) - n) DIV 10 THEN e := e * 10 + n ELSE Error( Basic.NumberTooLarge ) END
  686. UNTIL (ch < "0") OR ("9" < ch);
  687. IF neg THEN e := -e END
  688. ELSE Error( Basic.NumberIllegalCharacter )
  689. END
  690. END;
  691. DEC( e, i - d - m ); (* decimal point shift *)
  692. IF expCh = "E" THEN
  693. symbol.numberType := Real;
  694. IF (1 - MaxRealExponent < e) & (e <= MaxRealExponent) THEN
  695. IF e < 0 THEN symbol.real := f / Ten( -e ) ELSE symbol.real := f * Ten( e ) END
  696. ELSE Error( Basic.NumberTooLarge )
  697. END
  698. ELSE
  699. symbol.numberType := Longreal;
  700. IF (1 - MaxLongrealExponent < e) & (e <= MaxLongrealExponent) THEN
  701. IF e < 0 THEN symbol.real := f / Ten( -e ) ELSE symbol.real := f * Ten( e ) END
  702. ELSE Error( Basic.NumberTooLarge )
  703. END
  704. END
  705. END;
  706. symbol.identifierString[si] := 0X;
  707. RETURN result;
  708. END GetNumber;
  709. (** read / skip a comment **)
  710. PROCEDURE ReadComment(VAR symbol: Symbol);
  711. VAR level: LONGINT;
  712. BEGIN
  713. stringMaker.Clear;
  714. level := 1;
  715. WHILE (level > 0) & (ch # EOT) DO
  716. IF ch = "(" THEN
  717. stringWriter.Char(ch);
  718. GetNextCharacter;
  719. IF ch = "*" THEN INC(level); stringWriter.Char(ch); GetNextCharacter; END;
  720. ELSIF ch = "*" THEN
  721. stringWriter.Char(ch);
  722. GetNextCharacter;
  723. IF ch =")" THEN DEC(level); stringWriter.Char(ch); GetNextCharacter; END;
  724. ELSE
  725. stringWriter.Char(ch);
  726. GetNextCharacter;
  727. END;
  728. END;
  729. IF level > 0 THEN
  730. Error(Basic.CommentNotClosed)
  731. END;
  732. stringWriter.Char(0X);
  733. stringWriter.Update;
  734. stringMaker.Shorten(2); (* remove comment closing *)
  735. symbol.token := Comment;
  736. symbol.string := stringMaker.GetString(symbol.stringLength);
  737. END ReadComment;
  738. PROCEDURE SkipToEndOfCode*(VAR startPos,endPos: LONGINT; VAR symbol: Symbol): Token;
  739. VAR s: LONGINT;
  740. BEGIN
  741. ASSERT(case # Unknown);
  742. stringMaker.Clear;
  743. startPos := symbol.position.end;
  744. IF useLineNumbers THEN startPos := position.line END;
  745. s := symbol.token;
  746. WHILE (s # EndOfText) & (s # End) & (s # With) DO
  747. symbol.position := position;
  748. endPos := position.start;
  749. CASE ch OF
  750. 'A' .. 'Z','a'..'z': s := Identifier;
  751. GetIdentifier(symbol);
  752. IF (case=Uppercase) & (symbol.identifierString = "END") OR (case=Lowercase) & (symbol.identifierString = "end") THEN
  753. s := End
  754. ELSIF (case = Uppercase) & (symbol.identifierString = "WITH") OR (case = Lowercase) & (symbol.identifierString = "with") THEN
  755. s := With
  756. ELSE
  757. stringWriter.String(symbol.identifierString);
  758. END;
  759. ELSE
  760. stringWriter.Char(ch);
  761. GetNextCharacter;
  762. END;
  763. symbol.position.end := position.start;
  764. END;
  765. stringWriter.Update;
  766. symbol.string := stringMaker.GetStringCopy(symbol.stringLength);
  767. symbol.token := s;
  768. IF Trace THEN
  769. D.String("skip to end: "); D.Int(startPos,1); D.String(","); D.Int(endPos,1); D.Ln;
  770. OutSymbol(D.Log,symbol); D.Ln;
  771. END;
  772. RETURN s
  773. END SkipToEndOfCode;
  774. PROCEDURE SkipBlanks;
  775. BEGIN
  776. WHILE (ch <= " ") & (ch # ESC) DO (*ignore control characters*)
  777. IF ch = EOT THEN
  778. IF Trace THEN D.String("EOT"); D.Ln; END;
  779. RETURN
  780. ELSE GetNextCharacter
  781. END
  782. END;
  783. END SkipBlanks;
  784. (** get next symbol **)
  785. PROCEDURE GetNextSymbol*(VAR symbol: Symbol ): BOOLEAN;
  786. VAR s,token: LONGINT;
  787. BEGIN
  788. SkipBlanks;
  789. symbol.position := position;
  790. (*
  791. IF useLineNumbers THEN
  792. symbol.position.start := position.line+1;
  793. ELSE
  794. symbol.position.start := position.start
  795. END;
  796. symbol.position.line := position.line;
  797. *)
  798. stringMaker.Clear;
  799. CASE ch OF (* ch > " " *)
  800. EOT: s := EndOfText
  801. |ESC: s := Escape;; GetNextCharacter
  802. | DoubleQuote:
  803. s := String; GetString(symbol,TRUE, TRUE, FALSE);
  804. | SingleQuote:
  805. s := String; GetString(symbol,FALSE, FALSE,FALSE);
  806. (* to be replaced by:
  807. s := Character; GetString(symbol);
  808. IF symbol.stringLength #2 THEN (* stringlength = 1 for empty string '' *)
  809. Error(Basic.IllegalCharacterValue)
  810. END;
  811. *)
  812. | '#': s := Unequal; GetNextCharacter
  813. | '&': s := And; GetNextCharacter
  814. | '(': GetNextCharacter;
  815. IF ch = '*' THEN GetNextCharacter; ReadComment(symbol); s := Comment; ELSE s := LeftParenthesis END
  816. | ')': s := RightParenthesis; GetNextCharacter
  817. | '*': GetNextCharacter; IF ch = '*' THEN GetNextCharacter; s := TimesTimes ELSE s := Times END
  818. | '+': GetNextCharacter; IF ch = '*' THEN GetNextCharacter; s := PlusTimes ELSE s := Plus END
  819. | ',': s := Comma; GetNextCharacter
  820. | '-': s := Minus; GetNextCharacter
  821. | '.': GetNextCharacter;
  822. IF ch = '.' THEN GetNextCharacter; s := Upto;
  823. ELSIF ch = '*' THEN GetNextCharacter; s := DotTimes;
  824. ELSIF ch = '/' THEN GetNextCharacter; s := DotSlash;
  825. ELSIF ch='=' THEN GetNextCharacter; s := DotEqual;
  826. ELSIF ch='#' THEN GetNextCharacter; s := DotUnequal;
  827. ELSIF ch='>' THEN GetNextCharacter;
  828. IF ch='=' THEN s := DotGreaterEqual; GetNextCharacter
  829. ELSE s := DotGreater;
  830. END
  831. ELSIF ch='<' THEN GetNextCharacter;
  832. IF ch='=' THEN s := DotLessEqual; GetNextCharacter
  833. ELSE s := DotLess;
  834. END
  835. ELSE s := Period END
  836. | '/': s := Slash; GetNextCharacter
  837. | '0'..'9': s := GetNumber(symbol);
  838. | ':': GetNextCharacter;
  839. IF ch = '=' THEN GetNextCharacter; s := Becomes ELSE s := Colon END
  840. | ';': s := Semicolon; GetNextCharacter
  841. | '<': GetNextCharacter;
  842. IF ch = '=' THEN GetNextCharacter; s := LessEqual
  843. ELSIF ch ='<' THEN GetNextCharacter;
  844. IF ch ='?' THEN GetNextCharacter; s := LessLessQ
  845. ELSE s := LessLess
  846. END;
  847. ELSE s := Less;
  848. END
  849. | '=': s := Equal; GetNextCharacter
  850. | '>': GetNextCharacter;
  851. IF ch = '=' THEN GetNextCharacter; s := GreaterEqual
  852. ELSIF ch ='>' THEN GetNextCharacter;
  853. IF ch ='?' THEN GetNextCharacter; s := GreaterGreaterQ
  854. ELSE s := GreaterGreater
  855. END;
  856. ELSE s := Greater; END
  857. | '[': s := LeftBracket; GetNextCharacter
  858. | ']': s := RightBracket; GetNextCharacter
  859. | '^': s := Arrow; GetNextCharacter
  860. | '{': s := LeftBrace; GetNextCharacter
  861. | '|': s := Bar; GetNextCharacter
  862. | '}': s := RightBrace; GetNextCharacter
  863. | '~': s := Not; GetNextCharacter
  864. | '\': s := Backslash; GetNextCharacter;
  865. IF ch = DoubleQuote THEN
  866. s := String;
  867. GetEscapedString(symbol);
  868. (*
  869. GetString(symbol, TRUE, TRUE, TRUE)
  870. *)
  871. ELSIF (ch > " ") & (reader.Peek() = DoubleQuote) THEN
  872. s := String;
  873. GetEscapedString(symbol);
  874. END;
  875. | '`': s := Transpose; GetNextCharacter
  876. | '?': s := Questionmark; GetNextCharacter; IF ch = '?' THEN s := Questionmarks; GetNextCharacter END;
  877. | '!': s := ExclamationMark; GetNextCharacter; IF ch = '!' THEN s := ExclamationMarks; GetNextCharacter END;
  878. | Ellipsis:
  879. s := Upto; GetNextCharacter
  880. | 'A'..'Z': s := Identifier; GetIdentifier( symbol );
  881. IF (case=Uppercase) OR (case=Unknown) THEN
  882. token := keywordsUpper.IndexByIdentifier(symbol.identifier);
  883. IF (token >= 0) THEN s := token END;
  884. IF (s = Module) OR (s=CellNet) THEN case := Uppercase END;
  885. END;
  886. | 'a'..'z': s := Identifier; GetIdentifier( symbol);
  887. IF (case = Lowercase) OR (case=Unknown) THEN
  888. token := keywordsLower.IndexByIdentifier(symbol.identifier);
  889. IF (token >= 0) THEN s := token END;
  890. IF (s = Module) OR (s=CellNet) THEN case := Lowercase END;
  891. END;
  892. IF firstIdentifier & (s # Module) & (s # CellNet) & (case = Unknown) THEN case := Uppercase; s := Identifier END;
  893. ELSE s := Identifier; GetIdentifier( symbol );
  894. END;
  895. firstIdentifier := FALSE;
  896. symbol.token := s;
  897. symbol.position.end := position.start;
  898. IF Trace THEN OutSymbol(D.Log,symbol); D.Ln; END;
  899. RETURN ~error
  900. END GetNextSymbol;
  901. PROCEDURE ResetError*();
  902. BEGIN error := FALSE
  903. END ResetError;
  904. (** set the diagnostics mode of the scanner (diagnostics = NIL ==> no report) and reset the error state
  905. intended for silent symbol peeeking after the end of a module *)
  906. PROCEDURE ResetErrorDiagnostics*(VAR diagnostics: Diagnostics.Diagnostics);
  907. VAR b: BOOLEAN; d: Diagnostics.Diagnostics;
  908. BEGIN
  909. error := FALSE;
  910. d := SELF.diagnostics; SELF.diagnostics := diagnostics; diagnostics := d;
  911. END ResetErrorDiagnostics;
  912. END Scanner;
  913. Context*=RECORD
  914. position: Position;
  915. readerPosition : LONGINT;
  916. ch: CHAR;
  917. END;
  918. (** assembler scanner reflects the following EBNF
  919. Symbol = String | Token | Number | Identifier.
  920. Token = '\' | '#' | '(' ['*' any '*' ')'] | ')' | CR [LF] | LF | '*' | '+' | ',' | '-' | '~' | '.' | '/' | '%' | ':' | ';' | '=' | '[' | ']' | '{' | '}' | '!' | '^' | '$'['$'].
  921. String = '"' {Character} '"' | "'" {Character} "'".
  922. Identifier = '@' | Letter {'@' | '.' | Letter | Digit | '_'} .
  923. Letter = 'A' | 'B' | .. | 'Z' | 'a' | 'b' | .. | 'z' .
  924. Digit = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'.
  925. Number = Integer | Real.
  926. Character = Digit [HexDigit] 'X'.
  927. Integer = Digit {Digit} | Digit {HexDigit} 'H' | '0x' {HexDigit}.
  928. Real = Digit {Digit} '.' {Digit} [ScaleFactor].
  929. ScaleFactor = ('E' | 'D') ['+' | '-'] digit {digit}.
  930. HexDigit = Digit | 'A' | 'B' | 'C' | 'D' | 'E' | 'F'.
  931. **)
  932. AssemblerScanner* = OBJECT (Scanner) (*! move to different module? unify with compiler scanner? *)
  933. VAR
  934. startContext-: Context;
  935. PROCEDURE &InitAssemblerScanner*( CONST source: ARRAY OF CHAR; reader: Streams.Reader; position: LONGINT; diagnostics: Diagnostics.Diagnostics );
  936. BEGIN
  937. InitializeScanner(source,reader,position,diagnostics);
  938. GetContext(startContext);
  939. END InitAssemblerScanner;
  940. PROCEDURE GetContext*(VAR context: Context);
  941. BEGIN
  942. context.ch := ch;
  943. context.position := position;
  944. context.readerPosition := reader.Pos();
  945. END GetContext;
  946. PROCEDURE SetContext*(CONST context: Context);
  947. BEGIN
  948. reader.SetPos(context.readerPosition);
  949. ch := context.ch;
  950. position := context.position;
  951. END SetContext;
  952. PROCEDURE SkipToEndOfLine*;
  953. BEGIN
  954. WHILE (ch # EOT) & (ch # CR) & (ch # LF) DO
  955. GetNextCharacter
  956. END;
  957. END SkipToEndOfLine;
  958. (**
  959. note: in contrast to a regular identifier, an assembler scanner identifier may also contain periods and the '@'-symbol
  960. Identifier = '@' | Letter {'@' | '.' | Letter | Digit | '_'} .
  961. Letter = 'A' | 'B' | .. | 'Z' | 'a' | 'b' | .. | 'z' .
  962. Digit = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'.
  963. '_' is the underscore character
  964. **)
  965. PROCEDURE GetIdentifier( VAR symbol: Symbol );
  966. VAR
  967. i: LONGINT;
  968. PROCEDURE CharacterIsAllowed(character: CHAR): BOOLEAN;
  969. BEGIN
  970. CASE character OF
  971. | 'a' .. 'z', 'A' .. 'Z', '0' .. '9', '@', '.', '_': RETURN TRUE
  972. ELSE RETURN FALSE
  973. END;
  974. END CharacterIsAllowed;
  975. BEGIN
  976. i := 0;
  977. REPEAT
  978. symbol.identifierString[i] := ch; INC( i ); GetNextCharacter
  979. UNTIL ~CharacterIsAllowed(ch) OR (i = MaxIdentifierLength);
  980. IF i = MaxIdentifierLength THEN Error( Basic.IdentifierTooLong ); DEC( i ) END;
  981. symbol.identifierString[i] := 0X;
  982. END GetIdentifier;
  983. PROCEDURE GetNumber(VAR symbol: Symbol): Token;
  984. VAR number: HUGEINT; result: Token; digits: LONGINT;
  985. (** return hexadecimal number associated to character ch, error if none **)
  986. PROCEDURE Hexadecimal( ch: CHAR ): LONGINT;
  987. BEGIN
  988. IF (ch >= "0") & (ch <= "9") THEN RETURN ORD( ch ) - ORD( "0" )
  989. ELSIF (ch >= "a") & (ch <= "f") THEN RETURN ORD( ch ) - ORD( "a" ) + 10
  990. ELSE Error( Basic.NumberIllegalCharacter ); RETURN 0
  991. END
  992. END Hexadecimal;
  993. BEGIN
  994. result := Number;
  995. IF (ch = "0") THEN
  996. IF reader.Peek() = "x" THEN (* hex number *)
  997. digits := 0;
  998. GetNextCharacter; GetNextCharacter;
  999. WHILE (ch >= "0") & (ch <= "9") OR (ch >= "a") & (ch <="f") DO
  1000. number := number * 10H + Hexadecimal(ch);
  1001. INC(digits);
  1002. GetNextCharacter;
  1003. END;
  1004. symbol.hugeint := number;
  1005. symbol.integer := SHORT(number);
  1006. IF digits > MaxHexDigits THEN
  1007. symbol.numberType := Hugeint
  1008. ELSE
  1009. symbol.numberType := Integer
  1010. END;
  1011. ELSIF reader.Peek() = "b" THEN (* binary number *)
  1012. digits := 0;
  1013. GetNextCharacter; GetNextCharacter;
  1014. WHILE (ch >= "0") & (ch <= "1") DO
  1015. number := number * 2;
  1016. INC(digits);
  1017. IF ch = "1" THEN INC(number) END;
  1018. GetNextCharacter;
  1019. END;
  1020. symbol.hugeint := number;
  1021. symbol.integer := SHORT(number);
  1022. IF digits > 32 THEN
  1023. symbol.numberType := Hugeint
  1024. ELSE
  1025. symbol.numberType := Integer
  1026. END;
  1027. ELSE RETURN GetNumber^(symbol)
  1028. END;
  1029. ELSE RETURN GetNumber^(symbol)
  1030. END;
  1031. RETURN result
  1032. END GetNumber;
  1033. (** get next symbol **)
  1034. PROCEDURE GetNextSymbol*(VAR symbol: Symbol ): BOOLEAN;
  1035. VAR s: LONGINT;
  1036. PROCEDURE SkipBlanks;
  1037. BEGIN
  1038. WHILE (ch <= ' ') & (ch # CR) & (ch # LF) & (ch # EOT) DO (* ignore control characters except line feeds *)
  1039. GetNextCharacter
  1040. END;
  1041. END SkipBlanks;
  1042. BEGIN
  1043. REPEAT
  1044. SkipBlanks;
  1045. symbol.position := position;
  1046. (*
  1047. IF useLineNumbers THEN
  1048. symbol.position.start := position.line+1;
  1049. ELSE
  1050. symbol.position.start := position.start;
  1051. END;
  1052. symbol.position.line := position.line;
  1053. *)
  1054. CASE ch OF (* ch > ' ' *)
  1055. | EOT: s := EndOfText;
  1056. | DoubleQuote:
  1057. s := String; GetString(symbol, TRUE, FALSE, TRUE);
  1058. | SingleQuote:
  1059. s := Character; GetString(symbol, FALSE, FALSE, FALSE); symbol.character := symbol.string[0];
  1060. IF symbol.stringLength #2 THEN (* stringlength = 1 for empty string '' *)
  1061. Error(Basic.IllegalCharacterValue)
  1062. END;
  1063. | '\': s := Backslash; GetNextCharacter;
  1064. IF ch = DoubleQuote THEN s := String; GetString(symbol, FALSE, FALSE, TRUE) END;
  1065. | '#': s := Unequal; GetNextCharacter; (* for the ARM assembler *)
  1066. | '(': GetNextCharacter;
  1067. IF ch = '*' THEN GetNextCharacter; ReadComment(symbol); s := Comment; ELSE s := LeftParenthesis END
  1068. | ')': s := RightParenthesis; GetNextCharacter
  1069. | CR: GetNextCharacter; s := Ln;IF ch = LF THEN GetNextCharacter END;
  1070. | LF: GetNextCharacter; s := Ln; IF ch = CR THEN GetNextCharacter END;
  1071. | '*': s := Times; GetNextCharacter;
  1072. | '+': s := Plus ; GetNextCharacter;
  1073. | ',': s := Comma; GetNextCharacter
  1074. | '-': s := Minus; GetNextCharacter
  1075. | '~': s := Not; GetNextCharacter
  1076. | '.': s:= Period; GetNextCharacter
  1077. | '/': s := Div; GetNextCharacter
  1078. | '%': s := Mod; GetNextCharacter
  1079. | '0'..'9': s := GetNumber(symbol);
  1080. | ':': s := Colon; GetNextCharacter;
  1081. | ';': s := Comment; SkipToEndOfLine;
  1082. | '=': s := Equal; GetNextCharacter
  1083. | '[': s := LeftBracket; GetNextCharacter
  1084. | ']': s := RightBracket; GetNextCharacter
  1085. | '{': s := LeftBrace; GetNextCharacter
  1086. | '}': s := RightBrace; GetNextCharacter
  1087. | '!': s := ExclamationMark; GetNextCharacter;
  1088. | '^': s := Arrow; GetNextCharacter;
  1089. | 'A'..'Z': s := Identifier; GetIdentifier( symbol );
  1090. | 'a'..'z': s := Identifier; GetIdentifier( symbol);
  1091. | '@': s := Identifier; GetIdentifier( symbol); (* the '@'-symbol initiates an assembly scanner identifier *)
  1092. | '$': GetNextCharacter;
  1093. IF ch = '$' THEN s := PCOffset; GetNextCharacter ELSE s := PC; END
  1094. ELSE s := None; GetNextCharacter;
  1095. END;
  1096. symbol.position.end := position.start;
  1097. UNTIL s # Comment;
  1098. symbol.token := s;
  1099. IF Trace THEN D.Ln; D.Str( "Scan at " ); D.Int( symbol.position.start,1 ); D.Str( ": " ); OutSymbol(D.Log,symbol); D.Update; END;
  1100. RETURN ~error
  1101. END GetNextSymbol;
  1102. END AssemblerScanner;
  1103. VAR
  1104. reservedCharacter: ARRAY 256 OF BOOLEAN;
  1105. tokens-: ARRAY EndOfText+1 OF Keyword;
  1106. keywordsLower, keywordsUpper: KeywordTable;
  1107. (** return a new scanner on a stream, error output via diagnostics **)
  1108. PROCEDURE NewScanner*( CONST source: ARRAY OF CHAR; reader: Streams.Reader; position: LONGINT; diagnostics: Diagnostics.Diagnostics ): Scanner;
  1109. VAR s: Scanner;
  1110. BEGIN
  1111. NEW( s, source, reader, position, diagnostics ); RETURN s;
  1112. END NewScanner;
  1113. PROCEDURE NewAssemblerScanner*( CONST source: ARRAY OF CHAR; reader: Streams.Reader; position: LONGINT; diagnostics: Diagnostics.Diagnostics ): AssemblerScanner;
  1114. VAR s: AssemblerScanner;
  1115. BEGIN
  1116. NEW( s, source, reader, position, diagnostics ); RETURN s;
  1117. END NewAssemblerScanner;
  1118. PROCEDURE SymbolToString*(CONST symbol: Symbol; case: LONGINT; VAR str: ARRAY OF CHAR);
  1119. VAR id: StringPool.Index;
  1120. BEGIN
  1121. CASE symbol.token OF
  1122. Identifier, Number: COPY(symbol.identifierString, str)
  1123. | String, Comment: ASSERT(LEN(str) >= LEN(symbol.string^)); COPY(symbol.string^, str);
  1124. ELSE
  1125. GetKeyword(case, symbol.token, id);
  1126. IF id < 0 THEN str := "" ELSE StringPool.GetString(id, str) END;
  1127. END;
  1128. END SymbolToString;
  1129. (** debugging output **)
  1130. PROCEDURE OutSymbol*(w: Streams.Writer; CONST symbol: Symbol);
  1131. VAR str: ARRAY 256 OF CHAR;
  1132. BEGIN
  1133. w.Int(symbol.position.start,1); w.String("-");w.Int(symbol.position.end,1); w.String(":");
  1134. w.String(tokens[symbol.token]);
  1135. IF symbol.token= Number THEN
  1136. CASE symbol.numberType OF
  1137. Integer: w.String("(integer)")
  1138. |Hugeint: w.String("(hugeint)")
  1139. |Real: w.String("(real)")
  1140. |Longreal: w.String("(longreal)")
  1141. END;
  1142. END;
  1143. IF symbol.token = String THEN
  1144. w.String(":"); w.Char('"'); w.String(symbol.string^); w.Char('"');
  1145. ELSIF symbol.token = Comment THEN
  1146. w.String("(*"); w.String(symbol.string^); w.String("*)");
  1147. ELSE
  1148. SymbolToString(symbol, Uppercase, str); w.String(": "); w.String(str);
  1149. END
  1150. END OutSymbol;
  1151. (** reserved characters are the characters that may not occur within an identifier **)
  1152. PROCEDURE InitReservedCharacters;
  1153. VAR i: LONGINT;
  1154. BEGIN
  1155. FOR i := 0 TO LEN( reservedCharacter ) - 1 DO
  1156. CASE CHR(i) OF
  1157. | 'a' .. 'z', 'A' .. 'Z': reservedCharacter[i] := FALSE;
  1158. | '0'..'9': reservedCharacter[i] := FALSE;
  1159. | '_': reservedCharacter[i] := FALSE
  1160. ELSE
  1161. reservedCharacter[i] := TRUE
  1162. END;
  1163. END;
  1164. END InitReservedCharacters;
  1165. (* get keyword by token *)
  1166. PROCEDURE GetKeyword*(case:LONGINT; token: LONGINT; VAR identifier: IdentifierType);
  1167. BEGIN
  1168. IF case = Uppercase THEN
  1169. keywordsUpper.IdentifierByIndex(token,identifier);
  1170. ELSE ASSERT(case=Lowercase);
  1171. keywordsLower.IdentifierByIndex(token,identifier);
  1172. END;
  1173. END GetKeyword;
  1174. PROCEDURE InitTokens;
  1175. VAR i: LONGINT;
  1176. BEGIN
  1177. tokens[None] := "None";
  1178. tokens[Equal] := "Equal";
  1179. tokens[DotEqual] := "DotEqual";
  1180. tokens[Unequal] := "Unequal";
  1181. tokens[DotUnequal] := "DotUnequal";
  1182. tokens[Less] := "Less";
  1183. tokens[DotLess] := "DotLess";
  1184. tokens[LessEqual] := "LessEqual";
  1185. tokens[DotLessEqual] := "DotLessEqual";
  1186. tokens[Greater] := "Greater";
  1187. tokens[DotGreater] := "DotGreater";
  1188. tokens[GreaterEqual] := "GreaterEqual";
  1189. tokens[DotGreaterEqual] := "DotGreaterEqual";
  1190. tokens[LessLessQ] := "LessLessQ";
  1191. tokens[GreaterGreaterQ] := "GreaterGreaterQ";
  1192. tokens[In] := "In";
  1193. tokens[Is] := "Is";
  1194. tokens[Times] := "Times";
  1195. tokens[TimesTimes] := "TimesTimes";
  1196. tokens[DotTimes] := "DotTimes";
  1197. tokens[PlusTimes] := "PlusTimes";
  1198. tokens[Slash] := "Slash";
  1199. tokens[Backslash] := "Backslash";
  1200. tokens[DotSlash] := "DotSlash";
  1201. tokens[Div] := "Div";
  1202. tokens[Mod] := "Mod";
  1203. tokens[And] := "And";
  1204. tokens[Or] := "Or";
  1205. tokens[Plus] := "Plus";
  1206. tokens[Minus] := "Minus";
  1207. tokens[Not] := "Not";
  1208. tokens[LeftParenthesis] := "LeftParenthesis";
  1209. tokens[LeftBracket] := "LeftBracket";
  1210. tokens[LeftBrace] := "LeftBrace";
  1211. tokens[Number] := "Number";
  1212. tokens[Character] := "Character";
  1213. tokens[String] := "String";
  1214. tokens[Nil] := "Nil";
  1215. tokens[Imag] := "Imag";
  1216. tokens[True] := "True";
  1217. tokens[False] := "False";
  1218. tokens[Self] := "Self";
  1219. tokens[New] := "New";
  1220. tokens[Result] := "Result";
  1221. tokens[Identifier] := "Identifier";
  1222. tokens[If] := "If";
  1223. tokens[Case] := "Case";
  1224. tokens[While] := "While";
  1225. tokens[Repeat] := "Repeat";
  1226. tokens[For] := "For";
  1227. tokens[Loop] := "Loop";
  1228. tokens[With] := "With";
  1229. tokens[Exit] := "Exit";
  1230. tokens[Await] := "Await";
  1231. tokens[Return] := "Return";
  1232. tokens[Begin] := "Begin";
  1233. tokens[Semicolon] := "Semicolon";
  1234. tokens[Transpose] := "Transpose";
  1235. tokens[RightBrace] := "RightBrace";
  1236. tokens[RightBracket] := "RightBracket";
  1237. tokens[RightParenthesis] := "RightParenthesis";
  1238. tokens[Questionmark] := "Questionmark";
  1239. tokens[ExclamationMark] := "ExclamationMark";
  1240. tokens[Questionmarks] := "Questionmarks";
  1241. tokens[ExclamationMarks] := "ExclamationMarks";
  1242. tokens[LessLess] := "LessLess";
  1243. tokens[GreaterGreater] := "GreaterGreater";
  1244. tokens[Upto] := "Upto";
  1245. tokens[Arrow] := "Arrow";
  1246. tokens[Period] := "Period";
  1247. tokens[Comma] := "Comma";
  1248. tokens[Colon] := "Colon";
  1249. tokens[Of] := "Of";
  1250. tokens[Then] := "Then";
  1251. tokens[Do] := "Do";
  1252. tokens[To] := "To";
  1253. tokens[By] := "By";
  1254. tokens[Becomes] := "Becomes";
  1255. tokens[Bar] := "Bar";
  1256. tokens[End] := "End";
  1257. tokens[Else] := "Else";
  1258. tokens[Elsif] := "Elsif";
  1259. tokens[Extern] := "Extern";
  1260. tokens[Until] := "Until";
  1261. tokens[Finally] := "Finally";
  1262. tokens[Code] := "Code";
  1263. tokens[Const] := "Const";
  1264. tokens[Type] := "Type";
  1265. tokens[Var] := "Var";
  1266. tokens[Out] := "Out";
  1267. tokens[Procedure] := "Procedure";
  1268. tokens[Operator] := "Operator";
  1269. tokens[Import] := "Import";
  1270. tokens[Definition] := "Definition";
  1271. tokens[Module] := "Module";
  1272. tokens[Cell] := "Cell";
  1273. tokens[CellNet] := "CellNet";
  1274. tokens[Array] := "Array";
  1275. tokens[Object] := "Object";
  1276. tokens[Record] := "Record";
  1277. tokens[Pointer] := "Pointer";
  1278. tokens[Enum] := "Enum";
  1279. tokens[Port] := "Port";
  1280. tokens[Address] := "Address";
  1281. tokens[Alias] := "Alias";
  1282. tokens[Size] := "Size";
  1283. tokens[Ln] := "Ln";
  1284. tokens[PC] := "PC";
  1285. tokens[PCOffset] := "PCOffset";
  1286. tokens[Shortint] := "Shortint";
  1287. tokens[Integer] := "Integer";
  1288. tokens[Longint] := "Longint";
  1289. tokens[Hugeint] := "Hugeint";
  1290. tokens[Real] := "Real";
  1291. tokens[Longreal] := "Longreal";
  1292. tokens[Comment] := "Comment";
  1293. tokens[EndOfText] := "EndOfText";
  1294. FOR i := 0 TO EndOfText DO ASSERT(tokens[i] # "") END;
  1295. END InitTokens;
  1296. (** enter keywords in the list of keywords (both upper- and lowercase) **)
  1297. PROCEDURE InitKeywords;
  1298. PROCEDURE Upper(CONST source: ARRAY OF CHAR; VAR dest: ARRAY OF CHAR);
  1299. VAR c: CHAR; i: LONGINT;
  1300. BEGIN
  1301. i := 0;
  1302. REPEAT
  1303. c := source[i];
  1304. IF (c >= 'a') & (c<= 'z') THEN c := CHR(ORD(c)-ORD('a')+ORD('A')) END;
  1305. dest[i] := c; INC(i);
  1306. UNTIL c = 0X;
  1307. END Upper;
  1308. PROCEDURE Enter1(CONST name: ARRAY OF CHAR; token: LONGINT; case: SET);
  1309. BEGIN
  1310. IF Lowercase IN case THEN keywordsLower.PutString(name,token) END;
  1311. IF Uppercase IN case THEN keywordsUpper.PutString(name,token) END;
  1312. Basic.SetErrorExpected(token,name);
  1313. END Enter1;
  1314. PROCEDURE Enter(CONST name: ARRAY OF CHAR; token: LONGINT);
  1315. VAR upper: Keyword;
  1316. BEGIN
  1317. Enter1(name,token,{Lowercase});
  1318. Upper(name,upper);
  1319. Enter1(upper,token,{Uppercase});
  1320. END Enter;
  1321. PROCEDURE EnterSymbol(CONST name: ARRAY OF CHAR; token: LONGINT);
  1322. BEGIN
  1323. Enter1(name,token,{Lowercase,Uppercase});
  1324. END EnterSymbol;
  1325. BEGIN
  1326. NEW(keywordsUpper,EndOfText+1);
  1327. NEW(keywordsLower,EndOfText+1);
  1328. (* constructs and statements *)
  1329. Enter( "cell", Cell );
  1330. Enter( "cellnet", CellNet);
  1331. Enter( "await" , Await);
  1332. Enter( "begin" , Begin);
  1333. Enter( "by" , By);
  1334. Enter( "const" , Const);
  1335. Enter( "case" , Case);
  1336. Enter( "code" , Code);
  1337. Enter( "definition", Definition);
  1338. Enter( "do" , Do);
  1339. Enter( "div" , Div);
  1340. Enter( "end" , End);
  1341. Enter( "enum", Enum);
  1342. Enter( "else" , Else);
  1343. Enter( "elsif" , Elsif);
  1344. Enter( "exit" , Exit);
  1345. Enter( "extern" , Extern);
  1346. Enter( "false" , False);
  1347. Enter( "for" , For);
  1348. Enter( "finally" , Finally);
  1349. Enter( "if" , If);
  1350. Enter( "imag" , Imag);
  1351. Enter( "in" , In);
  1352. Enter( "is" , Is);
  1353. Enter( "import" , Import);
  1354. Enter( "loop" , Loop);
  1355. Enter( "module", Module);
  1356. Enter( "mod" , Mod);
  1357. Enter( "nil" , Nil );
  1358. Enter( "of" , Of);
  1359. Enter( "or" , Or);
  1360. Enter( "out", Out);
  1361. Enter( "operator" , Operator);
  1362. Enter( "procedure" , Procedure);
  1363. Enter( "port", Port);
  1364. Enter( "repeat" , Repeat);
  1365. Enter( "return" , Return);
  1366. Enter( "self", Self);
  1367. Enter( "new", New);
  1368. Enter( "result", Result);
  1369. Enter( "then" , Then);
  1370. Enter( "true" , True);
  1371. Enter( "to" , To);
  1372. Enter( "type" , Type);
  1373. Enter( "until" , Until );
  1374. Enter( "var" , Var );
  1375. Enter( "while" , While);
  1376. Enter( "with" , With);
  1377. (* types *)
  1378. Enter( "array" , Array );
  1379. Enter( "object" , Object);
  1380. Enter( "pointer" , Pointer);
  1381. Enter( "record" , Record);
  1382. Enter( "address" , Address);
  1383. Enter( "size" , Size);
  1384. Enter( "alias" , Alias);
  1385. (* symbols *)
  1386. EnterSymbol( "#", Unequal);
  1387. EnterSymbol( "&", And);
  1388. EnterSymbol( "(", LeftParenthesis);
  1389. EnterSymbol( ")", RightParenthesis);
  1390. EnterSymbol( "*", Times);
  1391. EnterSymbol( "**",TimesTimes);
  1392. EnterSymbol( "+", Plus);
  1393. EnterSymbol( "+*", PlusTimes);
  1394. EnterSymbol( ",", Comma);
  1395. EnterSymbol( "-", Minus);
  1396. EnterSymbol(".",Period );
  1397. EnterSymbol("..",Upto );
  1398. EnterSymbol(".*",DotTimes );
  1399. EnterSymbol("./",DotSlash );
  1400. EnterSymbol(".=",DotEqual );
  1401. EnterSymbol(".#",DotUnequal );
  1402. EnterSymbol(".>",DotGreater );
  1403. EnterSymbol(".>=",DotGreaterEqual );
  1404. EnterSymbol(".<", DotLess);
  1405. EnterSymbol(".<=",DotLessEqual );
  1406. EnterSymbol( "/", Slash);
  1407. EnterSymbol( ":", Colon);
  1408. EnterSymbol( ":=",Becomes);
  1409. EnterSymbol( ";", Semicolon);
  1410. EnterSymbol( "<", Less);
  1411. EnterSymbol( "<=", LessEqual);
  1412. EnterSymbol( "=", Equal);
  1413. EnterSymbol( ">", Greater);
  1414. EnterSymbol( ">=", GreaterEqual);
  1415. EnterSymbol( "[", LeftBracket);
  1416. EnterSymbol( "]", RightBracket);
  1417. EnterSymbol( "^", Arrow);
  1418. EnterSymbol( "{", LeftBrace);
  1419. EnterSymbol( "|",Bar);
  1420. EnterSymbol( "}", RightBrace);
  1421. EnterSymbol( "~", Not);
  1422. EnterSymbol( "\", Backslash);
  1423. EnterSymbol( "`", Transpose);
  1424. EnterSymbol( "?",Questionmark);
  1425. EnterSymbol( "??",Questionmarks);
  1426. EnterSymbol( "!",ExclamationMark);
  1427. EnterSymbol( "!!",ExclamationMarks);
  1428. EnterSymbol( "<<",LessLess);
  1429. EnterSymbol( "<<?",LessLessQ);
  1430. EnterSymbol( ">>",GreaterGreater);
  1431. EnterSymbol( ">>?",GreaterGreaterQ);
  1432. Basic.SetErrorMessage(Number,"missing number");
  1433. Basic.SetErrorMessage(String,"missing string");
  1434. Basic.SetErrorMessage(Character,"missing character");
  1435. Basic.SetErrorMessage(Identifier,"missing identifier");
  1436. Basic.SetErrorMessage(EndOfText,"unexpected symbol before end");
  1437. END InitKeywords;
  1438. (** debugging / reporting **)
  1439. PROCEDURE ReportKeywords*(context: Commands.Context);
  1440. VAR i: LONGINT; name: Keyword;
  1441. BEGIN
  1442. FOR i := 0 TO EndOfText DO
  1443. context.out.Int(i,1); context.out.String(": ");
  1444. context.out.Char('"');
  1445. keywordsLower.StringByIndex(i,name);
  1446. context.out.String(name);
  1447. context.out.Char('"');
  1448. context.out.String(", ");
  1449. context.out.Char('"');
  1450. keywordsUpper.StringByIndex(i,name);
  1451. context.out.String(name);
  1452. context.out.Char('"');
  1453. context.out.Ln;
  1454. END;
  1455. END ReportKeywords;
  1456. (*
  1457. PROCEDURE TestScanner*(context: Commands.Context);
  1458. VAR filename: ARRAY 256 OF CHAR; reader: Streams.Reader; scanner: Scanner;sym: Symbol;
  1459. BEGIN
  1460. context.arg.SkipWhitespace; context.arg.String(filename);
  1461. reader := TextUtilities.GetTextReader(filename);
  1462. scanner := NewScanner(filename,reader,0,NIL);
  1463. REPEAT
  1464. IF scanner.GetNextSymbol(sym) THEN
  1465. OutSymbol(context.out,sym);context.out.Ln;
  1466. END;
  1467. UNTIL scanner.error OR (sym.token=EndOfText)
  1468. END TestScanner;
  1469. *)
  1470. BEGIN
  1471. InitReservedCharacters; InitTokens; InitKeywords
  1472. END FoxScanner.
  1473. FoxScanner.ReportKeywords
  1474. FoxScanner.TestScanner Test.Mod ~