CSS2Scanner.Mod 11 KB


  1. MODULE CSS2Scanner; (** Stefan Walthert *)
  2. (** AUTHOR "swalthert"; PURPOSE ""; *)
  3. IMPORT
  4. KernelLog, Strings, Streams, Files, DynamicStrings;
  5. CONST
  6. (** Scanner: Tokens *)
  7. Null = -2;
  8. Invalid* = -1;
  9. Ident* = 0;
  10. AtKeyword* = 1; (** '@'ident *)
  11. String* = 2; (** '"'chars'"' | "'"chars"'" *)
  12. Hash* = 3; (** '#'name *)
  13. Important* = 4; (** '!important' *)
  14. Number* = 5; (** number (cf. Scanner.numType) *)
  15. Percentage* = 6; (** num'%' *)
  16. Dimension* = 7; (** num ident *)
  17. URI* = 8; (** 'url('string')' | 'url('chars')' *)
  18. Function* = 9; (** ident'(' *)
  19. UnicodeRange* = 10; (** *)
  20. Cdo* = 11; (** '<!--' *)
  21. Cdc* = 12; (** '-->' *)
  22. Slash* = 13; (** '/' *)
  23. Comma* = 14; (** ',' *)
  24. Greater* = 15; (** '>' *)
  25. Plus* = 16; (** '+' *)
  26. Minus* = 17; (** '-' *)
  27. Asterisk* = 18; (** '*' *)
  28. Semicolon* = 19; (** ';' *)
  29. Colon* = 20; (** ':' *)
  30. Dot* = 21; (** '.' *)
  31. BracketOpen* = 22; (** '[' *)
  32. BracketClose* = 23; (** ']' *)
  33. ParenOpen* = 24; (** '(' *)
  34. ParenClose* = 25; (** ')' *)
  35. BraceOpen* = 26; (** '{' *)
  36. BraceClose* = 27; (** '}' *)
  37. Equal* = 28; (** '=' *)
  38. Includes* = 29; (** '~=' *)
  39. Dashmatch* = 30; (** '|=' *)
  40. Eof* = 31; (** *)
  41. (** real or integer number *)
  42. Undefined* = 0;
  43. Integer* = 1; (** integer number *)
  44. Real* = 2; (** real number *)
  45. TYPE
  46. Scanner* = OBJECT
  47. VAR
  48. sym-: LONGINT;
  49. numberType-: SHORTINT;
  50. intVal-: LONGINT;
  51. realVal-: LONGREAL;
  52. line-, row-, pos: LONGINT;
  53. reportError*: PROCEDURE (pos, line, row: LONGINT; msg: ARRAY OF CHAR);
  54. nextCh: CHAR;
  55. dynstr: DynamicStrings.DynamicString;
  56. f: Files.File;
  57. r: Files.Reader;
  58. PROCEDURE & Init*(f: Files.File);
  59. BEGIN
  60. IF f = NIL THEN
  61. sym := Invalid
  62. ELSE
  63. reportError := DefaultReportError;
  64. sym := Null; numberType := Undefined; intVal := 0; realVal := 0.0;
  65. line := 1; row := 1;
  66. NEW(dynstr);
  67. SELF.f := f;
  68. Files.OpenReader(r, f, 0); pos := 0;
  69. NextCh()
  70. END
  71. END Init;
  72. PROCEDURE Error(msg: ARRAY OF CHAR);
  73. BEGIN
  74. reportError(GetPos(), line, row, msg)
  75. END Error;
  76. PROCEDURE NextCh;
  77. BEGIN
  78. IF (nextCh = DynamicStrings.CR) (* OR (nextCh = Strings.LF) *) THEN INC(line); row := 1
  79. ELSE INC(row)
  80. END;
  81. IF r.res # Streams.Ok THEN
  82. nextCh := 0X; sym := Eof
  83. ELSE
  84. r.Char(nextCh); INC(pos)
  85. END
  86. END NextCh;
  87. PROCEDURE SkipWhiteSpace;
  88. BEGIN
  89. WHILE IsWhiteSpace(nextCh) DO
  90. NextCh()
  91. END
  92. END SkipWhiteSpace;
  93. PROCEDURE ScanComment;
  94. BEGIN
  95. LOOP
  96. NextCh();
  97. WHILE (nextCh # '*') & (sym # Eof) DO
  98. NextCh()
  99. END;
  100. IF nextCh = '*' THEN
  101. NextCh();
  102. IF nextCh = '/' THEN
  103. NextCh(); EXIT
  104. END
  105. ELSIF sym = Eof THEN
  106. Error("unclosed comment")
  107. END
  108. END
  109. END ScanComment;
  110. PROCEDURE ScanEscape(isString: BOOLEAN; VAR i: LONGINT);
  111. VAR val: LONGINT; n: SHORTINT; hexstr: ARRAY 7 OF CHAR; newline: BOOLEAN;
  112. BEGIN
  113. newline := FALSE;
  114. NextCh();
  115. IF IsDigit(nextCh) OR (('a' <= nextCh) & (nextCh <= 'f')) OR (('A' <= nextCh) & (nextCh <= 'F')) THEN (* hexadecimal digit *)
  116. n := 0;
  117. (* WHILE ~IsWhiteSpace(nextCh) & (n < 6) DO *)
  118. WHILE (IsDigit(nextCh) OR (('a' <= nextCh) & (nextCh <= 'f')) OR (('A' <= nextCh) & (nextCh <= 'F'))) & (n < 6) DO
  119. hexstr[n] := nextCh; NextCh(); INC(n)
  120. END;
  121. hexstr[n] := 0X;
  122. HexStrToInt(hexstr, val);
  123. IF IsWhiteSpace(nextCh) & (n # 6) THEN NextCh() END; (* skip space after escape digits (if less than 6 digits) *)
  124. ELSE
  125. val := ORD(nextCh);
  126. IF (nextCh = 0AX) OR (nextCh = 0DX) THEN newline := TRUE END;
  127. NextCh()
  128. END;
  129. (* INC(i, number of bytes needed to write unicode value val as a UTF8 character); *)
  130. IF ~isString OR ~newline THEN
  131. (* compute UTF8 characters out of 'val', put them to dynstr *)
  132. END
  133. END ScanEscape;
  134. PROCEDURE ScanIdent;
  135. VAR i: LONGINT;
  136. BEGIN
  137. IF IsNmChar(nextCh) THEN
  138. i := 0;
  139. IF IsEscape(nextCh) THEN
  140. ScanEscape(FALSE, i)
  141. ELSE
  142. dynstr.Put(nextCh, 0); INC(i);
  143. NextCh()
  144. END;
  145. WHILE IsNmChar(nextCh) DO
  146. IF IsEscape(nextCh) THEN
  147. ScanEscape(FALSE, i)
  148. ELSE
  149. dynstr.Put(nextCh, i); INC(i);
  150. NextCh()
  151. END
  152. END;
  153. dynstr.Put(0X, i); sym := Ident
  154. ELSE
  155. Error("{nmstart} expected")
  156. END
  157. END ScanIdent;
  158. PROCEDURE ScanName;
  159. VAR i: LONGINT;
  160. BEGIN
  161. i := 0;
  162. WHILE IsNmChar(nextCh) DO
  163. IF IsEscape(nextCh) THEN
  164. ScanEscape(FALSE, i)
  165. ELSE
  166. dynstr.Put(nextCh, i); INC(i);
  167. NextCh()
  168. END
  169. END;
  170. dynstr.Put(0X, i); sym := Ident
  171. END ScanName;
  172. PROCEDURE ScanString;
  173. VAR i: LONGINT; ch, allowedQuote: CHAR;
  174. BEGIN
  175. ch := nextCh;
  176. IF ch = '"' THEN allowedQuote := "'"
  177. ELSIF ch = "'" THEN allowedQuote := '"'
  178. ELSE Error("quote expected")
  179. END;
  180. NextCh();
  181. i := 0;
  182. WHILE ((nextCh = 9X) OR (nextCh = ' ') OR (nextCh = '!') OR (('#' <= nextCh) & (nextCh <= '&'))
  183. OR (('(' <= nextCh) & (nextCh <= '~')) OR (nextCh = allowedQuote)
  184. OR IsNonAscii(nextCh) OR IsEscape(nextCh)) & (sym # Eof) DO
  185. IF IsEscape(nextCh) THEN
  186. ScanEscape(TRUE, i)
  187. ELSE
  188. dynstr.Put(nextCh, i); NextCh(); INC(i)
  189. END;
  190. END;
  191. IF nextCh # ch THEN Error("quote expected") END;
  192. dynstr.Put(0X, i);
  193. NextCh()
  194. END ScanString;
  195. PROCEDURE ScanURL;
  196. VAR i : LONGINT;
  197. BEGIN
  198. i := 0;
  199. WHILE ((nextCh = '!') OR (('#' <= nextCh) & (nextCh <= '&')) OR (('*' <= nextCh) & (nextCh <= '~'))
  200. OR IsNonAscii(nextCh) OR IsEscape(nextCh)) & (sym # Eof) DO
  201. IF IsEscape(nextCh) THEN
  202. ScanEscape(FALSE, i)
  203. ELSE
  204. dynstr.Put(nextCh, i); INC(i);
  205. NextCh()
  206. END
  207. END;
  208. dynstr.Put(0X, i)
  209. END ScanURL;
  210. PROCEDURE ScanNumber;
  211. VAR a, b, div: LONGINT;
  212. BEGIN
  213. a := 0;
  214. WHILE IsDigit(nextCh) & (sym # Eof) DO
  215. a := 10 * a + ORD(nextCh) - ORD('0');
  216. NextCh()
  217. END;
  218. IF nextCh = '.' THEN
  219. b := 0; div := 1;
  220. NextCh();
  221. IF ~IsDigit(nextCh) THEN sym := Dot; RETURN END;
  222. WHILE IsDigit(nextCh) & (sym # Eof) DO
  223. b := 10 * b + ORD(nextCh) - ORD('0'); div := 10 * div;
  224. NextCh()
  225. END;
  226. realVal := a + b / div;
  227. sym := Number; numberType := Real
  228. ELSE
  229. intVal := a;
  230. sym := Number; numberType := Integer
  231. END;
  232. IF IsNmStart(nextCh) THEN
  233. ScanIdent(); sym := Dimension
  234. ELSIF nextCh = '%' THEN
  235. NextCh(); sym := Percentage
  236. END
  237. END ScanNumber;
  238. PROCEDURE Scan*;
  239. VAR s: Strings.String; msg: ARRAY 22 OF CHAR;
  240. BEGIN
  241. dynstr.Put(0X, 0); sym := Null;
  242. numberType := Undefined; intVal := 0; realVal := 0.0; (* reset all fields *)
  243. REPEAT
  244. SkipWhiteSpace();
  245. CASE nextCh OF
  246. | 0X: sym := Eof
  247. | 'a' .. 'z', 'A' .. 'Z', '\': ScanIdent();
  248. IF nextCh = '(' THEN
  249. NextCh();
  250. s := GetStr();
  251. IF s^ = 'url' THEN
  252. SkipWhiteSpace();
  253. IF (nextCh = '"') OR (nextCh = "'") THEN
  254. ScanString()
  255. ELSE
  256. ScanURL()
  257. END;
  258. SkipWhiteSpace();
  259. IF nextCh = ')' THEN
  260. NextCh(); sym := URI
  261. ELSE
  262. Error("')' expected")
  263. END
  264. ELSE
  265. sym := Function
  266. END
  267. END
  268. | '!': NextCh(); SkipWhiteSpace();
  269. ScanIdent(); s := GetStr();
  270. IF s^ = 'important' THEN
  271. sym := Important
  272. ELSE
  273. Error("'!important' expected")
  274. END
  275. | '+': NextCh();
  276. IF IsDigit(nextCh) OR (nextCh = '.') THEN
  277. ScanNumber()
  278. ELSE
  279. sym := Plus
  280. END
  281. | '-': NextCh();
  282. IF nextCh = '-' THEN
  283. NextCh();
  284. IF nextCh = '>' THEN
  285. NextCh(); sym := Cdc
  286. ELSE
  287. Error("'-->' expected")
  288. END
  289. ELSE
  290. sym := Minus
  291. END;
  292. | '0' .. '9', '.' : ScanNumber()
  293. | '@': NextCh(); ScanIdent(); sym := AtKeyword
  294. | '#': NextCh(); ScanName(); sym := Hash
  295. | '*': NextCh(); sym := Asterisk
  296. | '<': NextCh();
  297. IF nextCh = '!' THEN
  298. NextCh();
  299. IF nextCh = '-' THEN
  300. NextCh();
  301. IF nextCh = '-' THEN
  302. NextCh(); sym := Cdo
  303. ELSE
  304. Error("'<!--' expected")
  305. END
  306. ELSE
  307. Error("'<!--' expected")
  308. END
  309. ELSE
  310. Error("'<!--' expected")
  311. END
  312. | '/': NextCh();
  313. IF nextCh = '*' THEN
  314. ScanComment(); sym := Null
  315. ELSE
  316. sym := Slash
  317. END
  318. | '>': NextCh(); sym := Greater
  319. | '~': NextCh();
  320. IF nextCh = '=' THEN
  321. NextCh(); sym := Includes
  322. ELSE
  323. Error("'~= expected")
  324. END
  325. | '|': NextCh();
  326. IF nextCh = '=' THEN
  327. NextCh(); sym := Dashmatch
  328. ELSE
  329. Error("'|=' expected")
  330. END
  331. | '=': NextCh(); sym := Equal
  332. | '"', "'": ScanString(); sym := String
  333. | '[': NextCh(); sym := BracketOpen
  334. | ']': NextCh(); sym := BracketClose
  335. | '(': NextCh(); sym := ParenOpen
  336. | ')': NextCh(); sym := ParenClose
  337. | '{': NextCh(); sym := BraceOpen
  338. | '}': NextCh(); sym := BraceClose
  339. | ',': NextCh(); sym := Comma
  340. | ';': NextCh(); sym := Semicolon
  341. | ':': NextCh(); sym := Colon
  342. ELSE
  343. msg := "unknown character"; msg[17] := " "; msg[18] := "'"; msg[19] := nextCh; msg[20] := "'"; msg[21] := 0X;
  344. Error(msg)
  345. END
  346. UNTIL sym # Null
  347. END Scan;
  348. PROCEDURE GetStr*(): Strings.String;
  349. BEGIN
  350. RETURN dynstr.ToArrOfChar();
  351. END GetStr;
  352. PROCEDURE GetPos*(): LONGINT;
  353. BEGIN
  354. RETURN pos
  355. END GetPos;
  356. END Scanner;
  357. PROCEDURE IsWhiteSpace(ch: CHAR): BOOLEAN;
  358. BEGIN
  359. RETURN (ch = 020X) OR (ch = 9X) OR (ch = 0DX) OR (ch = 0AX)
  360. END IsWhiteSpace;
  361. PROCEDURE IsNonAscii(ch: CHAR): BOOLEAN;
  362. BEGIN
  363. RETURN FALSE
  364. END IsNonAscii;
  365. PROCEDURE IsEscape(ch: CHAR): BOOLEAN;
  366. BEGIN
  367. RETURN ch = '\'
  368. END IsEscape;
  369. PROCEDURE IsNmStart(ch: CHAR): BOOLEAN;
  370. BEGIN
  371. RETURN (('a' <= ch) & (ch <= 'z')) OR (('A' <= ch) & (ch <= 'Z')) OR (ch = '-') OR IsNonAscii(ch) OR IsEscape(ch)
  372. END IsNmStart;
  373. PROCEDURE IsNmChar(ch: CHAR): BOOLEAN;
  374. BEGIN
  375. RETURN (('a' <= ch) & (ch <= 'z')) OR (('A' <= ch) & (ch <= 'Z')) OR (ch = '-')
  376. OR IsDigit(ch) OR IsNonAscii(ch) OR IsEscape(ch)
  377. END IsNmChar;
  378. PROCEDURE IsDigit(ch: CHAR): BOOLEAN;
  379. BEGIN
  380. RETURN ('0' <= ch) & (ch <= '9')
  381. END IsDigit;
  382. PROCEDURE HexStrToInt(VAR str: ARRAY OF CHAR; VAR val: LONGINT);
  383. VAR i, d: LONGINT; ch: CHAR;
  384. BEGIN
  385. i := 0; ch := str[0];
  386. WHILE (ch # 0X) & (ch <= " ") DO
  387. INC(i); ch := str[i]
  388. END;
  389. val := 0;
  390. WHILE (("0" <= ch) & (ch <= "9")) OR (("A" <= ch) & (ch <= "F")) DO
  391. IF (("0" <= ch) & (ch <= "9")) THEN d := ORD(ch)-ORD("0")
  392. ELSE d := ORD(ch) - ORD("A") + 10
  393. END;
  394. INC(i); ch := str[i];
  395. IF val <= ((MAX(LONGINT)-d) DIV 10H) THEN
  396. val := 10H*val+d
  397. ELSE
  398. HALT(99)
  399. END
  400. END
  401. END HexStrToInt;
  402. PROCEDURE DefaultReportError(pos, line, row: LONGINT; msg: ARRAY OF CHAR);
  403. BEGIN
  404. KernelLog.Enter; KernelLog.Char(CHR(9H)); KernelLog.Char(CHR(9H)); KernelLog.String("pos "); KernelLog.Int(pos, 6);
  405. KernelLog.String(", line "); KernelLog.Int(line, 0); KernelLog.String(", row "); KernelLog.Int(row, 0);
  406. KernelLog.String(" "); KernelLog.String(msg); KernelLog.Exit;
  407. END DefaultReportError;
  408. END CSS2Scanner.