2
0

FoxDocumentationScanner.Mod 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. MODULE FoxDocumentationScanner; (** AUTHOR ""; PURPOSE ""; *)
  2. IMPORT Streams, Diagnostics, D := Debugging;
  3. CONST
  4. (* scanner constants *)
  5. EOT* = 0X; LF* = 0AX; CR* = 0DX;
  6. Trace=FALSE;
  7. TYPE
  8. Token*= ENUM
  9. EndOfText*, NewLine*, Header*, Pipe*, LeftItalic*, RightItalic*, LeftBold*, RightBold*, LeftUnderline*, RightUnderline*,
  10. LinkBegin*, LinkEnd*, Heading*, Number*, LeftDescription*, RightDescription*, Description*,
  11. Bullet*, Line*, CodeBegin*, CodeEnd*, LabelBegin*, LabelEnd*,
  12. LineBreak*, Section*, String*
  13. END;
  14. String*= POINTER TO ARRAY OF CHAR;
  15. Symbol*= RECORD
  16. position*: LONGINT;
  17. token*: Token;
  18. level*: LONGINT;
  19. string*: String;
  20. stringLength*: LONGINT;
  21. hasWhitespace*: BOOLEAN;
  22. END;
  23. (** scanner reflects the following EBNF
  24. **)
  25. Scanner* = OBJECT
  26. VAR
  27. (* helper state information *)
  28. reader: Streams.Reader; (* source *)
  29. diagnostics: Diagnostics.Diagnostics; (* error logging *)
  30. ch: CHAR; (* look-ahead character *)
  31. position-: LONGINT; (* current position *)
  32. prevToken: Token;
  33. (*
  34. source: name of the source code for reference in error outputs
  35. reader: input stream
  36. position: reference position (offset) of the input stream , for error output
  37. diagnostics: error output object
  38. *)
  39. PROCEDURE & InitializeScanner*(reader: Streams.Reader; position: LONGINT; diagnostics: Diagnostics.Diagnostics);
  40. BEGIN
  41. SELF.reader := reader;
  42. SELF.diagnostics := diagnostics;
  43. ch := " ";
  44. IF reader = NIL THEN ch := EOT ELSE GetNextCharacter END;
  45. SELF.position := position;
  46. END InitializeScanner;
  47. (** get next character, end of text results in ch = EOT **)
  48. PROCEDURE GetNextCharacter;
  49. BEGIN
  50. ASSERT(ch # EOT);
  51. reader.Char(ch); INC(position);
  52. END GetNextCharacter;
  53. PROCEDURE Peek(): CHAR;
  54. BEGIN
  55. RETURN reader.Peek()
  56. END Peek;
  57. PROCEDURE BreaksLiteral(): BOOLEAN;
  58. BEGIN
  59. CASE ch OF
  60. "*", "_", "/","#":
  61. IF IsWhitespace(Peek(),TRUE) THEN RETURN TRUE END; (* right of bold, underline, italics and description *)
  62. |"]",">","\":
  63. IF ch = Peek() THEN RETURN TRUE END; (* right of link or label *)
  64. |"|": RETURN TRUE (* pipe in link or table *)
  65. ELSE
  66. RETURN FALSE
  67. END;
  68. RETURN FALSE
  69. END BreaksLiteral;
  70. PROCEDURE IdentifierStart(ch: CHAR): BOOLEAN;
  71. BEGIN
  72. CASE ch OF
  73. | 'a' .. 'z', 'A' .. 'Z', '_': RETURN TRUE
  74. ELSE RETURN FALSE
  75. END;
  76. END IdentifierStart;
  77. PROCEDURE AppendCharacter(VAR symbol: Symbol; ch: CHAR);
  78. VAR s: String; i: LONGINT;
  79. BEGIN
  80. IF symbol.string = NIL THEN NEW(symbol.string,32); symbol.stringLength := 0; END;
  81. IF symbol.stringLength = LEN(symbol.string)-1 THEN
  82. s := symbol.string;
  83. NEW(symbol.string, symbol.stringLength*2);
  84. FOR i := 0 TO symbol.stringLength DO
  85. symbol.string[i] := s[i];
  86. END;
  87. END;
  88. symbol.string[symbol.stringLength] := ch;
  89. INC(symbol.stringLength);
  90. END AppendCharacter;
  91. PROCEDURE AppendCharacters(VAR symbol: Symbol; ch: CHAR; number: LONGINT);
  92. BEGIN
  93. WHILE number > 0 DO
  94. AppendCharacter(symbol,ch); DEC(number)
  95. END;
  96. END AppendCharacters;
  97. PROCEDURE ReadLiteral(VAR symbol: Symbol; token: Token);
  98. BEGIN
  99. symbol.token := token;
  100. REPEAT (* consumes at least one character *)
  101. AppendCharacter(symbol, ch);
  102. GetNextCharacter;
  103. UNTIL (ch <= " ") OR BreaksLiteral();
  104. END ReadLiteral;
  105. PROCEDURE ReadLiteralWS(VAR symbol: Symbol; token: Token);
  106. BEGIN
  107. symbol.token := token;
  108. REPEAT (* consumes at least one character *)
  109. AppendCharacter(symbol, ch);
  110. GetNextCharacter;
  111. UNTIL (ch <= " ");
  112. END ReadLiteralWS;
  113. PROCEDURE ReadCharacters(this: CHAR; min,max: LONGINT; VAR symbol: Symbol; token: Token);
  114. BEGIN
  115. symbol.level := 0;
  116. WHILE (ch = this) DO INC(symbol.level); GetNextCharacter END;
  117. IF (symbol.level >= min) OR (symbol.level <= max) THEN
  118. symbol.token := token;
  119. ELSE
  120. AppendCharacters(symbol, this, symbol.level);
  121. ReadLiteral(symbol, Token.String);
  122. END;
  123. END ReadCharacters;
  124. PROCEDURE IsWhitespace(ch: CHAR; includeNewLine: BOOLEAN): BOOLEAN;
  125. BEGIN
  126. RETURN (ch <= " ") & (ch # EOT) & (includeNewLine OR (ch # CR) & (ch # LF))
  127. END IsWhitespace;
  128. (** get next symbol **)
  129. PROCEDURE GetNextSymbol*(VAR symbol: Symbol);
  130. VAR s: Token; prev: CHAR; firstInLine: BOOLEAN;
  131. PROCEDURE SkipBlanks;
  132. BEGIN
  133. WHILE IsWhitespace(ch,FALSE) DO (*ignore control characters*)
  134. prev := ch;
  135. GetNextCharacter
  136. END;
  137. END SkipBlanks;
  138. PROCEDURE ReadCode;
  139. VAR ending: LONGINT; end:BOOLEAN;
  140. BEGIN
  141. ending := 0; end := FALSE;
  142. REPEAT
  143. AppendCharacter(symbol, ch);
  144. IF ch = "}" THEN
  145. INC(ending);
  146. GetNextCharacter
  147. ELSIF (ending = 3) & (ch # "'") & (ch # '"') (* to allow for referencing '}}}' in descriptions of the documentation *)& (ending = 3) THEN
  148. end := TRUE
  149. ELSE
  150. ending := 0;
  151. GetNextCharacter
  152. END;
  153. UNTIL (ch = EOT) OR end;
  154. IF end THEN
  155. symbol.token := Token.CodeEnd;
  156. DEC(symbol.stringLength,4);
  157. symbol.string[symbol.stringLength] := 0X
  158. ELSE
  159. symbol.token := Token.EndOfText
  160. END;
  161. END ReadCode;
  162. (* return if the current character is preceded by a whitespace and next character is not a whitespace, such as in the beginning of *b o l d* *)
  163. PROCEDURE IsLeft(): BOOLEAN;
  164. BEGIN
  165. RETURN IsWhitespace(prev,FALSE) & ~IsWhitespace(Peek(),FALSE)
  166. END IsLeft;
  167. (* return if the current character is preceded by a non-whitespace and next character is a whitespace, such as at the end of *b o l d* *)
  168. PROCEDURE IsRight(): BOOLEAN;
  169. BEGIN
  170. RETURN ~IsWhitespace(prev,TRUE) & IsWhitespace(Peek(),TRUE)
  171. END IsRight;
  172. BEGIN
  173. symbol.stringLength := 0;
  174. symbol.string := NIL;
  175. symbol.level := 0;
  176. symbol.position := position;
  177. prev := ch;
  178. SkipBlanks;
  179. firstInLine := prevToken = Token.NewLine;
  180. symbol.hasWhitespace := IsWhitespace(prev, FALSE);
  181. IF symbol.token = Token.CodeBegin THEN
  182. ReadCode;
  183. RETURN
  184. END;
  185. CASE ch OF (* ch > " " *)
  186. EOT: symbol.token := Token.EndOfText
  187. | CR: symbol.token := Token.NewLine; GetNextCharacter; IF ch = LF THEN GetNextCharacter END;
  188. | LF: symbol.token := Token.NewLine; GetNextCharacter; IF ch = CR THEN GetNextCharacter END;
  189. | '|': GetNextCharacter;
  190. IF (ch = "=") THEN symbol.token := Token.Header; GetNextCharacter ELSE symbol.token := Token.Pipe END;
  191. | '/':
  192. IF IsLeft() THEN symbol.token := Token.LeftItalic; GetNextCharacter;
  193. ELSIF IsRight() THEN symbol.token := Token.RightItalic; GetNextCharacter;
  194. ELSE ReadLiteral( symbol, Token.String)
  195. END
  196. | '_':
  197. IF IsLeft() THEN symbol.token := Token.LeftUnderline;GetNextCharacter;
  198. ELSIF IsRight() THEN symbol.token := Token.RightUnderline;GetNextCharacter;
  199. ELSE ReadLiteral( symbol, Token.String)
  200. END
  201. | '[': ReadCharacters (ch, 2, 2, symbol, Token.LinkBegin);
  202. | ']': ReadCharacters (ch, 2, 2, symbol, Token.LinkEnd);
  203. | '=':
  204. IF firstInLine THEN
  205. ReadCharacters (ch, 1, 3, symbol, Token.Heading);
  206. IF IdentifierStart(ch) THEN
  207. WHILE ~IsWhitespace(ch,TRUE) & (ch#EOT) DO
  208. AppendCharacter(symbol,ch);
  209. GetNextCharacter;
  210. END;
  211. END;
  212. ELSE ReadLiteral(symbol, Token.String);
  213. END;
  214. | '#':
  215. IF firstInLine THEN (* number *)
  216. ReadCharacters(ch, 1, 3, symbol, Token.Number);
  217. IF IsWhitespace(ch,FALSE) THEN
  218. symbol.token := Token.Number;
  219. ELSIF symbol.level = 1 THEN
  220. symbol.token := Token.LeftDescription;
  221. ELSE
  222. ReadLiteral(symbol, Token.String);
  223. END;
  224. ELSE
  225. IF IsLeft() THEN symbol.token := Token.LeftDescription; symbol.level := 1; GetNextCharacter;
  226. ELSIF IsRight() THEN symbol.token := Token.RightDescription;GetNextCharacter;
  227. ELSE ReadLiteral(symbol, Token.String);
  228. END;
  229. END;
  230. | '*':
  231. IF firstInLine THEN
  232. ReadCharacters(ch, 1, 3, symbol, Token.Bullet);
  233. IF IsWhitespace(ch,FALSE) THEN
  234. symbol.token := Token.Bullet;
  235. ELSIF symbol.level = 1 THEN
  236. symbol.token := Token.LeftBold;
  237. ELSE
  238. AppendCharacters(symbol, '*', symbol.level);
  239. ReadLiteral(symbol, Token.String);
  240. END;
  241. ELSE
  242. IF IsLeft() THEN symbol.token := Token.LeftBold;GetNextCharacter;
  243. ELSIF IsRight() THEN symbol.token := Token.RightBold;GetNextCharacter;
  244. ELSE ReadLiteral(symbol, Token.String);
  245. END;
  246. END;
  247. | '-':
  248. IF firstInLine THEN ReadCharacters (ch, 4, MAX(LONGINT), symbol, Token.Line)
  249. ELSE ReadLiteral(symbol, Token.String);
  250. END;
  251. | '{': ReadCharacters (ch, 3, 3, symbol, Token.CodeBegin);
  252. | '}': ReadCharacters (ch, 3, 3, symbol, Token.CodeEnd);
  253. | '<':
  254. ReadCharacters (ch, 2, 2, symbol, Token.LabelBegin);
  255. IF IsWhitespace(ch, TRUE) & (symbol.level = 2) THEN
  256. AppendCharacters(symbol,'<',2);
  257. symbol.token := Token.String
  258. END;
  259. | '>':
  260. ReadCharacters (ch, 2, 2, symbol, Token.LabelEnd);
  261. IF IsWhitespace(prev, FALSE) & (symbol.level = 2) THEN
  262. AppendCharacters(symbol,'>',2);
  263. ReadLiteral(symbol, Token.String);
  264. END;
  265. | '\':
  266. ReadCharacters (ch, 2, 2, symbol, Token.LineBreak);
  267. | '@': ReadCharacters (ch, 1, 10, symbol, Token.Section);
  268. IF IdentifierStart(ch) THEN
  269. WHILE ~IsWhitespace(ch,TRUE) & (ch#EOT) DO
  270. AppendCharacter(symbol,ch);
  271. GetNextCharacter;
  272. END;
  273. END;
  274. ELSE
  275. ReadLiteral(symbol,Token.String)
  276. END;
  277. prevToken := symbol.token;
  278. IF (firstInLine) & (symbol.token = Token.String) & (symbol.stringLength>0) & (symbol.string[symbol.stringLength-1] = ":") THEN
  279. DEC(symbol.stringLength);
  280. symbol.string[symbol.stringLength] := 0X;
  281. symbol.token := Token.Description;
  282. END;
  283. IF Trace THEN DumpSymbol(D.Log, symbol); D.Ln END;
  284. END GetNextSymbol;
  285. END Scanner;
  286. PROCEDURE DumpSymbol(w: Streams.Writer; CONST symbol: Symbol);
  287. BEGIN
  288. w.String("token: ");
  289. CASE symbol.token OF
  290. Token.EndOfText: w.String("EndOfText");
  291. |Token.NewLine: w.String("NewLine");
  292. |Token.Header:w.String("Header");
  293. |Token.Pipe:w.String("Pipe");
  294. |Token.LeftItalic:w.String("LeftItalic");
  295. |Token.RightItalic:w.String("RightItalic");
  296. |Token.LinkBegin:w.String("LinkBegin");
  297. |Token.LinkEnd:w.String("LinkEnd");
  298. |Token.Heading:w.String("Heading");
  299. |Token.Number:w.String("Number");
  300. |Token.Bullet:w.String("Bullet");
  301. |Token.LeftBold:w.String("LeftBold");
  302. |Token.RightBold:w.String("RightBold");
  303. |Token.LeftUnderline:w.String("LeftUnderline");
  304. |Token.RightUnderline:w.String("RightUnderline");
  305. |Token.Line:w.String("Line");
  306. |Token.CodeBegin:w.String("CodeBegin");
  307. |Token.CodeEnd:w.String("CodeEnd");
  308. |Token.LabelBegin:w.String("LabelBegin");
  309. |Token.LabelEnd:w.String("LabelEnd");
  310. |Token.LineBreak:w.String("LineBreak");
  311. |Token.Section:w.String("Section");
  312. |Token.String:w.String("String");
  313. END;
  314. END DumpSymbol;
  315. END FoxDocumentationScanner.