MODULE XMLScanner; (** AUTHOR "swalthert"; PURPOSE "XML scanner"; *) IMPORT KernelLog, Streams, Strings, DynamicStrings; CONST (* String pooling settings *) Str_ElementName* = 1; Str_AttributeName* = 2; Str_CharRef* = 10; Str_EntityRef* = 11; Str_EntityValue* = 12; Str_AttributeValue* = 13; Str_Comment* = 20; Str_ProcessingInstruction* = 21; Str_CDataSection* = 22; Str_SystemLiteral* = 23; Str_PublicLiteral* = 24; Str_CharData* = 25; Str_Other* = 30; (** Scanner: Tokens *) Invalid* = -1; TagElemStartOpen* = 0; (** '<' *) TagElemEndOpen* = 1; (** '' *) TagEmptyElemClose* = 4; (** '/>' *) TagXMLDeclOpen* = 5; (** '' *) TagCondSectOpen* = 8; (** '' *) BracketOpen* = 10; (** '[' *) BracketClose* = 11; (** ']' *) ParenOpen* = 12; (** '(' *) ParenClose* = 13; (** ')' *) Comment* = 14; (** '', chars := GetStr() *) CDataSect* = 15; (** '', chars := GetStr() *) CharRef* = 16; (** '&#' number ';' or '&#x' hexnumber ';', number, hexnumber := GetStr() *) EntityRef* = 17; (** '&' name ';', name := GetStr() *) ParamEntityRef* = 18; (** '%' name ';', name := GetStr() *) CharData* = 19; (** chars := GetStr() *) Literal* = 20; (** '"'chars'"' or "'"chars"'", chars := GetStr() *) Name* = 21; (** Name ::= (Letter | '_' | ':') {NameChar} NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender chars := GetStr() *) Nmtoken* = 22; (** Nmtoken ::= NameChar {NameChar}, chars := GetStr() *) PoundName* = 23; (** '#'name, name := GetStr() *) Question* = 24; (** '?' *) Asterisk* = 25; (** '*' *) Plus* = 26; (** '+' *) Or* = 27; (** '|' *) Comma* = 28; (** ',' *) Percent* = 29; (** '%' *) Equal* = 30; (** '=' *) Eof* = 31; LF = 0AX; CR = 0DX; TYPE String = Strings.String; Scanner* = OBJECT VAR sym-: SHORTINT; (** current token *) line-, col-, oldpos, pos: LONGINT; reportError*: PROCEDURE {DELEGATE} (pos, line, row: LONGINT; CONST msg: ARRAY OF CHAR); nextCh: CHAR; (* look-ahead *) dynstr: DynamicStrings.DynamicString; (* buffer for CharData, Literal, Name, CharRef, EntityRef and ParamEntityRef *) r : Streams.Reader; stringPool : DynamicStrings.Pool; stringPooling : SET; (** Initialize scanner to read from the given ascii file *) PROCEDURE & Init*(r: Streams.Reader); BEGIN reportError := DefaultReportError; SELF.r := r; NEW(dynstr); line := 1; pos := 0; col := 0; stringPool := NIL; stringPooling := {}; NextCh(); END Init; PROCEDURE SetStringPooling*(stringPooling : SET); BEGIN SELF.stringPooling := stringPooling; IF (stringPooling = {}) THEN stringPool := NIL; ELSIF (stringPool = NIL) THEN NEW(stringPool); END; ASSERT((stringPool = NIL) = (stringPooling = {})); END SetStringPooling; PROCEDURE Error(CONST msg: ARRAY OF CHAR); BEGIN sym := Invalid; reportError(GetPos(), line, col, msg) END Error; PROCEDURE NextCh; BEGIN IF (nextCh = CR) OR (nextCh = LF) THEN INC(line); col := 0; ELSE INC(col) END; IF r.res # Streams.Ok THEN nextCh := 0X; sym := Eof ELSE nextCh := r.Get(); INC(pos); END END NextCh; PROCEDURE ReadTillChar(ch: CHAR); BEGIN dynstr.Clear; WHILE (nextCh # ch) & (sym # Eof) DO dynstr.AppendCharacter(nextCh); NextCh(); END; IF sym = Eof THEN sym := Invalid END END ReadTillChar; PROCEDURE SkipWhiteSpaces; BEGIN WHILE IsWhiteSpace(nextCh) & (sym # Eof) DO NextCh() END END SkipWhiteSpaces; PROCEDURE ScanPoundName; BEGIN dynstr.Clear; dynstr.AppendCharacter(nextCh); NextCh(); WHILE (('a' <= nextCh) & (nextCh <= 'z')) OR (('A' <= nextCh) & (nextCh <= 'Z')) OR (('0' <= nextCh) & (nextCh <= '9')) OR (nextCh = '.') OR (nextCh = '-') OR (nextCh = '_') OR (nextCh = ':') DO dynstr.AppendCharacter(nextCh); NextCh(); END; IF sym # Eof THEN sym := PoundName ELSE sym := Invalid END END ScanPoundName; (* Possible results: Name Nmtoken Invalid *) PROCEDURE ScanNm; BEGIN SkipWhiteSpaces(); IF (('0' <= nextCh) & (nextCh <= '9')) OR (nextCh = '.') OR (nextCh = '-') THEN sym := Nmtoken ELSIF (('a' <= nextCh) & (nextCh <= 'z')) OR (('A' <= nextCh) & (nextCh <= 'Z')) OR (nextCh = '_') OR (nextCh = ':') THEN sym := Name ELSE sym := Invalid; RETURN END; dynstr.Clear; dynstr.AppendCharacter(nextCh); NextCh(); WHILE ((('a' <= nextCh) & (nextCh <= 'z')) OR (('A' <= nextCh) & (nextCh <= 'Z')) OR (('0' <= nextCh) & (nextCh <= '9')) OR (nextCh = '.') OR (nextCh = '-') OR (nextCh = '_') OR (nextCh = ':')) & (sym # Eof) DO dynstr.AppendCharacter(nextCh); NextCh(); END; IF sym = Eof THEN sym := Invalid END END ScanNm; (* Scan Comment after comment open tag '