ContextualDependency.Mod 48 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459
  1. MODULE ContextualDependency; (** AUTHOR "gubsermi"; PURPOSE "Resolve context dependency rules"; *)
  2. IMPORT
  3. Files, KernelLog, Configuration, Texts, Strings,
  4. XML, XMLObjects, XMLParser, XMLScanner;
  5. CONST
  6. RangeDebug = FALSE;
  7. (* XML Attribute and Tag names *)
  8. LanguageAttribute = "language";
  9. ValueAttribute = "value";
  10. BaseAttribute = "base";
  11. CodeAttribute = "code";
  12. RangeAttribute = "range";
  13. SizeAttribute = "size";
  14. OffsetAttribute = "offset";
  15. NegativeAttribute = "neg";
  16. LowAttribute = "low";
  17. HighAttribute = "high";
  18. RootTag = "LanguageContext";
  19. CharacterTag = "Character";
  20. PropertiesTag = "Properties";
  21. ContextTag = "Context";
  22. SecondleftTag = "secondleft";
  23. LeftTag = "left";
  24. RightTag = "right";
  25. SecondrightTag = "secondright";
  26. GenericLeftTag = "genericLeft";
  27. GenericRightTag = "genericRight";
  28. BeforeTag = "left";
  29. AfterTag = "right";
  30. ResultTag = "result";
  31. RangeTag = "Range";
  32. SizeTag = "Size";
  33. TYPE
  34. (* A unicode string *)
  35. String = POINTER TO ARRAY OF Texts.Char32;
  36. (* A node in the range tree. If it represents an inner node of the range tree, it knows about its descendant's ranges.
  37. If it is a leaf node, the left and right bounds represent this node's range. *)
  38. RangeNode = POINTER TO RECORD
  39. (* the node's descendants *)
  40. leftNode, rightNode : RangeNode;
  41. (* the left descendants boundaries *)
  42. leftOuterBound, leftInnerBound : LONGINT;
  43. (* the right descendants boundaries *)
  44. rightInnerBound, rightOuterBound : LONGINT;
  45. (* depth of the subtree this node's the root of *)
  46. depth : LONGINT;
  47. (* the value of the node: a context analyzer of the node's range. NIL if the node's an inner node. *)
  48. target : RangedContextAnalyzer;
  49. END;
  50. (* The range tree guarantees fast access to the context analyzer of a character that lies in a certain range of
  51. characters. *)
  52. RangeTree = OBJECT
  53. VAR
  54. (* last is a link to the last accessed node, which helps to speed lookups up *)
  55. root, last : RangeNode;
  56. (* Creates a new node and inserts it into the range tree *)
  57. PROCEDURE AddRange(target : RangedContextAnalyzer);
  58. VAR
  59. newNode : RangeNode;
  60. dummyLeft, dummyRight : LONGINT;
  61. BEGIN
  62. NEW(newNode);
  63. newNode.leftInnerBound := target.rangeLow;
  64. newNode.leftOuterBound := target.rangeLow;
  65. newNode.rightInnerBound := target.rangeHigh;
  66. newNode.rightOuterBound := target.rangeHigh;
  67. newNode.target := target;
  68. newNode.depth := 1;
  69. dummyLeft := -1;
  70. dummyRight := -1;
  71. RecursiveAdd(newNode,root,dummyLeft,dummyRight);
  72. END AddRange;
  73. (* Recursively traverses the tree top-down and looks for an appropriate place to insert the new node *)
  74. PROCEDURE RecursiveAdd(newNode : RangeNode; VAR thisNode : RangeNode; VAR left, right : LONGINT);
  75. VAR
  76. newIntermediateNode : RangeNode;
  77. BEGIN
  78. (* thisNode is only root if the tree's empty (thisNode = root) *)
  79. IF thisNode = NIL THEN
  80. thisNode := newNode;
  81. ELSE
  82. (* thisNode is an inner node, so the tree has to be traversed further downwards *)
  83. IF thisNode.target = NIL THEN
  84. (* the new node wouldn't fit in the right subtree so the left subtree is traversed *)
  85. IF newNode.rightOuterBound < thisNode.rightInnerBound THEN
  86. RecursiveAdd(newNode,thisNode.leftNode,left,right);
  87. thisNode.leftOuterBound := left;
  88. thisNode.leftInnerBound := right;
  89. right := thisNode.rightOuterBound;
  90. (* the new node wouldn't fit in the left subtree so the right subtree is traversed *)
  91. ELSIF newNode.leftOuterBound > thisNode.leftInnerBound THEN
  92. RecursiveAdd(newNode,thisNode.rightNode,left,right);
  93. thisNode.rightOuterBound := right;
  94. thisNode.rightInnerBound := left;
  95. left := thisNode.leftOuterBound;
  96. END;
  97. ELSE
  98. (* a leaf node was found, therefore a new inner node is created and the new node and the
  99. leaf node are appended to this new inner node. *)
  100. NEW(newIntermediateNode);
  101. (* the new node is going to be the inner node's left descendant *)
  102. IF newNode.rightOuterBound < thisNode.leftOuterBound THEN
  103. newIntermediateNode.leftNode := newNode;
  104. newIntermediateNode.rightNode := thisNode;
  105. newIntermediateNode.leftOuterBound := newNode.leftOuterBound;
  106. newIntermediateNode.leftInnerBound := newNode.rightOuterBound;
  107. newIntermediateNode.rightOuterBound := thisNode.rightOuterBound;
  108. newIntermediateNode.rightInnerBound := thisNode.leftOuterBound;
  109. newIntermediateNode.depth := thisNode.depth + 1;
  110. thisNode := newIntermediateNode;
  111. (* the node's outer bounds are passed to the parent *)
  112. left := newIntermediateNode.leftOuterBound;
  113. right := newIntermediateNode.rightOuterBound;
  114. (* the new node is going to be the inner node's right descendant *)
  115. ELSIF newNode.leftOuterBound > thisNode.rightOuterBound THEN
  116. newIntermediateNode.leftNode := thisNode;
  117. newIntermediateNode.rightNode := newNode;
  118. newIntermediateNode.leftOuterBound := thisNode.leftOuterBound;
  119. newIntermediateNode.leftInnerBound := thisNode.rightOuterBound;
  120. newIntermediateNode.rightOuterBound := newNode.rightOuterBound;
  121. newIntermediateNode.rightInnerBound := newNode.leftOuterBound;
  122. newIntermediateNode.depth := thisNode.depth + 1;
  123. thisNode := newIntermediateNode;
  124. (* the node's outer bounds are passed to the parent *)
  125. left := newIntermediateNode.leftOuterBound;
  126. right := newIntermediateNode.rightOuterBound;
  127. (* overlapping ranges are not allowed, so nothing happens if the new range overlaps with the current leaf's range *)
  128. ELSE
  129. (* the leaf's outer bounds must be passed none-the-less so as not to override the parent's bounds *)
  130. left := thisNode.leftOuterBound;
  131. right := thisNode.rightOuterBound;
  132. END;
  133. END;
  134. END;
  135. END RecursiveAdd;
  136. (* Searches the range tree for a range that includes this position *)
  137. PROCEDURE Search(position : LONGINT) : RangedContextAnalyzer;
  138. BEGIN
  139. (* Check out the last accessed node first to avoid traversing the whole tree if possible *)
  140. IF last # NIL THEN
  141. IF (position >= last.leftOuterBound) & (position <= last.rightOuterBound) THEN
  142. RETURN last.target;
  143. END;
  144. END;
  145. (* Search the tree if the position was not in the last accessed node's range *)
  146. RETURN RecursiveSearch(position,root);
  147. END Search;
  148. (* Recursively searches the tree for a range that fits this position *)
  149. PROCEDURE RecursiveSearch(position : LONGINT; thisNode : RangeNode) : RangedContextAnalyzer;
  150. BEGIN
  151. (* if thisNode is NIL a fitting range wont be found in this tree *)
  152. IF thisNode = NIL THEN
  153. RETURN NIL
  154. (* thisNode is an inner node, so the appropriate subtree needs to be searched *)
  155. ELSIF thisNode.target = NIL THEN
  156. (* this position could be in the left subtree *)
  157. IF (position >= thisNode.leftOuterBound) & (position <= thisNode.leftInnerBound) THEN
  158. RETURN RecursiveSearch(position,thisNode.leftNode);
  159. (* this position could be in the right subtree *)
  160. ELSIF (position >= thisNode.rightInnerBound) & (position <= thisNode.rightOuterBound) THEN
  161. RETURN RecursiveSearch(position,thisNode.rightNode);
  162. (* this position cannot be in any subtree and therefore the search fails *)
  163. ELSE
  164. RETURN NIL;
  165. END;
  166. ELSE
  167. (* A leaf node is found and the position fits inside the node's range *)
  168. IF (position >= thisNode.leftOuterBound) & (position <= thisNode.rightOuterBound) THEN
  169. (* thisNode is 'cached' *)
  170. last := thisNode;
  171. RETURN thisNode.target;
  172. ELSE
  173. (* this position doesn't fit inside this node's range (shouldn't happen!) *)
  174. RETURN NIL;
  175. END;
  176. END;
  177. END RecursiveSearch;
  178. (* Balances the tree AVL-style. This means that the depth of the two subtrees of one node differ by 1 at most. *)
  179. PROCEDURE CompleteBalancing;
  180. VAR
  181. dummyInt : LONGINT;
  182. BEGIN
  183. dummyInt := RecursiveBalancing(root);
  184. END CompleteBalancing;
  185. (* Balances a subtree and returns the subtrees depth after balancing *)
  186. PROCEDURE RecursiveBalancing(VAR thisNode : RangeNode) : LONGINT;
  187. VAR
  188. leftDepth, rightDepth : LONGINT;
  189. BEGIN
  190. (* the tree is empty *)
  191. IF thisNode = NIL THEN
  192. RETURN 0;
  193. (* thisNode is an inner node *)
  194. ELSIF thisNode.target = NIL THEN
  195. (* as a first step the node's subtrees are balanced *)
  196. leftDepth := RecursiveBalancing(thisNode.leftNode);
  197. rightDepth := RecursiveBalancing(thisNode.rightNode);
  198. (* there are four different kinds of rotations which all consider this node's balance and one of its decendant's balance *)
  199. IF leftDepth > rightDepth + 1 THEN
  200. IF GetNodeDepth(thisNode.leftNode.leftNode) < GetNodeDepth(thisNode.leftNode.rightNode) THEN
  201. RotateLeft(thisNode.leftNode);
  202. END;
  203. RotateRight(thisNode);
  204. RETURN thisNode.depth;
  205. ELSIF leftDepth + 1 < rightDepth THEN
  206. IF GetNodeDepth(thisNode.rightNode.leftNode) > GetNodeDepth(thisNode.rightNode.rightNode) THEN
  207. RotateRight(thisNode.rightNode);
  208. END;
  209. RotateLeft(thisNode);
  210. RETURN thisNode.depth;
  211. (* if rotations need to be done thisNode's depth is passed back *)
  212. ELSE
  213. IF MAX(leftDepth,rightDepth) = leftDepth THEN
  214. RETURN leftDepth + 1;
  215. ELSE
  216. RETURN rightDepth + 1;
  217. END;
  218. END;
  219. (* if this node's a leaf, no rotations need to be done so only the (constant) depth is passed back *)
  220. ELSE
  221. RETURN 1;
  222. END;
  223. END RecursiveBalancing;
  224. (* Rotate the current tree to the right *)
  225. PROCEDURE RotateRight(VAR thisNode : RangeNode);
  226. VAR
  227. tempNode : RangeNode;
  228. BEGIN
  229. tempNode := thisNode.leftNode.rightNode;
  230. thisNode.leftNode.rightNode := thisNode;
  231. thisNode := thisNode.leftNode;
  232. thisNode.rightNode.leftNode := tempNode;
  233. thisNode.rightNode.depth := MAX(GetNodeDepth(thisNode.rightNode.leftNode),GetNodeDepth(thisNode.rightNode.rightNode)) + 1;
  234. thisNode.depth := MAX(thisNode.leftNode.depth,thisNode.rightNode.depth) + 1;
  235. END RotateRight;
  236. (* Rotate the current tree to the left *)
  237. PROCEDURE RotateLeft(VAR thisNode : RangeNode);
  238. VAR
  239. tempNode : RangeNode;
  240. BEGIN
  241. tempNode := thisNode.rightNode.leftNode;
  242. thisNode.rightNode.leftNode := thisNode;
  243. thisNode := thisNode.rightNode;
  244. thisNode.leftNode.rightNode := tempNode;
  245. thisNode.leftNode.depth := MAX(GetNodeDepth(thisNode.leftNode.leftNode),GetNodeDepth(thisNode.leftNode.rightNode)) + 1;
  246. thisNode.depth := MAX(thisNode.leftNode.depth,thisNode.rightNode.depth) + 1;
  247. END RotateLeft;
  248. (* Auxiliary procedure that returns the node's depth while checking for the node's existence *)
  249. PROCEDURE GetNodeDepth(thisNode : RangeNode) : LONGINT;
  250. BEGIN
  251. IF thisNode = NIL THEN
  252. RETURN 0;
  253. ELSE
  254. RETURN thisNode.depth;
  255. END;
  256. END GetNodeDepth;
  257. END RangeTree;
  258. (* a range is defined by a lower and an upper bound and a validity flag. The validity flag defines if a character must or must not lie
  259. in the specified range *)
  260. Range = OBJECT
  261. VAR
  262. lowerBound : LONGINT;
  263. upperBound : LONGINT;
  264. valid : BOOLEAN;
  265. PROCEDURE &Init*;
  266. BEGIN
  267. lowerBound := -1;
  268. upperBound := -1;
  269. valid := TRUE;
  270. END Init;
  271. END Range;
  272. RangeArray = POINTER TO ARRAY OF Range;
  273. Ranges = POINTER TO RECORD
  274. posRanges : RangeArray;
  275. negRanges : RangeArray;
  276. END;
  277. (* Holds all kinds of possible contexts. The mode of the Ranged Context Analyzer defines which ranges are even considered. *)
  278. GenericContext = POINTER TO RECORD
  279. lastRanges, secondlastRanges : Ranges;
  280. nextRanges, secondnextRanges : Ranges;
  281. wholeLastRanges, wholeNextRanges : POINTER TO ARRAY OF Ranges;
  282. resultingChar : Texts.Char32;
  283. END;
  284. (* A container for storing a context tied to a certain character in the Context Cache *)
  285. ContextCacheElement = OBJECT
  286. VAR
  287. key : Texts.Char32;
  288. value : GenericContext;
  289. next : ContextCacheElement;
  290. PROCEDURE &Init*(key : Texts.Char32; value : GenericContext);
  291. BEGIN
  292. SELF.key := key;
  293. SELF.value := value;
  294. next := NIL;
  295. END Init;
  296. END ContextCacheElement;
  297. (* A hashmap that stores contexts tied to certain characters. This guarantees faster access than reading from the
  298. XML file, where the rules are stored originally. *)
  299. ContextCache = OBJECT
  300. VAR
  301. internalCache: POINTER TO ARRAY OF ContextCacheElement;
  302. cacheSize : LONGINT;
  303. nextElement : ContextCacheElement;
  304. (* creates an internal storage of a certain size *)
  305. PROCEDURE &Init*(hashMapSize : LONGINT);
  306. BEGIN
  307. cacheSize := hashMapSize;
  308. NEW(internalCache,cacheSize);
  309. END Init;
  310. (* Searches the cache for a specific key and returns the corresponding entry *)
  311. PROCEDURE Lookup(char : Texts.Char32; VAR done : BOOLEAN) : GenericContext;
  312. VAR
  313. bucket : LONGINT;
  314. currentElement : ContextCacheElement;
  315. BEGIN
  316. (* if there could be more entries of the last looked up element, the last search is continued *)
  317. IF ~done & (nextElement # NIL) THEN
  318. currentElement := nextElement;
  319. ELSE
  320. (* get the bucket where the element resides if available *)
  321. bucket := char MOD cacheSize;
  322. currentElement := internalCache[bucket];
  323. END;
  324. (* search the linked list for the entry *)
  325. WHILE currentElement # NIL DO
  326. IF currentElement.key = char THEN
  327. IF currentElement.next = NIL THEN
  328. done := TRUE;
  329. nextElement := NIL;
  330. ELSE
  331. done := FALSE;
  332. nextElement := currentElement.next;
  333. END;
  334. RETURN currentElement.value;
  335. ELSE
  336. currentElement := currentElement.next;
  337. END;
  338. END;
  339. done := TRUE;
  340. nextElement := NIL;
  341. RETURN NIL;
  342. END Lookup;
  343. (* Inserts a new string entry for a given key. *)
  344. PROCEDURE Insert(char : Texts.Char32; value : GenericContext);
  345. VAR
  346. newElement : ContextCacheElement;
  347. bucket : LONGINT;
  348. BEGIN
  349. NEW(newElement,char,value);
  350. (* insert the new entry at the first position of the correct bucket *)
  351. bucket := char MOD cacheSize;
  352. newElement.next := internalCache[bucket];
  353. internalCache[bucket] := newElement;
  354. END Insert;
  355. END ContextCache;
  356. (* A ranged context analyzer applies previously defined rules on a certain set of characters. The rules are defined in an XML-file and
  357. are loaded lazily into the object. *)
  358. RangedContextAnalyzer = OBJECT
  359. VAR
  360. language : XML.String;
  361. rangeLow, rangeHigh : LONGINT;
  362. closeContext, wideContext, wholeContext : BOOLEAN;
  363. contextCache : ContextCache;
  364. contextFile : XML.Document;
  365. (* Initializes the context analyzer for a certain range and a certain mode *)
  366. PROCEDURE &Init*(context : XML.Document; rangeLow, rangeHigh : LONGINT; language, mode : Strings.String);
  367. BEGIN
  368. NEW(contextCache,rangeHigh-rangeLow);
  369. contextFile := context;
  370. SELF.rangeLow := rangeLow;
  371. SELF.rangeHigh := rangeHigh;
  372. SELF.language := language;
  373. IF mode^ = "close" THEN
  374. SetCloseContext;
  375. ELSIF mode^ = "wide" THEN
  376. SetWideContext;
  377. ELSIF mode^ = "whole" THEN
  378. SetWholeContext;
  379. ELSE
  380. SetNoContext;
  381. END;
  382. END Init;
  383. (* Sets the context analyzer to only analyzing the proximity (one character off in each direction) of the affected character *)
  384. PROCEDURE SetCloseContext;
  385. BEGIN
  386. closeContext := TRUE;
  387. wideContext := FALSE;
  388. wholeContext := FALSE;
  389. END SetCloseContext;
  390. (* Sets the context analyzer to analyzing a wider context (two characters off in each direction) of the affected character.
  391. 'Close Context' rules are still considered. *)
  392. PROCEDURE SetWideContext;
  393. BEGIN
  394. closeContext := FALSE;
  395. wideContext := TRUE;
  396. wholeContext := FALSE;
  397. END SetWideContext;
  398. (* Sets the context analyzer to analyzing the whole context (the whole line) of the affected character. 'Close Context' and
  399. 'Wide Context' rules are neglected. *)
  400. PROCEDURE SetWholeContext;
  401. BEGIN
  402. closeContext := FALSE;
  403. wideContext := FALSE;
  404. wholeContext := TRUE;
  405. END SetWholeContext;
  406. (* Turns the context analyzer off. *)
  407. PROCEDURE SetNoContext;
  408. BEGIN
  409. closeContext := FALSE;
  410. wideContext := FALSE;
  411. wholeContext := FALSE;
  412. END SetNoContext;
  413. (* Analyzes the close context for a certain character and - if a certain rule applies - returns the result of that rule. *)
  414. PROCEDURE AnalyzeCloseContext(thisChar, lastChar, nextChar : Texts.Char32) : Texts.Char32;
  415. VAR
  416. done,contextFound,validContextFound : BOOLEAN;
  417. thisContext : GenericContext;
  418. BEGIN
  419. done := TRUE;
  420. contextFound := FALSE;
  421. validContextFound := FALSE;
  422. (* Firstly, search in the context cache for an already loaded rule, that applies for this character *)
  423. REPEAT
  424. IF RangeDebug THEN
  425. KernelLog.String("looking for "); KernelLog.Hex(thisChar,4); KernelLog.Ln;
  426. END;
  427. thisContext := contextCache.Lookup(thisChar,done);
  428. IF (thisContext # NIL) THEN
  429. IF RangeDebug THEN
  430. KernelLog.String("found a context..."); KernelLog.Ln;
  431. END;
  432. (* If a context was found for this character, check if it is valid *)
  433. IF CheckCloseContext(lastChar,nextChar,thisContext) THEN
  434. IF RangeDebug THEN
  435. KernelLog.String("... which is valid."); KernelLog.Ln;
  436. END;
  437. done := TRUE;
  438. validContextFound := TRUE;
  439. ELSE
  440. IF RangeDebug THEN
  441. KernelLog.String("... which is invalid."); KernelLog.Ln;
  442. END;
  443. END;
  444. contextFound := TRUE;
  445. END;
  446. UNTIL done;
  447. (* If no context was found in the cache, search the XML file for fitting contexts *)
  448. IF ~contextFound THEN
  449. IF RangeDebug THEN
  450. KernelLog.String("No contexts found. Getting contexts from XML file"); KernelLog.Ln;
  451. END;
  452. GetContextsFromXML(thisChar);
  453. RETURN AnalyzeCloseContext(thisChar,lastChar,nextChar);
  454. ELSE
  455. (* If no valid context was found, return the original character (no change) *)
  456. IF (thisContext = NIL) OR ~validContextFound THEN
  457. IF RangeDebug THEN
  458. KernelLog.String("No change done.."); KernelLog.Ln;
  459. END;
  460. RETURN thisChar;
  461. (* If a context rule applied on the current character's context, the resulting character is returned *)
  462. ELSE
  463. IF RangeDebug THEN
  464. KernelLog.String("replacing "); KernelLog.Hex(thisChar,4);
  465. KernelLog.String(" with "); KernelLog.Hex(thisContext.resultingChar,4);
  466. KernelLog.Ln;
  467. END;
  468. RETURN thisContext.resultingChar;
  469. END;
  470. END;
  471. END AnalyzeCloseContext;
  472. (* Analyzes the wide context for a certain character and - if a certain rule applies - returns the result of that rule. *)
  473. PROCEDURE AnalyzeWideContext(thisChar, secondlastChar, lastChar, nextChar, secondnextChar : Texts.Char32) : Texts.Char32;
  474. VAR
  475. done,contextFound,validContextFound : BOOLEAN;
  476. thisContext : GenericContext;
  477. BEGIN
  478. done := TRUE;
  479. contextFound := FALSE;
  480. validContextFound := FALSE;
  481. (* Firstly, search in the context cache for an already loaded rule, that applies for this character *)
  482. REPEAT
  483. thisContext := contextCache.Lookup(thisChar,done);
  484. IF (thisContext # NIL) THEN
  485. (* If a context was found for this character, check if it is valid *)
  486. IF CheckWideContext(secondlastChar,lastChar,nextChar,secondnextChar,thisContext) THEN
  487. done := TRUE;
  488. validContextFound := TRUE;
  489. END;
  490. contextFound := TRUE;
  491. END;
  492. UNTIL done;
  493. (* If no context was found in the cache, search the XML file for fitting contexts *)
  494. IF ~contextFound THEN
  495. GetContextsFromXML(thisChar);
  496. RETURN AnalyzeWideContext(thisChar,secondlastChar,lastChar,nextChar,secondnextChar);
  497. ELSE
  498. (* If no valid context was found, return the original character (no change) *)
  499. IF (thisContext = NIL) OR ~validContextFound THEN
  500. RETURN thisChar;
  501. (* If a context rule applied on the current character's context, the resulting character is returned *)
  502. ELSE
  503. RETURN thisContext.resultingChar;
  504. END;
  505. END;
  506. END AnalyzeWideContext;
  507. (* Analyzes the whole context for the character at a certain position and - if a certain rule applies - returns the result of that rule. *)
  508. PROCEDURE AnalyzeWholeContext(thisPos : LONGINT; line : String) : Texts.Char32;
  509. VAR
  510. thisChar : Texts.Char32;
  511. done,contextFound,validContextFound : BOOLEAN;
  512. thisContext : GenericContext;
  513. BEGIN
  514. (* If the position is invalid, the NULL character is returned *)
  515. IF (thisPos < 0) OR (thisPos > LEN(line) - 1) THEN
  516. RETURN 0H;
  517. ELSE
  518. thisChar := line[thisPos];
  519. END;
  520. done := TRUE;
  521. contextFound := FALSE;
  522. validContextFound := FALSE;
  523. (* Firstly, search in the context cache for an already loaded rule, that applies for this character *)
  524. REPEAT
  525. thisContext := contextCache.Lookup(thisChar,done);
  526. IF (thisContext # NIL) THEN
  527. (* If a context was found for this character, check if it is valid *)
  528. IF CheckWholeContext(thisPos,line,thisContext) THEN
  529. done := TRUE;
  530. validContextFound := TRUE;
  531. END;
  532. contextFound := TRUE;
  533. END;
  534. UNTIL done;
  535. (* If no context was found in the cache, search the XML file for fitting contexts *)
  536. IF ~contextFound THEN
  537. GetContextsFromXML(thisChar);
  538. RETURN AnalyzeWholeContext(thisPos,line);
  539. ELSE
  540. (* If no valid context was found, return the original character (no change) *)
  541. IF (thisContext = NIL) OR ~validContextFound THEN
  542. RETURN thisChar;
  543. (* If a context rule applied on the current character's context, the resulting character is returned *)
  544. ELSE
  545. RETURN thisContext.resultingChar;
  546. END;
  547. END;
  548. END AnalyzeWholeContext;
  549. (* Checks if for a given context, the last and the next character fit to any of its given ranges *)
  550. PROCEDURE CheckCloseContext(lastChar, nextChar : Texts.Char32; context : GenericContext) : BOOLEAN;
  551. BEGIN
  552. (* If there is no actual context, the (non-present) context rules are defined to apply *)
  553. IF context = NIL THEN RETURN TRUE END;
  554. RETURN RangeOK(lastChar,context.lastRanges) & RangeOK(nextChar,context.nextRanges);
  555. END CheckCloseContext;
  556. (* Checks if for a given context, the second last, the last, the next and the second next character fit to any of its given ranges *)
  557. PROCEDURE CheckWideContext(secondlastChar, lastChar, nextChar, secondnextChar : Texts.Char32; context : GenericContext) : BOOLEAN;
  558. BEGIN
  559. (* If there is no actual context, the (non-present) context rules are defined to apply *)
  560. IF context = NIL THEN RETURN TRUE END;
  561. RETURN RangeOK(secondlastChar,context.secondlastRanges) & RangeOK(lastChar,context.lastRanges) &
  562. RangeOK(nextChar,context.nextRanges) & RangeOK(secondnextChar,context.secondnextRanges);
  563. END CheckWideContext;
  564. (* Checks if for a given context if - for each position one or more ranges are defined - any of these ranges fit the specific character *)
  565. PROCEDURE CheckWholeContext(thisPos : LONGINT; line : String; context : GenericContext) : BOOLEAN;
  566. VAR
  567. i,j : LONGINT;
  568. BEGIN
  569. (* If there is no actual context, the (non-present) context rules are defined to apply *)
  570. IF context = NIL THEN RETURN TRUE END;
  571. (* If there is a context, but no rules are defined, the context is considered to be invalid *)
  572. IF (context.wholeLastRanges = NIL) & (context.wholeNextRanges = NIL) THEN
  573. RETURN FALSE;
  574. END;
  575. (* Iterate through all rules defined for the preceding characters and check if they apply *)
  576. IF (context.wholeLastRanges # NIL) & (thisPos > 0) THEN
  577. j := 0;
  578. i := thisPos - 1;
  579. WHILE (i >= 0) & (j < LEN(context.wholeLastRanges)) DO
  580. IF ~RangeOK(line[i],context.wholeLastRanges[j]) THEN
  581. RETURN FALSE;
  582. END;
  583. DEC(i);
  584. INC(j);
  585. END;
  586. END;
  587. (* Iterate through all rules defined for the succeeding characters and check if they apply *)
  588. IF (context.wholeNextRanges # NIL) &(thisPos < LEN(line) - 1) THEN
  589. j := 0;
  590. i := thisPos + 1;
  591. WHILE (i < LEN(line)) & (j < LEN(context.wholeNextRanges)) DO
  592. IF ~RangeOK(line[i],context.wholeNextRanges[j]) THEN
  593. RETURN FALSE;
  594. END;
  595. INC(i);
  596. INC(j);
  597. END;
  598. END;
  599. RETURN TRUE;
  600. END CheckWholeContext;
  601. (* Check if the given character is part of at least one of the passed ranges *)
  602. PROCEDURE RangeOK(thisChar : Texts.Char32; ranges : Ranges) : BOOLEAN;
  603. VAR
  604. i : LONGINT;
  605. rangeOK : BOOLEAN;
  606. BEGIN
  607. (* If no ranges have been passed, there's no restriction for this character, so the rule is defined to apply *)
  608. IF ranges = NIL THEN RETURN TRUE END;
  609. rangeOK := FALSE;
  610. IF ranges.posRanges # NIL THEN
  611. i := 0;
  612. LOOP
  613. IF i > LEN(ranges.posRanges) - 1 THEN
  614. IF i = 0 THEN
  615. rangeOK := TRUE;
  616. END;
  617. EXIT;
  618. END;
  619. (* if the character lies in the current range, the rule applies *)
  620. IF (ranges.posRanges[i].lowerBound <= thisChar) & (thisChar <= ranges.posRanges[i].upperBound) THEN
  621. rangeOK := TRUE;
  622. EXIT;
  623. END;
  624. INC(i);
  625. END;
  626. END;
  627. IF ~rangeOK THEN
  628. RETURN FALSE;
  629. ELSIF ranges.negRanges = NIL THEN
  630. RETURN TRUE;
  631. END;
  632. i := 0;
  633. LOOP
  634. IF i > LEN(ranges.negRanges) - 1 THEN
  635. RETURN TRUE;
  636. END;
  637. (* if the character lies in the current range, the rule applies *)
  638. IF (ranges.negRanges[i].lowerBound <= thisChar) & (thisChar <= ranges.negRanges[i].upperBound) THEN
  639. RETURN FALSE;
  640. END;
  641. INC(i);
  642. END;
  643. RETURN rangeOK;
  644. END RangeOK;
  645. (* Parses the XML file containing the rules for this Ranged Context Analyzer *)
  646. PROCEDURE GetContextsFromXML(thisChar : Texts.Char32);
  647. VAR
  648. newContext : GenericContext;
  649. beforeRanges, afterRanges : Ranges;
  650. root : XML.Element;
  651. tagName, languageAttribute, baseAttribute, rangeAttribute, offsetAttribute, negAttribute, resultAttribute, sizeAttribute : XML.String;
  652. charElements, contextElements, rangeElements, genericBeforeElements, genericAfterElements : XMLObjects.Enumerator;
  653. charElement, contextElement, rangeElement, genericBeforeElement, genericAfterElement : ANY;
  654. contextFound, validContext, charFound : BOOLEAN;
  655. charString : XML.String;
  656. charCode : Texts.Char32;
  657. res : WORD;
  658. base16, neg : BOOLEAN;
  659. genericRangeSize, offset : LONGINT;
  660. BEGIN
  661. res := 0;
  662. contextFound := FALSE;
  663. charFound := FALSE;
  664. validContext := FALSE;
  665. IF contextFile # NIL THEN
  666. root := contextFile.GetRoot();
  667. tagName := root.GetName();
  668. languageAttribute := root.GetAttributeValue(LanguageAttribute);
  669. (* Check if the language attribute is correct *)
  670. IF (tagName^ = RootTag) & (languageAttribute^ = language^) THEN
  671. (* Check if character codes are stored in hexadecimal format *)
  672. baseAttribute := root.GetAttributeValue(BaseAttribute);
  673. base16 := baseAttribute^ = "Hex";
  674. charElements := root.GetContents();
  675. (* iterate through the character tags to find rules for the given character *)
  676. WHILE ~charFound & charElements.HasMoreElements() DO
  677. charElement := charElements.GetNext();
  678. WITH charElement : XML.Element DO
  679. tagName := charElement.GetName();
  680. IF tagName^ = CharacterTag THEN
  681. (* get the target character *)
  682. charString := charElement.GetAttributeValue(CodeAttribute);
  683. IF base16 THEN
  684. Strings.HexStrToInt(charString^,charCode,res);
  685. ELSE
  686. Strings.StrToInt(charString^,charCode);
  687. END;
  688. (* load only the context rules for the currently considered character (lazy loading) *)
  689. IF (res >= 0) & (charCode = thisChar) THEN
  690. charFound := TRUE;
  691. IF RangeDebug THEN
  692. KernelLog.String("range for "); KernelLog.Hex(charCode,4); KernelLog.Ln;
  693. END;
  694. contextElements := charElement.GetContents();
  695. (* iterate through all context rules *)
  696. WHILE contextElements.HasMoreElements() DO
  697. contextElement := contextElements.GetNext();
  698. WITH contextElement : XML.Element DO
  699. tagName := contextElement.GetName();
  700. IF tagName^ = ContextTag THEN
  701. NEW(newContext);
  702. contextFound := FALSE;
  703. validContext := FALSE;
  704. rangeElements := contextElement.GetContents();
  705. IF RangeDebug THEN
  706. KernelLog.String("Insert ranges: "); KernelLog.Ln;
  707. END;
  708. (* iterate through all ranges of the context. Neglect the rules that do not apply
  709. to the current analyzer's mode *)
  710. WHILE rangeElements.HasMoreElements() DO
  711. rangeElement := rangeElements.GetNext();
  712. WITH rangeElement : XML.Element DO
  713. tagName := rangeElement.GetName();
  714. (* parse the ranges for the second last character *)
  715. IF (tagName^ = SecondleftTag) & wideContext THEN
  716. rangeAttribute := rangeElement.GetAttributeValue(RangeAttribute);
  717. negAttribute := rangeElement.GetAttributeValue(NegativeAttribute);
  718. IF (negAttribute = NIL) OR (negAttribute^ # "!") THEN
  719. neg := FALSE;
  720. ELSE
  721. neg := TRUE;
  722. END;
  723. ParseRangeString(rangeAttribute,base16,neg,newContext.secondlastRanges);
  724. validContext := TRUE;
  725. (* parse the ranges for the last character *)
  726. ELSIF (tagName^ = LeftTag) & (wideContext OR closeContext) THEN
  727. IF RangeDebug THEN KernelLog.String("last: ") END;
  728. rangeAttribute := rangeElement.GetAttributeValue(RangeAttribute);
  729. negAttribute := rangeElement.GetAttributeValue(NegativeAttribute);
  730. IF (negAttribute = NIL) OR (negAttribute^ # "!") THEN
  731. neg := FALSE;
  732. ELSE
  733. neg := TRUE;
  734. END;
  735. ParseRangeString(rangeAttribute,base16,neg,newContext.lastRanges);
  736. validContext := TRUE;
  737. (* parse the ranges for the next character *)
  738. ELSIF (tagName^ = RightTag) & (closeContext OR wideContext) THEN
  739. IF RangeDebug THEN KernelLog.String("next: ") END;
  740. rangeAttribute := rangeElement.GetAttributeValue(RangeAttribute);
  741. negAttribute := rangeElement.GetAttributeValue(NegativeAttribute);
  742. IF (negAttribute = NIL) OR (negAttribute^ # "!") THEN
  743. neg := FALSE;
  744. ELSE
  745. neg := TRUE;
  746. END;
  747. ParseRangeString(rangeAttribute,base16,neg,newContext.nextRanges);
  748. validContext := TRUE;
  749. (* parse the ranges for the second next character *)
  750. ELSIF (tagName^ = SecondrightTag) & wideContext THEN
  751. rangeAttribute := rangeElement.GetAttributeValue(RangeAttribute);
  752. negAttribute := rangeElement.GetAttributeValue(NegativeAttribute);
  753. IF (negAttribute = NIL) OR (negAttribute^ # "!") THEN
  754. neg := FALSE;
  755. ELSE
  756. neg := TRUE;
  757. END;
  758. ParseRangeString(rangeAttribute,base16,neg,newContext.secondnextRanges);
  759. validContext := TRUE;
  760. (* get the ranges for all preceding characters *)
  761. ELSIF (tagName^ = GenericLeftTag) & wholeContext THEN
  762. (* get the distance of the farthest context rule *)
  763. sizeAttribute := rangeElement.GetAttributeValue(SizeAttribute);
  764. Strings.StrToInt(sizeAttribute^,genericRangeSize);
  765. NEW(newContext.wholeLastRanges,genericRangeSize);
  766. genericBeforeElements := rangeElement.GetContents();
  767. (* parse through all ranges *)
  768. WHILE genericBeforeElements.HasMoreElements() DO
  769. genericBeforeElement := genericBeforeElements.GetNext();
  770. WITH genericBeforeElement : XML.Element DO
  771. tagName := genericBeforeElement.GetName();
  772. IF tagName^ = BeforeTag THEN
  773. rangeAttribute := genericBeforeElement.GetAttributeValue(RangeAttribute);
  774. offsetAttribute := genericBeforeElement.GetAttributeValue(OffsetAttribute);
  775. negAttribute := rangeElement.GetAttributeValue(NegativeAttribute);
  776. IF (negAttribute = NIL) OR (negAttribute^ # "!") THEN
  777. neg := FALSE;
  778. ELSE
  779. neg := TRUE;
  780. END;
  781. Strings.StrToInt(offsetAttribute^,offset);
  782. IF offset < 1 THEN offset := 1; END;
  783. NEW(beforeRanges);
  784. ParseRangeString(rangeAttribute,base16,neg,beforeRanges);
  785. newContext.wholeLastRanges[offset-1] := beforeRanges;
  786. END;
  787. END;
  788. END;
  789. validContext := TRUE;
  790. (* get the ranges for all succeeding character *)
  791. ELSIF (tagName^ = GenericRightTag) & wholeContext THEN
  792. (* get the distance of the farthest context rule *)
  793. sizeAttribute := rangeElement.GetAttributeValue(SizeAttribute);
  794. Strings.StrToInt(sizeAttribute^,genericRangeSize);
  795. NEW(newContext.wholeNextRanges,genericRangeSize);
  796. genericAfterElements := rangeElement.GetContents();
  797. (* parse through all ranges *)
  798. WHILE genericAfterElements.HasMoreElements() DO
  799. genericAfterElement := genericAfterElements.GetNext();
  800. WITH genericAfterElement : XML.Element DO
  801. tagName := genericAfterElement.GetName();
  802. IF tagName^ = AfterTag THEN
  803. rangeAttribute := genericAfterElement.GetAttributeValue(RangeAttribute);
  804. offsetAttribute := genericAfterElement.GetAttributeValue(OffsetAttribute);
  805. negAttribute := rangeElement.GetAttributeValue(NegativeAttribute);
  806. IF (negAttribute = NIL) OR (negAttribute^ # "!") THEN
  807. neg := FALSE;
  808. ELSE
  809. neg := TRUE;
  810. END;
  811. Strings.StrToInt(offsetAttribute^,offset);
  812. IF offset < 1 THEN offset := 1; END;
  813. NEW(afterRanges);
  814. ParseRangeString(rangeAttribute,base16,neg,afterRanges);
  815. newContext.wholeNextRanges[offset-1] := afterRanges;
  816. END;
  817. END;
  818. END;
  819. validContext := TRUE;
  820. (* parse the tag specifying the resulting character *)
  821. ELSIF tagName^ = ResultTag THEN
  822. resultAttribute := rangeElement.GetAttributeValue(CodeAttribute);
  823. IF base16 THEN
  824. Strings.HexStrToInt(resultAttribute^,newContext.resultingChar,res);
  825. ELSE
  826. Strings.StrToInt(resultAttribute^,newContext.resultingChar);
  827. END;
  828. (* a Zero defines the Zero-Width Space, which is used to simulate
  829. the deletion of the target character *)
  830. IF newContext.resultingChar = 0 THEN
  831. newContext.resultingChar := 200BH;
  832. END;
  833. IF RangeDebug THEN
  834. KernelLog.String("resulting in: ");
  835. KernelLog.Hex(newContext.resultingChar,4);
  836. KernelLog.Ln;
  837. END;
  838. END;
  839. END;
  840. END;
  841. (* Insert the found context into the context cache *)
  842. IF validContext THEN
  843. contextCache.Insert(thisChar,newContext);
  844. contextFound := TRUE;
  845. END;
  846. END;
  847. END
  848. END;
  849. END;
  850. END;
  851. END;
  852. END;
  853. (* If no context was defined for this character, insert a reflexive context into the cache *)
  854. IF ~contextFound THEN
  855. IF RangeDebug THEN
  856. KernelLog.String("inserting reflexive context"); KernelLog.Ln;
  857. END;
  858. contextCache.Insert(thisChar,GetReflexiveContext(thisChar));
  859. END;
  860. END;
  861. END;
  862. END GetContextsFromXML;
  863. (* Parses a range string and puts the ranges into a range array *)
  864. PROCEDURE ParseRangeString(rangeString : XML.String; base16, neg : BOOLEAN; VAR ranges : Ranges);
  865. VAR
  866. numberOfRanges, i, j, k: LONGINT; res : WORD;
  867. tempString : XML.String;
  868. range : Range;
  869. theseRanges, oldRanges : RangeArray;
  870. BEGIN
  871. NEW(tempString,LEN(rangeString));
  872. IF LEN(rangeString) > 0 THEN
  873. (* If the first character is an 'X', any character is valid. Therefore the string needs no more parsing. *)
  874. IF rangeString[0] = "X" THEN
  875. IF RangeDebug THEN
  876. KernelLog.String("any character"); KernelLog.Ln;
  877. END;
  878. RETURN;
  879. END;
  880. numberOfRanges := 1;
  881. ELSE
  882. numberOfRanges := 0;
  883. END;
  884. (* Count the defined ranges in the string *)
  885. FOR i := 0 TO LEN(rangeString) - 1 DO
  886. IF rangeString[i] = ";" THEN
  887. INC(numberOfRanges);
  888. END;
  889. END;
  890. IF ranges = NIL THEN
  891. NEW(ranges);
  892. END;
  893. (* old ranges need to be memorized so as not to delete them in the end *)
  894. IF neg THEN
  895. oldRanges := ranges.negRanges;
  896. ELSE
  897. oldRanges := ranges.posRanges;
  898. END;
  899. (* old ranges are copied to the beginning of the new ranges *)
  900. IF oldRanges # NIL THEN
  901. NEW(theseRanges,numberOfRanges+LEN(oldRanges));
  902. FOR k := 0 TO LEN(oldRanges) - 1 DO
  903. theseRanges[k] := oldRanges[k];
  904. END;
  905. ELSE
  906. NEW(theseRanges,numberOfRanges);
  907. k := 0;
  908. END;
  909. NEW(range);
  910. j := 0; (*k := LEN(oldRanges);*)
  911. FOR i := 0 TO LEN(rangeString) - 1 DO
  912. (* Store the current character string *)
  913. tempString[j] := rangeString[i];
  914. (* If a '!' stands right before a range, the range is called invalid, which means that a certain character must not
  915. lie in the defined range *)
  916. IF rangeString[i] = "!" THEN
  917. IF i = 0 THEN
  918. range.valid := FALSE;
  919. ELSIF rangeString[i-1] = ";" THEN
  920. range.valid := FALSE;
  921. END;
  922. (* The '-' is the border between the lower and the upper bound of the range *)
  923. ELSIF rangeString[i] = "-" THEN
  924. tempString[j] := 0X;
  925. IF base16 THEN
  926. Strings.HexStrToInt(tempString^,range.lowerBound,res);
  927. ELSE
  928. Strings.StrToInt(tempString^,range.lowerBound);
  929. END;
  930. j := 0;
  931. (* The ';' marks the end of a range. *)
  932. ELSIF rangeString[i] = ";" THEN
  933. tempString[j] := 0X;
  934. (* 'O' defines the beginning of a line *)
  935. IF tempString^ = "O" THEN
  936. range.lowerBound := -1;
  937. range.upperBound := -1;
  938. ELSE
  939. IF base16 THEN
  940. Strings.HexStrToInt(tempString^,range.upperBound,res);
  941. ELSE
  942. Strings.StrToInt(tempString^,range.upperBound);
  943. END;
  944. (* if the no lower bound has been defined for this range, the range is defined of but one character *)
  945. IF range.lowerBound < 0 THEN
  946. range.lowerBound := range.upperBound;
  947. END;
  948. END;
  949. IF RangeDebug THEN
  950. KernelLog.String("["); KernelLog.Hex(range.lowerBound,4);
  951. KernelLog.String(","); KernelLog.Hex(range.upperBound,4);
  952. KernelLog.String("]");
  953. IF ~range.valid THEN KernelLog.String(" (!)") END;
  954. KernelLog.Ln;
  955. END;
  956. (* save the range and carry on with the next range in the string *)
  957. theseRanges[k] := range;
  958. NEW(range);
  959. INC(k);
  960. j := 0;
  961. ELSE
  962. INC(j);
  963. END;
  964. END;
  965. (* Since the last range need not to be delimited by a ';', it needs additional treatment after the string is parsed *)
  966. IF tempString^ = "O" THEN
  967. range.lowerBound := -1;
  968. range.upperBound := -1;
  969. ELSE
  970. IF base16 THEN
  971. Strings.HexStrToInt(tempString^,range.upperBound,res);
  972. ELSE
  973. Strings.StrToInt(tempString^,range.upperBound);
  974. END;
  975. IF range.lowerBound < 0 THEN
  976. range.lowerBound := range.upperBound;
  977. END;
  978. END;
  979. IF RangeDebug THEN
  980. KernelLog.String("["); KernelLog.Hex(range.lowerBound,4);
  981. KernelLog.String(","); KernelLog.Hex(range.upperBound,4);
  982. KernelLog.String("]");
  983. IF ~range.valid THEN KernelLog.String(" (!)") END;
  984. KernelLog.Ln;
  985. END;
  986. theseRanges[k] := range;
  987. IF neg THEN
  988. ranges.negRanges := theseRanges;
  989. ELSE
  990. ranges.posRanges := theseRanges;
  991. END;
  992. END ParseRangeString;
  993. (* Constructs a reflexive context, which means that the targetted character is not changed *)
  994. PROCEDURE GetReflexiveContext(thisChar : Texts.Char32) : GenericContext;
  995. VAR
  996. newContext : GenericContext;
  997. BEGIN
  998. NEW(newContext);
  999. newContext.resultingChar := thisChar;
  1000. RETURN newContext;
  1001. END GetReflexiveContext;
  1002. END RangedContextAnalyzer;
  1003. (* The Context Analyzer holds a tree of all Ranged Context Analyzers to guarantee a fast access to the correct context
  1004. for each character. It is also responsible to actually parse a given line and delegate the analysis to the correct
  1005. analyzer. *)
  1006. ContextAnalyzer = OBJECT
  1007. VAR
  1008. (* Initializes the tree for the available Ranged Context Analyzers. *)
  1009. PROCEDURE &Init*;
  1010. BEGIN
  1011. NEW(ranges);
  1012. END Init;
  1013. (* Parses a given line and analyzes it character-wise for defined context rules *)
  1014. PROCEDURE AnalyzeLine(line : Texts.TextReader; start, end : LONGINT) : Texts.TextReader;
  1015. VAR
  1016. lineCache, newLine : String;
  1017. ch,ch1,ch2,ch3,ch4 : Texts.Char32;
  1018. i : LONGINT;
  1019. analyzer : RangedContextAnalyzer;
  1020. newText : Texts.Text;
  1021. oneCharString : Texts.PUCS32String;
  1022. newTextReader : Texts.TextReader;
  1023. BEGIN
  1024. (* Set default values for negative start and end positions *)
  1025. line.text.AcquireRead;
  1026. IF start < 0 THEN
  1027. start := 0;
  1028. END;
  1029. IF end < 0 THEN
  1030. end := line.text.GetLength() - 1;
  1031. END;
  1032. NEW(lineCache,end-start+1);
  1033. NEW(newLine,end-start+1);
  1034. line.SetPosition(start);
  1035. line.SetDirection(1);
  1036. i := 0;
  1037. ch := 0;
  1038. (* Put the whole line into a faster accessible array *)
  1039. FOR i:= 0 TO LEN(lineCache) - 1 DO
  1040. line.ReadCh(ch);
  1041. lineCache[i] := ch;
  1042. END;
  1043. line.text.ReleaseRead;
  1044. NEW(newText);
  1045. NEW(oneCharString,2);
  1046. oneCharString[1] := 0H;
  1047. newText.AcquireWrite;
  1048. (* Iterate through all characters of the line *)
  1049. FOR i := 0 TO LEN(lineCache) - 1 DO
  1050. (* gather the context of the character. Special treatment for the line's borders. *)
  1051. IF i = 0 THEN
  1052. ch1 := -1;
  1053. ch2 := -1;
  1054. ELSIF i = 1 THEN
  1055. ch1 := -1;
  1056. ch2 := lineCache[0];
  1057. ELSE
  1058. ch1 := lineCache[i-2];
  1059. ch2 := lineCache[i-1];
  1060. END;
  1061. ch := lineCache[i];
  1062. IF i = LEN(lineCache) - 1 THEN
  1063. ch3 := -1;
  1064. ch4 := -1;
  1065. ELSIF i = LEN(lineCache) - 2 THEN
  1066. ch3 := lineCache[i+1];
  1067. ch4 := -1;
  1068. ELSE
  1069. ch3 := lineCache[i+1];
  1070. ch4 := lineCache[i+2];
  1071. END;
  1072. (* Search the range tree for a suitable analyzer *)
  1073. analyzer := ranges.Search(lineCache[i]);
  1074. IF analyzer # NIL THEN
  1075. IF RangeDebug THEN
  1076. KernelLog.String("==> "); KernelLog.Hex(ch,4); KernelLog.Ln;
  1077. END;
  1078. (* Call the analyzer's correct procedure to analyze the character's context *)
  1079. IF analyzer.closeContext THEN
  1080. ch := analyzer.AnalyzeCloseContext(ch,ch2,ch3);
  1081. ELSIF analyzer.wideContext THEN
  1082. ch := analyzer.AnalyzeWideContext(ch,ch1,ch2,ch3,ch4);
  1083. ELSIF analyzer.wholeContext THEN
  1084. ch := analyzer.AnalyzeWholeContext(i,lineCache);
  1085. END;
  1086. IF RangeDebug THEN
  1087. KernelLog.String("<== "); KernelLog.Hex(ch,4); KernelLog.Ln;
  1088. END;
  1089. END;
  1090. (* Insert the new character into the resulting text *)
  1091. oneCharString[0] := ch;
  1092. newText.InsertUCS32(i,oneCharString^);
  1093. END;
  1094. newText.ReleaseWrite;
  1095. (* store the new text in a special text reader *)
  1096. NEW(newTextReader,newText);
  1097. RETURN newTextReader;
  1098. END AnalyzeLine;
  1099. END ContextAnalyzer;
  1100. VAR
  1101. contextAnalyzer : ContextAnalyzer;
  1102. ranges : RangeTree;
  1103. (* Registers a certain set of context rules defined in an XML file. The new analyzer is stored in a range tree, that
  1104. was created by the Context Analyzer, previously. *)
  1105. PROCEDURE RegisterRangedAnalyzer(language : Strings.String; contextFile : XML.Document);
  1106. VAR
  1107. newAnalyzer : RangedContextAnalyzer;
  1108. root : XML.Element;
  1109. charElements, propertyElements : XMLObjects.Enumerator;
  1110. charElement, propertyElement : ANY;
  1111. tagName, languageAttribute, baseAttribute : XML.String;
  1112. base16, propertyFound : BOOLEAN;
  1113. rangeLow, rangeHigh: LONGINT; res: WORD;
  1114. mode, tempRangeLow, tempRangeHigh : Strings.String;
  1115. BEGIN
  1116. propertyFound := FALSE;
  1117. IF (contextFile # NIL) & (ranges # NIL) THEN
  1118. root := contextFile.GetRoot();
  1119. tagName := root.GetName();
  1120. languageAttribute := root.GetAttributeValue(LanguageAttribute);
  1121. IF (tagName^ = RootTag) & (languageAttribute^ = language^) THEN
  1122. baseAttribute := root.GetAttributeValue(BaseAttribute);
  1123. base16 := baseAttribute^ = "Hex";
  1124. charElements := root.GetContents();
  1125. (* Look for the property tag *)
  1126. WHILE ~propertyFound & charElements.HasMoreElements() DO
  1127. charElement := charElements.GetNext();
  1128. WITH charElement : XML.Element DO
  1129. tagName := charElement.GetName();
  1130. IF tagName^ = PropertiesTag THEN
  1131. propertyElements := charElement.GetContents();
  1132. (* Look through the properties *)
  1133. WHILE propertyElements.HasMoreElements() DO
  1134. propertyElement := propertyElements.GetNext();
  1135. WITH propertyElement : XML.Element DO
  1136. tagName := propertyElement.GetName();
  1137. (* Store the range, that is affected by these rules *)
  1138. IF tagName^ = RangeTag THEN
  1139. tempRangeLow := propertyElement.GetAttributeValue(LowAttribute);
  1140. tempRangeHigh := propertyElement.GetAttributeValue(HighAttribute);
  1141. IF base16 THEN
  1142. Strings.HexStrToInt(tempRangeLow^,rangeLow,res);
  1143. Strings.HexStrToInt(tempRangeHigh^,rangeHigh,res);
  1144. ELSE
  1145. Strings.StrToInt(tempRangeLow^,rangeLow);
  1146. Strings.StrToInt(tempRangeHigh^,rangeHigh);
  1147. END;
  1148. (* Get the range mode, that defines how big the considered contexts can be. *)
  1149. ELSIF tagName^ = SizeTag THEN
  1150. mode := propertyElement.GetAttributeValue(ValueAttribute);
  1151. END;
  1152. END;
  1153. END;
  1154. (* create a new Ranged Context Analyzer and append it to the tree *)
  1155. NEW(newAnalyzer,contextFile,rangeLow,rangeHigh,language,mode);
  1156. ranges.AddRange(newAnalyzer);
  1157. propertyFound := TRUE;
  1158. END;
  1159. END;
  1160. END;
  1161. END;
  1162. END;
  1163. END RegisterRangedAnalyzer;
  1164. (* Entry point for the outside world to analyze a line of text for context dependency *)
  1165. PROCEDURE AnalyzeLine*(line : Texts.TextReader; start, end : LONGINT) : Texts.TextReader;
  1166. BEGIN
  1167. RETURN contextAnalyzer.AnalyzeLine(line,start,end);
  1168. END AnalyzeLine;
  1169. (* Prepares an XML file for further treatment *)
  1170. PROCEDURE InitRangedAnalyzer(CONST filename : ARRAY OF CHAR; VAR useThisContext : BOOLEAN; VAR context : XML.Document);
  1171. TYPE
  1172. Trap = OBJECT
  1173. VAR
  1174. xmlError : BOOLEAN;
  1175. filename: Files.FileName;
  1176. PROCEDURE &InitTrap (CONST filename: ARRAY OF CHAR);
  1177. BEGIN COPY (filename, SELF.filename); xmlError := FALSE;
  1178. END InitTrap;
  1179. PROCEDURE Handler(pos, line, row: LONGINT; CONST msg: ARRAY OF CHAR);
  1180. BEGIN
  1181. KernelLog.String("Error in ");
  1182. KernelLog.String(filename);
  1183. KernelLog.String(" at position ");
  1184. KernelLog.String("pos= "); KernelLog.Int(pos, 0); KernelLog.String(" line= "); KernelLog.Int(line, 0); KernelLog.String(" row= "); KernelLog.Int(row, 0); KernelLog.Ln;
  1185. xmlError := TRUE;
  1186. END Handler;
  1187. END Trap;
  1188. VAR
  1189. file: Files.File;
  1190. scanner: XMLScanner.Scanner;
  1191. parser: XMLParser.Parser;
  1192. reader: Files.Reader;
  1193. trap: Trap;
  1194. BEGIN
  1195. context := NIL;
  1196. file := Files.Old(filename);
  1197. IF file # NIL THEN
  1198. NEW(reader, file, 0);
  1199. NEW(scanner, reader);
  1200. NEW(parser, scanner);
  1201. NEW(trap, filename);
  1202. parser.reportError := trap.Handler;
  1203. context:= parser.Parse();
  1204. IF ~trap.xmlError THEN
  1205. context := NIL;
  1206. useThisContext := TRUE;
  1207. ELSE
  1208. useThisContext := FALSE;
  1209. END;
  1210. ELSE
  1211. KernelLog.String("Error opening ");
  1212. KernelLog.String(filename);
  1213. KernelLog.String(". File not found.");
  1214. KernelLog.Ln;
  1215. useThisContext := FALSE;
  1216. END;
  1217. END InitRangedAnalyzer;
  1218. (* Searches the configuration file for available context dependency rule files and initializes the corresponding Ranged Context Analyzers *)
  1219. PROCEDURE LoadContextualDependencies;
  1220. VAR
  1221. contextSection : XML.Element;
  1222. rangePropertyElements : XMLObjects.Enumerator;
  1223. rangePropertyElement : ANY;
  1224. useNewContext : BOOLEAN;
  1225. newContextFile : XML.Document;
  1226. filenameAttribute, languageAttribute : Strings.String;
  1227. BEGIN
  1228. contextSection := Configuration.GetSection("Context");
  1229. IF (contextSection # NIL) THEN
  1230. rangePropertyElements:= contextSection.GetContents();
  1231. (* iterate through all available context properties *)
  1232. WHILE rangePropertyElements.HasMoreElements() DO
  1233. rangePropertyElement := rangePropertyElements.GetNext();
  1234. WITH rangePropertyElement : XML.Element DO
  1235. (* Get important properties from the file itself *)
  1236. filenameAttribute := rangePropertyElement.GetAttributeValue("value");
  1237. languageAttribute := rangePropertyElement.GetAttributeValue("name");
  1238. InitRangedAnalyzer(filenameAttribute^,useNewContext,newContextFile);
  1239. (* If the properties could correctly be gathered, the Ranged Context Analyzer is initialized and registered *)
  1240. IF useNewContext THEN
  1241. IF RangeDebug THEN
  1242. KernelLog.String(filenameAttribute^); KernelLog.String(" loaded."); KernelLog.Ln;
  1243. END;
  1244. RegisterRangedAnalyzer(languageAttribute,newContextFile);
  1245. END;
  1246. END;
  1247. END;
  1248. ranges.CompleteBalancing;
  1249. ELSE
  1250. KernelLog.String("ContextDependecy: Could not load contextual dependecies (missing 'Context' section in configuration file).");
  1251. KernelLog.Ln;
  1252. END;
  1253. END LoadContextualDependencies;
  1254. (* Create Context Analyzer and load all available context dependency rules *)
  1255. BEGIN
  1256. NEW(contextAnalyzer);
  1257. LoadContextualDependencies;
  1258. END ContextualDependency.