UnicodeBidirectionality.Mod 84 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751
  1. MODULE UnicodeBidirectionality; (** AUTHOR "gubsermi"; PURPOSE "Implementation of the Unicode Bidirectionality Algorithm"; *)
  2. IMPORT
  3. Codecs, Files, Streams, KernelLog, Texts, Commands, UnicodeProperties, Strings;
  4. CONST
  5. DOSNeutral = 0;
  6. DOSRightToLeft = 1;
  7. DOSLeftToRight = 2;
  8. NeutralType = 0;
  9. EuropeanNumber = 1;
  10. ArabicNumber = 2;
  11. LeftStrongType = 3;
  12. CharacterDebugging = FALSE;
  13. CacheDebugging = FALSE;
  14. WeakTypes1* = 0;
  15. WeakTypes2* = 1;
  16. EuropeanNumberAdj* = 2;
  17. ArabicNumberAdj* = 3;
  18. NeutralTypes* = 4;
  19. SameDirection* = 5;
  20. initialCachePoolSize = 10000;
  21. initialCacheElementSize = 1000;
  22. TYPE
  23. IntegerArray = POINTER TO ARRAY OF LONGINT;
  24. CharArray = POINTER TO ARRAY OF Texts.Char32;
  25. TextReaderArray = POINTER TO ARRAY OF Texts.TextReader;
  26. BoolArray = POINTER TO ARRAY OF BOOLEAN;
  27. PosArray = POINTER TO RECORD
  28. array : IntegerArray;
  29. size : LONGINT;
  30. next : PosArray;
  31. END;
  32. PosArrays = POINTER TO ARRAY OF PosArray;
  33. (* simple stack for integers with some default stack functions *)
  34. IntegerStack = OBJECT
  35. VAR
  36. top : INTEGER;
  37. internalStack : IntegerArray;
  38. internalStackSize : LONGINT;
  39. (* initializes the stack with a certain size *)
  40. PROCEDURE &Init*(size : LONGINT);
  41. BEGIN
  42. NEW(internalStack,size);
  43. top := 0;
  44. internalStackSize := size;
  45. END Init;
  46. (* pushes a new element on top of the stack *)
  47. PROCEDURE Push(i : LONGINT);
  48. VAR
  49. tempStack : IntegerArray;
  50. j : LONGINT;
  51. BEGIN
  52. (* if the internal stack is full it needs to be resized *)
  53. IF (top >= internalStackSize) THEN
  54. (* 1.5 times the originial stack size is sufficient, since the old size
  55. should've been a best estimate *)
  56. internalStackSize := ENTIER(top*1.5);
  57. (* create and fill the new stack *)
  58. NEW(tempStack,internalStackSize);
  59. FOR j := 0 TO top - 1 DO
  60. tempStack[j] := internalStack[j];
  61. END;
  62. internalStack := tempStack;
  63. END;
  64. internalStack[top] := i;
  65. INC(top);
  66. END Push;
  67. (* returns the top element of the stack and removes it *)
  68. PROCEDURE Pop() : LONGINT;
  69. BEGIN
  70. IF (top = 0) THEN
  71. RETURN -1;
  72. ELSE
  73. DEC(top);
  74. RETURN internalStack[top];
  75. END;
  76. END Pop;
  77. (* returns the top element of the stack without removing it *)
  78. PROCEDURE Top() : LONGINT;
  79. BEGIN
  80. IF (top = 0) THEN
  81. RETURN -1;
  82. ELSE
  83. RETURN internalStack[top-1];
  84. END;
  85. END Top;
  86. (* remove all elements from the stack *)
  87. PROCEDURE Purge;
  88. BEGIN
  89. top := 0;
  90. END Purge;
  91. (* returns the current number of elements on the stack *)
  92. PROCEDURE Size() : INTEGER;
  93. BEGIN
  94. RETURN top;
  95. END Size;
  96. END IntegerStack;
  97. PosArrayPool = RECORD
  98. first, last : PosArray;
  99. END;
  100. StringElement = RECORD
  101. element : Strings.String;
  102. used : BOOLEAN;
  103. END;
  104. IntegerStackElement = RECORD
  105. element : IntegerStack;
  106. used : BOOLEAN;
  107. END;
  108. UStringElement = RECORD
  109. element : Texts.PUCS32String;
  110. used : BOOLEAN;
  111. END;
  112. (* The Array Memory Manager uses several variable pools to minimize reallocation of heap memory *)
  113. ArrayMemoryManager = OBJECT
  114. VAR
  115. posArrayPool : PosArrayPool;
  116. stringPool : ARRAY initialCachePoolSize OF StringElement;
  117. integerStackPool : ARRAY 10 OF IntegerStackElement;
  118. uStringPool : ARRAY 10 OF UStringElement;
  119. PROCEDURE &Init*;
  120. VAR
  121. i : LONGINT;
  122. newPosArray: PosArray;
  123. BEGIN
  124. (* fill the position array pool *)
  125. NEW(posArrayPool.last);
  126. NEW(posArrayPool.last.array,initialCacheElementSize);
  127. posArrayPool.last.size := 0;
  128. posArrayPool.first := posArrayPool.last;
  129. FOR i := 1 TO initialCachePoolSize - 1 DO
  130. NEW(newPosArray);
  131. NEW(newPosArray.array,initialCacheElementSize);
  132. newPosArray.size := 0;
  133. posArrayPool.last.next := newPosArray;
  134. posArrayPool.last := newPosArray;
  135. END;
  136. (* fill the string pool *)
  137. FOR i:= 0 TO initialCachePoolSize - 1 DO
  138. NEW(stringPool[i].element,16);
  139. END;
  140. (* fill the one-character-string pool *)
  141. FOR i := 0 TO 9 DO
  142. NEW(integerStackPool[i].element,100);
  143. NEW(uStringPool[i].element,2);
  144. END;
  145. END Init;
  146. (* Allocates a new textreader array if necessary *)
  147. PROCEDURE NewTextReaderArray(VAR trArray : TextReaderArray; size : LONGINT);
  148. BEGIN
  149. IF trArray = NIL THEN
  150. NEW(trArray,MAX(initialCacheElementSize,2*size));
  151. ELSIF LEN(trArray) < size THEN
  152. NEW(trArray,2*size);
  153. END;
  154. END NewTextReaderArray;
  155. (* Gets a previously allocated integer stack from the pool if available *)
  156. PROCEDURE NewIntegerStack(VAR stack : IntegerStack; size : LONGINT);
  157. VAR
  158. i : LONGINT;
  159. BEGIN
  160. FOR i := 0 TO 9 DO
  161. IF ~integerStackPool[i].used THEN
  162. IF integerStackPool[i].element.internalStackSize < size THEN
  163. NEW(integerStackPool[i].element,size);
  164. END;
  165. stack := integerStackPool[i].element;
  166. integerStackPool[i].used := TRUE;
  167. RETURN;
  168. END;
  169. END;
  170. NEW(stack,size);
  171. END NewIntegerStack;
  172. (* Puts the integer stack back to the pool *)
  173. PROCEDURE FreeIntegerStack(stack : IntegerStack);
  174. VAR
  175. i : LONGINT;
  176. BEGIN
  177. FOR i := 0 TO 9 DO
  178. IF stack = integerStackPool[i].element THEN
  179. integerStackPool[i].used := FALSE;
  180. RETURN;
  181. END;
  182. END;
  183. END FreeIntegerStack;
  184. (* Allocates a new boolean array if necessary *)
  185. PROCEDURE NewBoolArray(VAR bArray : BoolArray; size : LONGINT);
  186. BEGIN
  187. IF bArray = NIL THEN
  188. NEW(bArray,MAX(initialCacheElementSize,2*size));
  189. ELSIF LEN(bArray) < size THEN
  190. NEW(bArray,2*size);
  191. END;
  192. END NewBoolArray;
  193. (* Allocates a new array for position arrays if necessary *)
  194. PROCEDURE NewPosArrays(VAR pArrays : PosArrays; size : LONGINT);
  195. BEGIN
  196. IF pArrays = NIL THEN
  197. NEW(pArrays,MAX(initialCacheElementSize,2*size));
  198. ELSIF LEN(pArrays) < size THEN
  199. NEW(pArrays,2*size);
  200. END;
  201. END NewPosArrays;
  202. (* Gets a previously allocated position array from the pool if available *)
  203. PROCEDURE NewPosArray(VAR pArray : PosArray; size : LONGINT);
  204. VAR
  205. thisPosArray, lastPosArray : PosArray;
  206. BEGIN
  207. (* if pArray is already a usable position array, use it *)
  208. IF (pArray # NIL) & (pArray # NIL) & (LEN(pArray.array) >= size) THEN
  209. pArray.size := size;
  210. RETURN;
  211. END;
  212. (* pool is empty *)
  213. IF posArrayPool.first = NIL THEN
  214. NEW(pArray);
  215. NEW(pArray.array,MAX(initialCacheElementSize,2*size));
  216. pArray.size := size;
  217. RETURN;
  218. END;
  219. (* the first position array is ok *)
  220. thisPosArray := posArrayPool.first;
  221. IF LEN(thisPosArray.array) >= size THEN
  222. pArray := thisPosArray;
  223. pArray.size := size;
  224. posArrayPool.first := pArray.next;
  225. pArray.next := NIL;
  226. RETURN;
  227. END;
  228. (* search the pool for a valid position array *)
  229. lastPosArray := thisPosArray;
  230. thisPosArray := thisPosArray.next;
  231. WHILE thisPosArray # NIL DO
  232. IF LEN(thisPosArray.array) >= size THEN
  233. pArray := thisPosArray;
  234. pArray.size := size;
  235. lastPosArray.next := thisPosArray.next;
  236. pArray.next := NIL;
  237. RETURN;
  238. END;
  239. lastPosArray := thisPosArray;
  240. thisPosArray := thisPosArray.next;
  241. END;
  242. (* if no position array was found, resize the first one and use it *)
  243. pArray := posArrayPool.first;
  244. NEW(pArray.array,MAX(initialCacheElementSize,2*size));
  245. pArray.size := size;
  246. posArrayPool.first := pArray.next;
  247. pArray.next := NIL;
  248. END NewPosArray;
  249. (* Allocates a new integer array if necessary *)
  250. PROCEDURE NewIntegerArray(VAR iArray : IntegerArray; size : LONGINT);
  251. BEGIN
  252. IF iArray = NIL THEN
  253. NEW(iArray,MAX(initialCacheElementSize,2*size));
  254. ELSIF LEN(iArray) < size THEN
  255. NEW(iArray,2*size);
  256. END;
  257. END NewIntegerArray;
  258. (* Allocates a new string array if necessary *)
  259. PROCEDURE NewStringArray(VAR sArray : Strings.StringArray; size : LONGINT);
  260. BEGIN
  261. IF sArray = NIL THEN
  262. NEW(sArray,MAX(initialCacheElementSize,2*size));
  263. ELSIF LEN(sArray) < size THEN
  264. NEW(sArray,2*size);
  265. END;
  266. END NewStringArray;
  267. (* Allocates a new character array if necessary *)
  268. PROCEDURE NewCharArray(VAR cArray : CharArray; size : LONGINT);
  269. BEGIN
  270. IF cArray = NIL THEN
  271. NEW(cArray,MAX(initialCacheElementSize,2*size));
  272. ELSIF LEN(cArray) < size THEN
  273. NEW(cArray,2*size);
  274. END;
  275. END NewCharArray;
  276. (* Allocates a new character type if necessary *)
  277. PROCEDURE NewCharacterType(VAR charType : Strings.String);
  278. BEGIN
  279. IF charType = NIL THEN
  280. NEW(charType,16);
  281. END;
  282. END NewCharacterType;
  283. (* Gets a previously allocated string from the pool if available *)
  284. PROCEDURE NewString(VAR string : Strings.String);
  285. VAR
  286. i : LONGINT;
  287. BEGIN
  288. FOR i := 0 TO initialCachePoolSize - 1 DO
  289. IF ~stringPool[i].used THEN
  290. string := stringPool[i].element;
  291. stringPool[i].used := TRUE;
  292. RETURN;
  293. END;
  294. END;
  295. NEW(string,256);
  296. END NewString;
  297. (* Puts the string back to the pool *)
  298. PROCEDURE FreeString(string : Strings.String);
  299. VAR
  300. i : LONGINT;
  301. BEGIN
  302. FOR i := 0 TO initialCachePoolSize - 1 DO
  303. IF string = stringPool[i].element THEN
  304. stringPool[i].used := FALSE;
  305. RETURN;
  306. END;
  307. END;
  308. END FreeString;
  309. (* Gets a previously allocated one-character-string from the pool if available *)
  310. PROCEDURE NewUString(VAR string : Texts.PUCS32String);
  311. VAR
  312. i : LONGINT;
  313. BEGIN
  314. FOR i := 0 TO 9 DO
  315. IF ~uStringPool[i].used THEN
  316. string := uStringPool[i].element;
  317. uStringPool[i].used := TRUE;
  318. RETURN;
  319. END;
  320. END;
  321. NEW(string,2);
  322. END NewUString;
  323. (* Puts the one-character-string back to the pool *)
  324. PROCEDURE FreeUString(string : Texts.PUCS32String);
  325. VAR
  326. i : LONGINT;
  327. BEGIN
  328. FOR i := 0 TO 9 DO
  329. IF string = uStringPool[i].element THEN
  330. uStringPool[i].used := FALSE;
  331. RETURN;
  332. END;
  333. END;
  334. END FreeUString;
  335. END ArrayMemoryManager;
  336. (* Desribes the different version of a line in the text. Since a line (or paragraph) can be splitted by word-wrapping,
  337. for each possible start of a line, the according property needs to be present. *)
  338. LineElement = OBJECT
  339. VAR
  340. next : LineElement;
  341. id : LONGINT;
  342. pos : LONGINT;
  343. lineLength : LONGINT;
  344. paragraphEmbeddingLevel : LONGINT;
  345. textReaders : TextReaderArray;
  346. dirty : BoolArray;
  347. posArrays, reversePosArrays : PosArrays;
  348. characterEmbeddingLevels : IntegerArray;
  349. originalCharacterTypes : Strings.StringArray;
  350. characterTypes : Strings.StringArray;
  351. characters : CharArray;
  352. (* Initializes a LineElement *)
  353. PROCEDURE &Init*;
  354. BEGIN
  355. next := NIL;
  356. id := -1;
  357. pos := -1;
  358. lineLength := -1;
  359. paragraphEmbeddingLevel := -1;
  360. END Init;
  361. (* clears the whole LineElement, except identification parts *)
  362. PROCEDURE Clear;
  363. BEGIN
  364. lineLength := 0;
  365. paragraphEmbeddingLevel := -1;
  366. textReaders := NIL;
  367. dirty := NIL;
  368. posArrays := NIL;
  369. reversePosArrays := NIL;
  370. characterEmbeddingLevels := NIL;
  371. originalCharacterTypes := NIL;
  372. characterTypes := NIL;
  373. characters := NIL;
  374. END Clear;
  375. END LineElement;
  376. (* The Line Cache supports the fast recovery of previously computed data belonging to a certain line in the text.
  377. Elements are identified by their start position. *)
  378. LineCache = OBJECT
  379. VAR
  380. first : LineElement;
  381. (* initializes and empty list *)
  382. PROCEDURE &Init*;
  383. BEGIN
  384. first := NIL;
  385. END Init;
  386. (* Inserts a line with a certain start position into the cache. The lines in the cache are sorted by their start position. *)
  387. PROCEDURE InsertLine(pos : LONGINT);
  388. VAR
  389. thisElement, lastElement, newElement : LineElement;
  390. BEGIN
  391. (* Look for the appropriate position *)
  392. thisElement := first;
  393. WHILE (thisElement # NIL) & (thisElement.pos < pos) DO
  394. lastElement := thisElement;
  395. thisElement := thisElement.next;
  396. END;
  397. (* the new element belongs to the end of the list *)
  398. IF thisElement = NIL THEN
  399. NEW(newElement);
  400. newElement.pos := pos;
  401. (* if the list was empty before, this has changed now *)
  402. IF first = NIL THEN
  403. first := newElement;
  404. ELSE
  405. lastElement.next := newElement;
  406. END;
  407. (* the line is already in the list *)
  408. ELSIF thisElement.pos = pos THEN
  409. (* maybe insert some stuff later *)
  410. (* Insert the new element at the correct position inside the list. Since the first line will always be the first line in
  411. the text (it might change, but it'll always be the first line), the case where the new element will be the new first
  412. element of the list needs not to be taken care of! *)
  413. ELSE
  414. NEW(newElement);
  415. newElement.pos := pos;
  416. newElement.next := thisElement;
  417. lastElement.next := newElement;
  418. END;
  419. IF CacheDebugging THEN
  420. KernelLog.Ln; KernelLog.String("line inserted (");
  421. KernelLog.Int(pos,4); KernelLog.String(")"); KernelLog.Ln;
  422. END;
  423. END InsertLine;
  424. (* gets the start position of the next line or the start position of the same line if the line is actually
  425. the last one in the text. *)
  426. PROCEDURE GetNextPos(pos : LONGINT) : LONGINT;
  427. VAR
  428. thisElement : LineElement;
  429. BEGIN
  430. (* find the current line *)
  431. thisElement := first;
  432. FindElement(pos,thisElement);
  433. (* no appropriate element has been found *)
  434. IF (thisElement = NIL) OR (thisElement.pos > pos) THEN
  435. RETURN -1;
  436. ELSE
  437. (* return the according start position *)
  438. IF thisElement.next # NIL THEN
  439. RETURN thisElement.next.pos;
  440. ELSE
  441. RETURN thisElement.pos;
  442. END;
  443. END;
  444. END GetNextPos;
  445. (* removes a certain line - identified by its start position - from the cache *)
  446. PROCEDURE RemoveLine(pos : LONGINT);
  447. VAR
  448. thisElement, lastElement : LineElement;
  449. BEGIN
  450. (* get the element to be deleted *)
  451. thisElement := first;
  452. WHILE (thisElement # NIL) & (thisElement.pos < pos) DO
  453. lastElement := thisElement;
  454. thisElement := thisElement.next;
  455. END;
  456. (* if the element has been found it can be removed *)
  457. IF (thisElement # NIL) & (thisElement.pos = pos) THEN
  458. IF thisElement = first THEN
  459. first := thisElement.next;
  460. ELSE
  461. lastElement.next := thisElement.next;
  462. END;
  463. END;
  464. IF CacheDebugging THEN
  465. KernelLog.Ln; KernelLog.String("line removed(");
  466. KernelLog.Int(pos,4); KernelLog.String(")"); KernelLog.Ln;
  467. END;
  468. END RemoveLine;
  469. (* changes all positions starting from a certain position by a certain value *)
  470. PROCEDURE ChangePos(startPos, changeValue : LONGINT);
  471. VAR
  472. thisElement : LineElement;
  473. BEGIN
  474. (* get the first element to be changed *)
  475. thisElement := first;
  476. FindElement(startPos,thisElement);
  477. thisElement := thisElement.next;
  478. (* change all following elements *)
  479. WHILE thisElement # NIL DO
  480. INC(thisElement.pos,changeValue);
  481. thisElement := thisElement.next;
  482. END;
  483. END ChangePos;
  484. (* finds the line a certain position belongs to *)
  485. PROCEDURE FindElement(pos : LONGINT; VAR thisElement : LineElement);
  486. VAR
  487. nextElement : LineElement;
  488. BEGIN
  489. (* search the list until the line is found *)
  490. nextElement := first;
  491. WHILE (nextElement # NIL) & (nextElement.pos <= pos) DO
  492. thisElement := nextElement;
  493. nextElement := nextElement.next;
  494. END;
  495. END FindElement;
  496. (* Prints the important information of the cache. Easily extensible if desired. *)
  497. PROCEDURE PrintCache;
  498. VAR
  499. thisElement : LineElement;
  500. i : INTEGER;
  501. BEGIN
  502. thisElement := first;
  503. i := 0;
  504. KernelLog.Ln;
  505. KernelLog.String("pos pel last len"); KernelLog.Ln;
  506. KernelLog.String("===================="); KernelLog.Ln;
  507. WHILE thisElement # NIL DO
  508. KernelLog.Int(thisElement.pos,4); KernelLog.String(": ");
  509. KernelLog.Int(thisElement.paragraphEmbeddingLevel,2); KernelLog.String(" ");
  510. IF thisElement.characters # NIL THEN
  511. KernelLog.Hex(thisElement.characters[LEN(thisElement.characters)-1],4);
  512. ELSE
  513. KernelLog.String("-:-:-");
  514. END;
  515. KernelLog.Int(thisElement.lineLength,4);
  516. KernelLog.Ln;
  517. thisElement := thisElement.next;
  518. INC(i);
  519. END;
  520. KernelLog.String("cache size: "); KernelLog.Int(i,3); KernelLog.String(" lines"); KernelLog.Ln;
  521. KernelLog.Ln;
  522. END PrintCache;
  523. END LineCache;
  524. (* The Segment Cache supports the fast recovery of previously computed data belonging to a certain segment in the text.
  525. Elements are identified by their id number. The Segment Cache is not ordered.*)
  526. SegmentCache = OBJECT
  527. VAR
  528. first, last : LineElement;
  529. nextId : LONGINT;
  530. PROCEDURE &Init*;
  531. BEGIN
  532. first := NIL;
  533. last := NIL;
  534. nextId := 0;
  535. END Init;
  536. (* Inserts a segment with a certain start and end position into the cache. The next free id is assigned to the element. *)
  537. PROCEDURE InsertSegment(start, end : LONGINT) : LONGINT;
  538. VAR
  539. thisElement, newElement : LineElement;
  540. BEGIN
  541. NEW(newElement);
  542. newElement.pos := start;
  543. newElement.lineLength := end - start + 1;
  544. newElement.id := nextId;
  545. INC(nextId);
  546. IF first = NIL THEN
  547. first := newElement;
  548. last := newElement;
  549. RETURN nextId;
  550. END;
  551. thisElement := first;
  552. WHILE thisElement # last DO
  553. thisElement := thisElement.next;
  554. END;
  555. thisElement.next := newElement;
  556. last := newElement;
  557. RETURN nextId;
  558. END InsertSegment;
  559. (* removes a certain segment - identified by its id - from the cache *)
  560. PROCEDURE RemoveSegment(id : LONGINT);
  561. VAR
  562. thisElement : LineElement;
  563. BEGIN
  564. IF first = NIL THEN
  565. RETURN;
  566. ELSIF first.id = id THEN
  567. IF first = last THEN
  568. first := NIL;
  569. last := NIL;
  570. RETURN;
  571. ELSE
  572. first := first.next;
  573. RETURN;
  574. END;
  575. END;
  576. thisElement := first;
  577. WHILE thisElement.next # last DO
  578. IF thisElement.next.id = id THEN
  579. thisElement.next := thisElement.next.next;
  580. RETURN;
  581. END;
  582. thisElement := thisElement.next;
  583. END;
  584. IF last.id = id THEN
  585. last := thisElement;
  586. END;
  587. END RemoveSegment;
  588. (* changes the start position of a certain element by a certain value *)
  589. PROCEDURE ChangePos(id, changeValue : LONGINT);
  590. VAR
  591. thisElement : LineElement;
  592. BEGIN
  593. thisElement := first;
  594. WHILE thisElement # last DO
  595. IF thisElement.id = id THEN
  596. INC(thisElement.pos,changeValue);
  597. RETURN;
  598. END;
  599. END;
  600. IF last.id = id THEN
  601. INC(thisElement.pos,changeValue);
  602. END;
  603. END ChangePos;
  604. (* finds the segment identified by a certain id *)
  605. PROCEDURE FindElement(id : LONGINT; VAR thisElement : LineElement);
  606. BEGIN
  607. thisElement := first;
  608. WHILE thisElement # last DO
  609. IF thisElement.id = id THEN
  610. RETURN;
  611. END;
  612. thisElement := thisElement.next;
  613. END;
  614. IF last.id = id THEN
  615. RETURN;
  616. END;
  617. thisElement := NIL;
  618. END FindElement;
  619. (* Prints the important information of the cache. Easily extensible if desired. *)
  620. PROCEDURE PrintCache;
  621. VAR
  622. thisElement : LineElement;
  623. i : INTEGER;
  624. BEGIN
  625. thisElement := first;
  626. i := 0;
  627. KernelLog.Ln;
  628. KernelLog.String("id pel last pos len"); KernelLog.Ln;
  629. KernelLog.String("===================="); KernelLog.Ln;
  630. WHILE thisElement # NIL DO
  631. KernelLog.Int(thisElement.id,4); KernelLog.String(": ");
  632. KernelLog.Int(thisElement.pos,4); KernelLog.String(" ");
  633. KernelLog.Int(thisElement.paragraphEmbeddingLevel,2); KernelLog.String(" ");
  634. IF thisElement.characters # NIL THEN
  635. KernelLog.Hex(thisElement.characters[LEN(thisElement.characters)-1],4);
  636. ELSE
  637. KernelLog.String("-:-:-");
  638. END;
  639. KernelLog.Int(thisElement.lineLength,4);
  640. KernelLog.Ln;
  641. thisElement := thisElement.next;
  642. INC(i);
  643. END;
  644. KernelLog.String("cache size: "); KernelLog.Int(i,3); KernelLog.String(" lines"); KernelLog.Ln;
  645. KernelLog.Ln;
  646. END PrintCache;
  647. END SegmentCache;
  648. (* This is the core of the Bidirectional formatting of a utf-text. *)
  649. BidiFormatter*=OBJECT
  650. VAR
  651. amm : ArrayMemoryManager;
  652. textReader : Texts.TextReader;
  653. unicodePropertyReader: UnicodeProperties.UnicodeTxtReader;
  654. mirrorPropertyReader: UnicodeProperties.BidiMirroringTxtReader;
  655. reformatted : BOOLEAN;
  656. lineCache: LineCache;
  657. segmentCache : SegmentCache;
  658. trueTextLength : LONGINT;
  659. HL1* : HigherLevelProtocol1;
  660. HL2* : HigherLevelProtocol2;
  661. HL3* : HigherLevelProtocol3;
  662. HL4* : BOOLEAN;
  663. HL5* : HigherLevelProtocol5;
  664. HL6* : HigherLevelProtocol6;
  665. (* Initializes the Bidi Formatter for a given text. Additionally, two property readers are set up. *)
  666. PROCEDURE &Init*(text : Texts.Text);
  667. BEGIN
  668. NEW(amm);
  669. NEW(textReader,text);
  670. trueTextLength := text.GetLength();
  671. textReader.SetDirection(1);
  672. NEW(unicodePropertyReader);
  673. NEW(mirrorPropertyReader);
  674. reformatted := FALSE;
  675. HL4 := FALSE;
  676. END Init;
  677. (* Implements the first steps of the bidirectionality algorithm. The text is analyzed and the character types are
  678. reapplied. After reformatting, the text is ready for word-wrapping and later for reordering. *)
  679. PROCEDURE ReformatText*;
  680. VAR
  681. thisPos, nextPos : LONGINT;
  682. char : Texts.Char32;
  683. BEGIN
  684. (* do nothing if the text is not utf formatted *)
  685. IF ~textReader.text.isUTF THEN
  686. RETURN;
  687. END;
  688. (* The text length needs to be remembered to detect fake 'changed' messages *)
  689. trueTextLength := textReader.text.GetLength();
  690. IF CharacterDebugging THEN KernelLog.String("reformatting text..."); KernelLog.Ln; END;
  691. (* initialize the line cache and - if used - the segment cache *)
  692. nextPos := 0;
  693. NEW(lineCache);
  694. IF HL4 THEN
  695. NEW(segmentCache);
  696. END;
  697. (* P1. cut the text into paragraphs (lines) and reformat them *)
  698. REPEAT
  699. thisPos := nextPos;
  700. lineCache.InsertLine(thisPos);
  701. FindEndOfParagraph(thisPos,nextPos);
  702. ReformatParagraph(thisPos);
  703. UNTIL (nextPos >= textReader.text.GetLength()) OR (thisPos = nextPos);
  704. (* if the last character is a LF, a last (empty) line needs to be inserted *)
  705. textReader.SetDirection(1);
  706. textReader.SetPosition(textReader.text.GetLength()-1);
  707. textReader.ReadCh(char);
  708. IF char = UnicodeProperties.LF THEN
  709. lineCache.InsertLine(textReader.text.GetLength());
  710. END;
  711. IF CacheDebugging THEN
  712. KernelLog.Ln;
  713. unicodePropertyReader.PrintCharTypeCache;
  714. END;
  715. IF CharacterDebugging THEN
  716. KernelLog.Ln; KernelLog.String("...reformatting done! Text length: ");
  717. KernelLog.Int(textReader.text.GetLength(),4); KernelLog.Ln;
  718. END;
  719. IF CacheDebugging THEN
  720. lineCache.PrintCache;
  721. IF segmentCache # NIL THEN
  722. segmentCache.PrintCache;
  723. END;
  724. END;
  725. reformatted := TRUE;
  726. END ReformatText;
  727. (* Applies the steps introduced ReformatText to some few affected lines. This is used for faster on-the-fly
  728. reformatting. When a character is inserted or deleted, only the current line and the line before (in some
  729. cases) need reformatting. *)
  730. PROCEDURE ReformatTextFrom*(pos, changed : LONGINT);
  731. VAR
  732. char : Texts.Char32;
  733. BEGIN
  734. (* do nothing if the text is not utf formatted *)
  735. IF ~textReader.text.isUTF THEN
  736. RETURN;
  737. END;
  738. (* check if there has really been a change (yes, there are some fake messages around!). if there really
  739. was a change, the start positions in the cache need to be changed. *)
  740. IF trueTextLength = textReader.text.GetLength() THEN
  741. changed := 0;
  742. (* whole text is reloaded/deleted *)
  743. ELSE
  744. lineCache.ChangePos(pos, changed);
  745. trueTextLength := textReader.text.GetLength();
  746. END;
  747. IF CharacterDebugging THEN
  748. KernelLog.String("reformatting text at position "); KernelLog.Int(pos,4);
  749. KernelLog.String("..."); KernelLog.Ln;
  750. END;
  751. textReader.SetDirection(1);
  752. textReader.SetPosition(pos);
  753. textReader.ReadCh(char);
  754. (* if a line feed has been inserted a new line is created in the cache and both lines are reformatted *)
  755. IF (changed > 0) & (char = UnicodeProperties.LF) THEN
  756. lineCache.InsertLine(pos+1);
  757. ReformatParagraph(pos);
  758. ReformatParagraph(pos+1);
  759. (* if one or more characters have been deleted the current line needs reformatting. Additionally, all
  760. cache entries that represent lines, which aren't whole lines anymore are deleted. *)
  761. ELSIF changed < 0 THEN
  762. SweepCache;
  763. ReformatParagraph(pos);
  764. (* default case: simple non-linefeed character insertion *)
  765. ELSE
  766. ReformatParagraph(pos);
  767. END;
  768. IF CharacterDebugging THEN
  769. KernelLog.Ln; KernelLog.String("...reformatting done! Text length: ");
  770. KernelLog.Int(textReader.text.GetLength(),4); KernelLog.Ln;
  771. END;
  772. IF CacheDebugging THEN
  773. lineCache.PrintCache;
  774. IF segmentCache # NIL THEN
  775. segmentCache.PrintCache;
  776. END;
  777. END;
  778. END ReformatTextFrom;
  779. (* performs the reformatting steps from the algorithm line-wise *)
  780. PROCEDURE ReformatParagraph(pos : LONGINT);
  781. VAR
  782. cacheElement : LineElement;
  783. start, end : LONGINT;
  784. BEGIN
  785. (* find the previously created cache entry for the current line *)
  786. lineCache.FindElement(pos,cacheElement);
  787. (* scan the text for the start and end position of the line *)
  788. FindStartOfParagraph(pos,start);
  789. FindEndOfParagraph(pos,end);
  790. IF CharacterDebugging THEN
  791. KernelLog.Ln; KernelLog.Ln;
  792. KernelLog.String("paragraph ("); KernelLog.Int(start,3); KernelLog.String(",");
  793. KernelLog.Int(end-1,3); KernelLog.String(") found.");
  794. END;
  795. ReformatClause(start,end,cacheElement);
  796. END ReformatParagraph;
  797. (* HL4. performs the reformatting steps from the algorithm for a single segment *)
  798. PROCEDURE ReformatSegment*(start, end : LONGINT) : LONGINT;
  799. VAR
  800. thisId : LONGINT;
  801. cacheElement : LineElement;
  802. BEGIN
  803. IF ~textReader.text.isUTF OR ~HL4 OR (segmentCache = NIL) THEN
  804. RETURN -1;
  805. END;
  806. (* insert a new entry into the cache *)
  807. thisId := segmentCache.InsertSegment(start,end);
  808. segmentCache.FindElement(thisId,cacheElement);
  809. (* reformat the segment *)
  810. ReformatClause(start,end+1,cacheElement);
  811. RETURN thisId;
  812. END ReformatSegment;
  813. (* HL4. removes a certain segment from the cache *)
  814. PROCEDURE RemoveSegment*(id : LONGINT);
  815. BEGIN
  816. IF ~textReader.text.isUTF OR ~HL4 OR (segmentCache = NIL) THEN
  817. RETURN;
  818. END;
  819. segmentCache.RemoveSegment(id);
  820. END RemoveSegment;
  821. (* HL4. changes the start position of a certain segment *)
  822. PROCEDURE ChangeSegmentPos*(id, changeValue : LONGINT);
  823. BEGIN
  824. IF ~textReader.text.isUTF OR ~HL4 OR (segmentCache = NIL) THEN
  825. RETURN;
  826. END;
  827. segmentCache.ChangePos(id,changeValue);
  828. END ChangeSegmentPos;
  829. (* reformats a part of the text (either a single line or a predefined segment *)
  830. PROCEDURE ReformatClause(start, end : LONGINT; VAR cacheElement : LineElement);
  831. VAR
  832. charType : Strings.String;
  833. i : LONGINT;
  834. BEGIN
  835. amm.NewString(charType);
  836. (* if HL1 is undefined, compute the paragraph embedding level and assign it otherwise *)
  837. IF HL1 = NIL THEN
  838. (* P2. determine the first strong character type *)
  839. FindFirstStrongCharacter(start,end,charType);
  840. (* P3. assign the paragraph embedding level *)
  841. IF charType = NIL THEN
  842. IF CharacterDebugging THEN KernelLog.String(" (Empty paragraph)"); END;
  843. cacheElement.Clear;
  844. amm.FreeString(charType);
  845. RETURN;
  846. ELSIF charType^ = "L" THEN
  847. cacheElement.paragraphEmbeddingLevel := 0;
  848. ELSE
  849. cacheElement.paragraphEmbeddingLevel := 1;
  850. END;
  851. ELSE
  852. cacheElement.paragraphEmbeddingLevel := HL1(textReader,start,end);
  853. END;
  854. amm.FreeString(charType);
  855. IF CharacterDebugging THEN
  856. KernelLog.String(" Embedding Level: "); KernelLog.Int(cacheElement.paragraphEmbeddingLevel,2); KernelLog.Ln;
  857. PrintOriginalTypedParagraph(start,end);
  858. PrintCodedParagraph(start,end);
  859. KernelLog.Ln; KernelLog.String("Applying explicit levels...");
  860. END;
  861. (* step through the different parts of the algorithm *)
  862. ApplyExplicitLevels(start,end,cacheElement);
  863. IF CharacterDebugging THEN
  864. PrintCurrentTypedParagraph(start,end);
  865. PrintLeveledParagraph(start,end);
  866. KernelLog.Ln; KernelLog.String("Resolving weak types...");
  867. END;
  868. ResolveWeakTypes(cacheElement);
  869. IF CharacterDebugging THEN
  870. PrintCurrentTypedParagraph(start,end);
  871. KernelLog.Ln; KernelLog.String("Resolving neutral types...");
  872. END;
  873. ResolveNeutralTypes(cacheElement);
  874. IF CharacterDebugging THEN
  875. PrintCurrentTypedParagraph(start,end);
  876. KernelLog.Ln; KernelLog.String("Resolving implicit levels...");
  877. END;
  878. ResolveImplicitLevels(cacheElement);
  879. IF CharacterDebugging THEN
  880. PrintLeveledParagraph(start,end);
  881. END;
  882. (* mark the whole line as dirty so it needs further processing for reordering *)
  883. FOR i := 0 TO LEN(cacheElement.dirty) - 1 DO
  884. cacheElement.dirty[i] := TRUE;
  885. END;
  886. END ReformatClause;
  887. (* Scans the text and applies the explicit embedding level of each character *)
  888. PROCEDURE ApplyExplicitLevels(start, end : LONGINT; VAR cacheElement : LineElement);
  889. VAR
  890. embeddingLevels : IntegerStack;
  891. dummyEmbeddingLevel : LONGINT;
  892. overrideStati : IntegerStack;
  893. dummyOverrideStatus : LONGINT;
  894. char : Texts.Char32;
  895. charType : Strings.String;
  896. nextLevel : LONGINT;
  897. i : INTEGER;
  898. lineLength : LONGINT;
  899. surplusLevels : LONGINT;
  900. hlCharacterType : Strings.String;
  901. hlEmbeddingLevel : LONGINT;
  902. BEGIN
  903. char := 0H;
  904. nextLevel := 0;
  905. i := 0;
  906. surplusLevels := 0;
  907. (* create several arrays for the cache *)
  908. lineLength := end-start;
  909. amm.NewIntegerArray(cacheElement.characterEmbeddingLevels,lineLength);
  910. amm.NewStringArray(cacheElement.characterTypes,lineLength);
  911. amm.NewStringArray(cacheElement.originalCharacterTypes,lineLength);
  912. amm.NewCharArray(cacheElement.characters,lineLength);
  913. (* create temporary stacks for the algorithm *)
  914. amm.NewIntegerStack(embeddingLevels, 62);
  915. amm.NewIntegerStack(overrideStati,2*lineLength);
  916. (* X1. set the current level to the paragraph embedding level *)
  917. embeddingLevels.Push(cacheElement.paragraphEmbeddingLevel);
  918. overrideStati.Push(DOSNeutral);
  919. (* step through the whole line and apply an embedding level to each character. The comment-codes
  920. mark the according step in the official algorithm. *)
  921. textReader.SetDirection(1);
  922. textReader.SetPosition(start);
  923. amm.NewString(charType);
  924. amm.NewString(hlCharacterType);
  925. WHILE (textReader.GetPosition() < end) DO
  926. textReader.ReadCh(char);
  927. unicodePropertyReader.GetBidiCharacterType(char,charType);
  928. (* perform a normal step if HL3 is undefined for this position or perform the algorithm of the HL3 *)
  929. IF (HL3 = NIL) OR
  930. ~HL3(cacheElement.characters,cacheElement.characterEmbeddingLevels,cacheElement.originalCharacterTypes,cacheElement.characterTypes,
  931. char,charType,hlCharacterType,hlEmbeddingLevel) THEN
  932. (* X2 *)
  933. IF charType^ = "RLE" THEN
  934. GetNextOddEmbeddingLevel(embeddingLevels.Top(), nextLevel);
  935. IF (embeddingLevels.Top() # nextLevel) THEN
  936. embeddingLevels.Push(nextLevel);
  937. overrideStati.Push(DOSNeutral);
  938. ELSE
  939. INC(surplusLevels);
  940. END;
  941. cacheElement.characterTypes[i] := Strings.NewString("BN");
  942. (* X3 *)
  943. ELSIF charType^ = "LRE" THEN
  944. GetNextEvenEmbeddingLevel(embeddingLevels.Top(), nextLevel);
  945. IF (embeddingLevels.Top() # nextLevel) THEN
  946. embeddingLevels.Push(nextLevel);
  947. overrideStati.Push(DOSNeutral);
  948. ELSE
  949. INC(surplusLevels);
  950. END;
  951. cacheElement.characterTypes[i] := Strings.NewString("BN");
  952. (* X4 *)
  953. ELSIF charType^ = "RLO" THEN
  954. GetNextOddEmbeddingLevel(embeddingLevels.Top(), nextLevel);
  955. IF (embeddingLevels.Top() # nextLevel) THEN
  956. embeddingLevels.Push(nextLevel);
  957. overrideStati.Push(DOSRightToLeft);
  958. ELSE
  959. INC(surplusLevels);
  960. END;
  961. cacheElement.characterTypes[i] := Strings.NewString("BN");
  962. (* X5 *)
  963. ELSIF charType^ = "LRO" THEN
  964. GetNextEvenEmbeddingLevel(embeddingLevels.Top(), nextLevel);
  965. IF (embeddingLevels.Top() # nextLevel) THEN
  966. embeddingLevels.Push(nextLevel);
  967. overrideStati.Push(DOSLeftToRight);
  968. ELSE
  969. INC(surplusLevels);
  970. END;
  971. cacheElement.characterTypes[i] := Strings.NewString("BN");
  972. (* X7 *)
  973. ELSIF charType^ = "PDF" THEN
  974. IF surplusLevels > 0 THEN
  975. DEC(surplusLevels);
  976. ELSE
  977. dummyEmbeddingLevel := embeddingLevels.Pop();
  978. IF (embeddingLevels.Size() < 1) THEN
  979. embeddingLevels.Push(dummyEmbeddingLevel);
  980. END;
  981. dummyOverrideStatus := overrideStati.Pop();
  982. IF (overrideStati.Size() < 1) THEN
  983. overrideStati.Push(dummyOverrideStatus);
  984. END;
  985. END;
  986. cacheElement.characterTypes[i] := Strings.NewString("BN");
  987. (* X6 *)
  988. ELSE
  989. (* check the previously gathered information and change the character types accordingly *)
  990. IF (overrideStati.Top() = DOSNeutral) THEN
  991. amm.NewCharacterType(cacheElement.characterTypes[i]);
  992. Strings.Copy(charType^,0,LEN(charType),cacheElement.characterTypes[i]^);
  993. ELSIF (overrideStati.Top() = DOSLeftToRight) THEN
  994. cacheElement.characterTypes[i] := Strings.NewString("L");
  995. ELSIF (overrideStati.Top() = DOSRightToLeft) THEN
  996. cacheElement.characterTypes[i] := Strings.NewString("R");
  997. END;
  998. END;
  999. cacheElement.characterEmbeddingLevels[i] := embeddingLevels.Top();
  1000. ELSE
  1001. amm.NewCharacterType(cacheElement.characterTypes[i]);
  1002. Strings.Copy(hlCharacterType^,0,LEN(hlCharacterType),cacheElement.characterTypes[i]^);
  1003. cacheElement.characterEmbeddingLevels[i] := hlEmbeddingLevel;
  1004. END;
  1005. amm.NewCharacterType(cacheElement.originalCharacterTypes[i]);
  1006. Strings.Copy(charType^,0,LEN(charType),cacheElement.originalCharacterTypes[i]^);
  1007. cacheElement.characters[i] := char;
  1008. INC(i);
  1009. END;
  1010. amm.FreeString(hlCharacterType);
  1011. amm.FreeString(charType);
  1012. amm.FreeIntegerStack(embeddingLevels);
  1013. amm.FreeIntegerStack(overrideStati);
  1014. (* Note, that X8 needs not to be done explicitly since paragraphs are concidered individually. X9 is ignored
  1015. and therefore special rules apply further on. *)
  1016. (* apply the gathered information to the cache entry *)
  1017. cacheElement.lineLength := lineLength;
  1018. amm.NewBoolArray(cacheElement.dirty,lineLength);
  1019. amm.NewTextReaderArray(cacheElement.textReaders,lineLength);
  1020. amm.NewPosArrays(cacheElement.posArrays,lineLength);
  1021. amm.NewPosArrays(cacheElement.reversePosArrays,lineLength);
  1022. END ApplyExplicitLevels;
  1023. (* Scans through the text and resolves all weak types *)
  1024. PROCEDURE ResolveWeakTypes(VAR cacheElement : LineElement);
  1025. VAR
  1026. i,nextI : LONGINT;
  1027. pos, state, ENstate : LONGINT;
  1028. passedBNs : IntegerStack;
  1029. charType : Strings.String;
  1030. lastCharType : Strings.String;
  1031. lastStrongType : LONGINT;
  1032. newLevel : BOOLEAN;
  1033. dummyBool : BOOLEAN;
  1034. dummyInt : LONGINT;
  1035. BEGIN
  1036. lastStrongType := NeutralType;
  1037. newLevel := FALSE;
  1038. amm.NewString(lastCharType);
  1039. GetBorderOfRunCharacterType(cacheElement.characterEmbeddingLevels[0],cacheElement.paragraphEmbeddingLevel,lastCharType);
  1040. (* W1. - W3. Non-spacing marks, European numbers, pt. I and Arabic numbers *)
  1041. FOR i := 0 TO cacheElement.lineLength - 1 DO
  1042. (* HL5. provide artificial context if necessary *)
  1043. IF HL5 # NIL THEN
  1044. dummyBool := HL5(cacheElement,i,lastCharType,lastStrongType,dummyInt,WeakTypes1);
  1045. END;
  1046. (* W1. *)
  1047. IF cacheElement.characterTypes[i]^ = "NSM" THEN
  1048. Strings.Copy(lastCharType^,0,MIN(LEN(cacheElement.characterTypes[i]),LEN(lastCharType)),cacheElement.characterTypes[i]^);
  1049. END;
  1050. (* border of two runs *)
  1051. IF (i < cacheElement.lineLength - 1) & (cacheElement.characterEmbeddingLevels[i] # cacheElement.characterEmbeddingLevels[i+1]) THEN
  1052. GetBorderOfRunCharacterType(cacheElement.characterEmbeddingLevels[i],cacheElement.characterEmbeddingLevels[i+1],lastCharType);
  1053. lastStrongType := NeutralType;
  1054. newLevel := TRUE;
  1055. END;
  1056. (* W3. *)
  1057. IF cacheElement.characterTypes[i]^ = "AL" THEN
  1058. IF ~newLevel THEN
  1059. lastCharType^ := "R";
  1060. lastStrongType := ArabicNumber;
  1061. END;
  1062. cacheElement.characterTypes[i]^ := "R";
  1063. (* remember strong type for W2 *)
  1064. ELSIF IsStrongCharacterType(cacheElement.characterTypes[i]) & ~newLevel THEN
  1065. Strings.Copy(cacheElement.characterTypes[i]^,0,MIN(LEN(cacheElement.characterTypes[i]),LEN(lastCharType)),lastCharType^);
  1066. lastStrongType := NeutralType;
  1067. (* W2. remember the new type for W1 *)
  1068. ELSIF cacheElement.characterTypes[i]^ = "EN" THEN
  1069. (* set AN/EN explicitly if HL2 is defined *)
  1070. IF HL2 = NIL THEN
  1071. IF lastStrongType = ArabicNumber THEN
  1072. cacheElement.characterTypes[i]^ := "AN";
  1073. lastCharType^ := "AN";
  1074. END;
  1075. ELSE
  1076. charType := HL2(cacheElement,i,lastStrongType);
  1077. Strings.Copy(charType^,0,MIN(LEN(cacheElement.characterTypes[i]),LEN(charType)),cacheElement.characterTypes[i]^);
  1078. Strings.Copy(charType^,0,MIN(LEN(lastCharType),LEN(charType)),lastCharType^);
  1079. END;
  1080. (* remember character type for W1, exept for BN *)
  1081. ELSIF cacheElement.characterTypes[i]^ # "BN" THEN
  1082. Strings.Copy(cacheElement.characterTypes[i]^,0,MIN(LEN(cacheElement.characterTypes[i]),LEN(lastCharType)),lastCharType^);
  1083. END;
  1084. newLevel := FALSE;
  1085. END;
  1086. (* W4. - W7. Separators, Terminators and European Numbers, pt. II *)
  1087. (* start with a neutral state and a European Number state according to the first run's sor *)
  1088. state := NeutralType;
  1089. GetBorderOfRunCharacterType(cacheElement.characterEmbeddingLevels[0],cacheElement.paragraphEmbeddingLevel,lastCharType);
  1090. IF lastCharType^ = "L" THEN
  1091. ENstate := LeftStrongType;
  1092. ELSE
  1093. ENstate := NeutralType;
  1094. END;
  1095. (* the stack is used to gather Boundary Neutrals on the way the may be changed according to a following character *)
  1096. amm.NewIntegerStack(passedBNs,cacheElement.lineLength);
  1097. nextI := 0;
  1098. FOR i := 0 TO cacheElement.lineLength - 1 DO
  1099. (* HL5. provide artificial context if necessary *)
  1100. IF HL5 # NIL THEN
  1101. dummyBool := HL5(cacheElement,i,lastCharType,state,ENstate,WeakTypes2);
  1102. END;
  1103. (* W7. & remember state for W4 and W5 *)
  1104. IF cacheElement.characterTypes[i]^ = "EN" THEN
  1105. IF ENstate = LeftStrongType THEN
  1106. cacheElement.characterTypes[i]^ := "L";
  1107. END;
  1108. state := EuropeanNumber;
  1109. passedBNs.Purge;
  1110. (* remember state for W4 *)
  1111. ELSIF cacheElement.characterTypes[i]^ = "AN" THEN
  1112. state := ArabicNumber;
  1113. passedBNs.Purge;
  1114. (* push a BN onto the BN stack *)
  1115. ELSIF cacheElement.characterTypes[i]^ = "BN" THEN
  1116. passedBNs.Push(i);
  1117. (* W4. European Separator *)
  1118. ELSIF cacheElement.characterTypes[i]^ = "ES" THEN
  1119. (* if the last character was a European Number, apply some changes *)
  1120. IF state = EuropeanNumber THEN
  1121. (* if the next character (ignoring BNs) is a European Number, the character may be changed *)
  1122. IF IsEuropeanNumberAdjacent(i+1,FALSE,cacheElement,nextI,state,ENstate) THEN
  1123. (* W7 *)
  1124. IF ENstate = LeftStrongType THEN
  1125. cacheElement.characterTypes[i]^ := "L";
  1126. ELSE
  1127. cacheElement.characterTypes[i]^ := "EN";
  1128. END;
  1129. (* or turned to Other Neutral otherwise *)
  1130. ELSE
  1131. cacheElement.characterTypes[i]^ := "ON";
  1132. END;
  1133. i := nextI;
  1134. (* or turn the character to Other Neutral otherwise *)
  1135. ELSE
  1136. cacheElement.characterTypes[i]^ := "ON";
  1137. END;
  1138. (* all BNs since the last 'real' character turn to Other Neutral *)
  1139. LOOP
  1140. pos := passedBNs.Pop();
  1141. IF pos < 0 THEN
  1142. EXIT;
  1143. ELSE
  1144. cacheElement.characterTypes[pos]^ := "ON";
  1145. END;
  1146. END;
  1147. (* W4. Common Separator *)
  1148. ELSIF cacheElement.characterTypes[i]^ = "CS" THEN
  1149. (* if the last character was a European Number, apply some changes *)
  1150. IF state = EuropeanNumber THEN
  1151. (* if the next character (ignoring BNs) is a European Number, the character may be changed *)
  1152. IF IsEuropeanNumberAdjacent(i+1,FALSE,cacheElement,nextI,state,ENstate) THEN
  1153. (* W7 *)
  1154. IF ENstate = LeftStrongType THEN
  1155. cacheElement.characterTypes[i]^ := "L";
  1156. ELSE
  1157. cacheElement.characterTypes[i]^ := "EN";
  1158. END;
  1159. (* or turned to Other Neutral otherwise *)
  1160. ELSE
  1161. cacheElement.characterTypes[i]^ := "ON";
  1162. END;
  1163. i := nextI;
  1164. (* if the last character was an Arabic Number, apply some changes *)
  1165. ELSIF state = ArabicNumber THEN
  1166. (* if the next character (ignoring BNs) is an Arabic Number, the character may be changed to AN *)
  1167. IF IsArabicNumberAdjacent(i+1,cacheElement,nextI,state,ENstate) THEN
  1168. cacheElement.characterTypes[i]^ := "AN";
  1169. (* or to ON otherwise *)
  1170. ELSE
  1171. cacheElement.characterTypes[i]^ := "ON";
  1172. END;
  1173. i := nextI;
  1174. (* if the last character was neither a European nor an Arabic Number, the character becomes ON *)
  1175. ELSE (* state = NeutralType *)
  1176. cacheElement.characterTypes[i]^ := "ON";
  1177. END;
  1178. (* all BNs since the last 'real' character turn to Other Neutral *)
  1179. LOOP
  1180. pos := passedBNs.Pop();
  1181. IF pos < 0 THEN
  1182. EXIT;
  1183. ELSE
  1184. cacheElement.characterTypes[pos]^ := "ON";
  1185. END;
  1186. END;
  1187. (* W5. *)
  1188. ELSIF cacheElement.characterTypes[i]^ = "ET" THEN
  1189. (* if the last character was a European Number, apply some changes *)
  1190. IF state = EuropeanNumber THEN
  1191. (* W7 *)
  1192. IF ENstate = LeftStrongType THEN
  1193. cacheElement.characterTypes[i]^ := "L";
  1194. (* all BNs since the last 'real' character turn to L *)
  1195. LOOP
  1196. pos := passedBNs.Pop();
  1197. IF pos < 0 THEN
  1198. EXIT;
  1199. ELSE
  1200. cacheElement.characterTypes[pos]^ := "L";
  1201. END;
  1202. END;
  1203. ELSE
  1204. cacheElement.characterTypes[i]^ := "EN";
  1205. (* all BNs since the last 'real' character turn to EN *)
  1206. LOOP
  1207. pos := passedBNs.Pop();
  1208. IF pos < 0 THEN
  1209. EXIT;
  1210. ELSE
  1211. cacheElement.characterTypes[pos]^ := "EN";
  1212. END;
  1213. END;
  1214. END;
  1215. (* look for the character following the sequence of ETs and BNs *)
  1216. ELSE
  1217. (* if it is a European Number, apply some changes *)
  1218. IF IsEuropeanNumberAdjacent(i+1,TRUE,cacheElement,nextI,state,ENstate) THEN
  1219. (* W7 *)
  1220. IF ENstate = LeftStrongType THEN
  1221. (* all BNs since the last 'real' character turn to L *)
  1222. cacheElement.characterTypes[i]^ := "L";
  1223. LOOP
  1224. pos := passedBNs.Pop();
  1225. IF pos < 0 THEN
  1226. EXIT;
  1227. ELSE
  1228. cacheElement.characterTypes[pos]^ := "L";
  1229. END;
  1230. END;
  1231. ELSE
  1232. cacheElement.characterTypes[i]^ := "EN";
  1233. (* all BNs since the last 'real' character turn to EN *)
  1234. LOOP
  1235. pos := passedBNs.Pop();
  1236. IF pos < 0 THEN
  1237. EXIT;
  1238. ELSE
  1239. cacheElement.characterTypes[pos]^ := "EN";
  1240. END;
  1241. END;
  1242. END;
  1243. (* or change them to Other Neutral otherwise *)
  1244. ELSE
  1245. cacheElement.characterTypes[i]^ := "ON";
  1246. (* all BNs since the last 'real' character turn to ON *)
  1247. LOOP
  1248. pos := passedBNs.Pop();
  1249. IF pos < 0 THEN
  1250. EXIT;
  1251. ELSE
  1252. cacheElement.characterTypes[pos]^ := "ON";
  1253. END;
  1254. END;
  1255. END;
  1256. i := nextI;
  1257. END;
  1258. (* otherwise inspect the character and change the states accordingly *)
  1259. ELSE
  1260. IF cacheElement.characterTypes[i]^ = "L" THEN
  1261. ENstate := LeftStrongType;
  1262. ELSIF cacheElement.characterTypes[i]^ = "R" THEN
  1263. ENstate := NeutralType;
  1264. END;
  1265. state := NeutralType;
  1266. passedBNs.Purge;
  1267. END;
  1268. (* border of two runs *)
  1269. IF (i < cacheElement.lineLength - 1) & (cacheElement.characterEmbeddingLevels[i] # cacheElement.characterEmbeddingLevels[i+1]) THEN
  1270. amm.NewString(charType);
  1271. GetBorderOfRunCharacterType(cacheElement.characterEmbeddingLevels[i],cacheElement.characterEmbeddingLevels[i+1],charType);
  1272. IF charType^ = "L" THEN
  1273. ENstate := LeftStrongType;
  1274. ELSE
  1275. ENstate := NeutralType;
  1276. END;
  1277. amm.FreeString(charType);
  1278. END;
  1279. END;
  1280. amm.FreeString(lastCharType);
  1281. amm.FreeIntegerStack(passedBNs);
  1282. END ResolveWeakTypes;
  1283. (* Scan through the text and resolve neutral types *)
  1284. PROCEDURE ResolveNeutralTypes(VAR cacheElement : LineElement);
  1285. VAR
  1286. i : LONGINT;
  1287. directionalType, hlCharacterType : Strings.String;
  1288. dummyInt : LONGINT;
  1289. BEGIN
  1290. amm.NewString(directionalType);
  1291. amm.NewString(hlCharacterType);
  1292. (* iterate through all characters of the paragraph *)
  1293. FOR i := 0 TO cacheElement.lineLength - 1 DO
  1294. (* if the character has a Neutral type, apply some changes *)
  1295. IF (IsNeutralCharacterType(cacheElement.characterTypes[i])) THEN
  1296. (* compute the directional type of the last character, consider borders of run, consider HL5 if necessary *)
  1297. IF (HL5 # NIL) & HL5(cacheElement,i,hlCharacterType,dummyInt,dummyInt,NeutralTypes) THEN
  1298. Strings.Copy(hlCharacterType^,0,LEN(hlCharacterType),directionalType^);
  1299. ELSIF (i = 0) THEN
  1300. GetFinalDirectionalType(cacheElement.characterTypes[i],cacheElement.characterEmbeddingLevels[i],cacheElement.paragraphEmbeddingLevel,directionalType);
  1301. ELSE
  1302. GetFinalDirectionalType(cacheElement.characterTypes[i],cacheElement.characterEmbeddingLevels[i],cacheElement.characterEmbeddingLevels[i-1],directionalType);
  1303. END;
  1304. (* N1. & N2. if the next non-Neutral character has the same directional type change the whole sequence of Neutrals to that type *)
  1305. IF (HasSameDirectionalType(directionalType,i+1,cacheElement)) THEN
  1306. WHILE (i < cacheElement.lineLength) & (IsNeutralCharacterType(cacheElement.characterTypes[i])) DO
  1307. Strings.Copy(directionalType^,0,LEN(directionalType),cacheElement.characterTypes[i]^);
  1308. INC(i);
  1309. END;
  1310. (* or change it to the embedding level otherwise *)
  1311. ELSE
  1312. WHILE (i < cacheElement.lineLength) & (IsNeutralCharacterType(cacheElement.characterTypes[i])) DO
  1313. GetBorderOfRunCharacterType(cacheElement.characterEmbeddingLevels[i],cacheElement.characterEmbeddingLevels[i]-1,cacheElement.characterTypes[i]);
  1314. INC(i);
  1315. END;
  1316. END;
  1317. END;
  1318. END;
  1319. amm.FreeString(directionalType);
  1320. amm.FreeString(hlCharacterType);
  1321. END ResolveNeutralTypes;
  1322. (* Scan through the text and resolve implicit levels *)
  1323. PROCEDURE ResolveImplicitLevels(VAR cacheElement : LineElement);
  1324. VAR
  1325. i : LONGINT;
  1326. BEGIN
  1327. (* iterate through all characters of the paragraph *)
  1328. FOR i := 0 TO cacheElement.lineLength - 1 DO
  1329. (* I2. if the current embedding level is odd increase the level of all L, EN and AN characters by 1 *)
  1330. IF (ODD(cacheElement.characterEmbeddingLevels[i])) THEN
  1331. IF (cacheElement.characterTypes[i]^ = "L") OR (cacheElement.characterTypes[i]^ = "EN") OR (cacheElement.characterTypes[i]^ = "AN") THEN
  1332. INC(cacheElement.characterEmbeddingLevels[i]);
  1333. END;
  1334. (* I1. increase all R characters by 1 and all AN and EN characters by 2 otherwise *)
  1335. ELSE
  1336. IF cacheElement.characterTypes[i]^ = "R" THEN
  1337. INC(cacheElement.characterEmbeddingLevels[i]);
  1338. ELSIF (cacheElement.characterTypes[i]^ = "AN") OR (cacheElement.characterTypes[i]^ = "EN") THEN
  1339. INC(cacheElement.characterEmbeddingLevels[i],2);
  1340. END;
  1341. END;
  1342. END;
  1343. END ResolveImplicitLevels;
  1344. (* Reorder an already reformatted line. The line is not necessarily the same line that has been reformatted before
  1345. but a whole part of it. *)
  1346. PROCEDURE ReorderLine*(start, length : LONGINT) : Texts.TextReader;
  1347. VAR
  1348. cacheElement : LineElement;
  1349. BEGIN
  1350. (* if the text has not been reformatted yet or isn't even formatted in UTF, do nothing *)
  1351. IF ~reformatted OR ~textReader.text.isUTF THEN
  1352. RETURN NIL;
  1353. END;
  1354. (* get the cache entry by the start position of the line *)
  1355. lineCache.FindElement(start,cacheElement);
  1356. (* reorder the line *)
  1357. RETURN ReorderClause(cacheElement,start,length);
  1358. END ReorderLine;
  1359. (* Reorder an already reformatted segment. *)
  1360. PROCEDURE ReorderSegment*(id : LONGINT) : Texts.TextReader;
  1361. VAR
  1362. cacheElement : LineElement;
  1363. BEGIN
  1364. IF ~reformatted OR ~textReader.text.isUTF OR ~HL4 OR (segmentCache = NIL) THEN
  1365. RETURN NIL;
  1366. END;
  1367. (* get the cache entry by the id of the segment *)
  1368. segmentCache.FindElement(id,cacheElement);
  1369. (* reorder the segment *)
  1370. RETURN ReorderClause(cacheElement,cacheElement.pos,cacheElement.lineLength);
  1371. END ReorderSegment;
  1372. (* Reorders a part of the text (either a single line or a predefined segment *)
  1373. PROCEDURE ReorderClause(VAR cacheElement : LineElement; start, length : LONGINT) : Texts.TextReader;
  1374. VAR
  1375. bidiTextReader : Texts.TextReader;
  1376. reorderedText : Texts.Text;
  1377. posArray, reversePosArray : PosArray;
  1378. i, newPos : LONGINT;
  1379. relStart: LONGINT;
  1380. maxLevel, minLevel : LONGINT;
  1381. oneCharString : Texts.PUCS32String;
  1382. lastChar : Strings.String;
  1383. BEGIN
  1384. relStart := start - cacheElement.pos;
  1385. (* if the paragraph is empty, nothing is to be done *)
  1386. IF cacheElement.paragraphEmbeddingLevel = -1 THEN
  1387. RETURN NIL;
  1388. END;
  1389. IF relStart >= cacheElement.lineLength THEN
  1390. RETURN NIL;
  1391. END;
  1392. (* if this line has already been reordered, use the cached, reordered line *)
  1393. IF ~cacheElement.dirty[relStart] THEN
  1394. RETURN cacheElement.textReaders[relStart];
  1395. END;
  1396. (* the length of the text cannot be longer than the actual line *)
  1397. length := MIN(length,cacheElement.lineLength-relStart);
  1398. IF CharacterDebugging THEN
  1399. KernelLog.Ln;
  1400. KernelLog.String("Reordering line ("); KernelLog.Int(start,4); KernelLog.String(",");
  1401. KernelLog.Int(start+length-1,4); KernelLog.String(")..."); KernelLog.Ln;
  1402. END;
  1403. IF CharacterDebugging THEN PrintLineEmbeddingLevels(cacheElement.characterEmbeddingLevels); END;
  1404. (* L1. Reset trailing whitespaces to the paragraph embedding level *)
  1405. i := length - 1;
  1406. LOOP
  1407. IF (i < relStart) OR ~IsNeutralCharacterType(cacheElement.originalCharacterTypes[i]) THEN
  1408. EXIT;
  1409. ELSE
  1410. cacheElement.characterEmbeddingLevels[i] := cacheElement.paragraphEmbeddingLevel;
  1411. DEC(i);
  1412. END;
  1413. END;
  1414. (* L2. Reordering *)
  1415. maxLevel := GetHighestLevel(relStart,length,cacheElement.characterEmbeddingLevels^);
  1416. minLevel := GetLowestOddLevel(relStart,length,cacheElement.characterEmbeddingLevels^);
  1417. lastChar := cacheElement.originalCharacterTypes[relStart+length-1];
  1418. (* fill the position array with its 'normal' positions *)
  1419. IF lastChar^ = "B" THEN
  1420. amm.NewPosArray(cacheElement.posArrays[relStart],length);
  1421. posArray := cacheElement.posArrays[relStart];
  1422. FillPositionArray(relStart,posArray.array^);
  1423. (* iterate through all levels from the highest to the lowest, odd level *)
  1424. FOR i := maxLevel TO minLevel BY - 1 DO
  1425. ReorderSubstrings(relStart,i,maxLevel,cacheElement.characterEmbeddingLevels^,posArray,lastChar);
  1426. END;
  1427. (* fill the reverse position array *)
  1428. amm.NewPosArray(cacheElement.reversePosArrays[relStart],length);
  1429. reversePosArray := cacheElement.reversePosArrays[relStart];
  1430. ELSE
  1431. amm.NewPosArray(cacheElement.posArrays[relStart],length+1);
  1432. posArray := cacheElement.posArrays[relStart];
  1433. FillPositionArray(relStart,posArray.array^);
  1434. (* iterate through all levels from the highest to the lowest, odd level *)
  1435. FOR i := maxLevel TO minLevel BY - 1 DO
  1436. ReorderSubstrings(relStart,i,maxLevel,cacheElement.characterEmbeddingLevels^,posArray,Strings.NewString("B"));
  1437. END;
  1438. (* fill the reverse position array *)
  1439. amm.NewPosArray(cacheElement.reversePosArrays[relStart],length+1);
  1440. reversePosArray := cacheElement.reversePosArrays[relStart];
  1441. END;
  1442. FOR i := 0 TO posArray.size - 1 DO
  1443. reversePosArray.array[posArray.array[i] - relStart] := i;
  1444. END;
  1445. IF CharacterDebugging THEN PrintLineEmbeddingLevels(cacheElement.characterEmbeddingLevels); END;
  1446. (* L4. Mirroring *)
  1447. NEW(reorderedText);
  1448. amm.NewUString(oneCharString);
  1449. oneCharString[1] := 0H;
  1450. (* read the characters from the cache and write them to a special text *)
  1451. reorderedText.AcquireWrite;
  1452. FOR i := 0 TO length - 1 DO
  1453. newPos := posArray.array[i];
  1454. oneCharString[0] := cacheElement.characters[newPos];
  1455. (* use the mirrored character if necessary *)
  1456. MirrorCharacter(oneCharString[0],cacheElement.characterEmbeddingLevels[newPos]);
  1457. reorderedText.InsertUCS32(i,oneCharString^);
  1458. END;
  1459. reorderedText.ReleaseWrite;
  1460. (* store the new text in a special text reader *)
  1461. NEW(bidiTextReader,reorderedText);
  1462. IF CharacterDebugging THEN
  1463. KernelLog.Ln; KernelLog.String("Printing line reordered positions::"); KernelLog.Ln;
  1464. FOR i := 0 TO posArray.size - 1 DO
  1465. KernelLog.Int(posArray.array[i],3); KernelLog.String(" ");
  1466. END;
  1467. KernelLog.Ln;
  1468. reorderedText.AcquireRead;
  1469. PrintCodedLine(bidiTextReader,0,length);
  1470. reorderedText.ReleaseRead;
  1471. KernelLog.Ln; KernelLog.String("...reordering done! Text length: "); KernelLog.Int(length,4); KernelLog.Ln;
  1472. END;
  1473. (* mark this line not-dirty and put the gathered information to the cache *)
  1474. cacheElement.dirty[relStart] := FALSE;
  1475. cacheElement.textReaders[relStart] := bidiTextReader;
  1476. amm.FreeUString(oneCharString);
  1477. (* return the special text renderer with the reordered text *)
  1478. RETURN bidiTextReader;
  1479. END ReorderClause;
  1480. (* Returns the position in the reordered text given its internal position and the start position of its line *)
  1481. PROCEDURE GetDisplayPosition*(pos, lineStart : LONGINT) : LONGINT;
  1482. VAR
  1483. relPos, relLineStart : LONGINT;
  1484. cacheElement : LineElement;
  1485. posArray : PosArray;
  1486. BEGIN
  1487. (* if the text has not been reformatted yet or isn't even formatted in UTF, do nothing *)
  1488. IF ~reformatted OR ~textReader.text.isUTF THEN
  1489. RETURN pos;
  1490. END;
  1491. (* avoid circling at line 0 *)
  1492. IF pos < 0 THEN
  1493. pos := 0
  1494. END;
  1495. (* get the cache entry and return the normal position if anything that's needed is not stored properly *)
  1496. lineCache.FindElement(pos,cacheElement);
  1497. IF (cacheElement = NIL) OR (cacheElement.reversePosArrays = NIL) OR (cacheElement.pos + cacheElement.lineLength <= pos) THEN
  1498. RETURN pos;
  1499. END;
  1500. (* compute the relative position of the line start and the relative position inside the line *)
  1501. relLineStart := lineStart - cacheElement.pos;
  1502. relPos := pos - lineStart;
  1503. (* get the appropriate reverse position array and return the bidi position if available *)
  1504. posArray := cacheElement.reversePosArrays[relLineStart];
  1505. IF posArray = NIL THEN
  1506. RETURN pos;
  1507. ELSE
  1508. RETURN posArray.array[relPos] + lineStart;
  1509. END;
  1510. END GetDisplayPosition;
  1511. (* Returns the internal position given its display position and the start position of its line *)
  1512. PROCEDURE GetInternalPosition*(pos, lineStart : LONGINT) : LONGINT;
  1513. VAR
  1514. relPos, relLineStart : LONGINT;
  1515. cacheElement : LineElement;
  1516. posArray : PosArray;
  1517. BEGIN
  1518. (* if the text has not been reformatted yet or isn't even formatted in UTF, do nothing *)
  1519. IF ~reformatted OR ~textReader.text.isUTF THEN
  1520. RETURN pos;
  1521. END;
  1522. (* get the cache entry and return the normal position if anything that's needed is not stored properly *)
  1523. lineCache.FindElement(pos,cacheElement);
  1524. IF (cacheElement = NIL) OR (cacheElement.posArrays = NIL) THEN
  1525. RETURN pos;
  1526. END;
  1527. (* compute the relative position of the line start and the relative position inside the line *)
  1528. relLineStart := lineStart - cacheElement.pos;
  1529. relPos := pos - lineStart;
  1530. (* get the appropriate position array and return the bidi position if available *)
  1531. posArray := cacheElement.posArrays[relLineStart];
  1532. IF posArray = NIL THEN
  1533. RETURN pos
  1534. ELSE
  1535. IF relPos < posArray.size THEN
  1536. RETURN posArray.array[relPos] + cacheElement.pos;
  1537. ELSE
  1538. RETURN posArray.array[posArray.size-1] + cacheElement.pos;
  1539. END;
  1540. END;
  1541. END GetInternalPosition;
  1542. (* Returns the characters implicit level *)
  1543. PROCEDURE GetImplicitLevel*(pos : LONGINT) : LONGINT;
  1544. VAR
  1545. relPos : LONGINT;
  1546. cacheElement : LineElement;
  1547. BEGIN
  1548. (* if the text has not been reformatted yet or isn't even formatted in UTF, do nothing *)
  1549. IF ~reformatted OR ~textReader.text.isUTF THEN
  1550. RETURN 0;
  1551. END;
  1552. (* get the cache entry and return the normal position if anything that's needed is not stored properly *)
  1553. lineCache.FindElement(pos,cacheElement);
  1554. IF (cacheElement = NIL) OR (cacheElement.characterEmbeddingLevels = NIL) THEN
  1555. RETURN MAX(0,cacheElement.paragraphEmbeddingLevel);
  1556. END;
  1557. IF cacheElement.pos + cacheElement.lineLength <= pos THEN
  1558. RETURN cacheElement.characterEmbeddingLevels[cacheElement.lineLength-1];
  1559. END;
  1560. (* compute the relative position of the line start and the relative position inside the line *)
  1561. relPos := pos - cacheElement.pos;
  1562. (* get the appropriate position array and return the implicit level *)
  1563. RETURN cacheElement.characterEmbeddingLevels[relPos];
  1564. END GetImplicitLevel;
  1565. (* Gets the embedding level of the pargraph the character is a part of *)
  1566. PROCEDURE GetParagraphEmbeddingLevel*(pos : LONGINT) : LONGINT;
  1567. VAR
  1568. cacheElement : LineElement;
  1569. BEGIN
  1570. (* if the text has not been reformatted yet or isn't even formatted in UTF, do nothing *)
  1571. IF ~reformatted OR ~textReader.text.isUTF THEN
  1572. RETURN 0;
  1573. END;
  1574. (* get the cache entry and return the normal position if anything that's needed is not stored properly *)
  1575. lineCache.FindElement(pos,cacheElement);
  1576. IF (cacheElement = NIL) OR (cacheElement.paragraphEmbeddingLevel = -1) THEN
  1577. RETURN 0;
  1578. END;
  1579. (* return the paragraph embedding level *)
  1580. RETURN cacheElement.paragraphEmbeddingLevel;
  1581. END GetParagraphEmbeddingLevel;
  1582. (* Returns the cached text reader of a line if available *)
  1583. PROCEDURE ReadyTextReader*(start : LONGINT; VAR isFirst : BOOLEAN) : Texts.TextReader;
  1584. VAR
  1585. cacheElement : LineElement;
  1586. relStart : LONGINT;
  1587. BEGIN
  1588. (* if the text has not been reformatted yet or isn't even formatted in UTF, do nothing *)
  1589. IF (~reformatted) OR ~textReader.text.isUTF THEN
  1590. RETURN NIL;
  1591. END;
  1592. (* get the cache entry and the relative start position of the line inside the paragraph *)
  1593. lineCache.FindElement(start,cacheElement);
  1594. relStart := start - cacheElement.pos;
  1595. (* if the paragraph is empty, nothing is to be done *)
  1596. IF cacheElement.paragraphEmbeddingLevel = -1 THEN
  1597. RETURN NIL;
  1598. END;
  1599. (* don't access the cache line if it's not the correct one *)
  1600. IF relStart >= cacheElement.lineLength THEN
  1601. RETURN NIL;
  1602. END;
  1603. (* if the line is not dirty, check if it is the first possible line in the paragraph and return the text reader *)
  1604. IF ~cacheElement.dirty[relStart] THEN
  1605. isFirst := (relStart = 0);
  1606. RETURN cacheElement.textReaders[relStart];
  1607. END;
  1608. RETURN NIL;
  1609. END ReadyTextReader;
  1610. (* Sets the a text reader to the cache at a certain position. This is only for contextually analyzed text readers! *)
  1611. PROCEDURE SetReadyTextReader*(start : LONGINT; textReader : Texts.TextReader);
  1612. VAR
  1613. cacheElement : LineElement;
  1614. relStart : LONGINT;
  1615. BEGIN
  1616. (* if the text has not been reformatted yet or isn't even formatted in UTF, do nothing *)
  1617. IF (~reformatted) OR ~textReader.text.isUTF THEN
  1618. RETURN;
  1619. END;
  1620. (* get the cache entry and the relative start position of the line inside the paragraph *)
  1621. lineCache.FindElement(start,cacheElement);
  1622. relStart := start - cacheElement.pos;
  1623. (* don't access the cache line if it's not the correct one *)
  1624. IF relStart >= cacheElement.lineLength THEN
  1625. RETURN;
  1626. END;
  1627. cacheElement.textReaders[relStart] := textReader;
  1628. END SetReadyTextReader;
  1629. (* Checks if the given position is the last one of its line *)
  1630. PROCEDURE IsLastCharacterInLine*(pos : LONGINT) : BOOLEAN;
  1631. VAR
  1632. cacheElement : LineElement;
  1633. BEGIN
  1634. lineCache.FindElement(pos,cacheElement);
  1635. RETURN pos = cacheElement.pos + cacheElement.lineLength - 1;
  1636. END IsLastCharacterInLine;
  1637. (* Finds the end of a paragraph. Since this procedure can be called before any reformatting, the cache can't be used *)
  1638. PROCEDURE FindEndOfParagraph(pos : LONGINT; VAR end : LONGINT);
  1639. VAR
  1640. char : Texts.Char32;
  1641. charType : Strings.String;
  1642. textLength : LONGINT;
  1643. BEGIN
  1644. (* initialize the text reader and some necessary variables *)
  1645. char := 0H;
  1646. textLength := textReader.text.GetLength();
  1647. amm.NewString(charType);
  1648. textReader.SetDirection(1);
  1649. textReader.SetPosition(pos);
  1650. (* scan through the text until a paragraph separator or the end of the text is reached *)
  1651. REPEAT
  1652. textReader.ReadCh(char);
  1653. unicodePropertyReader.GetBidiCharacterType(char,charType);
  1654. UNTIL (charType^ = "B") OR (char = UnicodeProperties.EOT) OR (textReader.GetPosition() >= textLength);
  1655. end := textReader.GetPosition();
  1656. amm.FreeString(charType);
  1657. END FindEndOfParagraph;
  1658. (* Finds the start of a paragraph. Since this procedure can be called before any reformatting, the cache can't be used *)
  1659. PROCEDURE FindStartOfParagraph(pos : LONGINT; VAR start : LONGINT);
  1660. VAR
  1661. char : Texts.Char32;
  1662. charType : Strings.String;
  1663. BEGIN
  1664. (* initialize the text reader *)
  1665. textReader.SetPosition(pos-1);
  1666. textReader.SetDirection(-1);
  1667. amm.NewString(charType);
  1668. (* scan through the text until a paragraph separator or the start of the text is reached *)
  1669. REPEAT
  1670. textReader.ReadCh(char);
  1671. unicodePropertyReader.GetBidiCharacterType(char,charType);
  1672. UNTIL (charType^ = "B") OR (textReader.GetPosition() <= 0);
  1673. (* prepare the correct start position for each case *)
  1674. IF charType^ # "B" THEN
  1675. start := 0;
  1676. ELSE
  1677. start := textReader.GetPosition() + 2;
  1678. END;
  1679. amm.FreeString(charType);
  1680. END FindStartOfParagraph;
  1681. (* Mirrors a character if the rules allow it. A character is only mirrored if the current embedding level is odd
  1682. and the 'mirrored' property of the character is true. Consider HL6 if necessary. *)
  1683. PROCEDURE MirrorCharacter(VAR originalChar : Texts.Char32; embeddingLevel : LONGINT);
  1684. BEGIN
  1685. IF ODD(embeddingLevel) & unicodePropertyReader.IsMirroredChar(originalChar) THEN
  1686. originalChar := mirrorPropertyReader.GetMirroredChar(originalChar);
  1687. ELSIF ODD(embeddingLevel) & (HL6 # NIL) THEN
  1688. originalChar := HL6(originalChar);
  1689. END;
  1690. END MirrorCharacter;
  1691. (* Reverts all substrings between the minimum and the maximum level *)
  1692. PROCEDURE ReorderSubstrings(globalStart, fromLevel, toLevel : LONGINT; CONST levels : ARRAY OF LONGINT; VAR posArray : PosArray; lastCharType : Strings.String);
  1693. VAR
  1694. i, start, end : LONGINT;
  1695. BEGIN
  1696. (* shortcut for arrays of length 1 *)
  1697. IF posArray.size = 1 THEN
  1698. RETURN;
  1699. END;
  1700. (* compute the correct end *)
  1701. IF lastCharType^ = "B" THEN
  1702. end := posArray.size - 2;
  1703. ELSE
  1704. end := posArray.size - 1;
  1705. END;
  1706. (* iterate through the characters of the line *)
  1707. FOR i := globalStart TO globalStart + end DO
  1708. (* if the current level is higher than the minimum level, start the substring to be reverted here *)
  1709. IF (levels[i] >= fromLevel) THEN
  1710. start := i;
  1711. (* look for an appropriate end of the substring *)
  1712. REPEAT
  1713. INC(i);
  1714. UNTIL (i > globalStart + end) OR (levels[i] < fromLevel);
  1715. (* revert the substring *)
  1716. IF posArray.size > (i - globalStart) THEN
  1717. SwapSubstring(start-globalStart,i-start-1,posArray.array^);
  1718. END;
  1719. END;
  1720. END;
  1721. END ReorderSubstrings;
  1722. (* Revert the substring marked by its start position and its length *)
  1723. PROCEDURE SwapSubstring(start, end: LONGINT; VAR posArray : ARRAY OF LONGINT);
  1724. VAR
  1725. i : LONGINT;
  1726. BEGIN
  1727. FOR i := 0 TO (end+1) DIV 2 - 1 DO
  1728. SwapPositions(start+i,start+end-i,posArray);
  1729. END;
  1730. END SwapSubstring;
  1731. (* Swap two entries in the position array *)
  1732. PROCEDURE SwapPositions(pos1, pos2 : LONGINT; VAR posArray : ARRAY OF LONGINT);
  1733. VAR
  1734. temp : LONGINT;
  1735. BEGIN
  1736. temp := posArray[pos1];
  1737. posArray[pos1] := posArray[pos2];
  1738. posArray[pos2] := temp;
  1739. END SwapPositions;
  1740. (* Fill a line's position array with its real positions *)
  1741. PROCEDURE FillPositionArray(start : LONGINT; VAR posArray : ARRAY OF LONGINT);
  1742. VAR
  1743. i : LONGINT;
  1744. BEGIN
  1745. FOR i := 0 TO LEN(posArray) - 1 DO
  1746. posArray[i] := start + i;
  1747. END;
  1748. END FillPositionArray;
  1749. (* Look through a level array and find its maximum entry *)
  1750. PROCEDURE GetHighestLevel(start, length : LONGINT; CONST levels : ARRAY OF LONGINT) : LONGINT;
  1751. VAR
  1752. i, max : LONGINT;
  1753. BEGIN
  1754. max := 0;
  1755. FOR i := start TO start+length - 1 DO
  1756. IF i > LEN(levels) - 1 THEN
  1757. RETURN max;
  1758. END;
  1759. IF (levels[i] > max) THEN
  1760. max := levels[i];
  1761. END;
  1762. END;
  1763. RETURN max;
  1764. END GetHighestLevel;
  1765. (* Look through a level array and find its minimum odd entry *)
  1766. PROCEDURE GetLowestOddLevel(start, length : LONGINT; CONST levels : ARRAY OF LONGINT) : LONGINT;
  1767. VAR
  1768. i, min : LONGINT;
  1769. BEGIN
  1770. min := 61;
  1771. FOR i := start TO start + length - 1 DO
  1772. IF i > LEN(levels) - 1 THEN
  1773. RETURN min;
  1774. END;
  1775. IF (levels[i] < min) & ODD(levels[i]) THEN
  1776. min := levels[i];
  1777. END;
  1778. END;
  1779. RETURN min;
  1780. END GetLowestOddLevel;
  1781. (* Computes the next even level which is not higher than 60 *)
  1782. PROCEDURE GetNextEvenEmbeddingLevel(thisLevel : LONGINT; VAR nextLevel : LONGINT);
  1783. BEGIN
  1784. IF thisLevel > 59 THEN
  1785. nextLevel := thisLevel;
  1786. RETURN;
  1787. END;
  1788. IF (ODD(thisLevel)) THEN
  1789. nextLevel := thisLevel + 1;
  1790. ELSE
  1791. nextLevel := thisLevel + 2;
  1792. END;
  1793. END GetNextEvenEmbeddingLevel;
  1794. (* Computes the next odd level which is not higher than 61 *)
  1795. PROCEDURE GetNextOddEmbeddingLevel(thisLevel : LONGINT; VAR nextLevel : LONGINT);
  1796. BEGIN
  1797. IF thisLevel > 60 THEN
  1798. nextLevel := thisLevel;
  1799. RETURN;
  1800. END;
  1801. IF (ODD(thisLevel)) THEN
  1802. nextLevel := thisLevel + 2;
  1803. ELSE
  1804. nextLevel := thisLevel + 1;
  1805. END;
  1806. END GetNextOddEmbeddingLevel;
  1807. (* Returns the character type of the higher of two levels *)
  1808. PROCEDURE GetBorderOfRunCharacterType(thisLevel, otherLevel : LONGINT; VAR result : Strings.String);
  1809. BEGIN
  1810. IF (ODD(MAX(thisLevel,otherLevel))) THEN
  1811. result^ := "R";
  1812. ELSE
  1813. result^ := "L";
  1814. END;
  1815. END GetBorderOfRunCharacterType;
  1816. (* Checks if the next non-separator/non-terminator is a European Number *)
  1817. PROCEDURE IsEuropeanNumberAdjacent(pos : LONGINT; terminators : BOOLEAN; VAR cacheElement : LineElement; VAR max : LONGINT; VAR state : LONGINT; VAR ENstate : LONGINT) : BOOLEAN;
  1818. VAR
  1819. ENstateBefore : LONGINT;
  1820. hlThisCharacterType : Strings.String;
  1821. BEGIN
  1822. amm.NewString(hlThisCharacterType);
  1823. (* remember the current state *)
  1824. ENstateBefore := ENstate;
  1825. (* HL5 - consider artificial context *)
  1826. IF (HL5 # NIL) & HL5(cacheElement,pos,hlThisCharacterType,state,ENstate,EuropeanNumberAdj) THEN
  1827. IF hlThisCharacterType^ = "EN" THEN
  1828. (* remember the state and the current position *)
  1829. max := pos;
  1830. amm.FreeString(hlThisCharacterType);
  1831. RETURN TRUE;
  1832. END;
  1833. END;
  1834. amm.FreeString(hlThisCharacterType);
  1835. (* if the position is invalid, return false *)
  1836. IF (pos < 0) OR (pos >= cacheElement.lineLength) THEN
  1837. RETURN FALSE;
  1838. (* if the current character is a European number return true *)
  1839. ELSIF cacheElement.characterTypes[pos]^ = "EN" THEN
  1840. (* W7 *)
  1841. IF ENstate = LeftStrongType THEN
  1842. cacheElement.characterTypes[pos]^ := "L";
  1843. END;
  1844. (* remember the state and the current position *)
  1845. max := pos;
  1846. state := EuropeanNumber;
  1847. RETURN TRUE;
  1848. (* if the scanned sequence consists of terminators/BNs and the next character is a terminator too, look further *)
  1849. ELSIF terminators & ((cacheElement.characterTypes[pos]^ = "ET") OR (cacheElement.characterTypes[pos]^ = "BN")) THEN
  1850. (* if the character following the current sequence is a European Number, return true *)
  1851. IF IsEuropeanNumberAdjacent(pos + 1,terminators,cacheElement,max,state,ENstate) THEN
  1852. (* W7 - according to the remembered state *)
  1853. IF ENstateBefore = LeftStrongType THEN
  1854. cacheElement.characterTypes[pos]^ := "L";
  1855. ELSE
  1856. cacheElement.characterTypes[pos]^ := "EN";
  1857. END;
  1858. RETURN TRUE;
  1859. (* or change it to Other Neutral otherwise *)
  1860. ELSE
  1861. cacheElement.characterTypes[pos]^ := "ON";
  1862. RETURN FALSE;
  1863. END;
  1864. (* if the scanned sequence consists of separators and the next character is a BN look further *)
  1865. ELSIF ~terminators & (cacheElement.characterTypes[pos]^ = "BN") THEN
  1866. (* change the BN to Other Neutral *)
  1867. cacheElement.characterTypes[pos]^ := "ON";
  1868. RETURN IsEuropeanNumberAdjacent(pos + 1,terminators,cacheElement,max,state,ENstate);
  1869. (* if the scanned sequence consisits of separators and the next character is not a BN and not a European Number, return false *)
  1870. ELSE
  1871. (* remember the current position and the current state *)
  1872. max := pos;
  1873. IF cacheElement.characterTypes[pos]^ = "AN" THEN
  1874. state := ArabicNumber;
  1875. ELSE
  1876. state := NeutralType;
  1877. IF cacheElement.characterTypes[pos]^ = "L" THEN
  1878. ENstate := LeftStrongType;
  1879. END;
  1880. END;
  1881. RETURN FALSE;
  1882. END;
  1883. END IsEuropeanNumberAdjacent;
  1884. (* Checks if the next non-separator is an Arabic Number *)
  1885. PROCEDURE IsArabicNumberAdjacent(pos : LONGINT; VAR cacheElement : LineElement; VAR max : LONGINT; VAR state : LONGINT; VAR ENstate : LONGINT) : BOOLEAN;
  1886. VAR
  1887. hlThisCharacterType : Strings.String;
  1888. BEGIN
  1889. amm.NewString(hlThisCharacterType);
  1890. (* HL5 - consider artificial context *)
  1891. IF (HL5 # NIL) & HL5(cacheElement,pos,hlThisCharacterType,state,ENstate,ArabicNumberAdj) THEN
  1892. IF hlThisCharacterType^ = "AN" THEN
  1893. (* remember the current position *)
  1894. max := pos;
  1895. amm.FreeString(hlThisCharacterType);
  1896. RETURN TRUE;
  1897. END;
  1898. END;
  1899. amm.FreeString(hlThisCharacterType);
  1900. (* if the position is invalid, return false *)
  1901. IF (pos < 0) OR (pos >= cacheElement.lineLength) THEN
  1902. RETURN FALSE;
  1903. (* if the current character is an Arabic Number, remember the state and return true *)
  1904. ELSIF cacheElement.characterTypes[pos]^ = "AN" THEN
  1905. state := ArabicNumber;
  1906. max := pos;
  1907. RETURN TRUE;
  1908. (* if the current character is a Boundary Neutral, change it to Other Neutral and look further *)
  1909. ELSIF cacheElement.characterTypes[pos]^ = "BN" THEN
  1910. cacheElement.characterTypes[pos]^ := "ON";
  1911. RETURN IsArabicNumberAdjacent(pos + 1,cacheElement,max,state,ENstate);
  1912. (* in any other case remember the state and return false *)
  1913. ELSE
  1914. IF cacheElement.characterTypes[pos]^ = "EN" THEN
  1915. (* W7 *)
  1916. IF ENstate = LeftStrongType THEN
  1917. cacheElement.characterTypes[pos]^ := "L";
  1918. END;
  1919. state := EuropeanNumber;
  1920. ELSE
  1921. IF cacheElement.characterTypes[pos]^ = "L" THEN
  1922. ENstate := LeftStrongType;
  1923. ELSIF cacheElement.characterTypes[pos]^ = "R" THEN
  1924. ENstate := NeutralType;
  1925. END;
  1926. state := NeutralType;
  1927. END;
  1928. max := pos;
  1929. RETURN FALSE;
  1930. END;
  1931. END IsArabicNumberAdjacent;
  1932. (* Finds the first strong character from 'start' on without using the cache *)
  1933. PROCEDURE FindFirstStrongCharacter(start, end : LONGINT; VAR charType : Strings.String);
  1934. VAR
  1935. char : Texts.Char32;
  1936. strongCharFound : BOOLEAN;
  1937. BEGIN
  1938. (* initialize the text reader and some auxiliary variables *)
  1939. char := 0H;
  1940. strongCharFound := FALSE;
  1941. textReader.SetPosition(start);
  1942. textReader.SetDirection(1);
  1943. (* scan through the text until a strong character is found or the end of the paragraph is reached *)
  1944. REPEAT
  1945. textReader.ReadCh(char);
  1946. strongCharFound := IsStrongCharacter(char,charType);
  1947. UNTIL (strongCharFound) OR (textReader.GetPosition() >= end);
  1948. (* if no character was found nullify the result *)
  1949. IF ~strongCharFound THEN
  1950. charType := NIL;
  1951. END;
  1952. END FindFirstStrongCharacter;
  1953. (* Checks if a certain character has a Strong type and remembers that type *)
  1954. PROCEDURE IsStrongCharacter(CONST char : Texts.Char32; VAR charType : Strings.String) : BOOLEAN;
  1955. BEGIN
  1956. unicodePropertyReader.GetBidiCharacterType(char,charType);
  1957. RETURN IsStrongCharacterType(charType);
  1958. END IsStrongCharacter;
  1959. (* Checks if a certain character type is defined as Strong (L, AL or R) *)
  1960. PROCEDURE IsStrongCharacterType(CONST charType : Strings.String) : BOOLEAN;
  1961. BEGIN
  1962. RETURN (charType^ = "L") OR (charType^ = "AL") OR (charType^ = "R");
  1963. END IsStrongCharacterType;
  1964. (* Checks if a ceratin character type is defined as Neutral (B, S, WS, ON or BN *)
  1965. PROCEDURE IsNeutralCharacterType(CONST charType : Strings.String) : BOOLEAN;
  1966. BEGIN
  1967. RETURN (charType^ = "B") OR (charType^ = "S") OR (charType^ = "WS") OR (charType^ = "ON") OR (charType^ = "BN");
  1968. END IsNeutralCharacterType;
  1969. (* Returns the directional type considering borders of a run *)
  1970. PROCEDURE GetFinalDirectionalType(charType : Strings.String; thisLevel, otherLevel : LONGINT; VAR result : Strings.String);
  1971. BEGIN
  1972. (* if the current position is at a border run determine the sor *)
  1973. IF (thisLevel # otherLevel) THEN
  1974. GetBorderOfRunCharacterType(thisLevel,otherLevel,result);
  1975. RETURN;
  1976. (* or return the character type otherwise *)
  1977. ELSIF charType^ = "L" THEN
  1978. result^ := "L";
  1979. ELSE
  1980. result^ := "R";
  1981. END;
  1982. END GetFinalDirectionalType;
  1983. (* Compares a directional type with the one of its non-Neutral successor *)
  1984. PROCEDURE HasSameDirectionalType(charType : Strings.String; thisPos : LONGINT; cacheElement : LineElement) : BOOLEAN;
  1985. VAR
  1986. hlThisCharacterType : Strings.String;
  1987. dummyInt : LONGINT;
  1988. dummyCharType : Strings.String;
  1989. result : BOOLEAN;
  1990. BEGIN
  1991. amm.NewString(hlThisCharacterType);
  1992. (* HL5 - consider artificial context *)
  1993. IF (HL5 # NIL) & HL5(cacheElement,thisPos,hlThisCharacterType,dummyInt,dummyInt,SameDirection) THEN
  1994. amm.FreeString(hlThisCharacterType);
  1995. RETURN charType^ = hlThisCharacterType^;
  1996. END;
  1997. amm.FreeString(hlThisCharacterType);
  1998. amm.NewString(dummyCharType);
  1999. (* compare with the type of the paragraph embedding level at the end of the paragraph *)
  2000. IF (thisPos = cacheElement.lineLength) THEN
  2001. GetBorderOfRunCharacterType(cacheElement.characterEmbeddingLevels[thisPos-1],cacheElement.paragraphEmbeddingLevel,dummyCharType);
  2002. result := charType = dummyCharType;
  2003. amm.FreeString(dummyCharType);
  2004. RETURN result;
  2005. (* compare with the type of the sor at the beginning of a run *)
  2006. ELSIF (cacheElement.characterEmbeddingLevels[thisPos-1] # cacheElement.characterEmbeddingLevels[thisPos]) THEN
  2007. GetBorderOfRunCharacterType(cacheElement.characterEmbeddingLevels[thisPos-1],cacheElement.characterEmbeddingLevels[thisPos],dummyCharType);
  2008. result := charType = dummyCharType;
  2009. amm.FreeString(dummyCharType);
  2010. RETURN result;
  2011. (* compare with the next character if this one is Neutral *)
  2012. ELSIF (IsNeutralCharacterType(cacheElement.characterTypes[thisPos])) THEN
  2013. amm.FreeString(dummyCharType);
  2014. RETURN HasSameDirectionalType(charType,thisPos+1,cacheElement);
  2015. (* compare with this character *)
  2016. ELSE
  2017. GetFinalDirectionalType(cacheElement.characterTypes[thisPos-1],cacheElement.characterEmbeddingLevels[thisPos-1],cacheElement.characterEmbeddingLevels[thisPos],dummyCharType);
  2018. result := charType = dummyCharType;
  2019. amm.FreeString(dummyCharType);
  2020. RETURN result;
  2021. END;
  2022. END HasSameDirectionalType;
  2023. (* Scans the line cache for invalid lines and removes them *)
  2024. PROCEDURE SweepCache;
  2025. VAR
  2026. thisPos, lastPos : LONGINT;
  2027. char : Texts.Char32;
  2028. charType : Strings.String;
  2029. BEGIN
  2030. (* initialization: the first line doesn't need to be checked, since it can't be invalid under these terms *)
  2031. lastPos := 0;
  2032. amm.NewString(charType);
  2033. LOOP
  2034. (* get the next line *)
  2035. thisPos := lineCache.GetNextPos(lastPos);
  2036. (* exit if this was the last line or something unforeseen happens *)
  2037. IF (thisPos = lastPos) OR (thisPos < 0) THEN EXIT END;
  2038. (* read the last character of the last line *)
  2039. textReader.SetPosition(thisPos-1);
  2040. textReader.ReadCh(char);
  2041. (* check the character type *)
  2042. unicodePropertyReader.GetBidiCharacterType(char,charType);
  2043. (* if the line is invalid, e.g. the last line doesn't end with a paragraph separator, remove it from the cache *)
  2044. IF charType^ # "B" THEN
  2045. lineCache.RemoveLine(thisPos);
  2046. ELSE
  2047. lastPos := thisPos;
  2048. END;
  2049. END;
  2050. amm.FreeString(charType);
  2051. END SweepCache;
  2052. (* Print the whole text's original character types *)
  2053. PROCEDURE PrintOriginalTypedText*;
  2054. BEGIN
  2055. PrintOriginalTypedParagraph(0,textReader.text.GetLength()-1);
  2056. END PrintOriginalTypedText;
  2057. (* Print a certain paragraph's original character types *)
  2058. PROCEDURE PrintOriginalTypedParagraph(start,end : LONGINT);
  2059. VAR
  2060. char : Texts.Char32;
  2061. charType : Strings.String;
  2062. BEGIN
  2063. IF textReader.text = NIL THEN
  2064. RETURN;
  2065. END;
  2066. amm.NewString(charType);
  2067. KernelLog.Ln;
  2068. KernelLog.String("Printing original character types:"); KernelLog.Ln;
  2069. textReader.SetDirection(1);
  2070. textReader.SetPosition(start);
  2071. WHILE (textReader.GetPosition() < end) DO
  2072. textReader.ReadCh(char);
  2073. unicodePropertyReader.GetBidiCharacterType(char,charType);
  2074. KernelLog.String(charType^); KernelLog.String(" ");
  2075. END;
  2076. KernelLog.Ln;
  2077. amm.FreeString(charType);
  2078. END PrintOriginalTypedParagraph;
  2079. (* Print the whole text's character types after reformatting *)
  2080. PROCEDURE PrintCurrentTypedText*;
  2081. BEGIN
  2082. PrintCurrentTypedParagraph(0,textReader.text.GetLength()-1);
  2083. END PrintCurrentTypedText;
  2084. (* Print a certain paragraph's character types after reformatting *)
  2085. PROCEDURE PrintCurrentTypedParagraph(start, end : LONGINT);
  2086. VAR
  2087. i : LONGINT;
  2088. type: Strings.String;
  2089. cacheElement : LineElement;
  2090. BEGIN
  2091. lineCache.FindElement(start,cacheElement);
  2092. KernelLog.Ln;
  2093. KernelLog.String("Printing current character types:"); KernelLog.Ln;
  2094. FOR i := 0 TO cacheElement.lineLength - 1 DO
  2095. type := cacheElement.characterTypes[i];
  2096. KernelLog.String(type^); KernelLog.String(" ");
  2097. END;
  2098. KernelLog.Ln;
  2099. END PrintCurrentTypedParagraph;
  2100. (* Print the whole text's character utf-codes *)
  2101. PROCEDURE PrintCodedText*;
  2102. BEGIN
  2103. PrintCodedParagraph(0,textReader.text.GetLength()-1);
  2104. END PrintCodedText;
  2105. (* Print a certain paragraph's character utf-codes *)
  2106. PROCEDURE PrintCodedParagraph(start, end : LONGINT);
  2107. VAR
  2108. char : Texts.Char32;
  2109. BEGIN
  2110. IF textReader.text = NIL THEN
  2111. RETURN;
  2112. END;
  2113. KernelLog.Ln;
  2114. KernelLog.String("Printing character codes:"); KernelLog.Ln;
  2115. textReader.SetDirection(1);
  2116. textReader.SetPosition(start);
  2117. WHILE (textReader.GetPosition() < end) DO
  2118. textReader.ReadCh(char);
  2119. KernelLog.Hex(char,9); KernelLog.String(" ");
  2120. END;
  2121. KernelLog.Ln;
  2122. END PrintCodedParagraph;
  2123. (* Print a certain line's character utf-codes *)
  2124. PROCEDURE PrintCodedLine(textReader : Texts.TextReader; start, end : LONGINT);
  2125. VAR
  2126. char : Texts.Char32;
  2127. BEGIN
  2128. IF textReader.text = NIL THEN
  2129. RETURN;
  2130. END;
  2131. textReader.SetDirection(1);
  2132. textReader.SetPosition(start);
  2133. KernelLog.Ln;
  2134. KernelLog.String("Printing line character codes:"); KernelLog.Ln;
  2135. WHILE (textReader.GetPosition() < end) DO
  2136. textReader.ReadCh(char);
  2137. KernelLog.Hex(char,9); KernelLog.String(" ");
  2138. END;
  2139. KernelLog.Ln;
  2140. END PrintCodedLine;
  2141. (* Print the whole text's character embedding levels *)
  2142. PROCEDURE PrintLeveledText*;
  2143. BEGIN
  2144. PrintLeveledParagraph(0,textReader.text.GetLength()-1);
  2145. END PrintLeveledText;
  2146. (* Print a certain paragraph's character embedding levels *)
  2147. PROCEDURE PrintLeveledParagraph(start, end : LONGINT);
  2148. VAR
  2149. i : LONGINT;
  2150. cacheElement : LineElement;
  2151. BEGIN
  2152. lineCache.FindElement(start,cacheElement);
  2153. KernelLog.Ln;
  2154. KernelLog.String("Printing resolved levels:"); KernelLog.Ln;
  2155. FOR i := 0 TO cacheElement.lineLength - 1 DO
  2156. KernelLog.Int(cacheElement.characterEmbeddingLevels[i],2); KernelLog.String(" ");
  2157. END;
  2158. KernelLog.Ln;
  2159. END PrintLeveledParagraph;
  2160. (* Print a certain line's character embedding levels *)
  2161. PROCEDURE PrintLineEmbeddingLevels(CONST levels : IntegerArray);
  2162. VAR
  2163. i : LONGINT;
  2164. BEGIN
  2165. KernelLog.Ln;
  2166. KernelLog.String("Printing line levels:"); KernelLog.Ln;
  2167. FOR i := 0 TO LEN(levels) - 1 DO
  2168. KernelLog.Int(levels[i],2); KernelLog.String(" ");
  2169. END;
  2170. KernelLog.Ln;
  2171. END PrintLineEmbeddingLevels;
  2172. END BidiFormatter;
  2173. (* The following procedure type declarations are used to define Higher Level Protocols. According to the Bidi-Algorithm
  2174. they have a predefined tasks. If they are assigned by a client object, they are used automatically and the original
  2175. algorithm is ignored if desired. It is important that the implementor of these functions knows exactly what he or her
  2176. is doing because the behaviour of the algorithm is undefined if these functions do not what they're supposed to.
  2177. For a better understanding the definitions of the original algorithms are quoted here. HL4 is not activated by setting
  2178. the corresponding function but by setting the boolean value to true. *)
  2179. (* HL1. Override P3, and set the paragraph level explicitly.
  2180. The function has reading access to the whole text and gets the start and end position of the clause of interest. It returns
  2181. the paragraph embedding level for this clause. *)
  2182. HigherLevelProtocol1* = PROCEDURE(textReader : Texts.TextReader; start, end : LONGINT) : LONGINT;
  2183. (* HL2. Override W2, and set EN or AN explicitly.
  2184. The function gets access to the whole cache line, the current position and the last strong type. It computes the character
  2185. type for the character at the given position. *)
  2186. HigherLevelProtocol2* = PROCEDURE(cacheLine : LineElement; pos, lastStrongType : LONGINT) : Strings.String;
  2187. (* HL3. Emulate directional overrides or embedding codes.
  2188. The function gets access to the characters, previously computed embedding levels, original and newly computed character
  2189. types of the whole clause, the current character and its character type. It redefines the character type and the embedding
  2190. level, which will be assigned to the cache afterwards. If it returns false, the algorithm is performed normally for this character. *)
  2191. HigherLevelProtocol3* = PROCEDURE(characters : CharArray; embeddingLevels : IntegerArray; originalTypes, types : Strings.StringArray;
  2192. char : Texts.Char32; charType : Strings.String; VAR hlCharacterType : Strings.String;
  2193. VAR hlEmbeddingLevel : LONGINT) : BOOLEAN;
  2194. (* HL5. Provide artificial context.
  2195. The function gets access to the whole cache line, the current position, the character type of the current character and
  2196. the position in the algorithm where the function is called. It defines the current state and ENstate on which the algorithm
  2197. will operate further on. If it returns false, the algorithm is performed normally for this character. *)
  2198. HigherLevelProtocol5* = PROCEDURE(cacheElement : LineElement; pos : LONGINT; VAR thisCharacterType : Strings.String;
  2199. VAR state, ENstate : LONGINT; placeOfVenue : LONGINT) : BOOLEAN;
  2200. (* HL6. Additional mirroring.
  2201. The function gets the character that needs to be mirrored and returns the target, mirrored character. *)
  2202. HigherLevelProtocol6* = PROCEDURE(originalChar : Texts.Char32) : Texts.Char32;
  2203. VAR
  2204. showUnicodeControlCharacters* : BOOLEAN;
  2205. (* Exchanges non-printable control characters to a hardcoded printable sign. The list can easily be changed and extended. *)
  2206. PROCEDURE GetDisplayCharacter*(VAR char : Texts.Char32);
  2207. BEGIN
  2208. IF ~showUnicodeControlCharacters THEN
  2209. RETURN;
  2210. END;
  2211. (* LRM *)
  2212. IF char = 200EH THEN
  2213. char := 21BEH;
  2214. (* RLM *)
  2215. ELSIF char = 200FH THEN
  2216. char := 21BFH;
  2217. (* LRE *)
  2218. ELSIF char = 202AH THEN
  2219. char := 2308H;
  2220. (* RLE *)
  2221. ELSIF char = 202BH THEN
  2222. char := 2309H;
  2223. (* LRO *)
  2224. ELSIF char = 202DH THEN
  2225. char := 250DH;
  2226. (* RLO *)
  2227. ELSIF char = 202EH THEN
  2228. char := 2511H;
  2229. (* PDF *)
  2230. ELSIF char = 202CH THEN
  2231. char := 252FH;
  2232. END;
  2233. END GetDisplayCharacter;
  2234. PROCEDURE TestReformatting*(context : Commands.Context);
  2235. VAR
  2236. filename, fullname: ARRAY 256 OF CHAR;
  2237. msg : ARRAY 512 OF CHAR;
  2238. file : Files.File;
  2239. decoder : Codecs.TextDecoder;
  2240. in : Streams.Reader;
  2241. decoderRes : WORD;
  2242. text : Texts.Text;
  2243. error : BOOLEAN;
  2244. bidiFormatter : BidiFormatter;
  2245. textReader : Texts.TextReader;
  2246. BEGIN
  2247. error := FALSE;
  2248. context.arg.SkipWhitespace; context.arg.String(filename);
  2249. COPY(filename, fullname);
  2250. (* Check whether file exists and get its canonical name *)
  2251. file := Files.Old(filename);
  2252. IF (file # NIL) THEN
  2253. file.GetName(fullname);
  2254. ELSE
  2255. file := Files.New(filename); (* to get path *)
  2256. IF (file # NIL) THEN
  2257. file.GetName(fullname);
  2258. file := NIL;
  2259. END;
  2260. END;
  2261. IF (file # NIL) THEN
  2262. decoder := Codecs.GetTextDecoder("UTF-8");
  2263. IF (decoder # NIL) THEN
  2264. in := Codecs.OpenInputStream(fullname);
  2265. IF in # NIL THEN
  2266. decoder.Open(in, decoderRes);
  2267. IF decoderRes = 0 THEN
  2268. text := decoder.GetText();
  2269. END;
  2270. ELSE
  2271. msg := "Can't open input stream on file "; Strings.Append(msg, fullname);
  2272. KernelLog.String(msg);
  2273. error := TRUE;
  2274. END;
  2275. ELSE
  2276. msg := "No decoder for file "; Strings.Append(msg, fullname);
  2277. Strings.Append(msg, " (Format: "); Strings.Append(msg, "UTF_8"); Strings.Append(msg, ")");
  2278. KernelLog.String(msg);
  2279. error := TRUE;
  2280. END;
  2281. ELSE
  2282. msg := "file '"; Strings.Append(msg, fullname); Strings.Append(msg,"' not found.");
  2283. KernelLog.String(msg);
  2284. error := TRUE;
  2285. END;
  2286. IF ~error THEN
  2287. KernelLog.String("file successfully read."); KernelLog.Ln;
  2288. text.AcquireRead;
  2289. NEW(bidiFormatter,text);
  2290. bidiFormatter.ReformatText;
  2291. textReader := bidiFormatter.ReorderLine(452,48);
  2292. text.ReleaseRead;
  2293. END;
  2294. END TestReformatting;
  2295. BEGIN
  2296. showUnicodeControlCharacters := FALSE;
  2297. END UnicodeBidirectionality.
  2298. System.Free UnicodeProperties ~
  2299. System.Free UnicodeBidirectionality ~
  2300. UnicodeBidirectionality.TestReformatting "BidiTestData.txt"~