StringPool.Mod 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. MODULE StringPool; (** prk **) (** AUTHOR "prk"; PURPOSE "StringPool"; *)
  2. IMPORT
  3. SYSTEM,
  4. KernelLog; (*debug only*)
  5. (**
  6. StringPool stores strings of any length. Equal strings have the same index.
  7. String with index 0 is guaranteed to be the empty string.
  8. *)
  9. CONST
  10. (* Module Configuration *)
  11. StringPoolSize0 = 1024*256; (* initial string pool size *)
  12. HashTableSize0 = 1024; (* initial hash table size *)
  13. TYPE
  14. (* Helper Structures *)
  15. Index* = LONGINT;
  16. StringPool = POINTER TO ARRAY OF CHAR;
  17. VAR
  18. pool: StringPool;
  19. poolLen: LONGINT;
  20. poolIndex: POINTER TO ARRAY OF Index;
  21. poolIndexSize: LONGINT; (* LEN(poolIndex)-1 *)
  22. ALastGet,
  23. AStrings, AGetString, ACompareString, ACompareString0, AStringCmpHit, ASearchHits, ASearchMisses: SIZE;
  24. AInsertHashRetries: ARRAY 10 OF SIZE;
  25. ASearchHashRetries: ARRAY 10 OF SIZE;
  26. (** ----------------- String Pool functions ------------------ *)
  27. (* Hash - Return an Hash value in [0, poolIndexSize[ *)
  28. PROCEDURE Hash(CONST str: ARRAY OF CHAR): LONGINT;
  29. VAR i, h: LONGINT; ch: CHAR;
  30. BEGIN
  31. i := 0; ch := str[0]; h := 0;
  32. WHILE ch # 0X DO
  33. h :=SYSTEM.VAL(LONGINT, SYSTEM.VAL(SET, ROT(h, 7)) / SYSTEM.VAL(SET, LONG(ORD(ch))));
  34. INC(i); ch := str[i]
  35. END;
  36. h := h MOD poolIndexSize;
  37. RETURN h
  38. END Hash;
  39. (* GrowPool - increase string pool size *)
  40. PROCEDURE GrowPool;
  41. VAR new: StringPool;
  42. BEGIN
  43. NEW(new, 2*LEN(pool));
  44. SYSTEM.MOVE(ADDRESSOF(pool[0]), ADDRESSOF(new[0]), LEN(pool));
  45. pool := new
  46. END GrowPool;
  47. (* GrowHashTable - Increase Hash table size and recompute all entries *)
  48. PROCEDURE GrowHashTable;
  49. VAR i, t, h: LONGINT; idx, idx0: Index; ch: CHAR;
  50. BEGIN
  51. t := (poolIndexSize+1)*2;
  52. NEW(poolIndex, t);
  53. FOR i := 0 TO t-1 DO poolIndex[i] := -1 END;
  54. FOR i := 0 TO LEN(AInsertHashRetries)-1 DO AInsertHashRetries[i] := 0 END;
  55. poolIndexSize := t-1;
  56. (* re-fill the hash-table *)
  57. idx := 0;
  58. WHILE idx < poolLen DO
  59. (*hash*)
  60. idx0 := idx; h := 0;
  61. ch := pool[idx];
  62. WHILE ch # 0X DO
  63. h :=SYSTEM.VAL(LONGINT, SYSTEM.VAL(SET, ROT(h, 7)) / SYSTEM.VAL(SET, LONG(ORD(ch))));
  64. INC(idx); ch := pool[idx]
  65. END;
  66. h := h MOD poolIndexSize;
  67. INC(idx); (*skip 0X*)
  68. i := 0;
  69. WHILE poolIndex[h] # -1 DO
  70. INC(i);
  71. INC(h);
  72. IF h >= poolIndexSize THEN DEC(h, poolIndexSize) END
  73. END;
  74. IF i >= LEN(AInsertHashRetries) THEN i := LEN(AInsertHashRetries)-1 END;
  75. INC(AInsertHashRetries[i]);
  76. poolIndex[h] := idx0
  77. END
  78. END GrowHashTable;
  79. (** GetString - Get a string from the string pool *)
  80. PROCEDURE GetString*(index: Index; VAR str: ARRAY OF CHAR);
  81. VAR i: LONGINT; ch: CHAR;
  82. BEGIN
  83. ALastGet := index;
  84. INC(AGetString);
  85. i := 0;
  86. REPEAT
  87. ch := pool[index+i]; str[i] := ch; INC(i)
  88. UNTIL ch = 0X
  89. END GetString;
  90. (* AddToPool - Add a string to the pool *)
  91. PROCEDURE AddToPool(VAR index: Index; CONST str: ARRAY OF CHAR);
  92. VAR i: LONGINT; ch: CHAR;
  93. BEGIN
  94. INC(AStrings);
  95. IF LEN(str) > LEN(pool) - poolLen THEN GrowPool END;
  96. i := 0; index := poolLen;
  97. REPEAT
  98. ch := str[i]; pool[poolLen+i] := ch; INC(i)
  99. UNTIL ch = 0X;
  100. INC(poolLen, i);
  101. END AddToPool;
  102. (** GetIndex - Retrieve a string from the pool, add if not present *)
  103. PROCEDURE GetIndex*(CONST str: ARRAY OF CHAR; VAR index: Index);
  104. VAR i, h: LONGINT; idx: Index;
  105. BEGIN {EXCLUSIVE}
  106. IF AStrings > poolIndexSize DIV 4 THEN GrowHashTable END;
  107. h := Hash(str);
  108. idx := poolIndex[h];
  109. i := 0;
  110. LOOP
  111. IF (idx = -1) THEN (* miss *)
  112. INC(ASearchMisses);
  113. IF i >= 10 THEN i := 9 END;
  114. INC(AInsertHashRetries[i]);
  115. AddToPool(index, str);
  116. poolIndex[h] := index;
  117. EXIT
  118. ELSIF (CompareString0(idx, str) = 0) THEN
  119. INC(ASearchHits);
  120. IF i >= LEN(ASearchHashRetries) THEN i := LEN(ASearchHashRetries)-1 END;
  121. INC(ASearchHashRetries[i]);
  122. index := idx;
  123. EXIT
  124. END;
  125. INC(i);
  126. ASSERT(i < poolIndexSize);
  127. INC(h);
  128. IF h >= poolIndexSize THEN DEC(h, poolIndexSize) END;
  129. idx := poolIndex[h]
  130. END;
  131. END GetIndex;
  132. PROCEDURE GetIndex1*(CONST str: ARRAY OF CHAR): Index;
  133. VAR idx: Index;
  134. BEGIN
  135. GetIndex(str, idx); RETURN idx
  136. END GetIndex1;
  137. (** Compare two strings
  138. CompareString = 0 <==> Str(index1) = Str(index2)
  139. CompareString < 0 <==> Str(index1) < Str(index2)
  140. CompareString > 0 <==> Str(index1) > Str(index2)
  141. *)
  142. PROCEDURE CompareString*(index1, index2: Index): WORD;
  143. VAR ch: CHAR;
  144. BEGIN
  145. INC(ACompareString);
  146. IF index1 = index2 THEN
  147. INC(AStringCmpHit); RETURN 0
  148. END;
  149. ch := pool[index1];
  150. WHILE (ch # 0X) & (ch = pool[index2]) DO
  151. INC(index1); INC(index2);
  152. ch := pool[index1]
  153. END;
  154. RETURN ORD(ch) - ORD(pool[index2])
  155. END CompareString;
  156. PROCEDURE CompareString0*(index: Index; CONST str: ARRAY OF CHAR): WORD;
  157. (* using VAR str makes the _whole_ compiler 10% faster!!! *)
  158. VAR ch1, ch2: CHAR; i: LONGINT;
  159. BEGIN
  160. INC(ACompareString0); i := 0;
  161. REPEAT
  162. ch1 := pool[index+i];
  163. ch2 := str[i];
  164. INC(i)
  165. UNTIL (ch1 = 0X) OR (ch1 # ch2);
  166. RETURN ORD(ch1) - ORD(ch2)
  167. END CompareString0;
  168. (*
  169. optimized version (no index checks)
  170. PROCEDURE CompareString0*(index: Index; VAR str: ARRAY OF CHAR): WORD;
  171. (* using VAR str makes the _whole_ compiler 10% faster!!! *)
  172. VAR ch1, ch2: CHAR; adr1, adr2: ADDRESS; i: LONGINT;
  173. BEGIN
  174. INC(ACompareString0);
  175. adr1 := ADDRESSOF(pool[index]);
  176. adr2 := ADDRESSOF(str[0]);
  177. REPEAT
  178. SYSTEM.GET(adr1+i, ch1);
  179. SYSTEM.GET(adr2+i, ch2);
  180. INC(i)
  181. UNTIL (ch1 = 0X) OR (ch1 # ch2);
  182. RETURN ORD(ch1) - ORD(ch2)
  183. END CompareString0;
  184. *)
  185. PROCEDURE DumpPool*;
  186. VAR i: LONGINT; ch: CHAR;
  187. BEGIN
  188. KernelLog.String("StringPool.Dump:");
  189. KernelLog.String("size = "); KernelLog.Int(poolLen,1);
  190. KernelLog.Ln;
  191. KernelLog.Int(0, 4); KernelLog.String(": ");
  192. i := 0;
  193. WHILE i < poolLen DO
  194. ch := pool[i]; INC(i);
  195. IF ch = 0X THEN
  196. KernelLog.Ln; KernelLog.Int(i, 4); KernelLog.String(": ");
  197. ELSE
  198. KernelLog.Char(ch)
  199. END
  200. END;
  201. END DumpPool;
  202. PROCEDURE Init;
  203. VAR i: LONGINT; str: ARRAY 2 OF CHAR;
  204. BEGIN
  205. NEW(pool, StringPoolSize0);
  206. NEW(poolIndex, HashTableSize0);
  207. poolIndexSize := HashTableSize0-1;
  208. FOR i := 0 TO poolIndexSize DO poolIndex[i] := -1 END;
  209. str := "";
  210. AddToPool(i, str);
  211. END Init;
  212. BEGIN
  213. Init;
  214. END StringPool.
  215. (*
  216. ToDo:
  217. * store string len in the pool, use it when retrieving (SYS.MOVE). In this case entries should be aligned
  218. Log:
  219. 15.03.02 prk ALastGet added; DumpPool improved
  220. 08.02.02 prk use Aos instead of Oberon modules
  221. 27.06.01 prk first version
  222. *)