BorrowsWheeler.Mod 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
  1. MODULE BorrowsWheeler; (** AUTHOR GF; PURPOSE "Borrows Wheeler Transformation"; *)
  2. CONST
  3. BlockSize* = 8*1024;
  4. TYPE
  5. MTF = OBJECT (* move to front *)
  6. TYPE
  7. Node = POINTER TO RECORD
  8. byte: CHAR; next: Node
  9. END;
  10. VAR
  11. alpha: Node;
  12. PROCEDURE Initialize;
  13. VAR n: Node; i: LONGINT;
  14. BEGIN
  15. alpha := NIL;
  16. FOR i := 0 TO 255 DO
  17. NEW( n ); n.next :=alpha; n.byte := CHR( 255 - i ); alpha := n
  18. END
  19. END Initialize;
  20. PROCEDURE Encode( VAR buf: ARRAY OF CHAR; len: LONGINT );
  21. VAR l, m: Node; i, k: LONGINT; ch: CHAR;
  22. BEGIN
  23. Initialize;
  24. FOR i := 0 TO len - 1 DO
  25. ch := buf[i];
  26. IF alpha.byte = ch THEN k := 0
  27. ELSE
  28. l := alpha; m := alpha.next; k := 1;
  29. WHILE m.byte # ch DO
  30. INC( k ); l := m; m := m.next
  31. END;
  32. l.next := m.next; m.next := alpha; alpha := m
  33. END;
  34. buf[i] := CHR( k )
  35. END
  36. END Encode;
  37. PROCEDURE Decode( VAR buf: ARRAY OF CHAR; len: LONGINT );
  38. VAR l, m: Node; i, c: LONGINT; ch: CHAR;
  39. BEGIN
  40. Initialize;
  41. FOR i := 0 TO len - 1 DO
  42. ch := buf[i];
  43. IF ch # 0X THEN
  44. c := ORD( ch ); l := alpha;
  45. WHILE c > 1 DO l := l.next; DEC( c ) END;
  46. m := l.next; l.next := m.next; m.next := alpha;
  47. alpha := m
  48. END;
  49. buf[i] := alpha.byte;
  50. END
  51. END Decode;
  52. END MTF;
  53. TYPE
  54. Encoder* = OBJECT
  55. TYPE
  56. Index = LONGINT;
  57. VAR
  58. mtf: MTF; length: LONGINT;
  59. sbuf: ARRAY 2*BlockSize OF CHAR;
  60. rotation: ARRAY BlockSize OF Index;
  61. PROCEDURE &New*;
  62. BEGIN
  63. NEW( mtf );
  64. END New;
  65. PROCEDURE Less( a, b: Index ): BOOLEAN;
  66. VAR i1, i2: Index; n, diff: LONGINT;
  67. BEGIN
  68. n := 0; i1 := rotation[a]; i2 := rotation[b];
  69. REPEAT
  70. diff := ORD( sbuf[i1]) - ORD( sbuf[i2] );
  71. INC( i1 ); INC( i2 ); INC( n );
  72. UNTIL (diff # 0) OR (n = length);
  73. RETURN diff < 0
  74. END Less;
  75. PROCEDURE Swap( a, b: Index );
  76. VAR tmp: Index;
  77. BEGIN
  78. tmp := rotation[a]; rotation[a] := rotation[b]; rotation[b] := tmp
  79. END Swap;
  80. PROCEDURE InsertSort( lo, hi: Index );
  81. VAR x, i, l, h, m, ip, tmp: Index;
  82. BEGIN
  83. x := lo + 1;
  84. WHILE x <= hi DO
  85. IF Less( x, x - 1 ) THEN
  86. (* find insert position ip *)
  87. ip := x - 1; l := lo; h := ip - 1;
  88. WHILE l <= h DO
  89. m := (l + h) DIV 2;
  90. IF Less( x, m ) THEN ip := m; h := m - 1 ELSE l := m + 1 END
  91. END;
  92. (* insert rotation[x] at position ip*)
  93. tmp := rotation[x]; i := x;
  94. REPEAT rotation[i] := rotation[i - 1]; DEC( i ) UNTIL i = ip;
  95. rotation[ip] := tmp;
  96. END;
  97. INC( x )
  98. END
  99. END InsertSort;
  100. PROCEDURE SortR( lo, hi: LONGINT );
  101. VAR i, j, m, n: LONGINT;
  102. BEGIN
  103. IF lo < hi THEN
  104. i := lo; j := hi; m := (lo + hi) DIV 2; n := hi - lo + 1;
  105. IF n = 2 THEN
  106. IF Less( hi, lo ) THEN
  107. Swap( lo, hi ) END;
  108. ELSIF n = 3 THEN
  109. IF Less( m, lo ) THEN Swap( lo, m ) END;
  110. IF Less( hi, m ) THEN
  111. Swap( m, hi );
  112. IF Less( m, lo ) THEN Swap( lo, m ) END
  113. END
  114. ELSIF n < 16 THEN
  115. InsertSort( lo, hi )
  116. ELSE
  117. (* QuickSort *)
  118. REPEAT
  119. WHILE Less( i, m ) DO INC( i ) END;
  120. WHILE Less( m, j ) DO DEC( j ) END;
  121. IF i <= j THEN
  122. IF m = i THEN m := j ELSIF m = j THEN m := i END;
  123. Swap( i, j ); INC( i ); DEC( j )
  124. END
  125. UNTIL i > j;
  126. SortR( lo, j ); SortR( i, hi )
  127. END
  128. END
  129. END SortR;
  130. PROCEDURE EncodeBlock*( VAR buf: ARRAY OF CHAR; len: LONGINT ): LONGINT;
  131. VAR i, index: LONGINT;
  132. BEGIN
  133. ASSERT( len <= BlockSize ); length := len;
  134. FOR i := 0 TO length - 1 DO sbuf[i] := buf[i]; sbuf[i+length] := buf[i] END;
  135. FOR i := 0 TO length - 1 DO rotation[i] := INTEGER( i ) END;
  136. SortR( 0, length - 1 );
  137. (* find index of the original row *)
  138. index := 0; WHILE rotation[index] # 0 DO INC( index ) END;
  139. (* replace buf by column L *)
  140. FOR i := 0 TO length -1 DO buf[i] := sbuf[rotation[i] + length - 1] END;
  141. mtf.Encode( buf, length );
  142. RETURN index
  143. END EncodeBlock;
  144. END Encoder;
  145. TYPE
  146. Decoder* = OBJECT
  147. TYPE
  148. Index = LONGINT;
  149. VAR
  150. length, index: LONGINT;
  151. mtf: MTF;
  152. f: ARRAY BlockSize OF CHAR;
  153. PROCEDURE &New*;
  154. BEGIN
  155. NEW( mtf );
  156. END New;
  157. PROCEDURE Swap( a, b: Index );
  158. VAR tmp: CHAR;
  159. BEGIN
  160. tmp := f[a]; f[a] := f[b]; f[b] := tmp
  161. END Swap;
  162. PROCEDURE InsertSort( lo, hi: Index );
  163. VAR x, i, l, h, m, ip: Index; tmp: CHAR;
  164. BEGIN
  165. x := lo + 1;
  166. WHILE x <= hi DO
  167. IF f[x] < f[x - 1] THEN
  168. (* find insert position ip *)
  169. ip := x - 1; l := lo; h := ip - 1;
  170. WHILE l <= h DO
  171. m := (l + h) DIV 2;
  172. IF f[x] < f[m] THEN ip := m; h := m - 1 ELSE l := m + 1 END
  173. END;
  174. (* insert f[x] at position ip*)
  175. tmp := f[x]; i := x;
  176. REPEAT f[i] := f[i - 1]; DEC( i ) UNTIL i = ip;
  177. f[ip] := tmp;
  178. END;
  179. INC( x )
  180. END
  181. END InsertSort;
  182. PROCEDURE SortF( lo, hi: Index );
  183. VAR i, j, m: Index; n: LONGINT;
  184. BEGIN
  185. IF lo < hi THEN
  186. i := lo; j := hi; m := (lo + hi) DIV 2; n := hi - lo + 1;
  187. IF n = 2 THEN
  188. IF f[hi] < f[lo] THEN Swap( lo, hi ) END;
  189. ELSIF n = 3 THEN
  190. IF f[m] < f[lo] THEN Swap( lo, m ) END;
  191. IF f[hi] < f[m] THEN
  192. Swap( m, hi );
  193. IF f[m] < f[lo] THEN Swap( lo, m ) END
  194. END
  195. ELSIF n < 16 THEN
  196. InsertSort( lo, hi )
  197. ELSE
  198. (* QuickSort *)
  199. REPEAT
  200. WHILE f[i] < f[m] DO INC( i ) END;
  201. WHILE f[m] < f[j] DO DEC( j ) END;
  202. IF i <= j THEN
  203. IF m = i THEN m := j ELSIF m = j THEN m := i END;
  204. Swap( i, j ); INC( i ); DEC( j )
  205. END
  206. UNTIL i > j;
  207. SortF( lo, j ); SortF( i, hi )
  208. END
  209. END
  210. END SortF;
  211. PROCEDURE DecodeBlock*( VAR buf: ARRAY OF CHAR; len, ind: LONGINT );
  212. VAR
  213. i, j, n: LONGINT; ch: CHAR;
  214. l: POINTER TO ARRAY OF CHAR;
  215. lc, fc: POINTER TO ARRAY OF LONGINT;
  216. xn: ARRAY 256 OF LONGINT;
  217. BEGIN
  218. ASSERT( len <= BlockSize ); length := len; index := ind;
  219. mtf.Decode( buf, len );
  220. NEW( l, length ); NEW( lc, length ); NEW( fc, length );
  221. FOR i := 0 TO 255 DO xn[i] := 0 END;
  222. FOR i := 0 TO length - 1 DO
  223. l[i] := buf[i]; f[i] := buf[i];
  224. j := ORD( l[i] ); lc[i] := xn[j]; INC( xn[j] )
  225. END;
  226. SortF( 0, length - 1 );
  227. FOR i := 0 TO 255 DO xn[i] := 0 END;
  228. FOR i := 0 TO length - 1 DO
  229. j := ORD( f[i] ); fc[i] := xn[j]; INC( xn[j] )
  230. END;
  231. FOR i := 0 TO length - 1 DO
  232. ch := f[index]; n := fc[index]; buf[i] := ch; index := 0;
  233. WHILE (l[index] # ch) OR (lc[index] # n) DO INC( index ) END
  234. END;
  235. END DecodeBlock;
  236. END Decoder;
  237. END BorrowsWheeler.