BorrowsWheeler.Mod 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. MODULE BorrowsWheeler; (** AUTHOR GF; PURPOSE "Borrows Wheeler Transformation"; *)
  2. CONST
  3. BlockSize* = 8*1024;
  4. TYPE
  5. MTF = OBJECT (* move to front *)
  6. TYPE
  7. Node = POINTER TO RECORD
  8. byte: CHAR; next: Node
  9. END;
  10. VAR
  11. alpha: Node;
  12. PROCEDURE Initialize;
  13. VAR n: Node; i: LONGINT;
  14. BEGIN
  15. alpha := NIL;
  16. FOR i := 0 TO 255 DO
  17. NEW( n ); n.next :=alpha; n.byte := CHR( 255 - i ); alpha := n
  18. END
  19. END Initialize;
  20. PROCEDURE Encode( VAR buf: ARRAY OF CHAR; len: LONGINT );
  21. VAR l, m: Node; i, k: LONGINT; ch: CHAR;
  22. BEGIN
  23. Initialize;
  24. FOR i := 0 TO len - 1 DO
  25. ch := buf[i];
  26. IF alpha.byte = ch THEN k := 0
  27. ELSE
  28. l := alpha; m := alpha.next; k := 1;
  29. WHILE m.byte # ch DO
  30. INC( k ); l := m; m := m.next
  31. END;
  32. l.next := m.next; m.next := alpha; alpha := m
  33. END;
  34. buf[i] := CHR( k )
  35. END
  36. END Encode;
  37. PROCEDURE Decode( VAR buf: ARRAY OF CHAR; len: LONGINT );
  38. VAR l, m: Node; i, c: LONGINT; ch: CHAR;
  39. BEGIN
  40. Initialize;
  41. FOR i := 0 TO len - 1 DO
  42. ch := buf[i];
  43. IF ch # 0X THEN
  44. c := ORD( ch ); l := alpha;
  45. WHILE c > 1 DO l := l.next; DEC( c ) END;
  46. m := l.next; l.next := m.next; m.next := alpha;
  47. alpha := m
  48. END;
  49. buf[i] := alpha.byte;
  50. END
  51. END Decode;
  52. END MTF;
  53. TYPE
  54. Encoder* = OBJECT
  55. TYPE
  56. Index = LONGINT;
  57. VAR
  58. mtf: MTF; length: LONGINT;
  59. sbuf: ARRAY 2*BlockSize OF CHAR;
  60. rotation: ARRAY BlockSize OF Index;
  61. PROCEDURE &New*;
  62. BEGIN
  63. NEW( mtf );
  64. END New;
  65. PROCEDURE Less( a, b: Index ): BOOLEAN;
  66. VAR i1, i2: Index; n, diff: LONGINT;
  67. BEGIN
  68. n := 0; i1 := rotation[a]; i2 := rotation[b];
  69. REPEAT
  70. diff := ORD( sbuf[i1]) - ORD( sbuf[i2] );
  71. INC( i1 ); INC( i2 ); INC( n );
  72. UNTIL (diff # 0) OR (n = length);
  73. RETURN diff < 0
  74. END Less;
  75. PROCEDURE Swap( a, b: Index );
  76. VAR tmp: Index;
  77. BEGIN
  78. tmp := rotation[a]; rotation[a] := rotation[b]; rotation[b] := tmp
  79. END Swap;
  80. PROCEDURE InsertSort( lo, hi: Index );
  81. VAR x, i, l, h, m, ip, tmp: Index;
  82. BEGIN
  83. x := lo + 1;
  84. WHILE x <= hi DO
  85. IF Less( x, x - 1 ) THEN
  86. (* find insert position ip *)
  87. ip := x - 1; l := lo; h := ip - 1;
  88. WHILE l <= h DO
  89. m := (l + h) DIV 2;
  90. IF Less( x, m ) THEN ip := m; h := m - 1 ELSE l := m + 1 END
  91. END;
  92. (* insert rotation[x] at position ip*)
  93. tmp := rotation[x]; i := x;
  94. REPEAT rotation[i] := rotation[i - 1]; DEC( i ) UNTIL i = ip;
  95. rotation[ip] := tmp;
  96. END;
  97. INC( x )
  98. END
  99. END InsertSort;
  100. PROCEDURE SortR( lo, hi: LONGINT );
  101. VAR i, j, m, n: LONGINT;
  102. BEGIN
  103. IF lo < hi THEN
  104. n := hi - lo + 1;
  105. IF n = 2 THEN
  106. IF Less( hi, lo ) THEN Swap( lo, hi ) END
  107. ELSIF n < 16 THEN
  108. InsertSort( lo, hi ) (* less expensive string compares! *)
  109. ELSE
  110. (* QuickSort *)
  111. i := lo; j := hi; m := (lo + hi) DIV 2;
  112. REPEAT
  113. WHILE Less( i, m ) DO INC( i ) END;
  114. WHILE Less( m, j ) DO DEC( j ) END;
  115. IF i <= j THEN
  116. IF m = i THEN m := j ELSIF m = j THEN m := i END;
  117. Swap( i, j ); INC( i ); DEC( j )
  118. END
  119. UNTIL i > j;
  120. SortR( lo, j ); SortR( i, hi )
  121. END
  122. END
  123. END SortR;
  124. PROCEDURE EncodeBlock*( VAR buf: ARRAY OF CHAR; len: LONGINT ): LONGINT;
  125. VAR i, index: LONGINT;
  126. BEGIN
  127. ASSERT( len <= BlockSize ); length := len;
  128. FOR i := 0 TO length - 1 DO sbuf[i] := buf[i]; sbuf[i+length] := buf[i] END;
  129. FOR i := 0 TO length - 1 DO rotation[i] := INTEGER( i ) END;
  130. SortR( 0, length - 1 );
  131. (* find index of the original row *)
  132. index := 0; WHILE rotation[index] # 0 DO INC( index ) END;
  133. (* replace buf by column L *)
  134. FOR i := 0 TO length -1 DO buf[i] := sbuf[rotation[i] + length - 1] END;
  135. mtf.Encode( buf, length );
  136. RETURN index
  137. END EncodeBlock;
  138. END Encoder;
  139. TYPE
  140. Decoder* = OBJECT
  141. TYPE
  142. Index = LONGINT;
  143. VAR
  144. mtf: MTF;
  145. f, l: ARRAY BlockSize OF CHAR;
  146. lc, fc: ARRAY BlockSize OF INTEGER;
  147. PROCEDURE &New*;
  148. BEGIN
  149. NEW( mtf );
  150. END New;
  151. PROCEDURE -Swap( a, b: Index );
  152. VAR tmp: CHAR;
  153. BEGIN
  154. tmp := f[a]; f[a] := f[b]; f[b] := tmp
  155. END Swap;
  156. PROCEDURE SortF( lo, hi: Index );
  157. VAR i, j, m: Index;
  158. BEGIN
  159. IF lo < hi THEN
  160. IF (hi - lo) = 1 THEN
  161. IF f[hi] < f[lo] THEN Swap( lo, hi ) END;
  162. ELSE
  163. (* QuickSort *)
  164. i := lo; j := hi; m := (lo + hi) DIV 2;
  165. REPEAT
  166. WHILE f[i] < f[m] DO INC( i ) END;
  167. WHILE f[m] < f[j] DO DEC( j ) END;
  168. IF i <= j THEN
  169. IF m = i THEN m := j ELSIF m = j THEN m := i END;
  170. Swap( i, j ); INC( i ); DEC( j )
  171. END
  172. UNTIL i > j;
  173. SortF( lo, j ); SortF( i, hi )
  174. END
  175. END
  176. END SortF;
  177. PROCEDURE DecodeBlock*( VAR buf: ARRAY OF CHAR; len, index: LONGINT );
  178. VAR
  179. i, j, n: LONGINT; ch: CHAR;
  180. xn: ARRAY 256 OF INTEGER;
  181. BEGIN
  182. ASSERT( len <= BlockSize );
  183. mtf.Decode( buf, len );
  184. FOR i := 0 TO 255 DO xn[i] := 0 END;
  185. FOR i := 0 TO len - 1 DO
  186. l[i] := buf[i]; f[i] := buf[i];
  187. j := ORD( l[i] ); lc[i] := xn[j]; INC( xn[j] )
  188. END;
  189. SortF( 0, len - 1 );
  190. FOR i := 0 TO 255 DO xn[i] := 0 END;
  191. FOR i := 0 TO len - 1 DO
  192. j := ORD( f[i] ); fc[i] := xn[j]; INC( xn[j] )
  193. END;
  194. FOR i := 0 TO len - 1 DO
  195. ch := f[index]; n := fc[index]; buf[i] := ch; index := 0;
  196. WHILE (l[index] # ch) OR (lc[index] # n) DO INC( index ) END
  197. END;
  198. END DecodeBlock;
  199. END Decoder;
  200. END BorrowsWheeler.