瀏覽代碼

code cleanups

git-svn-id: https://svn-dept.inf.ethz.ch/svn/lecturers/a2/trunk@8705 8c9fc860-2736-0410-a75d-ab315db34111
infsvn.guenter 6 年之前
父節點
當前提交
7795057246
共有 5 個文件被更改,包括 375 次插入389 次删除
  1. 160 152
      source/AdaptiveHuffman.Mod
  2. 17 54
      source/BorrowsWheeler.Mod
  3. 154 137
      source/Huffman.Mod
  4. 21 22
      source/OZip.Mod
  5. 23 24
      source/OZip2.Mod

+ 160 - 152
source/AdaptiveHuffman.Mod

@@ -1,117 +1,148 @@
 MODULE AdaptiveHuffman;	(** AUTHOR GF; PUROSE "adaptive Huffman coding" *)
 MODULE AdaptiveHuffman;	(** AUTHOR GF; PUROSE "adaptive Huffman coding" *)
 
 
-(* adaptive Huffman compression, Vitter's algorithm *)
+(* adaptive Huffman compression, Vitter's FGK algorithm *)
 
 
 IMPORT Streams;
 IMPORT Streams;
 
 
 CONST
 CONST
-	AlphabetSize* = 256;	(* byte *)
+	AlphabetSize = 256;		(* byte *)
 	BlockSize* = 8*1024;
 	BlockSize* = 8*1024;
-	ScaleSize = 2*1024;
+	ScaleLimit = 4*1024;
 	
 	
 	Encode = 0;  Decode = 1;
 	Encode = 0;  Decode = 1;
 
 
+TYPE	
+	BitReader = OBJECT
+		VAR
+			in: Streams.Reader;
+			curByte, curBit: LONGINT;
+			
+			PROCEDURE &New( r: Streams.Reader );
+			BEGIN
+				in := r;  curBit := -1;  curByte := 0
+			END New;
+			
+			PROCEDURE Initialize;
+			BEGIN
+				curBit := -1;  curByte := 0
+			END Initialize;
+			
+			PROCEDURE Bit( ): LONGINT;
+			VAR
+				bit: LONGINT;  ch: CHAR;
+			BEGIN
+				IF curBit < 0 THEN
+					in.Char( ch );  curByte := ORD( ch );  curBit := 7
+				END;
+				bit := ASH( curByte, -curBit ) MOD 2;
+				DEC( curBit );
+				RETURN bit
+			END Bit;
+	
+	END BitReader;
+	
+
 TYPE
 TYPE
-	Index = INTEGER;		(* 16-bit integer to keep the table small *)
-	Symbol = LONGINT;
+	BitWriter = OBJECT
+		VAR
+			out: Streams.Writer;
+			curByte, curBit: LONGINT;
+			
+			PROCEDURE &New( w: Streams.Writer );
+			BEGIN
+				out := w;  curBit := 0;  curByte := 0
+			END New;
+			
+			PROCEDURE Bit( bit: LONGINT );
+			BEGIN
+				curByte := 2*curByte + bit;
+				INC( curBit );
+				IF curBit > 7 THEN
+					out.Char( CHR( curByte ) );  curByte := 0;  curBit := 0
+				END
+			END Bit;
+		
+			PROCEDURE Finish;	(* flush last few bits *)
+			BEGIN
+				WHILE curBit # 0 DO  Bit( 0 )  END;
+				out.Update
+			END Finish;
+				
+	END BitWriter;
+	
 	
 	
-	Node = RECORD
-		weight: LONGINT;		(* node weight	*)
-		up: Index;				(* next node up the tree	*)
-		down: Index;			(* pair of down nodes	*)
-		pattern: Symbol		(* node pattern	*)
-	END;
 
 
 
 
+TYPE
 	HuffmanCoder = OBJECT		
 	HuffmanCoder = OBJECT		
-	VAR
-		mode: SHORTINT;		(* encode, decode *)
-		in: Streams.Reader;	(* input from archive *)
-		out: Streams.Writer;	(* output to archive *)
-		curByte, curBit: LONGINT;
-		
-		esc: Index;		(* the current tree height	*)
-		root: Index;         (* the root of the tree	*)
-		map: ARRAY AlphabetSize OF Index;			(* mapping for symbols to nodes	*)
-		table: ARRAY 2*AlphabetSize + 4 OF Node;	(* the coding table	*)
+		TYPE
+			Index = INTEGER;		(* 16-bit integer to keep the table small *)
+			Pattern = INTEGER;
+			Node = RECORD
+				weight: INTEGER;		(* node weight	*)
+				pattern: Pattern;		(* node pattern (if weight is even, leaf node)	*)
+				up: Index;				(* next node up the tree	*)
+				down: Index			(* pair of down nodes (if weight is odd, internal node)	*)
+			END;
+		VAR
+			mode: SHORTINT;	(* Encode, Decode *)
+			in: BitReader;		(* input from archive *)
+			out: BitWriter;	(* output to archive *)
+			
+			esc: Index;		(* the current escape node	*)
+			root: Index;         (* the root of the tree	*)
+			map: ARRAY AlphabetSize OF Index;			(* mapping of patterns to nodes	*)
+			table: ARRAY 2*AlphabetSize + 2 OF Node;	(* the Huffman tree	*)
 		
 		
 		
 		
 		
 		
-		PROCEDURE &New( m: SHORTINT; input: Streams.Reader;  output: Streams.Writer );
+		PROCEDURE &New( m: SHORTINT;  input: Streams.Reader;  output: Streams.Writer );
 		BEGIN
 		BEGIN
 			ASSERT( m IN {Encode, Decode} );
 			ASSERT( m IN {Encode, Decode} );
 			mode := m;
 			mode := m;
-			IF mode = Encode THEN  out := output  ELSE  in := input  END;
+			IF mode = Encode THEN  NEW( out, output )  ELSE  NEW( in, input )  END;
 		END New;
 		END New;
 		
 		
 		PROCEDURE Initialize;
 		PROCEDURE Initialize;
 		VAR  i: Index;
 		VAR  i: Index;
 		BEGIN
 		BEGIN
-			root := 2*AlphabetSize + 4 - 1;  curByte := 0;
-			IF mode = Encode THEN  curByte := 0;  curBit := 0  ELSE  curBit := -1  END;
+			root := 2*AlphabetSize + 2 - 1; 
 			FOR i := 0 TO root DO
 			FOR i := 0 TO root DO
 				table[i].up := 0;  table[i].down := 0;  table[i].pattern := 0;  table[i].weight := 0
 				table[i].up := 0;  table[i].down := 0;  table[i].pattern := 0;  table[i].weight := 0
 			END;
 			END;
 			FOR i := 0 TO AlphabetSize - 1 DO  map[i]  := 0  END;
 			FOR i := 0 TO AlphabetSize - 1 DO  map[i]  := 0  END;
 			esc := root;
 			esc := root;
+			IF mode = Decode THEN  in.Initialize  END
 		END Initialize;		
 		END Initialize;		
 		
 		
-		
-		PROCEDURE Finish;	(* flush last few bits *)
+		PROCEDURE Finish;	
 		BEGIN
 		BEGIN
 			ASSERT( mode = Encode );
 			ASSERT( mode = Encode );
-			WHILE curBit # 0 DO  PutBit( 0 )  END;
-			out.Update
-		END Finish;
-		
-		
-		
-		PROCEDURE GetBit( ): LONGINT;
-		VAR
-			bit: LONGINT;  ch: CHAR;
-		BEGIN
-			IF curBit < 0 THEN
-				in.Char( ch );  curByte := ORD( ch );  curBit := 7
-			END;
-			bit := ASH( curByte, -curBit ) MOD 2;
-			DEC( curBit );
-			RETURN bit
-		END GetBit;
-		
-		PROCEDURE PutBit( bit: LONGINT );
-		BEGIN
-			curByte := 2*curByte + bit;
-			INC( curBit );
-			IF curBit > 7 THEN
-				out.Char( CHR( curByte ) );  curByte := 0;  curBit := 0
-			END
-		END PutBit;
-		
+			out.Finish  (* flush last few bits *)
+		END Finish;	
+
 		
 		
-		PROCEDURE GetPattern(  ): Symbol;
-		VAR  sym, i, bit: LONGINT;
+		PROCEDURE GetPattern(  ): Pattern;
+		VAR  i: INTEGER;  patt: Pattern;
 		BEGIN
 		BEGIN
-			sym := 0;  bit := 1;
-			FOR i := 1 TO 8 DO
-				IF GetBit() = 1 THEN  sym := sym + bit  END;
-				bit := bit * 2;
+			patt := 0; 
+			FOR i := 0 TO 7 DO
+				IF in.Bit() = 1 THEN  patt := patt + INTEGER(ASH( 1, i ))  END;
 			END;
 			END;
-			RETURN sym
+			RETURN patt
 		END GetPattern;
 		END GetPattern;
 		
 		
-		
-		(*  send the bits for an escaped symbol	*)
-		PROCEDURE PutPattern( sym: Symbol );
+		PROCEDURE PutPattern( patt: Pattern );
 		VAR i: LONGINT;
 		VAR i: LONGINT;
 		BEGIN
 		BEGIN
-			FOR i := 1 TO 8 DO
-				PutBit( sym MOD 2 );  sym := sym DIV 2;
+			FOR i := 0 TO 7 DO
+				out.Bit( patt MOD 2 );  patt := patt DIV 2;
 			END
 			END
 		END PutPattern;
 		END PutPattern;
 
 
 		
 		
-		(*	split escape node to incorporate new symbol	*)
-		PROCEDURE AddSymbol( sym: Symbol ): Index;
+		(* split escape node to incorporate a new pattern *)
+		PROCEDURE AddPattern( patt: Pattern ): Index;
 		VAR  pair, node: Index;
 		VAR  pair, node: Index;
 		BEGIN
 		BEGIN
 			ASSERT( esc > 1 );
 			ASSERT( esc > 1 );
@@ -120,55 +151,54 @@ TYPE
 			table[pair].down := node;  table[pair].weight := 1;  
 			table[pair].down := node;  table[pair].weight := 1;  
 			
 			
 			table[node].up := pair;
 			table[node].up := pair;
-			table[node].down := 0;  table[node].weight := 0;  table[node].pattern := sym; 
+			table[node].down := 0;  table[node].weight := 0;  table[node].pattern := patt; 
 			
 			
 			table[esc].up := pair;
 			table[esc].up := pair;
 			table[esc].down := 0;  table[esc].weight := 0;  
 			table[esc].down := 0;  table[esc].weight := 0;  
 			
 			
-			map[sym] := node;
+			map[patt] := node;
 			
 			
 			RETURN node;
 			RETURN node;
-		END AddSymbol;		
+		END AddPattern;		
 		
 		
 
 
-		(*  swap leaf to group leader position return symbol's new node	*)
+		(* swap leaf to group leader position, return pattern's new node *)
 		PROCEDURE GroupLeader( node: Index ): Index;
 		PROCEDURE GroupLeader( node: Index ): Index;
 		VAR
 		VAR
 			leader: Index;
 			leader: Index;
 			weight: LONGINT;
 			weight: LONGINT;
-			sym, prev:  Symbol;
+			patt, prev:  Pattern;
 		BEGIN
 		BEGIN
 			weight := table[node].weight;  leader := node;
 			weight := table[node].weight;  leader := node;
 			WHILE (leader < root) & (weight = table[leader + 1].weight) DO  INC( leader )  END;
 			WHILE (leader < root) & (weight = table[leader + 1].weight) DO  INC( leader )  END;
 			IF leader # node THEN  
 			IF leader # node THEN  
-				(*	swap the leaf nodes	*)
-				sym := table[node].pattern;
+				(* swap the leaf nodes *)
+				patt := table[node].pattern;
 				prev := table[leader].pattern;
 				prev := table[leader].pattern;
-				table[leader].pattern := sym;
+				table[leader].pattern := patt;
 				table[node].pattern := prev;
 				table[node].pattern := prev;
-				map[sym] := leader;
+				map[patt] := leader;
 				map[prev] := node;
 				map[prev] := node;
 			END;
 			END;
 			RETURN leader
 			RETURN leader
 		END GroupLeader;		
 		END GroupLeader;		
 				
 				
 
 
-		(*	slide internal node up over all leaves of equal weight; 
+		(*	slide internal node up over all leaves of equal weight
 			or exchange leaf with next smaller weight internal node.
 			or exchange leaf with next smaller weight internal node.
 			return node's new position	*)
 			return node's new position	*)
 		PROCEDURE SlideNode( node: Index ): Index;
 		PROCEDURE SlideNode( node: Index ): Index;
 		VAR  next: Index;  swap: Node;
 		VAR  next: Index;  swap: Node;
 		BEGIN
 		BEGIN
 			swap := table[node];  next := node + 1;
 			swap := table[node];  next := node + 1;
-			(* if we're sliding an internal node, find the highest possible leaf to exchange with	*)
+			(* if we're sliding an internal node, find the highest possible leaf to exchange with *)
 			IF ODD( swap.weight ) THEN
 			IF ODD( swap.weight ) THEN
 				WHILE swap.weight > table[next+1].weight DO  INC( next )  END
 				WHILE swap.weight > table[next+1].weight DO  INC( next )  END
 			END;
 			END;
-			(*	swap the two nodes	*)
+			(* swap the two nodes *)
 			table[node] := table[next];  table[next] := swap;
 			table[node] := table[next];  table[next] := swap;
 			table[next].up := table[node].up;  table[node].up := swap.up;
 			table[next].up := table[node].up;  table[node].up := swap.up;
-
-			(*	repair the symbol map and tree structure	*)
+			(* repair the pattern map and tree structure *)
 			IF ODD( swap.weight ) THEN
 			IF ODD( swap.weight ) THEN
 				table[swap.down].up := next;
 				table[swap.down].up := next;
 				table[swap.down - 1].up := next;
 				table[swap.down - 1].up := next;
@@ -182,8 +212,8 @@ TYPE
 		END SlideNode;		
 		END SlideNode;		
 
 
 
 
-		(*	increment symbol weight and re balance the tree.	*)
-		PROCEDURE IncWeight( node: Index );
+		(* increment pattern weight and re balance the tree. *)
+		PROCEDURE IncrementWeight( node: Index );
 		VAR  up: Index;
 		VAR  up: Index;
 		BEGIN
 		BEGIN
 			(*	obviate swapping a parent with its child: increment the leaf and proceed directly to its parent.
 			(*	obviate swapping a parent with its child: increment the leaf and proceed directly to its parent.
@@ -194,7 +224,7 @@ TYPE
 				node := GroupLeader( node )
 				node := GroupLeader( node )
 			END;
 			END;
 			(*	increase the weight of each node and slide over any smaller weights ahead of it until reaching the root.
 			(*	increase the weight of each node and slide over any smaller weights ahead of it until reaching the root.
-				internal nodes work upwards from their initial positions; while symbol nodes slide over first,
+				internal nodes work upwards from their initial positions; while pattern nodes slide over first,
 				then work up from their final positions.	*)
 				then work up from their final positions.	*)
 			INC( table[node].weight, 2 );  up := table[node].up;
 			INC( table[node].weight, 2 );  up := table[node].up;
 			WHILE up # 0 DO
 			WHILE up # 0 DO
@@ -206,16 +236,16 @@ TYPE
 				END;
 				END;
 				INC( table[node].weight, 2 );  up := table[node].up
 				INC( table[node].weight, 2 );  up := table[node].up
 			END;
 			END;
-		END IncWeight;
+		END IncrementWeight;
 		
 		
 			
 			
 		(*  scale all weights and rebalance the tree,
 		(*  scale all weights and rebalance the tree,
-		    zero weight nodes are removed from the tree by sliding them out the rank list	*)
+		    zero weight nodes are removed from the tree	*)
 		PROCEDURE Scale;
 		PROCEDURE Scale;
-		VAR  node, prev: Index;  weight: LONGINT;
+		VAR  node, prev: Index;  weight: INTEGER;
 		
 		
-			PROCEDURE Weight( idx: Index ): LONGINT;
-			VAR w: LONGINT;
+			PROCEDURE Weight( idx: Index ): INTEGER;
+			VAR w: INTEGER;
 			BEGIN
 			BEGIN
 				w := table[idx].weight;
 				w := table[idx].weight;
 				IF ODD( w ) THEN  RETURN w - 1  ELSE  RETURN w  END
 				IF ODD( w ) THEN  RETURN w - 1  ELSE  RETURN w  END
@@ -223,94 +253,80 @@ TYPE
 			
 			
 		BEGIN
 		BEGIN
 			node := esc + 1;
 			node := esc + 1;
-			(*	work up the tree from the escape node scaling weights by the value of bits	*)
+			(* work up the tree from the escape node scaling weights *)
 			WHILE node <= root DO
 			WHILE node <= root DO
-				(*	recompute the weight of internal nodes; slide down and out any unused ones	*)
 				weight := table[node].weight;
 				weight := table[node].weight;
 				IF ODD( weight ) THEN
 				IF ODD( weight ) THEN
+					(* recompute the weight of internal nodes *)
 					weight := Weight( table[node].down ) + Weight( table[node].down-1 ) + 1
 					weight := Weight( table[node].down ) + Weight( table[node].down-1 ) + 1
 				ELSE
 				ELSE
-					(*	remove zero weight leaves by incrementing esc and removing them from the symbol map 	*)
+					(* remove zero weight leaves and remove them from the pattern map *)
 					weight := weight DIV 2;
 					weight := weight DIV 2;
 					IF ODD( weight ) THEN  DEC( weight )  END;
 					IF ODD( weight ) THEN  DEC( weight )  END;
 					IF weight = 0 THEN
 					IF weight = 0 THEN
 						map[table[node].pattern] := 0;  INC( esc, 2 );
 						map[table[node].pattern] := 0;  INC( esc, 2 );
 					END
 					END
 				END;
 				END;
-				(*	slide the scaled node back down over any previous nodes with larger weights	*)
+				(* slide the scaled node back down over any previous nodes with larger weights *)
 				table[node].weight := weight;
 				table[node].weight := weight;
 				prev := node - 1;
 				prev := node - 1;
 				WHILE weight < table[prev].weight DO  IGNORE SlideNode( prev );  DEC( prev )  END;
 				WHILE weight < table[prev].weight DO  IGNORE SlideNode( prev );  DEC( prev )  END;
 				INC( node )
 				INC( node )
 			END;
 			END;
-			(*	prepare a new escape node	*)
+			(* prepare a new escape node *)
 			table[esc].down := 0;  table[esc].weight := 0;
 			table[esc].down := 0;  table[esc].weight := 0;
 		END Scale;
 		END Scale;
 		
 		
+		
+		
 	
 	
-		(*	encode the next symbol	*)
 		PROCEDURE EncodeByte( ch: CHAR );
 		PROCEDURE EncodeByte( ch: CHAR );
 		VAR
 		VAR
-			code, n: LONGINT;  
-			up, idx, node: Index;  sym: Symbol;
+			code, bits: LONGINT;  
+			cur, node: Index;  patt: Pattern;
 		BEGIN
 		BEGIN
-			ASSERT( mode = Encode );
-			(*	for a new symbol, direct the receiver to the escape node	*)
-			sym := ORD( ch );
-			node := map[sym];  idx := node; 
-			IF idx = 0 THEN  idx := esc  END;
-			ASSERT( idx # 0 );
-			(*	accumulate the code bits by working up the tree from the node to the root	*)
-			code := 1;  n := 0;  up := table[idx].up;
-			WHILE up # 0 DO
-				code := code * 2;  INC( n );
-				IF ODD( idx ) THEN  INC( code )  END;
-				idx := up;  up := table[idx].up	
+			patt := ORD( ch );  node := map[patt];  
+			
+			(* accumulate the code bits by working up from the node to the root	*)
+			cur := node;  code := 0;  bits := 0;
+			IF cur = 0 THEN  cur := esc  END;
+			WHILE table[cur].up # 0 DO
+				IF ODD( cur ) THEN  code := code*2 + 1  ELSE  code := code*2  END;
+				INC( bits );  cur := table[cur].up	
 			END;
 			END;
-			(*	send the code, root selector bit first	*)
-			WHILE n > 0 DO
-				PutBit( code MOD 2 );  code := code DIV 2;
-				DEC( n )
+			(* send the code *)
+			WHILE bits > 0 DO
+				out.Bit( code MOD 2 );  code := code DIV 2;
+				DEC( bits )
 			END;
 			END;
-			(*	send pattern and incorporate it into the tree	*)
 			IF node = 0 THEN
 			IF node = 0 THEN
-				PutPattern( sym );
-				node := AddSymbol( sym );
+				(* send the new pattern and incorporate it into the tree *)
+				PutPattern( patt );  node := AddPattern( patt );
 			END;
 			END;
-			(*	adjust and re-balance the tree	*)
-			IncWeight( node );
+			IncrementWeight( node );
 		END EncodeByte;
 		END EncodeByte;
 
 
 
 
-		(*	decode the next symbol	*)
-		PROCEDURE DecodeByte( ): CHAR;
+		PROCEDURE ExtractByte( ): CHAR;
 		VAR
 		VAR
 			node, down: Index; 
 			node, down: Index; 
-			sym: Symbol;
+			patt: Pattern;
 		BEGIN
 		BEGIN
-			ASSERT( mode = Decode );
-			(*	work down the tree from the root until reaching either a leaf or the escape node.  
-			A one bit means go left, a zero means go right.	*)
+			(* work down the tree from the root until reaching either a leaf or the escape node *)
 			node := root;  down := table[node].down;
 			node := root;  down := table[node].down;
 			WHILE down # 0 DO
 			WHILE down # 0 DO
-				IF GetBit( ) = 1 THEN  
-					node := down-1
-				ELSE
-					node := down;
-				END;
+				IF in.Bit( ) = 1 THEN  node := down - 1  ELSE  node := down  END;
 				down := table[node].down;
 				down := table[node].down;
 			END;
 			END;
-			(*	sent to the escape node	*)
 			IF node = esc THEN
 			IF node = esc THEN
-				sym := GetPattern( );  
-				node := AddSymbol( sym );
+				(* add the new pattern to the tree *)
+				patt := GetPattern( );  node := AddPattern( patt )
 			ELSE
 			ELSE
-				sym := table[node].pattern
+				patt := table[node].pattern
 			END;
 			END;
-			(*	adjust and re-balance the tree	*)
-			IncWeight( node );
-			RETURN CHR( sym );
-		END DecodeByte;
+			IncrementWeight( node );
+			RETURN CHR( patt );
+		END ExtractByte;
 		
 		
 	END HuffmanCoder;
 	END HuffmanCoder;
 
 
@@ -320,23 +336,20 @@ TYPE
 	Encoder* =OBJECT
 	Encoder* =OBJECT
 		VAR
 		VAR
 			huff: HuffmanCoder;
 			huff: HuffmanCoder;
-			out: Streams.Writer;
 			
 			
 			PROCEDURE & New*( archive: Streams.Writer );
 			PROCEDURE & New*( archive: Streams.Writer );
 			BEGIN
 			BEGIN
 				NEW( huff, Encode, NIL, archive );
 				NEW( huff, Encode, NIL, archive );
-				out := archive
 			END New;
 			END New;
 			
 			
 			PROCEDURE CompressBlock*( CONST source: ARRAY OF CHAR; len: LONGINT );
 			PROCEDURE CompressBlock*( CONST source: ARRAY OF CHAR; len: LONGINT );
 			VAR  i: LONGINT;
 			VAR  i: LONGINT;
 			BEGIN
 			BEGIN
-				out.RawNum( len );
 				huff.Initialize;	
 				huff.Initialize;	
 				i := 0;
 				i := 0;
 				WHILE i < len DO
 				WHILE i < len DO
 					huff.EncodeByte( source[i] );  INC( i );
 					huff.EncodeByte( source[i] );  INC( i );
-					IF (i MOD ScaleSize = 0) & ((len-i) >= ScaleSize) THEN  huff.Scale  END
+					IF (i MOD ScaleLimit = 0) & ((len-i) >= ScaleLimit) THEN  huff.Scale  END
 				END;
 				END;
 				huff.Finish
 				huff.Finish
 			END CompressBlock;
 			END CompressBlock;
@@ -349,26 +362,21 @@ TYPE
 	Decoder* =OBJECT
 	Decoder* =OBJECT
 		VAR
 		VAR
 			huff: HuffmanCoder;
 			huff: HuffmanCoder;
-			input: Streams.Reader;
 			
 			
 			PROCEDURE & New*( archive: Streams.Reader );
 			PROCEDURE & New*( archive: Streams.Reader );
 			BEGIN
 			BEGIN
 				NEW( huff, Decode, archive, NIL );
 				NEW( huff, Decode, archive, NIL );
-				input := archive
 			END New;
 			END New;
 			
 			
-			PROCEDURE ExtractBlock*( VAR buf: ARRAY OF CHAR ): LONGINT;
-			VAR  i, blockSize: LONGINT; 
+			PROCEDURE ExtractBlock*( VAR buf: ARRAY OF CHAR; len: LONGINT );
+			VAR  i: LONGINT; 
 			BEGIN
 			BEGIN
-				input.RawNum( blockSize );
-				IF blockSize = 0 THEN (* end of stream *) RETURN 0  END;
 				huff.Initialize;  
 				huff.Initialize;  
 				i := 0;
 				i := 0;
-				WHILE i < blockSize DO
-					buf[i] := huff.DecodeByte( );  INC( i );
-					IF (i MOD ScaleSize = 0) & ((blockSize-i) >= ScaleSize) THEN  huff.Scale  END
+				WHILE i < len DO
+					buf[i] := huff.ExtractByte( );  INC( i );
+					IF (i MOD ScaleLimit = 0) & ((len - i) >= ScaleLimit) THEN  huff.Scale  END
 				END;
 				END;
-				RETURN blockSize
 			END ExtractBlock;
 			END ExtractBlock;
 
 
 	END Decoder;
 	END Decoder;

+ 17 - 54
source/BorrowsWheeler.Mod

@@ -115,20 +115,14 @@ TYPE
 			VAR i, j, m, n: LONGINT;
 			VAR i, j, m, n: LONGINT;
 			BEGIN
 			BEGIN
 				IF lo < hi THEN
 				IF lo < hi THEN
-					i := lo;  j := hi;  m := (lo + hi) DIV 2;  n := hi - lo + 1;
+					n := hi - lo + 1;
 					IF n = 2 THEN 
 					IF n = 2 THEN 
-					IF Less( hi, lo ) THEN  
-						Swap( lo, hi )  END;
-					ELSIF n = 3 THEN 
-						IF Less( m, lo ) THEN  Swap( lo, m )  END;
-						IF Less( hi, m ) THEN  
-							Swap( m, hi );
-							IF Less( m, lo ) THEN  Swap( lo, m )  END	
-						END
+						IF Less( hi, lo ) THEN  Swap( lo, hi )  END
 					ELSIF n < 16 THEN  
 					ELSIF n < 16 THEN  
-						InsertSort( lo, hi )
+						InsertSort( lo, hi )  (* less expensive string compares! *)
 					ELSE
 					ELSE
 						(* QuickSort *)
 						(* QuickSort *)
+						i := lo;  j := hi;  m := (lo + hi) DIV 2;
 						REPEAT
 						REPEAT
 							WHILE Less( i, m ) DO  INC( i )  END;  
 							WHILE Less( i, m ) DO  INC( i )  END;  
 							WHILE Less( m, j ) DO  DEC( j )  END;
 							WHILE Less( m, j ) DO  DEC( j )  END;
@@ -165,9 +159,9 @@ TYPE
 		TYPE
 		TYPE
 			Index = LONGINT;
 			Index = LONGINT;
 		VAR
 		VAR
-			length, index: LONGINT;
 			mtf: MTF;
 			mtf: MTF;
-			f: ARRAY BlockSize OF CHAR; 
+			f, l: ARRAY BlockSize OF CHAR; 
+			lc, fc: ARRAY BlockSize OF INTEGER;
 			
 			
 			PROCEDURE &New*;
 			PROCEDURE &New*;
 			BEGIN
 			BEGIN
@@ -175,50 +169,22 @@ TYPE
 			END New;
 			END New;
 			
 			
 			
 			
-			PROCEDURE Swap( a, b: Index );
+			PROCEDURE -Swap( a, b: Index );
 			VAR  tmp: CHAR;
 			VAR  tmp: CHAR;
 			BEGIN
 			BEGIN
 				tmp := f[a];  f[a] := f[b];  f[b] := tmp
 				tmp := f[a];  f[a] := f[b];  f[b] := tmp
 			END Swap;
 			END Swap;
 			
 			
-			PROCEDURE InsertSort( lo, hi: Index );
-			VAR x, i, l, h, m, ip: Index;  tmp: CHAR; 
-			BEGIN
-				x := lo + 1; 
-				WHILE x <= hi DO
-					IF f[x] < f[x - 1]  THEN
-						(* find insert position ip *)
-						ip := x - 1;  l := lo;  h := ip - 1;
-						WHILE l <= h DO
-							m := (l + h) DIV 2; 
-							IF f[x] < f[m] THEN  ip := m;  h := m - 1  ELSE  l := m + 1  END
-						END;
-						(* insert f[x] at position ip*)
-						tmp := f[x];  i := x;
-						REPEAT  f[i] := f[i - 1];  DEC( i )  UNTIL i = ip;
-						f[ip] := tmp;
-					END;
-					INC( x )
-				END
-			END InsertSort;
 			
 			
 			PROCEDURE SortF( lo, hi: Index );
 			PROCEDURE SortF( lo, hi: Index );
-			VAR i, j, m: Index;  n: LONGINT;
+			VAR i, j, m: Index;  
 			BEGIN
 			BEGIN
 				IF lo < hi THEN
 				IF lo < hi THEN
-					i := lo;  j := hi;  m := (lo + hi) DIV 2;  n := hi - lo + 1;
-					IF n = 2 THEN 
+					IF (hi - lo) = 1 THEN 
 						IF f[hi] < f[lo] THEN  Swap( lo, hi )  END;
 						IF f[hi] < f[lo] THEN  Swap( lo, hi )  END;
-					ELSIF n = 3 THEN 
-						IF f[m] < f[lo] THEN  Swap( lo, m )  END;
-						IF f[hi] < f[m] THEN  
-							Swap( m, hi );
-							IF f[m] < f[lo] THEN  Swap( lo, m )  END	
-						END
-					ELSIF n < 16 THEN  
-						InsertSort( lo, hi )
 					ELSE
 					ELSE
 						(* QuickSort *)
 						(* QuickSort *)
+						i := lo;  j := hi;  m := (lo + hi) DIV 2; 
 						REPEAT
 						REPEAT
 							WHILE f[i] < f[m] DO  INC( i )  END;  
 							WHILE f[i] < f[m] DO  INC( i )  END;  
 							WHILE f[m] < f[j] DO  DEC( j )  END;
 							WHILE f[m] < f[j] DO  DEC( j )  END;
@@ -233,27 +199,24 @@ TYPE
 			END SortF;
 			END SortF;
 			
 			
 			
 			
-			PROCEDURE DecodeBlock*( VAR buf: ARRAY OF CHAR; len, ind: LONGINT );
+			PROCEDURE DecodeBlock*( VAR buf: ARRAY OF CHAR; len, index: LONGINT );
 			VAR 
 			VAR 
 				i, j, n: LONGINT;  ch: CHAR;
 				i, j, n: LONGINT;  ch: CHAR;
-				l: POINTER TO ARRAY OF CHAR;
-				lc, fc: POINTER TO ARRAY OF LONGINT;
-				xn: ARRAY 256 OF LONGINT;  
+				xn: ARRAY 256 OF INTEGER;  
 			BEGIN
 			BEGIN
-				ASSERT( len <= BlockSize );  length := len;  index := ind;
+				ASSERT( len <= BlockSize );
 				mtf.Decode( buf, len );
 				mtf.Decode( buf, len );
-				NEW( l, length );  NEW( lc, length );  NEW( fc, length );
 				FOR i := 0 TO 255 DO  xn[i] := 0  END;
 				FOR i := 0 TO 255 DO  xn[i] := 0  END;
-				FOR i := 0 TO length - 1 DO 
+				FOR i := 0 TO len - 1 DO 
 					l[i] := buf[i];  f[i] := buf[i];
 					l[i] := buf[i];  f[i] := buf[i];
 					j := ORD( l[i] );  lc[i] := xn[j];  INC( xn[j] )
 					j := ORD( l[i] );  lc[i] := xn[j];  INC( xn[j] )
 				END;
 				END;
-				SortF( 0, length - 1 );
+				SortF( 0, len - 1 );
 				FOR i := 0 TO 255 DO  xn[i] := 0  END;
 				FOR i := 0 TO 255 DO  xn[i] := 0  END;
-				FOR i := 0 TO length - 1 DO 
+				FOR i := 0 TO len - 1 DO 
 					j := ORD( f[i] );  fc[i] := xn[j];  INC( xn[j] )
 					j := ORD( f[i] );  fc[i] := xn[j];  INC( xn[j] )
 				END;
 				END;
-				FOR i := 0 TO length - 1 DO
+				FOR i := 0 TO len - 1 DO
 					ch := f[index];  n := fc[index];  buf[i] := ch;  index := 0;
 					ch := f[index];  n := fc[index];  buf[i] := ch;  index := 0;
 					WHILE (l[index] # ch) OR (lc[index] # n) DO  INC( index )  END
 					WHILE (l[index] # ch) OR (lc[index] # n) DO  INC( index )  END
 				END;
 				END;

+ 154 - 137
source/Huffman.Mod

@@ -1,223 +1,237 @@
 MODULE Huffman;  (** AUTHOR GF;  PURPOSE "Huffman compression";  *)
 MODULE Huffman;  (** AUTHOR GF;  PURPOSE "Huffman compression";  *)
 
 
 IMPORT Streams;
 IMPORT Streams;
-
-CONST
-	BlockSize = 8*1024;
 	
 	
 TYPE 
 TYPE 
+	BitReader = OBJECT
+		VAR
+			in: Streams.Reader;
+			curByte, curBit: LONGINT;
+			
+			PROCEDURE &New( r: Streams.Reader );
+			BEGIN
+				in := r;  curBit := -1;  curByte := 0
+			END New;
+			
+			PROCEDURE Initialize;
+			BEGIN
+				curBit := -1;  curByte := 0
+			END Initialize;
+			
+			PROCEDURE Bit( ): LONGINT;
+			VAR
+				bit: LONGINT;  ch: CHAR;
+			BEGIN
+				IF curBit < 0 THEN
+					in.Char( ch );  curByte := ORD( ch );  curBit := 7
+				END;
+				bit := ASH( curByte, -curBit ) MOD 2;
+				DEC( curBit );
+				RETURN bit
+			END Bit;
+	
+	END BitReader;
 	
 	
-	Pattern = RECORD  pattern, freq: LONGINT  END;
 	
 	
-	PatternCounts = ARRAY 256 OF Pattern;
-	PatternFrequencies = POINTER TO ARRAY OF Pattern;		(* ordered by frequency *)
+TYPE
+	BitWriter = OBJECT
+		VAR
+			out: Streams.Writer;
+			curByte, curBit: LONGINT;
+			
+			PROCEDURE &New( w: Streams.Writer );
+			BEGIN
+				out := w;  curBit := 0;  curByte := 0
+			END New;
+			
+			PROCEDURE Bit( bit: LONGINT );
+			BEGIN
+				curByte := 2*curByte + bit;
+				INC( curBit );
+				IF curBit > 7 THEN
+					out.Char( CHR( curByte ) );  curByte := 0;  curBit := 0
+				END
+			END Bit;
+		
+			PROCEDURE Finish;	(* flush last few bits *)
+			BEGIN
+				WHILE curBit # 0 DO  Bit( 0 )  END;
+				out.Update
+			END Finish;
+				
+	END BitWriter;
+	
+	
+TYPE	
+	PatternCounts = ARRAY 256 OF LONGINT;		(* scaled to 0 .. 101H *)
 	
 	
+	Pattern = RECORD  patt, weight: LONGINT  END;
+	PatternWeights = POINTER TO ARRAY OF Pattern;		(* weights of patterns contained in block *)
 	
 	
+	HuffCode = RECORD  bits, val: LONGINT  END;
 	
 	
+TYPE	
 	Node = OBJECT 
 	Node = OBJECT 
 		VAR 
 		VAR 
-			freq: LONGINT;
+			weight: LONGINT;
+			pattern: LONGINT;		(* node pattern if node is leaf *)
 			left, right: Node;		(* both NIL in case of leaf *)
 			left, right: Node;		(* both NIL in case of leaf *)
-			pattern: LONGINT;						
-		
-		PROCEDURE & Init( patt, f: LONGINT );
-		BEGIN
-			pattern := patt;  freq := f;  left := NIL;  right := NIL
-		END Init;
-		
-		PROCEDURE AddChildren( l, r: Node );
-		BEGIN
-			left := l;  right := r;  freq := l.freq + r.freq
-		END AddChildren;
+									
+			PROCEDURE &Init( patt, w: LONGINT );
+			BEGIN
+				pattern := patt;  weight := w;  left := NIL;  right := NIL
+			END Init;
+			
+			PROCEDURE AddChildren( l, r: Node );
+			BEGIN
+				left := l;  right := r;  weight := l.weight + r.weight
+			END AddChildren;
 			
 			
 	END Node;
 	END Node;
 	
 	
 	
 	
-	
-	Encoder* = OBJECT
-		TYPE
-			HCode = RECORD  len, val: LONGINT  END;
+TYPE
+	Encoder* = OBJECT			
 		VAR
 		VAR
 			w: Streams.Writer;
 			w: Streams.Writer;
-			codeTable: ARRAY 256 OF HCode;
-			buffer: ARRAY 2*BlockSize OF CHAR;
-			byte, curBit, bpos: LONGINT;
+			out: BitWriter;
+			codeTable: ARRAY 256 OF HuffCode;
+		
 			
 			
 			
 			
 			PROCEDURE &New*( output: Streams.Writer );
 			PROCEDURE &New*( output: Streams.Writer );
 			BEGIN
 			BEGIN
 				w := output;
 				w := output;
+				NEW( out, w )
 			END New;
 			END New;
 			
 			
-			PROCEDURE Initialize( CONST source: ARRAY OF CHAR; len: LONGINT );
-			VAR pf: PatternFrequencies;
-			BEGIN
-				w.RawNum( len );
-				pf := CountPatterns( source, len );
-				WriteFrequencies( pf );
-				BuildCodeTable( BuildTree( pf ) );				
-				byte := 0;  bpos := 0;  curBit := 7;
-			END Initialize;
-			
 			
 			
-			PROCEDURE WriteFrequencies( pf: PatternFrequencies );
+			PROCEDURE WriteFrequencies( pw: PatternWeights );
 			VAR i, n: LONGINT;
 			VAR i, n: LONGINT;
-				a: ARRAY 256 OF LONGINT;
+				pc: PatternCounts;
 			BEGIN
 			BEGIN
-				n := LEN( pf^ );
+				n := LEN( pw^ );
 				IF n < 128 THEN
 				IF n < 128 THEN
 					w.Char( CHR( n ) );
 					w.Char( CHR( n ) );
-					FOR i := 0 TO n - 1 DO  w.RawNum( pf[i].freq );  w.Char( CHR( pf[i].pattern ) )  END
+					FOR i := 0 TO n - 1 DO  w.RawNum( pw[i].weight );  w.Char( CHR( pw[i].patt ) )  END
 				ELSE
 				ELSE
 					w.Char( 0X );
 					w.Char( 0X );
-					FOR i := 0 TO 255 DO  a[i] := 0  END;
-					FOR i := 0 TO n -1 DO  a[pf[i].pattern] := pf[i].freq  END;
-					FOR i := 0 TO 255 DO  w.RawNum( a[i] )  END
-				END
+					FOR i := 0 TO 255 DO  pc[i] := 0  END;
+					FOR i := 0 TO n -1 DO  pc[pw[i].patt] := pw[i].weight  END;
+					FOR i := 0 TO 255 DO  w.RawNum( pc[i] )  END
+				END;
 			END WriteFrequencies;
 			END WriteFrequencies;
 			
 			
 			
 			
-			PROCEDURE CountPatterns( CONST source: ARRAY OF CHAR; len: LONGINT ): PatternFrequencies;
+			PROCEDURE CountPatterns( CONST source: ARRAY OF CHAR; len: LONGINT ): PatternWeights;
 			VAR 
 			VAR 
-				i: LONGINT;  a: PatternCounts;
+				i: LONGINT;  pc: PatternCounts;
 			BEGIN
 			BEGIN
-				FOR i := 0 TO 255 DO  a[i].pattern := i;  a[i].freq := 0  END;
-				FOR i := 0 TO len - 1 DO  INC( a[ORD( source[i] )].freq )  END;
+				FOR i := 0 TO 255 DO  pc[i] := 0  END;
+				FOR i := 0 TO len - 1 DO  INC( pc[ORD( source[i] )] )  END;
 				FOR i := 0 TO 255 DO  
 				FOR i := 0 TO 255 DO  
-					IF a[i].freq > 0 THEN (* scale => [1..101H] *)
-						a[i].freq := 100H * a[i].freq DIV len + 1;
+					IF pc[i] > 0 THEN (* scale => [1..101H] *)
+						pc[i] := 100H * pc[i] DIV len + 1;
 					END;
 					END;
 				END;
 				END;
-				RETURN SortPatterns( a )
+				RETURN ContainedPatterns( pc )
 			END CountPatterns;
 			END CountPatterns;
 			
 			
 			
 			
-			PROCEDURE BuildCodeTable( tree: Node );
+			PROCEDURE BuildCodeTable( pw: PatternWeights );
 			VAR 
 			VAR 
-				initval: HCode; i: LONGINT;
+				initval: HuffCode;
+				tree: Node;
 			
 			
-				PROCEDURE Traverse( node: Node;  code: HCode );
+				PROCEDURE Traverse( node: Node;  code: HuffCode );
 				BEGIN
 				BEGIN
 					IF node.left = NIL THEN  (* leaf *)
 					IF node.left = NIL THEN  (* leaf *)
 						codeTable[node.pattern] := code;
 						codeTable[node.pattern] := code;
 					ELSE
 					ELSE
-						INC( code.len );  
+						INC( code.bits );  
 						code.val := 2*code.val;  Traverse( node.right, code );	(* ..xx0 *)
 						code.val := 2*code.val;  Traverse( node.right, code );	(* ..xx0 *)
 						code.val := code.val + 1;  Traverse( node.left, code );	(* ..xx1 *)
 						code.val := code.val + 1;  Traverse( node.left, code );	(* ..xx1 *)
 					END;
 					END;
 				END Traverse;
 				END Traverse;
 			
 			
 			BEGIN
 			BEGIN
-				FOR i := 0 TO 255 DO  codeTable[i].len := 0;  codeTable[i].val := 0  END;
-				initval.len := 0;  initval.val := 0;
+				tree := BuildTree( pw );
+				initval.bits := 0;  initval.val := 0;
 				Traverse( tree, initval );
 				Traverse( tree, initval );
 			END BuildCodeTable;	
 			END BuildCodeTable;	
 			
 			
 			
 			
-			PROCEDURE AppendBit( bit: LONGINT );
-			BEGIN
-				IF bit # 0 THEN  byte := byte + ASH( 1, curBit )  END;
-				DEC( curBit );
-				IF curBit < 0 THEN
-					buffer[bpos] := CHR( byte );  INC( bpos );
-					byte := 0; curBit := 7
-				END
-			END AppendBit;
-			
-			
-			PROCEDURE Append( code: HCode );
+			PROCEDURE PutCode( code: HuffCode );
 			VAR len, val: LONGINT;
 			VAR len, val: LONGINT;
 			BEGIN
 			BEGIN
-				len := code.len;  val := code.val;
+				len := code.bits;  val := code.val;
 				WHILE len > 0 DO
 				WHILE len > 0 DO
-					DEC( len );  AppendBit( ASH( val, -len ) MOD 2 )
+					DEC( len );  out.Bit( ASH( val, -len ) MOD 2 )
 				END
 				END
-			END Append;
+			END PutCode;
 			
 			
 			
 			
 			PROCEDURE CompressBlock*( CONST source: ARRAY OF CHAR; len: LONGINT );
 			PROCEDURE CompressBlock*( CONST source: ARRAY OF CHAR; len: LONGINT );
-			VAR i: LONGINT;
+			VAR 
+				i: LONGINT;
+				pw: PatternWeights;
 			BEGIN
 			BEGIN
-				Initialize( source, len );
-				FOR i := 0 TO len - 1 DO  Append( codeTable[ORD( source[i] )] )  END;
-				
-				IF curBit # 7 THEN  
-					buffer[bpos] := CHR( byte );  INC( bpos ); 
-				END;
-				
-				FOR i := 0 TO bpos - 1 DO  w.Char( buffer[i] )  END;
-				w.Update
+				pw := CountPatterns( source, len );
+				WriteFrequencies( pw );
+				BuildCodeTable( pw );
+				FOR i := 0 TO len - 1 DO  PutCode( codeTable[ORD( source[i] )] )  END;
+				out.Finish;
 			END CompressBlock;
 			END CompressBlock;
 				
 				
 	END Encoder;
 	END Encoder;
 	
 	
 	
 	
-	
+TYPE
 	Decoder* = OBJECT
 	Decoder* = OBJECT
 		VAR
 		VAR
-			blockSize: LONGINT;
 			r: Streams.Reader;
 			r: Streams.Reader;
+			in: BitReader;
 			tree: Node;
 			tree: Node;
-			byte, curBit: LONGINT;
 		
 		
 			PROCEDURE &New*( input: Streams.Reader );
 			PROCEDURE &New*( input: Streams.Reader );
 			BEGIN
 			BEGIN
 				r := input;  
 				r := input;  
+				NEW( in, r )
 			END New;
 			END New;
 			
 			
-			PROCEDURE Initialize;
-			VAR  pf: PatternFrequencies;
-			BEGIN
-				pf := ReadFrequencies( r );
-				tree := BuildTree( pf );
-				curBit := -1
-			END Initialize;
-			
 			
 			
-			PROCEDURE ReadFrequencies( r: Streams.Reader ): PatternFrequencies;
-			VAR i, n: LONGINT;  c: CHAR;
-				pf: PatternFrequencies;
-				a: PatternCounts;
+			PROCEDURE ReadFrequencies( r: Streams.Reader ): PatternWeights;
+			VAR 
+				i, n: LONGINT;  c: CHAR;
+				pw: PatternWeights;
+				pc: PatternCounts;
 			BEGIN
 			BEGIN
 				r.Char( c );  n := ORD( c );
 				r.Char( c );  n := ORD( c );
 				IF n > 0 THEN
 				IF n > 0 THEN
-					NEW( pf, n );
-					FOR i := 0 TO n - 1 DO  r.RawNum( pf[i].freq );  r.Char( c );  pf[i].pattern := ORD( c )  END
+					NEW( pw, n );
+					FOR i := 0 TO n - 1 DO  r.RawNum( pw[i].weight );  r.Char( c );  pw[i].patt := ORD( c )  END
 				ELSE
 				ELSE
-					FOR i := 0 TO 255 DO  a[i].pattern := i;  r.RawNum( a[i].freq )  END;
-					pf := SortPatterns( a )
+					FOR i := 0 TO 255 DO  r.RawNum( pc[i] )  END;
+					pw := ContainedPatterns( pc )
 				END;
 				END;
-				RETURN pf
+				RETURN pw
 			END ReadFrequencies;
 			END ReadFrequencies;
 		
 		
-		
-			PROCEDURE GetBit( ): LONGINT;
-			VAR bit: LONGINT;  c: CHAR;
-			BEGIN
-				IF curBit < 0 THEN
-					r.Char( c );  byte := ORD( c );  curBit := 7
-				END;
-				bit := ASH( byte, -curBit ) MOD 2;  DEC( curBit );
-				RETURN bit
-			END GetBit;	
-			
 			
 			
-			PROCEDURE ExtractBlock*( VAR buf: ARRAY OF CHAR ): LONGINT;
+			PROCEDURE ExtractBlock*( VAR buf: ARRAY OF CHAR; len: LONGINT );
 			VAR 
 			VAR 
-				i, len: LONGINT;  n: Node;
+				i: LONGINT;  n: Node;
 			BEGIN
 			BEGIN
-				r.RawNum( blockSize ); 
-				IF blockSize = 0 THEN  (* end of stream *) RETURN 0  END;
-				Initialize;  i := 0;  len := 0;
+				tree := BuildTree( ReadFrequencies( r ) );
+				in.Initialize;
+				i := 0; 
 				REPEAT
 				REPEAT
 					n := tree; 
 					n := tree; 
 					REPEAT
 					REPEAT
-						IF GetBit() # 0 THEN  n := n.left  ELSE  n := n.right  END;
-						INC( i )
+						IF in.Bit() = 1 THEN  n := n.left  ELSE  n := n.right  END;
 					UNTIL n.left = NIL;	(* leaf *)
 					UNTIL n.left = NIL;	(* leaf *)
-					buf[len] := CHR( n.pattern );  INC( len )
-				UNTIL len >= blockSize;
-				RETURN blockSize
+					buf[i] := CHR( n.pattern );  INC( i )
+				UNTIL i >= len;
 			END ExtractBlock;
 			END ExtractBlock;
 				
 				
 	END Decoder;
 	END Decoder;
@@ -226,10 +240,12 @@ TYPE
 	
 	
 	
 	
 	
 	
-	(* sort patterns by frequency, omit unused patterns *)
-	PROCEDURE SortPatterns( VAR a: PatternCounts ): PatternFrequencies;
+	(* sort patterns by weight, omit unused patterns *)
+	PROCEDURE ContainedPatterns( VAR pc: PatternCounts ): PatternWeights;
 	VAR 
 	VAR 
-		i, n, start: LONGINT;  pf: PatternFrequencies;
+		i, n, start: LONGINT;  
+		pw: PatternWeights;
+		a: ARRAY 256 OF Pattern;
 		
 		
 		PROCEDURE Sort( low, high: LONGINT );  
 		PROCEDURE Sort( low, high: LONGINT );  
 		VAR 
 		VAR 
@@ -238,8 +254,8 @@ TYPE
 			IF low < high THEN
 			IF low < high THEN
 				i := low;  j := high;  m := (i + j) DIV 2;
 				i := low;  j := high;  m := (i + j) DIV 2;
 				REPEAT
 				REPEAT
-					WHILE a[i].freq < a[m].freq DO  INC( i )  END;
-					WHILE a[j].freq > a[m].freq DO  DEC( j )  END;
+					WHILE a[i].weight < a[m].weight DO  INC( i )  END;
+					WHILE a[j].weight > a[m].weight DO  DEC( j )  END;
 					IF i <= j THEN
 					IF i <= j THEN
 						IF i = m THEN  m := j  ELSIF j = m THEN  m := i  END;
 						IF i = m THEN  m := j  ELSIF j = m THEN  m := i  END;
 						tmp := a[i];  a[i] := a[j];  a[j] := tmp;
 						tmp := a[i];  a[i] := a[j];  a[j] := tmp;
@@ -251,24 +267,25 @@ TYPE
 		END Sort;
 		END Sort;
 		
 		
 	BEGIN
 	BEGIN
-		Sort( 0, 255 );	(* sort patterns by frequency *)
+		FOR i := 0 TO 255 DO  a[i].patt := i;  a[i].weight := pc[i]  END;
+		Sort( 0, 255 );	(* sort patterns by weight *)
 		i := 0;
 		i := 0;
-		WHILE a[i].freq = 0 DO  INC( i )  END; 	(* skip unused patterns *)
+		WHILE a[i].weight = 0 DO  INC( i )  END; 	(* skip unused patterns *)
 		n := 256 - i;  start := i;
 		n := 256 - i;  start := i;
-		NEW( pf, n );
-		FOR i := 0 TO n - 1 DO  pf[i] := a[start + i]  END;
-		RETURN pf
-	END SortPatterns;
+		NEW( pw, n );
+		FOR i := 0 TO n - 1 DO  pw[i] := a[start + i]  END;
+		RETURN pw
+	END ContainedPatterns;
 
 
 	
 	
-	PROCEDURE BuildTree( pf: PatternFrequencies ): Node;
+	PROCEDURE BuildTree( pw: PatternWeights ): Node;
 	VAR 
 	VAR 
 		i, start, top: LONGINT;  node, n2: Node;
 		i, start, top: LONGINT;  node, n2: Node;
 		a: POINTER TO ARRAY OF Node;
 		a: POINTER TO ARRAY OF Node;
 		patt: LONGINT;
 		patt: LONGINT;
 	BEGIN
 	BEGIN
-		NEW( a, LEN( pf^ ) );  top := LEN( pf^ ) - 1;
-		FOR i := 0 TO top DO  NEW( a[i], pf[i].pattern, pf[i].freq )  END;
+		NEW( a, LEN( pw^ ) );  top := LEN( pw^ ) - 1;
+		FOR i := 0 TO top DO  NEW( a[i], pw[i].patt, pw[i].weight )  END;
 		IF top = 0 THEN  
 		IF top = 0 THEN  
 			(* the whole, probably last small block contains only a single pattern *)
 			(* the whole, probably last small block contains only a single pattern *)
 			patt := (a[0].pattern + 1) MOD 256;	(* some different pattern *)
 			patt := (a[0].pattern + 1) MOD 256;	(* some different pattern *)
@@ -278,7 +295,7 @@ TYPE
 			WHILE start < top DO  
 			WHILE start < top DO  
 				NEW( node, 0, 0 );  node.AddChildren( a[start], a[start+1] ); 
 				NEW( node, 0, 0 );  node.AddChildren( a[start], a[start+1] ); 
 				i := start + 1;  
 				i := start + 1;  
-				WHILE (i < top) & (a[i+1].freq < node.freq) DO  a[i] := a[i+1];  INC( i )  END;
+				WHILE (i < top) & (a[i+1].weight < node.weight) DO  a[i] := a[i+1];  INC( i )  END;
 				a[i] := node;  
 				a[i] := node;  
 				INC( start );
 				INC( start );
 			END
 			END

+ 21 - 22
source/OZip.Mod

@@ -1,10 +1,10 @@
 MODULE OZip; (** AUTHOR GF; PURPOSE "files and streams compression tool"; *)
 MODULE OZip; (** AUTHOR GF; PURPOSE "files and streams compression tool"; *)
 
 
-IMPORT Streams, Commands, Files, Strings, BWT :=  BorrowsWheeler, Huffman;
+IMPORT Streams, Commands, Files, Strings, BW := BorrowsWheeler, Huffman;
 
 
 (*
 (*
 	Format	=	ComprTag { Block }
 	Format	=	ComprTag { Block }
-	Block		=	bwindex size Freqs  { CHAR }
+	Block		=	size  bwindex Freqs  { CHAR }
 	Freqs		= 	num { freq } 			(num = 0,  (256 * freq))
 	Freqs		= 	num { freq } 			(num = 0,  (256 * freq))
 				|	num { freq patt }		(num = 1..127)
 				|	num { freq patt }		(num = 1..127)
 	num		=	CHAR			
 	num		=	CHAR			
@@ -15,7 +15,7 @@ IMPORT Streams, Commands, Files, Strings, BWT :=  BorrowsWheeler, Huffman;
 *)
 *)
 
 
 CONST
 CONST
-	BlockSize = 8*1024;
+	BlockSize* = 8*1024;
 	ComprTag = LONGINT(0FEFD1F2FH);
 	ComprTag = LONGINT(0FEFD1F2FH);
 	Suffix = ".oz";
 	Suffix = ".oz";
 		
 		
@@ -23,20 +23,19 @@ CONST
 	PROCEDURE Compress*( r: Streams.Reader;  w: Streams.Writer );
 	PROCEDURE Compress*( r: Streams.Reader;  w: Streams.Writer );
 	VAR 
 	VAR 
 		huff: Huffman.Encoder; 
 		huff: Huffman.Encoder; 
-		bw: BWT.Encoder;  bwindex: LONGINT; 
+		bw: BW.Encoder;  
 		buffer: ARRAY BlockSize OF CHAR;  len: LONGINT;
 		buffer: ARRAY BlockSize OF CHAR;  len: LONGINT;
 	BEGIN 
 	BEGIN 
 		NEW( huff, w );  NEW( bw );
 		NEW( huff, w );  NEW( bw );
 		w.RawLInt( ComprTag );
 		w.RawLInt( ComprTag );
 		LOOP
 		LOOP
 			r.Bytes( buffer, 0, BlockSize, len );
 			r.Bytes( buffer, 0, BlockSize, len );
-			IF len < 1 THEN  EXIT  END;
-			bwindex := bw.EncodeBlock( buffer, len );
-			w.RawNum( bwindex );
+			IF len = 0 THEN  EXIT  END;
+			w.RawNum( len );
+			w.RawNum( bw.EncodeBlock( buffer, len ) );
 			huff.CompressBlock( buffer, len );
 			huff.CompressBlock( buffer, len );
 		END;
 		END;
-		w.RawNum( 0 );	(* bw index *)
-		w.RawNum( 0 );	(* size of block, (end of stream) *)
+		w.RawNum( 0 );	(* mark end of stream *)
 		w.Update
 		w.Update
 	END Compress;
 	END Compress;
 	
 	
@@ -45,22 +44,20 @@ CONST
 	PROCEDURE Uncompress*( r: Streams.Reader;  w: Streams.Writer ): BOOLEAN;
 	PROCEDURE Uncompress*( r: Streams.Reader;  w: Streams.Writer ): BOOLEAN;
 	VAR 
 	VAR 
 		huff: Huffman.Decoder;
 		huff: Huffman.Decoder;
-		tag, len, i, bwIndex: LONGINT; 
-		bw: BWT.Decoder;
+		tag, len, bwIndex: LONGINT; 
+		bw: BW.Decoder;
 		buffer: ARRAY BlockSize OF CHAR;
 		buffer: ARRAY BlockSize OF CHAR;
 	BEGIN 
 	BEGIN 
 		r.RawLInt( tag );
 		r.RawLInt( tag );
 		IF tag = ComprTag  THEN
 		IF tag = ComprTag  THEN
 			NEW( huff, r );  NEW( bw );
 			NEW( huff, r );  NEW( bw );
 			LOOP
 			LOOP
+				r.RawNum( len );
+				IF len = 0 THEN  EXIT  END;
 				r.RawNum( bwIndex );
 				r.RawNum( bwIndex );
-				len := huff.ExtractBlock( buffer );	
-				IF len > 0 THEN
-					bw.DecodeBlock( buffer, len, bwIndex );
-					FOR i := 0 TO len - 1 DO  w.Char( buffer[i] )  END;
-				ELSE
-					EXIT
-				END
+				huff.ExtractBlock( buffer, len );
+				bw.DecodeBlock( buffer, len, bwIndex );
+				w.Bytes( buffer, 0, len )
 			END;
 			END;
 			w.Update;
 			w.Update;
 			RETURN TRUE
 			RETURN TRUE
@@ -74,9 +71,11 @@ CONST
 	
 	
 	PROCEDURE NewFile( CONST name: ARRAY OF CHAR ): Files.File;
 	PROCEDURE NewFile( CONST name: ARRAY OF CHAR ): Files.File;
 	VAR
 	VAR
-		name2: ARRAY 128 OF CHAR;  res: LONGINT;
+		tname, name2: ARRAY 128 OF CHAR;  res: LONGINT;
 	BEGIN
 	BEGIN
-		IF Files.Old( name ) # NIL THEN
+		tname := "./";
+		Strings.Append( tname, name );
+		IF Files.Old( tname ) # NIL THEN
 			COPY( name, name2);  Strings.Append( name2, ".Bak" );
 			COPY( name, name2);  Strings.Append( name2, ".Bak" );
 			Files.Rename( name, name2, res );
 			Files.Rename( name, name2, res );
 		END;
 		END;
@@ -152,11 +151,11 @@ END OZip.
 
 
 
 
 	OZip.CompressFile   TLS.Mod ~
 	OZip.CompressFile   TLS.Mod ~
-	OZip.CompressFile   OZip.GofU ~
+	OZip.CompressFile   OZip.GofUu ~
 	OZip.CompressFile   guide.pdf ~
 	OZip.CompressFile   guide.pdf ~
 		
 		
 	OZip.UncompressFile   TLS.Mod.oz TTT.Mod ~
 	OZip.UncompressFile   TLS.Mod.oz TTT.Mod ~
-	OZip.UncompressFile   OZip.GofU.oz  TTT.GofU ~
+	OZip.UncompressFile   OZip.GofUu.oz  TTT.GofUu ~
 	OZip.UncompressFile   guide.pdf.oz  TTT.pdf ~
 	OZip.UncompressFile   guide.pdf.oz  TTT.pdf ~
 	
 	
 	System.Free  OZip Huffman  BorrowsWheeler ~
 	System.Free  OZip Huffman  BorrowsWheeler ~

+ 23 - 24
source/OZip2.Mod

@@ -1,17 +1,17 @@
 MODULE OZip2; (** AUTHOR GF; PURPOSE "files and streams compression tool"; *)
 MODULE OZip2; (** AUTHOR GF; PURPOSE "files and streams compression tool"; *)
 
 
 IMPORT Streams, Commands, Files, Strings, 
 IMPORT Streams, Commands, Files, Strings, 
-	BWT :=  BorrowsWheeler, AH := AdaptiveHuffman;
+	BW :=  BorrowsWheeler, AH := AdaptiveHuffman;
 
 
 (*
 (*
 	Format	=	ComprTag { Block }
 	Format	=	ComprTag { Block }
-	Block		=	bwindex  size { CHAR }
+	Block		=	size  bwindex  { CHAR }
 	bwindex	=	RawNum
 	bwindex	=	RawNum
 	size		=	RawNum
 	size		=	RawNum
 *)
 *)
 
 
 CONST
 CONST
-	BlockSize = 8*1024;
+	BlockSize* = 8*1024;
 	ComprTag = LONGINT(0FEFD1F18H);
 	ComprTag = LONGINT(0FEFD1F18H);
 	Suffix = ".oz2";
 	Suffix = ".oz2";
 		
 		
@@ -19,44 +19,41 @@ CONST
 	PROCEDURE Compress*( r: Streams.Reader;  w: Streams.Writer );
 	PROCEDURE Compress*( r: Streams.Reader;  w: Streams.Writer );
 	VAR 
 	VAR 
 		huff: AH.Encoder; 
 		huff: AH.Encoder; 
-		bw: BWT.Encoder;  index: LONGINT; 
+		bw: BW.Encoder; 
 		buffer: ARRAY BlockSize OF CHAR;  len: LONGINT;
 		buffer: ARRAY BlockSize OF CHAR;  len: LONGINT;
 	BEGIN 
 	BEGIN 
 		NEW( huff, w );  NEW( bw );
 		NEW( huff, w );  NEW( bw );
 		w.RawLInt( ComprTag );
 		w.RawLInt( ComprTag );
 		LOOP
 		LOOP
 			r.Bytes( buffer, 0, BlockSize, len );
 			r.Bytes( buffer, 0, BlockSize, len );
-			IF len < 1 THEN  EXIT  END;
-			index := bw.EncodeBlock( buffer, len );
-			w.RawNum( index );
+			IF len = 0 THEN  EXIT  END;
+			w.RawNum( len );
+			w.RawNum( bw.EncodeBlock( buffer, len ) );
 			huff.CompressBlock( buffer, len );
 			huff.CompressBlock( buffer, len );
 		END;
 		END;
-		w.RawNum( 0 );	(* bw index*)
-		w.RawNum( 0 );	(* lenght of block, (end of stream) *)
+		w.RawNum( 0 );	(* mark end of stream *)
 		w.Update
 		w.Update
 	END Compress;
 	END Compress;
 	
 	
 	
 	
-	(* returns false if input is not an OZip compressed stream *)	
+	(* returns false if input is not an OZip2 compressed stream *)	
 	PROCEDURE Uncompress*( r: Streams.Reader;  w: Streams.Writer ): BOOLEAN;
 	PROCEDURE Uncompress*( r: Streams.Reader;  w: Streams.Writer ): BOOLEAN;
 	VAR 
 	VAR 
 		huff: AH.Decoder;
 		huff: AH.Decoder;
-		bw: BWT.Decoder;
-		tag, len, i, bwIndex: LONGINT;
+		bw: BW.Decoder;
+		tag, len, bwIndex: LONGINT;
 		buffer: ARRAY BlockSize OF CHAR;
 		buffer: ARRAY BlockSize OF CHAR;
 	BEGIN 
 	BEGIN 
 		r.RawLInt( tag );
 		r.RawLInt( tag );
 		IF tag = ComprTag  THEN
 		IF tag = ComprTag  THEN
 			NEW( huff, r );  NEW( bw );
 			NEW( huff, r );  NEW( bw );
 			LOOP
 			LOOP
+				r.RawNum( len );
+				IF len = 0 THEN  EXIT  END;
 				r.RawNum( bwIndex );
 				r.RawNum( bwIndex );
-				len := huff.ExtractBlock( buffer );	
-				IF len > 0 THEN  
-					bw.DecodeBlock( buffer, len, bwIndex );
-					FOR i := 0 TO len - 1 DO  w.Char( buffer[i] )  END;
-				ELSE
-					EXIT
-				END
+				huff.ExtractBlock( buffer, len );	
+				bw.DecodeBlock( buffer, len, bwIndex );
+				w.Bytes( buffer, 0, len )
 			END;
 			END;
 			w.Update;
 			w.Update;
 			RETURN TRUE
 			RETURN TRUE
@@ -70,9 +67,11 @@ CONST
 	
 	
 	PROCEDURE NewFile( CONST name: ARRAY OF CHAR ): Files.File;
 	PROCEDURE NewFile( CONST name: ARRAY OF CHAR ): Files.File;
 	VAR
 	VAR
-		name2: ARRAY 128 OF CHAR;  res: LONGINT;
+		tname, name2: ARRAY 128 OF CHAR;  res: LONGINT;
 	BEGIN
 	BEGIN
-		IF Files.Old( name ) # NIL THEN
+		tname := "./";
+		Strings.Append( tname, name );
+		IF Files.Old( tname ) # NIL THEN
 			COPY( name, name2);  Strings.Append( name2, ".Bak" );
 			COPY( name, name2);  Strings.Append( name2, ".Bak" );
 			Files.Rename( name, name2, res );
 			Files.Rename( name, name2, res );
 		END;
 		END;
@@ -147,11 +146,11 @@ END OZip2.
 
 
 
 
 	OZip2.CompressFile   TLS.Mod ~
 	OZip2.CompressFile   TLS.Mod ~
-	OZip2.CompressFile   OZip2.GofU ~
+	OZip2.CompressFile   OZip2.GofUu ~
 	OZip2.CompressFile   guide.pdf ~
 	OZip2.CompressFile   guide.pdf ~
 		
 		
-	OZip2.UncompressFile   TLS.Modc.oz2  TTT.Mod ~
-	OZip2.UncompressFile   OZip2.GofU.oz2  TTT.GofU ~
+	OZip2.UncompressFile   TLS.Mod.oz2  TTT.Mod ~
+	OZip2.UncompressFile   OZip2.GofUu.oz2  TTT.GofUu ~
 	OZip2.UncompressFile   guide.pdf.oz2  TTT.pdf ~
 	OZip2.UncompressFile   guide.pdf.oz2  TTT.pdf ~
 	
 	
 	System.Free  OZip2 AdaptiveHuffman  BorrowsWheeler ~
 	System.Free  OZip2 AdaptiveHuffman  BorrowsWheeler ~