2
0
Эх сурвалжийг харах

added adaptive Huffman coding

git-svn-id: https://svn.inf.ethz.ch/svn/lecturers/a2/trunk@8678 8c9fc860-2736-0410-a75d-ab315db34111
eth.guenter 6 жил өмнө
parent
commit
9f83a885e1

+ 379 - 0
source/AdaptiveHuffman.Mod

@@ -0,0 +1,379 @@
+MODULE AdaptiveHuffman;	(** AUTHOR GF; PUROSE "adaptive Huffman coding" *)
+
+(* adaptive Huffman compression, Vitter's algorithm *)
+
+IMPORT Streams;
+
+CONST
+	AlphabetSize* = 256;	(* byte *)
+	BlockSize* = 8*1024;
+	ScaleSize = 2*1024;
+	
+	Encode = 0;  Decode = 1;
+
+TYPE
+	Index = INTEGER;		(* 16-bit integer to keep the table small *)
+	Symbol = LONGINT;
+	
+	Node = RECORD
+		weight: LONGINT;		(* node weight	*)
+		up: Index;				(* next node up the tree	*)
+		down: Index;			(* pair of down nodes	*)
+		pattern: Symbol		(* node pattern	*)
+	END;
+
+
+	HuffmanCoder = OBJECT		
+	VAR
+		mode: SHORTINT;		(* encode, decode *)
+		in: Streams.Reader;	(* input from archive *)
+		out: Streams.Writer;	(* output to archive *)
+		curByte, curBit: LONGINT;
+		
+		esc: Index;		(* the current tree height	*)
+		root: Index;         (* the root of the tree	*)
+		map: ARRAY AlphabetSize OF Index;			(* mapping for symbols to nodes	*)
+		table: ARRAY 2*AlphabetSize + 4 OF Node;	(* the coding table	*)
+		
+		
+		
+		PROCEDURE &New( m: SHORTINT; input: Streams.Reader;  output: Streams.Writer );
+		BEGIN
+			ASSERT( m IN {Encode, Decode} );
+			mode := m;
+			IF mode = Encode THEN  out := output  ELSE  in := input  END;
+		END New;
+		
+		PROCEDURE Initialize;
+		VAR  i: Index;
+		BEGIN
+			root := 2*AlphabetSize + 4 - 1;  curByte := 0;
+			IF mode = Encode THEN  curByte := 0;  curBit := 0  ELSE  curBit := -1  END;
+			FOR i := 0 TO root DO
+				table[i].up := 0;  table[i].down := 0;  table[i].pattern := 0;  table[i].weight := 0
+			END;
+			FOR i := 0 TO AlphabetSize - 1 DO  map[i]  := 0  END;
+			esc := root;
+		END Initialize;		
+		
+		
+		PROCEDURE Finish;	(* flush last few bits *)
+		BEGIN
+			ASSERT( mode = Encode );
+			WHILE curBit # 0 DO  PutBit( 0 )  END;
+			out.Update
+		END Finish;
+		
+		
+		
+		PROCEDURE GetBit( ): LONGINT;
+		VAR
+			bit: LONGINT;  ch: CHAR;
+		BEGIN
+			IF curBit < 0 THEN
+				in.Char( ch );  curByte := ORD( ch );  curBit := 7
+			END;
+			bit := ASH( curByte, -curBit ) MOD 2;
+			DEC( curBit );
+			RETURN bit
+		END GetBit;
+		
+		PROCEDURE PutBit( bit: LONGINT );
+		BEGIN
+			curByte := 2*curByte + bit;
+			INC( curBit );
+			IF curBit > 7 THEN
+				out.Char( CHR( curByte ) );  curByte := 0;  curBit := 0
+			END
+		END PutBit;
+		
+		
+		PROCEDURE GetPattern(  ): Symbol;
+		VAR  sym, i, bit: LONGINT;
+		BEGIN
+			sym := 0;  bit := 1;
+			FOR i := 1 TO 8 DO
+				IF GetBit() = 1 THEN  sym := sym + bit  END;
+				bit := bit * 2;
+			END;
+			RETURN sym
+		END GetPattern;
+		
+		
+		(*  send the bits for an escaped symbol	*)
+		PROCEDURE PutPattern( sym: Symbol );
+		VAR i: LONGINT;
+		BEGIN
+			FOR i := 1 TO 8 DO
+				PutBit( sym MOD 2 );  sym := sym DIV 2;
+			END
+		END PutPattern;
+
+		
+		(*	split escape node to incorporate new symbol	*)
+		PROCEDURE AddSymbol( sym: Symbol ): Index;
+		VAR  pair, node: Index;
+		BEGIN
+			ASSERT( esc > 1 );
+			pair := esc;  node := esc - 1;  esc := esc - 2 ;
+			
+			table[pair].down := node;  table[pair].weight := 1;  
+			
+			table[node].up := pair;
+			table[node].down := 0;  table[node].weight := 0;  table[node].pattern := sym; 
+			
+			table[esc].up := pair;
+			table[esc].down := 0;  table[esc].weight := 0;  
+			
+			map[sym] := node;
+			
+			RETURN node;
+		END AddSymbol;		
+		
+
+		(*  swap leaf to group leader position return symbol's new node	*)
+		PROCEDURE GroupLeader( node: Index ): Index;
+		VAR
+			leader: Index;
+			weight: LONGINT;
+			sym, prev:  Symbol;
+		BEGIN
+			weight := table[node].weight;  leader := node;
+			WHILE (leader < root) & (weight = table[leader + 1].weight) DO  INC( leader )  END;
+			IF leader # node THEN  
+				(*	swap the leaf nodes	*)
+				sym := table[node].pattern;
+				prev := table[leader].pattern;
+				table[leader].pattern := sym;
+				table[node].pattern := prev;
+				map[sym] := leader;
+				map[prev] := node;
+			END;
+			RETURN leader
+		END GroupLeader;		
+				
+
+		(*	slide internal node up over all leaves of equal weight; 
+			or exchange leaf with next smaller weight internal node.
+			return node's new position	*)
+		PROCEDURE SlideNode( node: Index ): Index;
+		VAR  next: Index;  swap: Node;
+		BEGIN
+			swap := table[node];  next := node + 1;
+			(* if we're sliding an internal node, find the highest possible leaf to exchange with	*)
+			IF ODD( swap.weight ) THEN
+				WHILE swap.weight > table[next+1].weight DO  INC( next )  END
+			END;
+			(*	swap the two nodes	*)
+			table[node] := table[next];  table[next] := swap;
+			table[next].up := table[node].up;  table[node].up := swap.up;
+
+			(*	repair the symbol map and tree structure	*)
+			IF ODD( swap.weight ) THEN
+				table[swap.down].up := next;
+				table[swap.down - 1].up := next;
+				map[table[node].pattern] := node;
+			ELSE
+				table[table[node].down - 1].up := node;
+				table[table[node].down].up := node;
+				map[swap.pattern] := next;
+			END;
+			RETURN next;
+		END SlideNode;		
+
+
+		(*	increment symbol weight and re balance the tree.	*)
+		PROCEDURE IncWeight( node: Index );
+		VAR  up: Index;
+		BEGIN
+			(*	obviate swapping a parent with its child: increment the leaf and proceed directly to its parent.
+				otherwise, promote leaf to group leader position in the tree	*)
+			IF table[node].up = node + 1 THEN
+				INC( table[node].weight, 2 );  INC( node );
+			ELSE
+				node := GroupLeader( node )
+			END;
+			(*	increase the weight of each node and slide over any smaller weights ahead of it until reaching the root.
+				internal nodes work upwards from their initial positions; while symbol nodes slide over first,
+				then work up from their final positions.	*)
+			INC( table[node].weight, 2 );  up := table[node].up;
+			WHILE up # 0 DO
+				WHILE table[node].weight > table[node + 1].weight DO  node := SlideNode( node)  END;
+				IF ODD( table[node].weight) THEN
+					node := up
+				ELSE
+					node := table[node].up
+				END;
+				INC( table[node].weight, 2 );  up := table[node].up
+			END;
+		END IncWeight;
+		
+			
+		(*  scale all weights and rebalance the tree,
+		    zero weight nodes are removed from the tree by sliding them out the rank list	*)
+		PROCEDURE Scale;
+		VAR  node, prev: Index;  weight: LONGINT;
+		
+			PROCEDURE Weight( idx: Index ): LONGINT;
+			VAR w: LONGINT;
+			BEGIN
+				w := table[idx].weight;
+				IF ODD( w ) THEN  RETURN w - 1  ELSE  RETURN w  END
+			END Weight;
+			
+		BEGIN
+			node := esc + 1;
+			(*	work up the tree from the escape node scaling weights by the value of bits	*)
+			WHILE node <= root DO
+				(*	recompute the weight of internal nodes; slide down and out any unused ones	*)
+				weight := table[node].weight;
+				IF ODD( weight ) THEN
+					weight := Weight( table[node].down ) + Weight( table[node].down-1 ) + 1
+				ELSE
+					(*	remove zero weight leaves by incrementing esc and removing them from the symbol map 	*)
+					weight := weight DIV 2;
+					IF ODD( weight ) THEN  DEC( weight )  END;
+					IF weight = 0 THEN
+						map[table[node].pattern] := 0;  INC( esc, 2 );
+					END
+				END;
+				(*	slide the scaled node back down over any previous nodes with larger weights	*)
+				table[node].weight := weight;
+				prev := node - 1;
+				WHILE weight < table[prev].weight DO  IGNORE SlideNode( prev );  DEC( prev )  END;
+				INC( node )
+			END;
+			(*	prepare a new escape node	*)
+			table[esc].down := 0;  table[esc].weight := 0;
+		END Scale;
+		
+	
+		(*	encode the next symbol	*)
+		PROCEDURE EncodeByte( ch: CHAR );
+		VAR
+			code, n: LONGINT;  
+			up, idx, node: Index;  sym: Symbol;
+		BEGIN
+			ASSERT( mode = Encode );
+			(*	for a new symbol, direct the receiver to the escape node	*)
+			sym := ORD( ch );
+			node := map[sym];  idx := node; 
+			IF idx = 0 THEN  idx := esc  END;
+			ASSERT( idx # 0 );
+			(*	accumulate the code bits by working up the tree from the node to the root	*)
+			code := 1;  n := 0;  up := table[idx].up;
+			WHILE up # 0 DO
+				code := code * 2;  INC( n );
+				IF ODD( idx ) THEN  INC( code )  END;
+				idx := up;  up := table[idx].up	
+			END;
+			(*	send the code, root selector bit first	*)
+			WHILE n > 0 DO
+				PutBit( code MOD 2 );  code := code DIV 2;
+				DEC( n )
+			END;
+			(*	send pattern and incorporate it into the tree	*)
+			IF node = 0 THEN
+				PutPattern( sym );
+				node := AddSymbol( sym );
+			END;
+			(*	adjust and re-balance the tree	*)
+			IncWeight( node );
+		END EncodeByte;
+
+
+		(*	decode the next symbol	*)
+		PROCEDURE DecodeByte( ): CHAR;
+		VAR
+			node, down: Index; 
+			sym: Symbol;
+		BEGIN
+			ASSERT( mode = Decode );
+			(*	work down the tree from the root until reaching either a leaf or the escape node.  
+			A one bit means go left, a zero means go right.	*)
+			node := root;  down := table[node].down;
+			WHILE down # 0 DO
+				IF GetBit( ) = 1 THEN  
+					node := down-1
+				ELSE
+					node := down;
+				END;
+				down := table[node].down;
+			END;
+			(*	sent to the escape node	*)
+			IF node = esc THEN
+				sym := GetPattern( );  
+				node := AddSymbol( sym );
+			ELSE
+				sym := table[node].pattern
+			END;
+			(*	adjust and re-balance the tree	*)
+			IncWeight( node );
+			RETURN CHR( sym );
+		END DecodeByte;
+		
+	END HuffmanCoder;
+
+
+
+TYPE
+	Encoder* =OBJECT
+		VAR
+			huff: HuffmanCoder;
+			out: Streams.Writer;
+			
+			PROCEDURE & New*( archive: Streams.Writer );
+			BEGIN
+				NEW( huff, Encode, NIL, archive );
+				out := archive
+			END New;
+			
+			PROCEDURE CompressBlock*( CONST source: ARRAY OF CHAR; len: LONGINT );
+			VAR  i: LONGINT;
+			BEGIN
+				out.RawNum( len );
+				huff.Initialize;	
+				i := 0;
+				WHILE i < len DO
+					huff.EncodeByte( source[i] );  INC( i );
+					IF (i MOD ScaleSize = 0) & ((len-i) >= ScaleSize) THEN  huff.Scale  END
+				END;
+				huff.Finish
+			END CompressBlock;
+
+	END Encoder;
+	
+	
+	
+TYPE
+	Decoder* =OBJECT
+		VAR
+			huff: HuffmanCoder;
+			input: Streams.Reader;
+			
+			PROCEDURE & New*( archive: Streams.Reader );
+			BEGIN
+				NEW( huff, Decode, archive, NIL );
+				input := archive
+			END New;
+			
+			PROCEDURE ExtractBlock*( VAR buf: ARRAY OF CHAR ): LONGINT;
+			VAR  i, blockSize: LONGINT; 
+			BEGIN
+				input.RawNum( blockSize );
+				IF blockSize = 0 THEN (* end of stream *) RETURN 0  END;
+				huff.Initialize;  
+				i := 0;
+				WHILE i < blockSize DO
+					buf[i] := huff.DecodeByte( );  INC( i );
+					IF (i MOD ScaleSize = 0) & ((blockSize-i) >= ScaleSize) THEN  huff.Scale  END
+				END;
+				RETURN blockSize
+			END ExtractBlock;
+
+	END Decoder;
+	
+	
+END AdaptiveHuffman.
+
+

+ 11 - 13
source/Huffman.Mod

@@ -52,6 +52,7 @@ TYPE
 			PROCEDURE Initialize( CONST source: ARRAY OF CHAR; len: LONGINT );
 			VAR pf: PatternFrequencies;
 			BEGIN
+				w.RawNum( len );
 				pf := CountPatterns( source, len );
 				WriteFrequencies( pf );
 				BuildCodeTable( BuildTree( pf ) );				
@@ -66,9 +67,7 @@ TYPE
 				n := LEN( pf^ );
 				IF n < 128 THEN
 					w.Char( CHR( n ) );
-					FOR i := 0 TO n - 1 DO  
-						w.RawNum( pf[i].freq );  w.Char( CHR( pf[i].pattern ) )  
-					END
+					FOR i := 0 TO n - 1 DO  w.RawNum( pf[i].freq );  w.Char( CHR( pf[i].pattern ) )  END
 				ELSE
 					w.Char( 0X );
 					FOR i := 0 TO 255 DO  a[i] := 0  END;
@@ -87,7 +86,7 @@ TYPE
 				FOR i := 0 TO 255 DO  
 					IF a[i].freq > 0 THEN (* scale => [1..101H] *)
 						a[i].freq := 100H * a[i].freq DIV len + 1;
-					END
+					END;
 				END;
 				RETURN SortPatterns( a )
 			END CountPatterns;
@@ -137,18 +136,15 @@ TYPE
 			
 			
 			PROCEDURE CompressBlock*( CONST source: ARRAY OF CHAR; len: LONGINT );
-			VAR i, codesize: LONGINT;
+			VAR i: LONGINT;
 			BEGIN
 				Initialize( source, len );
 				FOR i := 0 TO len - 1 DO  Append( codeTable[ORD( source[i] )] )  END;
 				
-				codesize := 8*bpos;
 				IF curBit # 7 THEN  
-					INC( codesize, (7 - curBit) );
 					buffer[bpos] := CHR( byte );  INC( bpos ); 
 				END;
 				
-				w.RawNum( codesize );
 				FOR i := 0 TO bpos - 1 DO  w.Char( buffer[i] )  END;
 				w.Update
 			END CompressBlock;
@@ -159,7 +155,7 @@ TYPE
 	
 	Decoder* = OBJECT
 		VAR
-			codesize: LONGINT;	(* bits! *)
+			blockSize: LONGINT;
 			r: Streams.Reader;
 			tree: Node;
 			byte, curBit: LONGINT;
@@ -174,7 +170,6 @@ TYPE
 			BEGIN
 				pf := ReadFrequencies( r );
 				tree := BuildTree( pf );
-				r.RawNum( codesize );
 				curBit := -1
 			END Initialize;
 			
@@ -207,10 +202,12 @@ TYPE
 			END GetBit;	
 			
 			
-			PROCEDURE ExtractBlock*( VAR buf: ARRAY OF CHAR; VAR len: LONGINT );
+			PROCEDURE ExtractBlock*( VAR buf: ARRAY OF CHAR ): LONGINT;
 			VAR 
-				i: LONGINT;  n: Node;
+				i, len: LONGINT;  n: Node;
 			BEGIN
+				r.RawNum( blockSize ); 
+				IF blockSize = 0 THEN  (* end of stream *) RETURN 0  END;
 				Initialize;  i := 0;  len := 0;
 				REPEAT
 					n := tree; 
@@ -219,7 +216,8 @@ TYPE
 						INC( i )
 					UNTIL n.left = NIL;	(* leaf *)
 					buf[len] := CHR( n.pattern );  INC( len )
-				UNTIL i >= codesize;
+				UNTIL len >= blockSize;
+				RETURN blockSize
 			END ExtractBlock;
 				
 	END Decoder;

+ 47 - 42
source/OZip.Mod

@@ -1,18 +1,17 @@
 MODULE OZip; (** AUTHOR GF; PURPOSE "files and streams compression tool"; *)
 
-IMPORT Streams, Commands, Files, Strings, BW :=  BorrowsWheeler, Huffman, Log := KernelLog;
+IMPORT Streams, Commands, Files, Strings, BWT :=  BorrowsWheeler, Huffman;
 
 (*
 	Format	=	ComprTag { Block }
-	Block		=	Index  Freqs  Code 
+	Block		=	bwindex size Freqs  { CHAR }
 	Freqs		= 	num { freq } 			(num = 0,  (256 * freq))
 				|	num { freq patt }		(num = 1..127)
-	Code		=	size { CHAR }		(size in bits ! )
-	num	=	CHAR			
-	patt	=	CHAR
-	freq	=	RawNum
-	Index	=	RawNum
-	size	=	RawNum
+	num		=	CHAR			
+	patt		=	CHAR
+	freq		=	RawNum
+	bwindex	=	RawNum
+	size		=	RawNum
 *)
 
 CONST
@@ -24,7 +23,7 @@ CONST
 	PROCEDURE Compress*( r: Streams.Reader;  w: Streams.Writer );
 	VAR 
 		huff: Huffman.Encoder; 
-		bw: BW.Encoder;  index: LONGINT; 
+		bw: BWT.Encoder;  bwindex: LONGINT; 
 		buffer: ARRAY BlockSize OF CHAR;  len: LONGINT;
 	BEGIN 
 		NEW( huff, w );  NEW( bw );
@@ -32,37 +31,42 @@ CONST
 		LOOP
 			r.Bytes( buffer, 0, BlockSize, len );
 			IF len < 1 THEN  EXIT  END;
-			index := bw.EncodeBlock( buffer, len );
-			w.RawNum( index );
+			bwindex := bw.EncodeBlock( buffer, len );
+			w.RawNum( bwindex );
 			huff.CompressBlock( buffer, len );
-			IF r IS Files.Reader THEN  Log.Char( '.' )  END
-		END
+		END;
+		w.RawNum( 0 );	(* bw index *)
+		w.RawNum( 0 );	(* size of block, (end of stream) *)
+		w.Update
 	END Compress;
 	
 	
-		
-	PROCEDURE Uncompress*( r: Streams.Reader;  w: Streams.Writer;  VAR msg: ARRAY OF CHAR ): BOOLEAN;
+	(* returns false if input is not an OZip compressed stream *)
+	PROCEDURE Uncompress*( r: Streams.Reader;  w: Streams.Writer ): BOOLEAN;
 	VAR 
 		huff: Huffman.Decoder;
 		tag, len, i, bwIndex: LONGINT; 
-		bw: BW.Decoder;
+		bw: BWT.Decoder;
 		buffer: ARRAY BlockSize OF CHAR;
 	BEGIN 
 		r.RawLInt( tag );
-		IF tag # ComprTag  THEN
-			msg := "OZip.Uncompress: bad input (compressed stream expected)"; 
+		IF tag = ComprTag  THEN
+			NEW( huff, r );  NEW( bw );
+			LOOP
+				r.RawNum( bwIndex );
+				len := huff.ExtractBlock( buffer );	
+				IF len > 0 THEN
+					bw.DecodeBlock( buffer, len, bwIndex );
+					FOR i := 0 TO len - 1 DO  w.Char( buffer[i] )  END;
+				ELSE
+					EXIT
+				END
+			END;
+			w.Update;
+			RETURN TRUE
+		ELSE
 			RETURN FALSE
-		END;
-		NEW( huff, r );  NEW( bw );
-		WHILE r.Available( ) > 0 DO	
-			r.RawNum( bwIndex );
-			huff.ExtractBlock( buffer, len );	
-			bw.DecodeBlock( buffer, len, bwIndex );
-			FOR i := 0 TO len - 1 DO  w.Char( buffer[i] )  END;
-			IF w IS Files.Writer THEN  Log.Char( '.' )  END
-		END;
-		w.Update;
-		RETURN TRUE
+		END
 	END Uncompress;
 
 	
@@ -89,18 +93,18 @@ CONST
 		name1, name2: ARRAY 128 OF CHAR;
 	BEGIN
 		IF c.arg.GetString( name1 ) THEN
+			c.out.String( "OZip.CompressFile " ); c.out.String( name1 );  c.out.Update;
 			IF ~c.arg.GetString( name2 ) THEN
 				name2 := name1;  Strings.Append( name2, Suffix )
 			END;
 			f1 := Files.Old( name1 );
 			IF f1 # NIL THEN
-				Log.String( "OZip.Compress " ); Log.String( name1 ); Log.Char( ' ' );
 				Files.OpenReader( r, f1, 0 ); 
 				f2 := NewFile( name2 );  Files.OpenWriter( w, f2, 0 );
-				Compress( r, w );  w.Update;  Files.Register( f2 );
-				Log.String( " => " );  Log.String( name2 );  Log.Ln; 
+				Compress( r, w );  Files.Register( f2 );
+				c.out.String( " => " );  c.out.String( name2 );  c.out.Ln; 
 			ELSE
-				c.error.String( "could not open file  " );  c.error.String( name1 );  c.error.Ln
+				c.error.String( "   ### file not found" );  c.error.Ln
 			END
 		ELSE
 			c.error.String( "usage: OZip.CompressFile infile [outfile] ~ " );  c.error.Ln;
@@ -109,14 +113,16 @@ CONST
 	END CompressFile;
 	
 	
+	
 	(** OZip.UncompressFile  infile [outfile] ~ *)
 	PROCEDURE UncompressFile*( c: Commands.Context );
 	VAR
 		f1, f2: Files.File;
 		r: Files.Reader;  w: Files.Writer;
-		name1, name2, msg: ARRAY 128 OF CHAR;
+		name1, name2: ARRAY 128 OF CHAR;
 	BEGIN
 		IF c.arg.GetString( name1 ) THEN
+			c.out.String( "OZip.UncompressFile " );  c.out.String( name1 );  c.out.Update;
 			IF ~c.arg.GetString( name2 ) THEN
 				name2 := name1;
 				IF Strings.EndsWith( Suffix, name2 ) THEN  name2[Strings.Length( name2 ) - 3] := 0X
@@ -127,15 +133,14 @@ CONST
 			IF f1 # NIL THEN
 				Files.OpenReader( r, f1, 0 );	 
 				f2 := NewFile( name2 );  Files.OpenWriter( w, f2, 0 );
-				Log.String( "OZip.Uncompress " );  Log.String( name1 );  Log.Char( ' ' );
-				IF Uncompress( r, w, msg ) THEN
-					w.Update;  Files.Register( f2 );
-					Log.String( " => " );  Log.String( name2 );  Log.Ln; 
+				IF Uncompress( r, w ) THEN
+					Files.Register( f2 );
+					c.out.String( " => " );  c.out.String( name2 );  c.out.Ln;  c.out.Update
 				ELSE
-					c.error.String( msg );  c.error.Ln
+					c.error.String( "  ### wrong input (OZip compressed data expected)" );  c.error.Ln
 				END
 			ELSE
-				c.error.String( "could not open file  " );  c.error.String( name1 );  c.error.Ln
+				c.error.String( "  ### file not found" );  c.error.Ln
 			END
 		ELSE
 			c.error.String( "usage: OZip.UncompressFile infile [outfile] ~ " );  c.error.Ln;
@@ -146,11 +151,11 @@ CONST
 END OZip.
 
 
-	OZip.CompressFile   OZip.Mod ~
+	OZip.CompressFile   TLS.Mod ~
 	OZip.CompressFile   OZip.GofU ~
 	OZip.CompressFile   guide.pdf ~
 		
-	OZip.UncompressFile   OZip.Mod.oz  TTT.Mod ~
+	OZip.UncompressFile   TLS.Mod.oz TTT.Mod ~
 	OZip.UncompressFile   OZip.GofU.oz  TTT.GofU ~
 	OZip.UncompressFile   guide.pdf.oz  TTT.pdf ~
 	

+ 157 - 0
source/OZip2.Mod

@@ -0,0 +1,157 @@
+MODULE OZip2; (** AUTHOR GF; PURPOSE "files and streams compression tool"; *)
+
+IMPORT Streams, Commands, Files, Strings, 
+	BWT :=  BorrowsWheeler, AH := AdaptiveHuffman;
+
+(*
+	Format	=	ComprTag { Block }
+	Block		=	bwindex  size { CHAR }
+	bwindex	=	RawNum
+	size		=	RawNum
+*)
+
+CONST
+	BlockSize = 8*1024;
+	ComprTag = LONGINT(0FEFD1F18H);
+	Suffix = ".oz2";
+		
+
+	PROCEDURE Compress*( r: Streams.Reader;  w: Streams.Writer );
+	VAR 
+		huff: AH.Encoder; 
+		bw: BWT.Encoder;  index: LONGINT; 
+		buffer: ARRAY BlockSize OF CHAR;  len: LONGINT;
+	BEGIN 
+		NEW( huff, w );  NEW( bw );
+		w.RawLInt( ComprTag );
+		LOOP
+			r.Bytes( buffer, 0, BlockSize, len );
+			IF len < 1 THEN  EXIT  END;
+			index := bw.EncodeBlock( buffer, len );
+			w.RawNum( index );
+			huff.CompressBlock( buffer, len );
+		END;
+		w.RawNum( 0 );	(* bw index*)
+		w.RawNum( 0 );	(* lenght of block, (end of stream) *)
+		w.Update
+	END Compress;
+	
+	
+	(* returns false if input is not an OZip compressed stream *)	
+	PROCEDURE Uncompress*( r: Streams.Reader;  w: Streams.Writer ): BOOLEAN;
+	VAR 
+		huff: AH.Decoder;
+		bw: BWT.Decoder;
+		tag, len, i, bwIndex: LONGINT;
+		buffer: ARRAY BlockSize OF CHAR;
+	BEGIN 
+		r.RawLInt( tag );
+		IF tag = ComprTag  THEN
+			NEW( huff, r );  NEW( bw );
+			LOOP
+				r.RawNum( bwIndex );
+				len := huff.ExtractBlock( buffer );	
+				IF len > 0 THEN  
+					bw.DecodeBlock( buffer, len, bwIndex );
+					FOR i := 0 TO len - 1 DO  w.Char( buffer[i] )  END;
+				ELSE
+					EXIT
+				END
+			END;
+			w.Update;
+			RETURN TRUE
+		ELSE
+			RETURN FALSE
+		END
+	END Uncompress;
+
+	
+
+	
+	PROCEDURE NewFile( CONST name: ARRAY OF CHAR ): Files.File;
+	VAR
+		name2: ARRAY 128 OF CHAR;  res: LONGINT;
+	BEGIN
+		IF Files.Old( name ) # NIL THEN
+			COPY( name, name2);  Strings.Append( name2, ".Bak" );
+			Files.Rename( name, name2, res );
+		END;
+		RETURN Files.New( name )
+	END NewFile;
+	
+	
+	
+	(** OZip2.CompressFile  infile [outfile] ~ *)
+	PROCEDURE CompressFile*( c: Commands.Context );
+	VAR
+		f1, f2: Files.File;
+		r: Files.Reader;  w: Files.Writer;
+		name1, name2: ARRAY 128 OF CHAR;
+	BEGIN
+		IF c.arg.GetString( name1 ) THEN
+			c.out.String( "OZip2.CompressFile " ); c.out.String( name1 );  c.out.Update;
+			IF ~c.arg.GetString( name2 ) THEN
+				name2 := name1;  Strings.Append( name2, Suffix )
+			END;
+			f1 := Files.Old( name1 );
+			IF f1 # NIL THEN
+				Files.OpenReader( r, f1, 0 ); 
+				f2 := NewFile( name2 );  Files.OpenWriter( w, f2, 0 );
+				Compress( r, w );  Files.Register( f2 );
+				c.out.String( " => " );  c.out.String( name2 );  c.out.Ln;  c.out.Update
+			ELSE
+				c.error.String( "  ### file not found" );  c.error.Ln
+			END
+		ELSE
+			c.error.String( "usage: OZip2.CompressFile infile [outfile] ~ " );  c.error.Ln;
+		END;
+		c.error.Update
+	END CompressFile;
+	
+	
+	(** OZip2.UncompressFile  infile [outfile] ~ *)
+	PROCEDURE UncompressFile*( c: Commands.Context );
+	VAR
+		f1, f2: Files.File;
+		r: Files.Reader;  w: Files.Writer;
+		name1, name2: ARRAY 128 OF CHAR;
+	BEGIN
+		IF c.arg.GetString( name1 ) THEN
+			c.out.String( "OZip2.UncompressFile " );  c.out.String( name1 );  c.out.Update;
+			IF ~c.arg.GetString( name2 ) THEN
+				name2 := name1;
+				IF Strings.EndsWith( Suffix, name2 ) THEN  name2[Strings.Length( name2 ) - 4] := 0X
+				ELSE  Strings.Append( name2, ".uncomp" )
+				END
+			END;
+			f1 := Files.Old( name1 );
+			IF f1 # NIL THEN
+				Files.OpenReader( r, f1, 0 );	 
+				f2 := NewFile( name2 );  Files.OpenWriter( w, f2, 0 );
+				IF Uncompress( r, w ) THEN
+					Files.Register( f2 );
+					c.out.String( " => " );  c.out.String( name2 );  c.out.Ln;  c.out.Update
+				ELSE
+					c.error.String( "  ### wrong input (OZip2 compressed data expected)" );  c.error.Ln
+				END
+			ELSE
+				c.error.String( "   ### file not found" );  c.error.Ln
+			END
+		ELSE
+			c.error.String( "usage: OZip2.UncompressFile infile [outfile] ~ " );  c.error.Ln;
+		END;
+		c.error.Update
+	END UncompressFile;
+	
+END OZip2.
+
+
+	OZip2.CompressFile   TLS.Mod ~
+	OZip2.CompressFile   OZip2.GofU ~
+	OZip2.CompressFile   guide.pdf ~
+		
+	OZip2.UncompressFile   TLS.Modc.oz2  TTT.Mod ~
+	OZip2.UncompressFile   OZip2.GofU.oz2  TTT.GofU ~
+	OZip2.UncompressFile   guide.pdf.oz2  TTT.pdf ~
+	
+	System.Free  OZip2 AdaptiveHuffman  BorrowsWheeler ~

+ 2 - 1
source/Release.Tool

@@ -1523,7 +1523,8 @@ PACKAGE Contributions ARCHIVE "Contributions.zip" SOURCE "ContributionsSrc.zip"
 	PrettyPrint.Mod PrettyPrintHighlighter.XML
 
 	# streams and files compression
-	Huffman.Mod BorrowsWheeler.Mod OZip.Mod
+	BorrowsWheeler.Mod Huffman.Mod OZip.Mod
+	AdaptiveHuffman.Mod OZip2.Mod
 
 	NbrInt8.Mod NbrInt16.Mod NbrInt32.Mod NbrInt64.Mod NbrInt.Mod NbrRat.Mod
 	NbrRe32.Mod NbrRe64.Mod NbrRe.Mod NbrCplx.Mod NbrStrings.Mod