Browse Source

Added support for HTTPS and additional features in WebHTTPClient and WebHTTPTools.

git-svn-id: https://svn.inf.ethz.ch/svn/lecturers/a2/trunk@7048 8c9fc860-2736-0410-a75d-ab315db34111
eth.tmartiel 8 years ago
parent
commit
c8a3bbd737
2 changed files with 175 additions and 4 deletions
  1. 68 2
      source/WebHTTPClient.Mod
  2. 107 2
      source/WebHTTPTools.Mod

+ 68 - 2
source/WebHTTPClient.Mod

@@ -3,12 +3,69 @@
 MODULE WebHTTPClient; (** AUTHOR "TF"; PURPOSE "HTTP client"; *)
 
 IMPORT
-	TFLog, Streams, IP, DNS, TCP, WebHTTP, Modules;
+	TFLog, Streams, Strings, IP, DNS, TCP, TLS, WebHTTP, Modules;
 
 VAR log : TFLog.Log;
 
 CONST Ok* = 0;
 
+TYPE
+	ContentReader * = OBJECT (Streams.Reader)
+	VAR
+		in: Streams.Reader;
+		encoding: ARRAY 64 OF CHAR;
+		length: LONGINT;
+
+		PROCEDURE & InitContentReader * (in: Streams.Reader; CONST h: WebHTTP.ResponseHeader);
+		VAR
+			token: ARRAY 64 OF CHAR;
+			res: LONGINT;
+		BEGIN
+			InitReader(Receive, 1024);
+			SELF.in := in;
+			COPY(h.transferencoding, encoding);
+			IF (encoding # "") & Strings.Match("chunked", encoding) THEN
+				in.SkipSpaces(); in.Token(token); Strings.HexStrToInt(token, length, res); in.SkipLn();
+			ELSE
+				length := h.contentlength;
+			END;
+		END InitContentReader;
+
+		PROCEDURE Receive * (VAR buf: ARRAY OF CHAR;  ofs, size, min: LONGINT;  VAR len, res: LONGINT);
+		VAR
+			token: ARRAY 16 OF CHAR;
+			i, total, r: LONGINT;
+			ch: CHAR;
+		BEGIN
+			IF (encoding # "") & Strings.Match("chunked", encoding) THEN
+				WHILE (length # 0) & (in.res = Streams.Ok) DO
+					WHILE (total < size) & (i < length) DO
+						in.Char(ch);
+						buf[ofs + total] := ch;
+						INC(i);
+						INC(total)
+					END;
+					i := 0;
+					in.SkipLn;in.SkipSpaces; in.Token(token); Strings.HexStrToInt(token, length, r); in.SkipLn;
+				END;
+				len := total
+			ELSE
+				IF length >= 0 THEN
+					WHILE (i < size) & (i < length) DO in.Char(ch);  buf[ofs + i] := ch; INC(i) END;
+					DEC(length, i);
+				ELSE
+					WHILE (in.res = Streams.Ok) & (i < size) DO in.Char(ch); buf[ofs + i] := ch; INC(i) END;
+				END;
+				len := i
+			END;
+			IF len < min THEN
+				res := Streams.EOF
+			ELSE
+				res := Streams.Ok
+			END
+		END Receive;
+	END ContentReader;
+
 PROCEDURE Head*(CONST url : ARRAY OF CHAR; VAR con : TCP.Connection; VAR header: WebHTTP.ResponseHeader; VAR res : LONGINT);
 VAR
 	host : ARRAY 128 OF CHAR;
@@ -49,12 +106,21 @@ VAR
 	fadr : IP.Adr;
 	w : Streams.Writer;
 	x : WebHTTP.AdditionalField;
+	tls: TLS.Connection;
 BEGIN
 	IF WebHTTP.SplitHTTPAdr(url, host, path, port) THEN
 		IF path = "" THEN path := "/" END;
 		DNS.HostByName(host, fadr, res);
 		IF res = DNS.Ok THEN
-			IF  con = NIL THEN NEW(con); con.Open(TCP.NilPort, fadr, port, res); END;
+			IF  con = NIL THEN
+				IF port = WebHTTP.HTTPPort THEN
+					NEW(con);
+				ELSE
+					NEW(tls);
+					con := tls;
+				END;
+				con.Open(TCP.NilPort, fadr, port, res);
+			END;
 			IF res = TCP.Ok THEN
 				Streams.OpenWriter(w, con.Send);
 				Streams.OpenReader(out, con.Receive);

+ 107 - 2
source/WebHTTPTools.Mod

@@ -3,7 +3,7 @@
 MODULE WebHTTPTools; (** AUTHOR "TF"; PURPOSE "HTTP download tool"; *)
 
 IMPORT
-	Commands, Files, TCP, WebHTTP, WebHTTPClient, Streams, TFLog, Modules;
+	Commands, Files, IP, TCP, WebHTTP, WebHTTPClient, Streams, TFLog, Modules;
 
 VAR log : TFLog.Log;
 
@@ -64,7 +64,7 @@ BEGIN
 			in.SkipSpaces(); in.Token(token); cs := HexStrToIntDef(token, 0); in.SkipLn();
 			WHILE cs # 0 DO
 				FOR i := 0 TO cs - 1 DO in.Char(ch); fw.Char( ch) END;
-				 in.SkipLn;in.SkipSpaces; in.Token(token); cs := HexStrToIntDef(token, 0); in.SkipLn;
+				in.SkipLn;in.SkipSpaces; in.Token(token); cs := HexStrToIntDef(token, 0); in.SkipLn;
 			END;
 			IF fw.res = Streams.Ok THEN log.Enter; log.TimeStamp; log.String("GET "); log.String(url); log.String(" - OK"); log.Exit
 			ELSE log.Enter; log.TimeStamp; log.String("GET "); log.String(url); log.String(" - failed"); log.Exit
@@ -104,6 +104,105 @@ BEGIN
 	END;
 END Head;
 
+PROCEDURE GetAll*(context : Commands.Context);
+VAR
+	baseUrl, baseDir, fileName, url, name, token: ARRAY 256 OF CHAR;
+	file: Files.File;
+	fw: Files.Writer;
+	rh: WebHTTP.RequestHeader;
+	h: WebHTTP.ResponseHeader;
+	con: TCP.Connection;
+	in: Streams.Reader;
+	res, i, cs: LONGINT;
+	ch : CHAR;
+BEGIN
+	IF ~context.arg.GetString(baseUrl) THEN
+		context.error.String("Expected base URL");
+		context.error.Ln;
+		RETURN;
+	END;
+	IF ~context.arg.GetString(baseDir) THEN
+		context.error.String("Expected base directory");
+		context.error.Ln;
+		RETURN;
+	END;
+
+	WHILE context.arg.GetString(fileName) DO
+		Files.JoinPath(baseUrl, fileName, url);
+		Files.JoinPath(baseDir, fileName, name);
+		log.Enter; log.TimeStamp; log.String("GET "); log.String(url); log.String(" to "); log.String(name); log.Exit;
+		rh.fadr := IP.NilAdr;
+		rh.fport := 0;
+		rh.method := 0;
+		rh.maj := 0; rh.min := 0;
+		rh.uri := '';
+		rh.host := '';
+		rh.referer := '';
+		rh.useragent := "WebHTTPTool/0.1";
+		rh.accept := '';
+		rh.transferencoding := '';
+		rh.additionalFields := NIL;
+		res := WebHTTPClient.Ok;
+		con := NIL;
+		in := NIL;
+		WebHTTPClient.Get(url, rh, con, h, in, res);
+		IF res = WebHTTPClient.Ok THEN
+			file := Files.New(name);
+			Files.OpenWriter(fw, file, 0);
+			IF (h.transferencoding # "") & MatchPrefixI("chunked", h.transferencoding) THEN
+				in.SkipSpaces(); in.Token(token); cs := HexStrToIntDef(token, 0); in.SkipLn();
+				WHILE cs # 0 DO
+					FOR i := 0 TO cs - 1 DO in.Char(ch); fw.Char( ch) END;
+					 in.SkipLn;in.SkipSpaces; in.Token(token); cs := HexStrToIntDef(token, 0); in.SkipLn;
+				END;
+				IF fw.res = Streams.Ok THEN log.Enter; log.TimeStamp; log.String("GET "); log.String(url); log.String(" - OK"); log.Exit
+				ELSE log.Enter; log.TimeStamp; log.String("GET "); log.String(url); log.String(" - failed"); log.Exit
+				END
+			ELSE
+				IF h.contentlength >= 0 THEN
+					FOR i := 0 TO h.contentlength - 1 DO in.Char(ch);  fw.Char(ch) END;
+					IF fw.res = Streams.Ok THEN log.Enter; log.TimeStamp; log.String("GET "); log.String(url); log.String(" - OK"); log.Exit
+					ELSE log.Enter; log.TimeStamp; log.String("GET "); log.String(url); log.String(" - failed"); log.Exit
+					END
+				ELSE
+					WHILE in.res = Streams.Ok DO in.Char(ch); fw.Char(ch) END;
+					log.Enter; log.TimeStamp; log.String("GET "); log.String(url); log.String(" - OK"); log.Exit
+				END
+			END;
+			fw.Update;
+			Files.Register(file);
+			con.Close
+		ELSE
+			log.Enter; log.TimeStamp; log.String("GET "); log.String(url); log.String(" - "); log.Int(h.statuscode, 5);
+			log.String(" ("); log.Int(res, 0); log.String(") ");
+			log.String(h.reasonphrase); log.Exit
+		END
+	END;
+END GetAll;
+
+PROCEDURE Read*(context : Commands.Context);
+VAR
+	url: ARRAY 512 OF CHAR;
+	reader: WebHTTPClient.ContentReader;
+	rh: WebHTTP.RequestHeader;
+	h: WebHTTP.ResponseHeader;
+	in: Streams.Reader;
+	con: TCP.Connection;
+	res: LONGINT;
+BEGIN
+	IF ~context.arg.GetString(url) THEN RETURN END;
+	
+	log.Enter; log.TimeStamp; log.String("GET "); log.String(url); log.Exit;
+	rh.useragent := "WebHTTPTool/0.1";
+	WebHTTPClient.Get(url, rh, con, h, in, res);
+	IF res = WebHTTPClient.Ok THEN
+		NEW(reader, in, h);
+		WHILE reader.res = 0 DO
+			context.out.Char(reader.Get());
+		END
+	END
+END Read;
+
 PROCEDURE Cleanup;
 BEGIN
 	log.Close
@@ -121,6 +220,10 @@ WebHTTPTools.Head http://212.254.73.92/~
 WebHTTPTools.Head http://www.microsoft.com~
 WebHTTPTools.Head http://slashdot.org~ (* whats wrong with slashdot ? telnet worked... *)
 
+WebHTTPTools.Get https://www.mediapart.fr/ test.html ~
+WebHTTPTools.Get http://files.rcsb.org/download/4hhb.cif.gz test.gz ~
+WebHTTPTools.Get https://highdim.com/ test.html ~
+WebHTTPTools.Get https://www.startpage.com/ test.html ~
 WebHTTP.Mod
 
 SystemTools.Free WebHTTPTools
@@ -129,3 +232,5 @@ WebHTTPClient WebWormWatch WebHTTPServer WebHTTP~
 WebHTTPServer.Start~
 WebWormWatch.Install~
 
+WebHTTPTools.GetAll http://files.rcsb.org/download/ WORK: 4hhb.cif 4hhb.cif.gz 4hhb.cif 4hhb.cif.gz ~
+WebHTTPTools.Read http://www.highdim.com ~