Răsfoiți Sursa

use L2BlockM as the minimal increment when distributing the matrix over M among multiple processes; in the original code, the increment is equal to zero when nProcesses > M, that led to an "index out of range" trap

git-svn-id: https://svn.inf.ethz.ch/svn/lecturers/a2/trunk@8651 8c9fc860-2736-0410-a75d-ab315db34111
eth.morozova 6 ani în urmă
părinte
comite
c239e59c18

+ 3 - 3
source/AMD64.FoxArrayBaseOptimized.Mod

@@ -27,7 +27,7 @@ CONST
 	debug = FALSE;  parallel = TRUE;  SSE = TRUE;
 
 	MaxCachePoolSize = 0 (* disabled *)  (*  646*1024*1024  *)  (* enabled *) ;
-	maxProcesses = 32;
+	maxProcesses = 48;
 
 	cMatMulDynamic* = -1;  cMatMulScalarProduct* = 0;
 	cMatMulNaive* = 1;  cMatMulTransposed* = 2;
@@ -4738,7 +4738,7 @@ VAR
 		END;
 
 		IF parallel & (M > L2BlockM) THEN
-			inc := Align( M DIV nrProcesses, L2BlockM );  M1 := 0;
+			inc := Align( MAX(M DIV nrProcesses,L2BlockM), L2BlockM );  M1 := 0;
 			i := 0;
 			WHILE (M1 < M) DO
 				M2 := M1 + inc;
@@ -4796,7 +4796,7 @@ VAR
 			END;
 		END;
 		IF parallel & (M > L2BlockM) THEN
-			inc := Align( M DIV nrProcesses, L2BlockM );  M1 := 0;
+			inc := Align( MAX(M DIV nrProcesses,L2BlockM), L2BlockM );  M1 := 0;
 			i := 0;
 			WHILE (M1 < M) DO
 				M2 := M1 + inc;

+ 3 - 3
source/I386.FoxArrayBaseOptimized.Mod

@@ -27,7 +27,7 @@ CONST
 	debug = FALSE;  parallel = TRUE;  SSE = TRUE;
 
 	MaxCachePoolSize = 0 (* disabled *)  (*  646*1024*1024  *)  (* enabled *) ;
-	maxProcesses = 32;
+	maxProcesses = 48;
 
 	cMatMulDynamic* = -1;  cMatMulScalarProduct* = 0;
 	cMatMulNaive* = 1;  cMatMulTransposed* = 2;
@@ -4740,7 +4740,7 @@ VAR
 		END;
 
 		IF parallel & (M > L2BlockM) THEN
-			inc := Align( M DIV nrProcesses, L2BlockM );  M1 := 0;
+			inc := Align( MAX(M DIV nrProcesses,L2BlockM), L2BlockM );  M1 := 0;
 			i := 0;
 			WHILE (M1 < M) DO
 				M2 := M1 + inc;
@@ -4798,7 +4798,7 @@ VAR
 			END;
 		END;
 		IF parallel & (M > L2BlockM) THEN
-			inc := Align( M DIV nrProcesses, L2BlockM );  M1 := 0;
+			inc := Align( MAX(M DIV nrProcesses,L2BlockM), L2BlockM );  M1 := 0;
 			i := 0;
 			WHILE (M1 < M) DO
 				M2 := M1 + inc;