ARM.WMRasterScale.Mod 47 KB


  1. MODULE WMRasterScale; (** AUTHOR "TF"; PURPOSE "Support scaling of images"; *)
  2. (** AUTHOR "MZ"; PURPOSE "Speedup rasterops with SSE2"; *)
  3. IMPORT
  4. SYSTEM, Raster, Rect := WMRectangles;
  5. CONST
  6. (** Copy Modes *)
  7. ModeCopy* = 0; ModeSrcOverDst* = 1;
  8. (** Scale Modes *)
  9. ScaleBox* = 0; ScaleBilinear* = 1;
  10. TYPE
  11. Rectangle = Rect.Rectangle;
  12. Image = Raster.Image;
  13. ScalerProc = PROCEDURE (src, dst : Image; VAR dr : Rectangle; sx, sy, sdx, sdy : LONGINT);
  14. XScalerProc = PROCEDURE (srcadr, dstadr: ADDRESS; srcbpr, dstbpr, dl, dt, dr, db, sx, sy, sdx, sdy, sw, sh : LONGINT);
  15. (* copy sr in 16.16 fix rectangle from src to dr integer rectangle in dst *)
  16. PROCEDURE Q0GenericCopy(src, dst : Image; VAR dr : Rectangle; sx, sy, sdx, sdy : LONGINT);
  17. VAR x, y : LONGINT; col : Raster.Pixel;
  18. getMode, putMode : Raster.Mode;
  19. fx, fy : LONGINT;
  20. BEGIN
  21. Raster.InitMode(getMode, Raster.srcCopy);
  22. Raster.InitMode(putMode, Raster.srcCopy);
  23. fy := sy;
  24. FOR y := dr.t TO dr.b - 1 DO
  25. fx := sx;
  26. FOR x := dr.l TO dr.r - 1 DO
  27. Raster.Get(src, fx DIV 65536, fy DIV 65536, col, getMode);
  28. INC(fx, sdx);
  29. Raster.Put(dst, x, y, col, putMode)
  30. END;
  31. INC(fy, sdy)
  32. END
  33. END Q0GenericCopy;
  34. PROCEDURE Q0GenericSrcOverDst(src, dst : Image; VAR dr : Rectangle; sx, sy, sdx, sdy : LONGINT);
  35. VAR x, y : LONGINT; col : Raster.Pixel;
  36. getMode, putMode : Raster.Mode;
  37. fx, fy : LONGINT;
  38. BEGIN
  39. Raster.InitMode(getMode, Raster.srcCopy);
  40. Raster.InitMode(putMode, Raster.srcOverDst);
  41. fy := sy;
  42. FOR y := dr.t TO dr.b - 1 DO
  43. fx := sx;
  44. FOR x := dr.l TO dr.r - 1 DO
  45. Raster.Get(src, fx DIV 65536, fy DIV 65536, col, getMode);
  46. INC(fx, sdx);
  47. Raster.Put(dst, x, y, col, putMode)
  48. END;
  49. INC(fy, sdy)
  50. END
  51. END Q0GenericSrcOverDst;
  52. (* copy sr in 16.16 fix rectangle from src to dr integer rectangle in dst *)
  53. PROCEDURE Q1GenericCopy(src, dst : Image; VAR dr : Rectangle; sx, sy, sdx, sdy : LONGINT);
  54. VAR x, y, xfleft, xfright, yftop, yfbottom : LONGINT; col, col0, col1, col2, col3 : Raster.Pixel;
  55. b0, g0, r0, a0, b1, g1, r1, a1, cb, cg, cr, ca : LONGINT;
  56. getMode, putMode : Raster.Mode;
  57. fx, fy : LONGINT; x0, x1, y0, y1 : LONGINT;
  58. BEGIN
  59. Raster.InitMode(getMode, Raster.srcCopy);
  60. Raster.InitMode(putMode, Raster.srcCopy);
  61. fy := sy - 8000H; sx := sx - 8000H;
  62. FOR y := dr.t TO dr.b - 1 DO
  63. fx := sx; y0 := Bounds(fy DIV 65536, 0, src.height - 1); y1 := Bounds(fy DIV 65536 + 1, 0, src.height - 1);
  64. FOR x := dr.l TO dr.r - 1 DO
  65. x0 := Bounds(fx DIV 65536, 0, src.width - 1); x1 := Bounds(fx DIV 65536 + 1, 0, src.width - 1);
  66. Raster.Get(src, x0, y0, col0, getMode);
  67. Raster.Get(src, x1, y0, col1, getMode);
  68. Raster.Get(src, x0, y1, col2, getMode);
  69. Raster.Get(src, x1, y1, col3, getMode);
  70. xfleft := (65536 - fx MOD 65536);
  71. xfright := (fx MOD 65536);
  72. b0 := (ORD(col0[Raster.b]) * xfleft + ORD(col1[Raster.b]) * xfright) DIV 65536;
  73. g0 := (ORD(col0[Raster.g]) * xfleft + ORD(col1[Raster.g]) * xfright) DIV 65536;
  74. r0 := (ORD(col0[Raster.r]) * xfleft + ORD(col1[Raster.r]) * xfright) DIV 65536;
  75. a0 := (ORD(col0[Raster.a]) * xfleft + ORD(col1[Raster.a]) * xfright) DIV 65536;
  76. b1 := (ORD(col2[Raster.b]) * xfleft + ORD(col3[Raster.b]) * xfright) DIV 65536;
  77. g1 := (ORD(col2[Raster.g]) * xfleft + ORD(col3[Raster.g]) * xfright) DIV 65536;
  78. r1 := (ORD(col2[Raster.r]) * xfleft + ORD(col3[Raster.r]) * xfright) DIV 65536;
  79. a1 := (ORD(col2[Raster.a]) * xfleft + ORD(col3[Raster.a]) * xfright) DIV 65536;
  80. yftop := (65536 - fy MOD 65536);
  81. yfbottom := (fy MOD 65536);
  82. cb := (b0 * yftop + b1 * yfbottom) DIV 65536;
  83. cg := (g0 * yftop + g1 * yfbottom) DIV 65536;
  84. cr := (r0 * yftop + r1 * yfbottom) DIV 65536;
  85. ca := (a0 * yftop + a1 * yfbottom) DIV 65536;
  86. col[Raster.b] := CHR(cb);
  87. col[Raster.g] := CHR(cg);
  88. col[Raster.r] := CHR(cr);
  89. col[Raster.a] := CHR(ca);
  90. INC(fx, sdx);
  91. Raster.Put(dst, x, y, col, putMode)
  92. END;
  93. INC(fy, sdy)
  94. END
  95. END Q1GenericCopy;
  96. (* copy sr in 16.16 fix rectangle from src to dr integer rectangle in dst *)
  97. PROCEDURE Q1GenericSrcOverDst(src, dst : Image; VAR dr : Rectangle; sx, sy, sdx, sdy : LONGINT);
  98. VAR x, y, xfleft, xfright, yftop, yfbottom : LONGINT; col, col0, col1, col2, col3 : Raster.Pixel;
  99. b0, g0, r0, a0, b1, g1, r1, a1, cb, cg, cr, ca : LONGINT;
  100. getMode, putMode : Raster.Mode;
  101. fx, fy : LONGINT; x0, x1, y0, y1 : LONGINT;
  102. BEGIN
  103. Raster.InitMode(getMode, Raster.srcCopy);
  104. Raster.InitMode(putMode, Raster.srcOverDst);
  105. fy := sy - 8000H; sx := sx - 8000H;
  106. FOR y := dr.t TO dr.b - 1 DO
  107. fx := sx; y0 := Bounds(fy DIV 65536, 0, src.height - 1); y1 := Bounds(fy DIV 65536 + 1, 0, src.height - 1);
  108. FOR x := dr.l TO dr.r - 1 DO
  109. x0 := Bounds(fx DIV 65536, 0, src.width - 1); x1 := Bounds(fx DIV 65536 + 1, 0, src.width - 1);
  110. Raster.Get(src, x0, y0, col0, getMode);
  111. Raster.Get(src, x1, y0, col1, getMode);
  112. Raster.Get(src, x0, y1, col2, getMode);
  113. Raster.Get(src, x1, y1, col3, getMode);
  114. xfleft := (65536 - fx MOD 65536);
  115. xfright := (fx MOD 65536);
  116. b0 := (ORD(col0[Raster.b]) * xfleft + ORD(col1[Raster.b]) * xfright) DIV 65536;
  117. g0 := (ORD(col0[Raster.g]) * xfleft + ORD(col1[Raster.g]) * xfright) DIV 65536;
  118. r0 := (ORD(col0[Raster.r]) * xfleft + ORD(col1[Raster.r]) * xfright) DIV 65536;
  119. a0 := (ORD(col0[Raster.a]) * xfleft + ORD(col1[Raster.a]) * xfright) DIV 65536;
  120. b1 := (ORD(col2[Raster.b]) * xfleft + ORD(col3[Raster.b]) * xfright) DIV 65536;
  121. g1 := (ORD(col2[Raster.g]) * xfleft + ORD(col3[Raster.g]) * xfright) DIV 65536;
  122. r1 := (ORD(col2[Raster.r]) * xfleft + ORD(col3[Raster.r]) * xfright) DIV 65536;
  123. a1 := (ORD(col2[Raster.a]) * xfleft + ORD(col3[Raster.a]) * xfright) DIV 65536;
  124. yftop := (65536 - fy MOD 65536);
  125. yfbottom := (fy MOD 65536);
  126. cb := (b0 * yftop + b1 * yfbottom) DIV 65536;
  127. cg := (g0 * yftop + g1 * yfbottom) DIV 65536;
  128. cr := (r0 * yftop + r1 * yfbottom) DIV 65536;
  129. ca := (a0 * yftop + a1 * yfbottom) DIV 65536;
  130. col[Raster.b] := CHR(cb);
  131. col[Raster.g] := CHR(cg);
  132. col[Raster.r] := CHR(cr);
  133. col[Raster.a] := CHR(ca);
  134. INC(fx, sdx);
  135. Raster.Put(dst, x, y, col, putMode)
  136. END;
  137. INC(fy, sdy)
  138. END
  139. END Q1GenericSrcOverDst;
  140. PROCEDURE XQ0BGR565BGR565(srcadr,dstadr: ADDRESS; srcbpr, dstbpr, dl, dt, dr, db, sx, sy, sdx, sdy, sw, sh : LONGINT);
  141. VAR x, y : LONGINT; yadr, adr, sa, col : LONGINT;
  142. fx, fy : LONGINT;
  143. BEGIN
  144. fy := sy;
  145. yadr := dstadr + dl * 2 + dt * dstbpr;
  146. FOR y := dt TO db - 1 DO
  147. fx := sx;
  148. adr := yadr;
  149. sa := srcadr + (fy DIV 65536) * srcbpr;
  150. FOR x := dl TO dr - 1 DO
  151. col := SYSTEM.GET16(sa + (fx DIV 65536) * 2);
  152. INC(fx, sdx);
  153. SYSTEM.PUT16(adr, col);
  154. INC(adr, 2);
  155. END;
  156. INC(fy, sdy);
  157. INC(yadr, dstbpr)
  158. END
  159. END XQ0BGR565BGR565;
  160. (*
  161. (* this asm version is 2.3 times faster than the portable version. (P3/600/Dell precision 420 (dual)) *)
  162. PROCEDURE XQ0BGR565BGR565(srcadr, dstadr: ADDRESS; srcbpr, dstbpr, dl, dt, dr, db, sx, sy, sdx, sdy, sw, sh : LONGINT);
  163. VAR yadr : LONGINT;
  164. CODE {SYSTEM.i386}
  165. MOV EDX, [EBP+dstadr]
  166. MOV EBX, [EBP+dl]
  167. SHL EBX, 1
  168. ADD EDX, EBX
  169. MOV EBX, [EBP+dt]
  170. IMUL EBX, [EBP+dstbpr]
  171. ADD EDX, EBX ; edx = dstadr + 2 * dl + dt * dstbpr
  172. MOV [EBP+yadr], EDX
  173. ; init first EDI
  174. MOV EDI, EDX
  175. MOV ECX, [EBP+dt]
  176. SUB [EBP+db], ECX ; counter in db
  177. MOV EDX, [EBP+sdx] ; keep EDX
  178. ; init first ESI
  179. MOV ESI, [EBP+srcadr] ; calc new source adr
  180. MOV EAX, [EBP+sy]
  181. SHR EAX, 16 ; integer part of sy
  182. IMUL EAX, [EBP+srcbpr] ; sy * srcbpr
  183. ADD ESI, EAX ; first source adr in ESI
  184. outerloop:
  185. MOV EBX, [EBP+sx]
  186. MOV ECX, [EBP+dr] ; FOR x := dl TO dr - 1 DO
  187. SUB ECX, [EBP+dl]
  188. innerloop:
  189. MOV EAX, EBX
  190. SHR EAX, 16
  191. MOV AX, WORD [ESI + EAX * 2] ; read the pixel
  192. ADD EBX, EDX ; INC fx, sdx
  193. MOV [EDI], AX ; set the pixel
  194. ADD EDI, 2 ; inc adr
  195. LOOP innerloop
  196. ; free : EAX, EBX, ECX
  197. MOV EAX, [EBP+sy] ; sy := sy + sdy
  198. ADD EAX, [EBP+sdy]
  199. MOV [EBP+sy], EAX ; keep sy in EAX
  200. MOV ESI, [EBP+srcadr] ; calc new source adr
  201. SHR EAX, 16 ; integer part of sy
  202. IMUL EAX, [EBP+srcbpr] ; sy * srcbpr
  203. ADD ESI, EAX ; new source adr in ESI
  204. ; new dst address
  205. MOV ECX, [EBP+dstbpr]
  206. MOV EAX, [EBP+yadr]
  207. ADD EAX, ECX
  208. MOV EDI, EAX
  209. MOV [EBP+yadr], EAX
  210. DEC DWORD [EBP+db]
  211. JNLE outerloop
  212. END XQ0BGR565BGR565;
  213. *)
  214. (*PROCEDURE SSE2Q0BGR565BGR565(srcadr, dstadr: ADDRESS; srcbpr, dstbpr, dl, dt, dr, db, sx, sy, sdx, sdy, sw, sh : LONGINT (*; VAR mysrc, mydest, myres: ARRAY OF LONGINT*));
  215. VAR yadr : LONGINT;
  216. CODE {SYSTEM.i386, SYSTEM.MMX, SYSTEM.SSE, SYSTEM.SSE2}
  217. PUSHFD
  218. PUSH EBX
  219. ; CLI
  220. MOV EDX, [EBP+dstadr]
  221. MOV EBX, [EBP+dl]
  222. SHL EBX, 1
  223. ADD EDX, EBX
  224. MOV EBX, [EBP+dt]
  225. IMUL EBX, [EBP+dstbpr]
  226. ADD EDX, EBX ; edx = dstadr + 2 * dl + dt * dstbpr
  227. MOV [EBP+yadr], EDX
  228. ; init first EDI
  229. MOV EDI, EDX
  230. MOV ECX, [EBP+dt]
  231. SUB [EBP+db], ECX ; counter in db
  232. JLE endyloop
  233. MOV EDX, [EBP+sdx] ; keep EDX
  234. ; init first ESI
  235. MOV ESI, [EBP+srcadr] ; calc new source adr
  236. MOV EAX, [EBP+sy]
  237. SHR EAX, 16 ; integer part of sy
  238. IMUL EAX, [EBP+srcbpr] ; sy * srcbpr
  239. ADD ESI, EAX ; first source adr in ESI
  240. outerloop:
  241. MOV EBX, [EBP+sx]
  242. MOV ECX, [EBP+dr] ; FOR x := dl TO dr - 1 DO
  243. SUB ECX, [EBP+dl]
  244. JLE endyloop
  245. innerloop:
  246. CMP ECX, 8
  247. JLE singlepixel
  248. PXOR XMM0, XMM0
  249. ; 8pixels at the time
  250. MOV EAX, EBX
  251. SHR EAX, 16
  252. MOV AX, WORD [ESI + EAX * 2] ; read the pixel
  253. PINSRW XMM0, EAX,0
  254. ADD EBX, EDX ; INC fx, sdx
  255. MOV EAX, EBX
  256. SHR EAX, 16
  257. MOV AX, WORD [ESI + EAX * 2] ; read the pixel
  258. PINSRW XMM0, EAX,1
  259. ADD EBX, EDX ; INC fx, sdx
  260. MOV EAX, EBX
  261. SHR EAX, 16
  262. MOV AX, WORD [ESI + EAX * 2] ; read the pixel
  263. PINSRW XMM0, EAX,2
  264. ADD EBX, EDX ; INC fx, sdx
  265. MOV EAX, EBX
  266. SHR EAX, 16
  267. MOV AX, WORD [ESI + EAX * 2] ; read the pixel
  268. PINSRW XMM0, EAX,3
  269. ADD EBX, EDX ; INC fx, sdx
  270. MOV EAX, EBX
  271. SHR EAX, 16
  272. MOV AX, WORD [ESI + EAX * 2] ; read the pixel
  273. PINSRW XMM0, EAX,4
  274. ADD EBX, EDX ; INC fx, sdx
  275. MOV EAX, EBX
  276. SHR EAX, 16
  277. MOV AX, WORD [ESI + EAX * 2] ; read the pixel
  278. PINSRW XMM0, EAX,5
  279. ADD EBX, EDX ; INC fx, sdx
  280. MOV EAX, EBX
  281. SHR EAX, 16
  282. MOV AX, WORD [ESI + EAX * 2] ; read the pixel
  283. PINSRW XMM0, EAX,6
  284. ADD EBX, EDX ; INC fx, sdx
  285. MOV EAX, EBX
  286. SHR EAX, 16
  287. MOV AX, WORD [ESI + EAX * 2] ; read the pixel
  288. PINSRW XMM0, EAX,7
  289. ADD EBX, EDX ; INC fx, sdx
  290. MOVDQU [EDI], XMM0 ; MOV [EDI], AX ; set the pixels
  291. ADD EDI, 16 ; inc adr
  292. SUB ECX, 8
  293. CMP ECX, 0
  294. JE outside2
  295. ; LOOP innerloop
  296. JMP innerloop
  297. singlepixel:
  298. MOV EAX, EBX
  299. SHR EAX, 16
  300. MOV AX, WORD [ESI + EAX * 2] ; read the pixel
  301. ADD EBX, EDX ; INC fx, sdx
  302. MOV [EDI], AX ; set the pixel
  303. ADD EDI, 2 ; inc adr
  304. SUB ECX, 1
  305. CMP ECX, 0
  306. JE outside2
  307. ; LOOP innerloop
  308. JMP innerloop
  309. outside2:
  310. ; free : EAX, EBX, ECX
  311. MOV EAX, [EBP+sy] ; sy := sy + sdy
  312. ADD EAX, [EBP+sdy]
  313. MOV [EBP+sy], EAX ; keep sy in EAX
  314. MOV ESI, [EBP+srcadr] ; calc new source adr
  315. SHR EAX, 16 ; integer part of sy
  316. IMUL EAX, [EBP+srcbpr] ; sy * srcbpr
  317. ADD ESI, EAX ; new source adr in ESI
  318. ; new dst address
  319. MOV ECX, [EBP+dstbpr]
  320. MOV EAX, [EBP+yadr]
  321. ADD EAX, ECX
  322. MOV EDI, EAX
  323. MOV [EBP+yadr], EAX
  324. DEC DWORD [EBP+db]
  325. JNLE outerloop
  326. endyloop:
  327. EMMS ; declare FPU registers free
  328. POP EBX
  329. POPFD
  330. END SSE2Q0BGR565BGR565;
  331. *)
  332. PROCEDURE Q1BGR565BGR565(srcadr,dstadr: ADDRESS; srcbpr, dstbpr, dl, dt, dr, db, sx, sy, sdx, sdy, sw, sh : LONGINT);
  333. VAR x, y, xfleft, xfright, yftop, yfbottom : LONGINT; yadr, adr: ADDRESS; col0, col1, col2, col3 : LONGINT;
  334. b0, g0, r0, b1, g1, r1, cb, cg, cr : LONGINT;
  335. fx, fy, xadd1, xadd2 : LONGINT; yadd1, yadd2: ADDRESS;
  336. BEGIN
  337. yadr := dstadr + dl * 2 + dt * dstbpr;
  338. fy := sy - 8000H; sx := sx - 8000H;
  339. FOR y := dt TO db - 1 DO
  340. fx := sx;
  341. adr := yadr;
  342. yadd1 := srcadr + Bounds(fy DIV 65536, 0, sh - 1) * srcbpr;
  343. yadd2 := srcadr + Bounds(fy DIV 65536 + 1, 0, sh - 1) * srcbpr;
  344. FOR x := dl TO dr - 1 DO
  345. xadd1 := Bounds(fx DIV 65536, 0, sw - 1) * 2;
  346. xadd2 := Bounds(fx DIV 65536 + 1, 0, sw - 1) * 2;
  347. col0 := SYSTEM.GET16(yadd1 + xadd1);
  348. col1 := SYSTEM.GET16(yadd1 + xadd2);
  349. col2 := SYSTEM.GET16(yadd2 + xadd1);
  350. col3 := SYSTEM.GET16(yadd2 + xadd2);
  351. xfleft := (65536 - fx MOD 65536);
  352. xfright := (fx MOD 65536);
  353. b0 := ((col0 MOD 32) * 8 * xfleft + (col1 MOD 32) * 8 * xfright) DIV 65536;
  354. g0 := ((col0 DIV 32 MOD 64) * 4 * xfleft + (col1 DIV 32 MOD 64) * 4 * xfright) DIV 65536;
  355. r0 := ((col0 DIV 2048 MOD 32) * 8 * xfleft + (col1 DIV 2048 MOD 32) * 8 * xfright) DIV 65536;
  356. b1 := ((col2 MOD 32) * 8 * xfleft + (col3 MOD 32) * 8 * xfright) DIV 65536;
  357. g1 := ((col2 DIV 32 MOD 64) * 4 * xfleft + (col3 DIV 32 MOD 64) * 4 * xfright) DIV 65536;
  358. r1 := ((col2 DIV 2048 MOD 32) * 8 * xfleft + (col3 DIV 2048 MOD 32) * 8 * xfright) DIV 65536;
  359. yftop := (65536 - fy MOD 65536);
  360. yfbottom := (fy MOD 65536);
  361. cb := (b0 * yftop + b1 * yfbottom) DIV 65536;
  362. cg := (g0 * yftop + g1 * yfbottom) DIV 65536;
  363. cr := (r0 * yftop + r1 * yfbottom) DIV 65536;
  364. INC(fx, sdx);
  365. SYSTEM.PUT16(adr, ASH(cb, -3) + ASH(ASH(cg, -2), 5) + ASH(ASH(cr, -3), 11));
  366. INC(adr, 2);
  367. END;
  368. INC(fy, sdy);
  369. INC(yadr, dstbpr)
  370. END
  371. END Q1BGR565BGR565;
  372. PROCEDURE SSE2Q1BGR565BGR565(srcadr, dstadr: ADDRESS; srcbpr, dstbpr, dl, dt, dr, db, sx, sy, sdx, sdy, sw, sh : LONGINT);
  373. VAR x, y, xfleft, xfright, yftop, yfbottom : LONGINT; yadr, adr, col, col0, col1, col2, col3 : LONGINT;
  374. b0, g0, r0, b1, g1, r1, cb, cg, cr : LONGINT;
  375. fx, fy, yadd1, yadd2, xadd1, xadd2 : LONGINT;
  376. END SSE2Q1BGR565BGR565;
  377. PROCEDURE Q1BGRA8888BGR565(srcadr, dstadr: ADDRESS; srcbpr, dstbpr, dl, dt, dr, db, sx, sy, sdx, sdy, sw, sh : LONGINT);
  378. VAR x, y, xfleft, xfright, yftop, yfbottom : LONGINT; yadr: ADDRESS; col, col0, col1, col2, col3 : LONGINT;
  379. b0, g0, r0, a0, b1, g1, r1, a1, cb, cg, cr, ca, dstb, dstg, dstr : LONGINT;
  380. fx, fy, xadd0, xadd1: LONGINT; yadd0, yadd1: ADDRESS;
  381. BEGIN
  382. yadr := dstadr + dl * 2 + dt * dstbpr;
  383. fy := sy - 8000H; sx := sx - 8000H;
  384. FOR y := dt TO db - 1 DO
  385. fx := sx;
  386. dstadr := yadr;
  387. yadd0 := srcadr + Bounds(fy DIV 65536, 0, sh - 1) * srcbpr;
  388. yadd1 := srcadr + Bounds(fy DIV 65536 + 1, 0, sh - 1) * srcbpr;
  389. FOR x := dl TO dr - 1 DO
  390. (* destination color *)
  391. col := SYSTEM.GET16(dstadr);
  392. dstb := (col MOD 32) * 8; dstg := (col DIV 32 MOD 64) * 4; dstr := (col DIV 2048 MOD 32) * 8;
  393. xadd0 := Bounds(fx DIV 65536, 0, sw - 1) * 4;
  394. xadd1 := Bounds(fx DIV 65536 + 1, 0, sw - 1) * 4;
  395. col0 := SYSTEM.GET32(yadd0 + xadd0);
  396. col1 := SYSTEM.GET32(yadd0 + xadd1);
  397. col2 := SYSTEM.GET32(yadd1 + xadd0);
  398. col3 := SYSTEM.GET32(yadd1 + xadd1);
  399. xfleft := (65536 - fx MOD 65536);
  400. xfright := (fx MOD 65536);
  401. yftop := (65536 - fy MOD 65536);
  402. yfbottom := (fy MOD 65536);
  403. a0 := ((col0 DIV 1000000H MOD 100H) * xfleft + (col1 DIV 1000000H MOD 100H) * xfright) DIV 65536;
  404. a1 := ((col2 DIV 1000000H MOD 100H) * xfleft + (col3 DIV 1000000H MOD 100H) * xfright) DIV 65536;
  405. ca := (a0 * yftop + a1 * yfbottom) DIV 65536;
  406. IF ca # 0 THEN
  407. b0 := ((col0 MOD 100H) * xfleft + (col1 MOD 100H) * xfright) DIV 65536;
  408. g0 := ((col0 DIV 100H MOD 100H) * xfleft + (col1 DIV 100H MOD 100H) * xfright) DIV 65536;
  409. r0 := ((col0 DIV 10000H MOD 100H) * xfleft + (col1 DIV 10000H MOD 100H) * xfright) DIV 65536;
  410. b1 := ((col2 MOD 100H) * xfleft + (col3 MOD 100H) * xfright) DIV 65536;
  411. g1 := ((col2 DIV 100H MOD 100H) * xfleft + (col3 DIV 100H MOD 100H) * xfright) DIV 65536;
  412. r1 := ((col2 DIV 10000H MOD 100H) * xfleft + (col3 DIV 10000H MOD 100H) * xfright) DIV 65536;
  413. cb := (b0 * yftop + b1 * yfbottom) DIV 65536;
  414. cg := (g0 * yftop + g1 * yfbottom) DIV 65536;
  415. cr := (r0 * yftop + r1 * yfbottom) DIV 65536;
  416. IF ca # 255 THEN
  417. cb := (cb * 256 + (256 - ca) * dstb) DIV 256; IF cb > 256 THEN cb := 256 END;
  418. cg := (cg * 256 + (256 - ca) * dstg) DIV 256; IF cg > 256 THEN cg := 256 END;
  419. cr := (cr * 256 + (256 - ca) * dstr) DIV 256; IF cr > 256 THEN cr := 256 END
  420. END;
  421. SYSTEM.PUT16(dstadr, ASH(cb, -3) + ASH(ASH(cg, -2), 5) + ASH(ASH(cr, -3), 11))
  422. END;
  423. INC(fx, sdx);
  424. INC(dstadr, 2);
  425. END;
  426. INC(fy, sdy);
  427. INC(yadr, dstbpr)
  428. END
  429. END Q1BGRA8888BGR565;
  430. (*
  431. PROCEDURE SSE2Q1BGRA8888BGR565(srcadr, dstadr: ADDRESS; srcbpr, dstbpr, dl, dt, dr, db, sx, sy, sdx, sdy, sw, sh:LONGINT);
  432. VAR x, y, z,xfleft, xfright, yftop, yfbottom : LONGINT; yadr, adr, col, col0, col1, col2, col3 : LONGINT;
  433. b0, g0, r0, a0, a01,b1, g1, r1, a1, cb, cg, cr,cb2, cg2, cr2, ca, ca2,dstb, dstg, dstr,res : LONGINT;
  434. fx, fy, yadd1, yadd2, xadd1, xadd2: LONGINT;
  435. CODE {SYSTEM.i386, SYSTEM.MMX, SYSTEM.SSE, SYSTEM.SSE2}
  436. PUSHFD
  437. PUSH EBX
  438. ; CLI
  439. PXOR MMX3,MMX3
  440. PXOR MMX4,MMX4
  441. PXOR MMX5, MMX5
  442. PXOR MMX6, MMX6
  443. PXOR XMM1, XMM1
  444. PXOR XMM3, XMM3
  445. PXOR XMM4, XMM4
  446. PXOR XMM6, XMM6
  447. PXOR XMM7, XMM7
  448. MOV EDX, [EBP+dstadr]
  449. MOV EBX, [EBP+dl]
  450. SHL EBX, 1
  451. ADD EDX, EBX
  452. MOV EBX, [EBP+dt]
  453. IMUL EBX, [EBP+dstbpr]
  454. ADD EDX, EBX
  455. MOV [EBP+yadr], EDX
  456. MOV EDX, [EBP+sy]
  457. SUB EDX, 8000H ;edx = sy-8000H
  458. MOV [EBP+fy], EDX
  459. ; sx := sx - 8000H;
  460. MOV EDX, [EBP+sx]
  461. SUB EDX, 8000H ;sx = sx-8000H
  462. MOV [EBP+sx] , EDX
  463. MOV ECX, [EBP+db]
  464. SUB ECX, [EBP+dt] ; counter in y
  465. JLE endyloop ;exit
  466. MOV [EBP+y], ECX
  467. outerloop:
  468. MOV EDX, [EBP+yadr]
  469. MOV EDI, EDX ; adr in EDI
  470. MOV [EBP+adr], EDX
  471. MOV EDX, [EBP+sx] ; keep EDX
  472. MOV [EBP+fx], EDX
  473. MOV EAX, [EBP+fy]
  474. MOVD XMM3, EAX ; prepare for top, bottom
  475. SAR EAX, 16
  476. CMP EAX, 0
  477. JE zero
  478. JL negativ
  479. MOV EBX, [EBP+sh]
  480. SUB EBX, 1
  481. CMP EAX, EBX
  482. JGE bigger
  483. ok:
  484. MOV EBX, EAX
  485. ADD EBX, 1
  486. JMP different
  487. zero:
  488. MOV EAX, 0
  489. MOV EBX, 1
  490. JMP different
  491. negativ:
  492. MOV EAX, 0
  493. MOV EBX, 0
  494. JMP samepixel
  495. bigger:
  496. MOV EAX, EBX
  497. JMP samepixel
  498. different:
  499. MOV ECX, [EBP+srcbpr]
  500. MUL EAX, ECX
  501. MOV EBX, EAX
  502. ADD EBX, ECX
  503. MOV ECX, [EBP+srcadr]
  504. ADD EAX, ECX
  505. ADD EBX, ECX
  506. JMP endyadd
  507. samepixel:
  508. MOV ECX, [EBP+srcbpr]
  509. MUL EAX, ECX
  510. MOV ECX, [EBP+srcadr]
  511. ADD EAX, ECX
  512. MOV EBX, EAX
  513. endyadd:
  514. MOV [EBP+yadd1], EAX
  515. MOV [EBP+yadd2], EBX
  516. ; yfbottom := (fy MOD 65536);
  517. ; yftop := (65536 - fy MOD 65536);
  518. MOVD ECX, XMM3
  519. AND ECX, 0FFFFH
  520. MOV [EBP+yfbottom],ECX
  521. PINSRW XMM3, ECX, 1
  522. NEG ECX
  523. ADD ECX, 65535
  524. MOV [EBP+yftop],ECX
  525. PINSRW XMM3, ECX, 0
  526. PSRLW XMM3, 1
  527. MOV ECX, [EBP+dr]
  528. SUB ECX, [EBP+dl] ; counter in x
  529. JLE endyloop ;exit
  530. MOV [EBP+x], ECX
  531. innerloop:
  532. MOV ECX, [EBP+x]
  533. ; if x < 8 then do one pixel at the time
  534. CMP ECX, 8
  535. JL singlepixel
  536. ; else
  537. ; take 8 at the time
  538. MOV EBX, EDI
  539. AND EBX, 0FH
  540. CMP EBX, 0
  541. JNE singlepixel
  542. alleightpixels:
  543. MOV EAX, 0000000FFH
  544. MOVD MMX3, EAX
  545. ; dest red -> MMX4
  546. MOV EAX, 0F800F800H
  547. MOVD MMX4, EAX
  548. ; dest green -> MMX5
  549. MOV EAX, 07E007E0H
  550. MOVD MMX5, EAX
  551. ; dest blue -> MMX6 ; moved as MMX6 is used in singlepixel
  552. ; MOV EAX, 001F001FH
  553. ; MOVD MMX6, EAX
  554. MOV ECX, [EBP+yfbottom]
  555. PINSRW XMM3, ECX, 1
  556. MOV ECX, [EBP+yftop]
  557. PINSRW XMM3, ECX, 0
  558. PSRLW XMM3,1
  559. PXOR XMM5, XMM5
  560. PXOR XMM2,XMM2
  561. MOV DWORD [EBP+z], 4
  562. loop03:
  563. ; shift everything left
  564. MOV ECX, [EBP+fx]
  565. PSLLDQ XMM5, 4
  566. PINSRW XMM7, ECX,0 ; prepare for l,r
  567. SAR ECX, 16
  568. CMP ECX, 0
  569. JE zerox03
  570. JL negativx03
  571. MOV EDX, [EBP+sw]
  572. SUB EDX, 1
  573. CMP ECX, EDX
  574. JGE biggerx03
  575. okx03:
  576. MOV EDX, ECX
  577. ADD EDX, 1
  578. JMP endbound203
  579. zerox03:
  580. MOV ECX, 0
  581. MOV EDX, 1
  582. JMP endbound203
  583. negativx03:
  584. MOV ECX, 0
  585. MOV EDX, 0
  586. JMP endbound203
  587. biggerx03:
  588. MOV ECX, EDX
  589. endbound203:
  590. SHL ECX, 2 ; xadd1
  591. SHL EDX, 2 ; xadd2
  592. MOV EAX, [EBP+yadd1]
  593. MOV EBX, [EBP+yadd2]
  594. MOVD XMM2, [EBX+EDX]
  595. PSLLDQ XMM2,4
  596. MOVD XMM1, [EBX+ECX]
  597. POR XMM2,XMM1
  598. PSLLDQ XMM2,4
  599. MOVD XMM1, [EAX+EDX]
  600. POR XMM2,XMM1
  601. PSLLDQ XMM2,4
  602. MOVD XMM1, [EAX+ECX]
  603. POR XMM2,XMM1
  604. PEXTRW EAX,XMM7,0
  605. AND EAX, 0FFFFH
  606. PINSRW XMM7, EAX,1
  607. PINSRW XMM7, EAX, 3 ;xfright
  608. NEG AX
  609. ADD EAX, 65535
  610. PINSRW XMM7, EAX, 0
  611. PINSRW XMM7, EAX, 2 ;xfleft
  612. PSRLW XMM7, 1
  613. MOVDQU XMM0, XMM2
  614. PSRLD XMM0, 24
  615. PXOR XMM1, XMM1
  616. MOV ECX, 0FFH ; ECX locked for ca
  617. PINSRW XMM1, ECX,0
  618. PINSRW XMM1, ECX,2
  619. PINSRW XMM1, ECX,4
  620. PINSRW XMM1, ECX,6
  621. PCMPEQW XMM1, XMM0
  622. PMOVMSKB EAX, XMM1
  623. CMP EAX, 0FFFFH
  624. JE endofalpha03
  625. PSHUFLW XMM0, XMM0,58H
  626. PSHUFHW XMM0, XMM0,58H
  627. PSHUFD XMM0,XMM0,58H
  628. PMADDWD XMM0,XMM7
  629. PSRLD XMM0, 15 ; XMM7 already shifted by 1
  630. PSHUFLW XMM0, XMM0, 58H
  631. PMADDWD XMM0, XMM3
  632. PSRLD XMM0,15 ; XMM3 already shifted by 1
  633. PEXTRW ECX, XMM0, 0
  634. endofalpha03:
  635. ; alpha done
  636. CMP ECX,0
  637. JE alphazero03
  638. SHL ECX, 24
  639. ; calculate red
  640. MOVDQU XMM0, XMM2
  641. PSLLD XMM0, 8
  642. PSRLD XMM0, 24
  643. PSHUFLW XMM0, XMM0,58H
  644. PSHUFHW XMM0, XMM0,58H
  645. PSHUFD XMM0,XMM0,58H
  646. PMADDWD XMM0,XMM7
  647. PSRLD XMM0, 15 ; XMM7 already shifted by 1
  648. PSHUFLW XMM0, XMM0, 58H
  649. PMADDWD XMM0, XMM3
  650. PSRLD XMM0,15 ; XMM3 already shifted by 1
  651. PEXTRW EBX, XMM0,0
  652. SHL EBX,16
  653. OR ECX,EBX
  654. ; red done
  655. ; calculate green
  656. MOVDQU XMM0, XMM2
  657. PSLLD XMM0, 16
  658. PSRLD XMM0, 24
  659. PSHUFLW XMM0, XMM0,58H
  660. PSHUFHW XMM0, XMM0,58H
  661. PSHUFD XMM0,XMM0,58H
  662. PMADDWD XMM0,XMM7
  663. PSRLD XMM0, 15 ; XMM7 already shifted by 1
  664. PSHUFLW XMM0, XMM0, 58H
  665. PMADDWD XMM0, XMM3
  666. PSRLD XMM0,15 ; XMM3 already shifted by 1
  667. PEXTRW EBX, XMM0,0
  668. SHL EBX,8
  669. OR ECX,EBX
  670. ; green done
  671. ; calculate blue
  672. MOVDQU XMM0, XMM2
  673. PSLLD XMM0,24
  674. PSRLD XMM0, 24
  675. PSHUFLW XMM0, XMM0,58H
  676. PSHUFHW XMM0, XMM0,58H
  677. PSHUFD XMM0, XMM0,58H
  678. PMADDWD XMM0,XMM7
  679. PSRLD XMM0, 15 ; XMM7 already shifted by 1
  680. PSHUFLW XMM0, XMM0, 58H
  681. PMADDWD XMM0, XMM3
  682. PSRLD XMM0,15 ; XMM3 already shifted by 1
  683. PEXTRW EBX, XMM0,0
  684. OR ECX,EBX
  685. ; blue done
  686. ; put color in correct position
  687. MOVD XMM4,ECX
  688. POR XMM5, XMM4 ; results in XMM5
  689. ; prepared source
  690. alphazero03: ; set mask is done later
  691. MOV ECX,[EBP+fx]
  692. ADD ECX, [EBP+sdx]
  693. MOV [EBP+fx],ECX
  694. SUB DWORD [EBP+z], 1
  695. JNZ loop03
  696. endofloop03:
  697. MOV DWORD [EBP+z], 4
  698. loop47:
  699. ; shift everything left
  700. PSLLDQ XMM6, 4
  701. PINSRW XMM7, ECX,0 ; prepare for l,r
  702. SAR ECX, 16
  703. CMP ECX, 0
  704. JE zerox47
  705. JL negativx47
  706. MOV EDX, [EBP+sw]
  707. SUB EDX, 1
  708. CMP ECX, EDX
  709. JGE biggerx47
  710. okx47:
  711. MOV EDX, ECX
  712. ADD EDX, 1
  713. JMP endbound247
  714. zerox47:
  715. MOV ECX, 0
  716. MOV EDX, 1
  717. JMP endbound247
  718. negativx47:
  719. MOV ECX, 0
  720. MOV EDX, 0
  721. JMP endbound247
  722. biggerx47:
  723. MOV ECX, EDX
  724. endbound247:
  725. SHL ECX, 2 ; xadd1
  726. SHL EDX, 2 ; xadd2
  727. MOV EAX, [EBP+yadd1]
  728. MOV EBX, [EBP+yadd2]
  729. MOVD XMM2, [EBX+EDX]
  730. PSLLDQ XMM2,4
  731. MOVD XMM1, [EBX+ECX]
  732. POR XMM2,XMM1
  733. PSLLDQ XMM2,4
  734. MOVD XMM1, [EAX+EDX]
  735. POR XMM2,XMM1
  736. PSLLDQ XMM2,4
  737. MOVD XMM1, [EAX+ECX]
  738. POR XMM2,XMM1
  739. PEXTRW EAX,XMM7,0
  740. AND EAX, 0FFFFH
  741. PINSRW XMM7, EAX,1
  742. PINSRW XMM7, EAX, 3 ;xfright
  743. NEG EAX
  744. ADD EAX, 65535
  745. PINSRW XMM7, EAX, 0
  746. PINSRW XMM7, EAX, 2 ;xfleft
  747. PSRLW XMM7, 1
  748. MOVDQU XMM0, XMM2
  749. PSRLD XMM0, 24
  750. PXOR XMM1, XMM1
  751. MOV ECX, 0FFH ; ECX locked for ca
  752. PINSRW XMM1, ECX,0
  753. PINSRW XMM1, ECX,2
  754. PINSRW XMM1, ECX,4
  755. PINSRW XMM1, ECX,6
  756. PCMPEQW XMM1, XMM0
  757. PMOVMSKB EAX, XMM1
  758. CMP EAX, 0FFFFH
  759. JE endofalpha47
  760. PSHUFLW XMM0, XMM0,58H
  761. PSHUFHW XMM0, XMM0,58H
  762. PSHUFD XMM0,XMM0,58H
  763. PMADDWD XMM0,XMM7
  764. PSRLD XMM0, 15 ; XMM7 already shifted by 1
  765. PSHUFLW XMM0, XMM0, 58H
  766. PMADDWD XMM0, XMM3
  767. PSRLD XMM0,15 ; XMM3 already shifted by 1
  768. PEXTRW ECX, XMM0, 0
  769. endofalpha47:
  770. ; alpha done
  771. CMP ECX,0
  772. JE alphazero47
  773. SHL ECX, 24
  774. ; calculate red
  775. MOVDQU XMM0, XMM2
  776. PSLLD XMM0, 8
  777. PSRLD XMM0, 24
  778. PSHUFLW XMM0, XMM0,58H
  779. PSHUFHW XMM0, XMM0,58H
  780. PSHUFD XMM0,XMM0,58H
  781. PMADDWD XMM0,XMM7
  782. PSRLD XMM0, 15 ; XMM7 already shifted by 1
  783. PSHUFLW XMM0, XMM0, 58H
  784. PMADDWD XMM0, XMM3
  785. PSRLD XMM0,15 ; XMM3 already shifted by 1
  786. PEXTRW EBX, XMM0,0
  787. SHL EBX,16
  788. OR ECX,EBX
  789. ; red done
  790. ; calculate green
  791. MOVDQU XMM0, XMM2
  792. PSLLD XMM0, 16
  793. PSRLD XMM0, 24
  794. PSHUFLW XMM0, XMM0,58H
  795. PSHUFHW XMM0, XMM0,58H
  796. PSHUFD XMM0,XMM0,58H
  797. PMADDWD XMM0,XMM7
  798. PSRLD XMM0, 15 ; XMM7 already shifted by 1
  799. PSHUFLW XMM0, XMM0, 58H
  800. PMADDWD XMM0, XMM3
  801. PSRLD XMM0,15 ; XMM3 already shifted by 1
  802. PEXTRW EBX, XMM0,0
  803. SHL EBX,8
  804. OR ECX,EBX
  805. ; green done
  806. ; calculate blue
  807. MOVDQU XMM0, XMM2
  808. PSLLD XMM0,24
  809. PSRLD XMM0, 24
  810. PSHUFLW XMM0, XMM0,58H
  811. PSHUFHW XMM0, XMM0,58H
  812. PSHUFD XMM0,XMM0,58H
  813. PMADDWD XMM0,XMM7
  814. PSRLD XMM0, 15 ; XMM7 already shifted by 1
  815. PSHUFLW XMM0, XMM0, 58H
  816. PMADDWD XMM0, XMM3
  817. PSRLD XMM0,15 ; XMM3 already shifted by 1
  818. PEXTRW EBX, XMM0,0
  819. OR ECX,EBX
  820. ; blue done
  821. ; put color in correct position
  822. MOVD XMM4,ECX
  823. POR XMM6, XMM4 ; results in XMM6
  824. ; prepared source
  825. alphazero47: ; set mask is done later
  826. MOV ECX,[EBP+fx]
  827. ADD ECX, [EBP+sdx]
  828. MOV [EBP+fx],ECX
  829. SUB DWORD [EBP+z], 1
  830. JNZ loop47
  831. endofloop47:
  832. ; all sources calculated, but in reversed order
  833. PSHUFD XMM2,XMM5, 1AH
  834. PSHUFD XMM1,XMM6, 1AH
  835. ; now sources ready for further calculation with destination
  836. ; get alphas
  837. MOVQ2DQ XMM4, MMX3
  838. MOVDQU XMM6, XMM2
  839. PSHUFD XMM4, XMM4, 0
  840. MOVDQU XMM5, XMM1
  841. PSLLD XMM4, 24
  842. PAND XMM6, XMM4 ; alpha 5-8 in XMM6
  843. PAND XMM5, XMM4 ; alpha 1-4 in XMM5
  844. PSRLD XMM5, 24
  845. PSHUFHW XMM5, XMM5, 85H
  846. PSRLD XMM6, 24
  847. ; put both alphas into 1 register
  848. PSHUFHW XMM6, XMM6, 85H
  849. PSHUFLW XMM5, XMM5, 85H
  850. PSHUFLW XMM6, XMM6, 58H
  851. PSHUFD XMM5, XMM5, 0D0H ; 0102030400000000
  852. PSHUFD XMM6, XMM6, 5CH ; 0000000005060708
  853. PXOR XMM0,XMM0
  854. POR XMM5, XMM6 ; XMM5 = alphas 0102030405060708
  855. PCMPEQD XMM0, XMM5
  856. PMOVMSKB EAX, XMM0
  857. CMP EAX, 0FFFFH ; all alphas = zero; TEST not possible, because only 8 bits compared
  858. JE endloop
  859. ; mask out alpha = zero
  860. ; fd := 255-ORD(src[a]); fd = XMM4
  861. ; MOV XMM4, 00FF00FF00FF00FF00FF00FF00FF00FFH
  862. PXOR XMM4, XMM4
  863. MOV EAX, 00FFH
  864. PINSRW XMM4, EAX ,0
  865. PSHUFLW XMM4, XMM4, 0
  866. PSHUFD XMM4, XMM4, 0
  867. PSUBW XMM4, XMM5
  868. MOV EAX,1H
  869. PINSRW XMM3, EAX ,0
  870. PSHUFLW XMM3, XMM3, 0
  871. PSHUFD XMM3, XMM3, 0
  872. PADDUSW XMM4, XMM3
  873. ; new red
  874. ; calculate red 2
  875. ; get source
  876. ; sred14 = src14 && (srcMask <<16)
  877. ; srcMask << 16
  878. MOVQ2DQ XMM3, MMX3
  879. PSHUFD XMM3, XMM3, 0
  880. MOVDQU XMM5, XMM1
  881. MOVDQU XMM6, XMM2
  882. PSLLD XMM3, 16
  883. ; sred14 = src14 && (srcMask << 24)
  884. ; src14 must be copied because it mustn't be changed
  885. PAND XMM5, XMM3 ; sred14
  886. PSRLD XMM5, 16
  887. ; sred14s = shuffled sred14
  888. PSHUFHW XMM5, XMM5,85H
  889. PAND XMM6, XMM3 ; sred58
  890. PSRLD XMM6, 16
  891. PSHUFLW XMM5, XMM5,85H
  892. PSHUFHW XMM6, XMM6,85H
  893. PSHUFD XMM5, XMM5,0D0H ; sred14s
  894. PSHUFLW XMM6, XMM6,58H
  895. PSHUFD XMM6, XMM6,5CH ; sred58s
  896. POR XMM5, XMM6 ; sred18
  897. ; sred18255 = sred18 * 256- sred18
  898. MOVDQU XMM7, XMM5
  899. PSLLW XMM5, 8
  900. PSUBUSW XMM5, XMM7 ; sred18255
  901. ; src is now ready
  902. ; destination
  903. ; dest18 must be copied because it mustn't be changed
  904. ; Load data into memory
  905. MOV EDI, [EBP+adr]
  906. MOVDQU XMM3, [EDI] ;dest 1-8
  907. MOVQ2DQ XMM6, MMX4
  908. PSHUFD XMM6, XMM6, 0
  909. MOVDQU XMM7, XMM3
  910. PAND XMM7, XMM6 ; dred18
  911. PSRLW XMM7, 8
  912. ; dred18alpha = dred18 * negalpha
  913. PMULLW XMM7, XMM4 ; dred18alpha
  914. ; dest is prepared
  915. ; combining dest and src
  916. ; dred18big = sred18255 + dred18alpha
  917. PADDUSW XMM7, XMM5 ; dred18big
  918. ; dred18f = dred18big && destMaskred128 because >> 11 and << 11 is && mask
  919. PAND XMM7, XMM6 ; dred18f
  920. ; dest18nr0 = dest18 && (~destMaskred128)
  921. PANDN XMM6, XMM3 ; dest18nr0
  922. ; dest18nrf = dest18nr0 || dred18f
  923. POR XMM6, XMM7
  924. MOVDQU XMM3, XMM6
  925. ; red is calculated
  926. ; calculate green:
  927. ; get source
  928. ; sgreen14 = src14 && (srcMask <<8)
  929. ; srcMask << 8
  930. MOVQ2DQ XMM7, MMX3
  931. PSHUFD XMM7, XMM7, 0
  932. MOVDQU XMM5, XMM1
  933. PSLLD XMM7, 8
  934. PAND XMM5, XMM7 ; sgreen14
  935. PSRLD XMM5, 8
  936. ; sgreen14s = shuffled sgreen14
  937. PSHUFHW XMM5, XMM5,85H
  938. MOVDQU XMM6, XMM2
  939. PSHUFLW XMM5, XMM5,85H
  940. PAND XMM6, XMM7 ; sgreen58
  941. PSRLD XMM6, 8
  942. PSHUFD XMM5, XMM5,0D0H ; sgreen14s
  943. ; sgreen58 = src58&& (srcMask << 8)
  944. ; src58 must be copied because it mustn't be changed
  945. ; sgreen58s = shuffled sgreen58
  946. PSHUFHW XMM6, XMM6,85H
  947. PSHUFLW XMM6, XMM6,58H
  948. PSHUFD XMM6, XMM6,5CH ; sgreen58s
  949. ; sgreen18 = sgreen14s || sgreen58s
  950. POR XMM5, XMM6 ; sgreen18
  951. ; sgreen18255 = sgreen18 * 256- sgreen18
  952. MOVDQU XMM7, XMM5
  953. MOVQ2DQ XMM6, MMX5
  954. PSLLW XMM5, 8
  955. PSUBUSW XMM5, XMM7 ; sgreen18255
  956. PSHUFD XMM6, XMM6, 0
  957. MOVDQU XMM7, XMM3
  958. PAND XMM7, XMM6 ; dgreen18
  959. PSRLW XMM7,3
  960. ; dgreen18alpha = dgreen18 * negalpha
  961. PMULLW XMM7, XMM4 ; dgreen18alpha
  962. ; dest is prepared
  963. ; combining dest and src
  964. ; dgreen18big = sgreen18255 + dgreen18alpha
  965. PADDUSW XMM7, XMM5 ; dgreen18big
  966. PANDN XMM6, XMM3 ; dest18ng0
  967. ; dgreen18f = (dgreen18big >> 11) <<5
  968. PSRLW XMM7, 10 ; dgreen18f
  969. PSLLW XMM7, 5
  970. ; dest18ng0 = dest18 && (~destMaskgreen128)
  971. ; dest18ngf = dest18ng0 || dred18f
  972. POR XMM6, XMM7
  973. MOVDQU XMM3, XMM6
  974. ; green is calculated
  975. ; calculate blue
  976. MOV EAX, 001F001FH
  977. MOVD MMX6, EAX
  978. ; get source
  979. ; sblue14 = src14 && (srcMask)
  980. ; srcMask
  981. MOVQ2DQ XMM7, MMX3
  982. MOVDQU XMM5, XMM1
  983. PSHUFD XMM7, XMM7, 0
  984. MOVDQU XMM6, XMM2
  985. ; sblue14 = src14 && (srcMask)
  986. ; src14 must be copied because it mustn't be changed
  987. PAND XMM5, XMM7 ; sblue14
  988. ; sblue14s = shuffled sblue14
  989. PSHUFHW XMM5, XMM5,85H
  990. PAND XMM6, XMM7 ; sblue58
  991. PSHUFHW XMM6, XMM6,85H
  992. PSHUFLW XMM5, XMM5,85H
  993. PSHUFLW XMM6, XMM6,58H
  994. PSHUFD XMM5, XMM5,0D0H ; sblue14s
  995. PSHUFD XMM6, XMM6,5CH ; sblue58s
  996. POR XMM5, XMM6 ; sblue18
  997. ; sblue18255 = sblue18 * 256- sblue18
  998. MOVDQU XMM7, XMM5
  999. PSLLW XMM5, 8
  1000. PSUBUSW XMM5, XMM7 ; sblue18255
  1001. MOVQ2DQ XMM6, MMX6
  1002. PSHUFD XMM6, XMM6, 0
  1003. MOVDQU XMM7, XMM3
  1004. PAND XMM7, XMM6 ; dblue18
  1005. PSLLW XMM7, 3
  1006. PMULLW XMM7, XMM4 ; dblue18alpha
  1007. ; dest is prepared
  1008. ; combining dest and src
  1009. ; dblue18big = sblue18255 + dblue18alpha
  1010. PADDUSW XMM7, XMM5 ; dblue18big
  1011. ; dblue18f = (dblue18big >> 11)
  1012. PANDN XMM6, XMM3 ; dest18nr0
  1013. PSRLW XMM7, 11 ; dblue18f
  1014. ; dest18nr0 = dest18 && (~destMaskblue128)
  1015. ; dest18nbf = dest18nb0 || dblue18f
  1016. POR XMM6, XMM7
  1017. MOVDQU XMM3, XMM6
  1018. ; blue is calculated
  1019. ; now dest is calculated, store it
  1020. ; get 0 stuff
  1021. MOVDQU XMM5, [EDI]
  1022. PAND XMM5,XMM0
  1023. PANDN XMM0, XMM3
  1024. POR XMM0, XMM5
  1025. MOVDQU [EDI],XMM0
  1026. endloop:
  1027. ;fx already inc ; by sdx
  1028. ADD EDI, 16
  1029. MOV [EBP+adr],EDI
  1030. SUB DWORD [EBP+x], 8
  1031. JNZ innerloop ; x>=0
  1032. JZ endxloop
  1033. singlepixel: ; original code from MMXBGRA8888Over565, adjusted to fit this procedure
  1034. MOV EDI, [EBP+adr]
  1035. MOV EAX, 0000000FFH
  1036. MOVD MMX3, EAX
  1037. ; dest red -> MMX4
  1038. MOV EAX, 0F800F800H
  1039. MOVD MMX4, EAX
  1040. ; dest green -> MMX5
  1041. MOV EAX, 07E007E0H
  1042. MOVD MMX5, EAX
  1043. ; dest blue -> MMX6 ; moved as MMX6 is used in singlepixel
  1044. ; MOV EAX, 001F001FH
  1045. ; MOVD MMX6, EAX
  1046. MOV ECX, [EBP+yfbottom]
  1047. PINSRW XMM3, ECX, 1
  1048. MOV ECX, [EBP+yftop]
  1049. PINSRW XMM3, ECX, 0
  1050. PSRLW XMM3,1
  1051. MOV ECX, [EBP+fx]
  1052. PINSRW XMM7, ECX,0 ; prepare for l,r
  1053. SAR ECX, 16
  1054. CMP ECX, 0
  1055. JE zerox
  1056. JL negativx
  1057. MOV EDX, [EBP+sw]
  1058. SUB EDX, 1
  1059. CMP ECX, EDX
  1060. JGE biggerx
  1061. okx:
  1062. MOV EDX, ECX
  1063. ADD EDX, 1
  1064. JMP endbound2
  1065. zerox:
  1066. MOV ECX, 0
  1067. MOV EDX, 1
  1068. JMP endbound2
  1069. negativx:
  1070. MOV ECX, 0
  1071. MOV EDX, 0
  1072. JMP endbound2
  1073. biggerx:
  1074. MOV ECX, EDX
  1075. endbound2:
  1076. SHL ECX, 2 ; xadd1
  1077. SHL EDX, 2 ; xadd2
  1078. MOV EAX, [EBP+yadd1]
  1079. MOV EBX, [EBP+yadd2]
  1080. MOVD XMM2, [EBX+EDX]
  1081. PSLLDQ XMM2,4
  1082. MOVD XMM1, [EBX+ECX]
  1083. POR XMM2,XMM1
  1084. PSLLDQ XMM2,4
  1085. MOVD XMM1, [EAX+EDX]
  1086. POR XMM2,XMM1
  1087. PSLLDQ XMM2,4
  1088. MOVD XMM1, [EAX+ECX]
  1089. POR XMM2,XMM1
  1090. PEXTRW EAX,XMM7,0
  1091. AND EAX, 0FFFFH
  1092. PINSRW XMM7, EAX,1
  1093. PINSRW XMM7, EAX, 3 ;xfright
  1094. NEG EAX
  1095. ADD EAX, 65535
  1096. PINSRW XMM7, EAX, 0
  1097. PINSRW XMM7, EAX, 2 ;xfleft
  1098. PSRLW XMM7, 1
  1099. MOVDQU XMM0, XMM2
  1100. PSRLD XMM0, 24
  1101. PXOR XMM1, XMM1
  1102. MOV ECX, 0FFH ; ECX locked for ca
  1103. PINSRW XMM1, ECX,0
  1104. PINSRW XMM1, ECX,2
  1105. PINSRW XMM1, ECX,4
  1106. PINSRW XMM1, ECX,6
  1107. PCMPEQW XMM1, XMM0
  1108. PMOVMSKB EAX, XMM1
  1109. CMP EAX, 0FFFFH
  1110. JE endofalpha
  1111. PSHUFLW XMM0, XMM0,58H
  1112. PSHUFHW XMM0, XMM0,58H
  1113. PSHUFD XMM0,XMM0,58H
  1114. PMADDWD XMM0,XMM7
  1115. PSRLD XMM0, 15 ; XMM7 already shifted by 1
  1116. PSHUFLW XMM0, XMM0, 58H
  1117. PMADDWD XMM0, XMM3
  1118. PSRLD XMM0,15 ; XMM3 already shifted by 1
  1119. PEXTRW ECX, XMM0, 0
  1120. endofalpha:
  1121. ; alpha done
  1122. CMP ECX,0
  1123. JE alphazero
  1124. ; calculate red
  1125. MOVDQU XMM0, XMM2
  1126. PSLLD XMM0, 8
  1127. PSRLD XMM0, 24
  1128. PSHUFLW XMM0, XMM0,58H
  1129. PSHUFHW XMM0, XMM0,58H
  1130. PSHUFD XMM0,XMM0,58H
  1131. PMADDWD XMM0,XMM7
  1132. PSRLD XMM0, 15 ; XMM7 already shifted by 1
  1133. PSHUFLW XMM0, XMM0, 58H
  1134. PMADDWD XMM0, XMM3
  1135. PSRLD XMM0,15 ; XMM3 already shifted by 1
  1136. PEXTRW EBX, XMM0,0
  1137. PINSRW XMM4, EBX, 4
  1138. ; red done
  1139. ; calculate green
  1140. MOVDQU XMM0, XMM2
  1141. PSLLD XMM0, 16
  1142. PSRLD XMM0, 24
  1143. PSHUFLW XMM0, XMM0,58H
  1144. PSHUFHW XMM0, XMM0,58H
  1145. PSHUFD XMM0,XMM0,58H
  1146. PMADDWD XMM0,XMM7
  1147. PSRLD XMM0, 15 ; XMM7 already shifted by 1
  1148. PSHUFLW XMM0, XMM0, 58H
  1149. PMADDWD XMM0, XMM3
  1150. PSRLD XMM0,15 ; XMM3 already shifted by 1
  1151. PEXTRW EBX, XMM0,0
  1152. PINSRW XMM4, EBX, 2
  1153. ; green done
  1154. ; calculate blue
  1155. MOVDQU XMM0, XMM2
  1156. PSLLD XMM0,24
  1157. PSRLD XMM0, 24
  1158. PSHUFLW XMM0, XMM0,58H
  1159. PSHUFHW XMM0, XMM0,58H
  1160. PSHUFD XMM0,XMM0,58H
  1161. PMADDWD XMM0,XMM7
  1162. PSRLD XMM0, 15 ; XMM7 already shifted by 1
  1163. PSHUFLW XMM0, XMM0, 58H
  1164. PMADDWD XMM0, XMM3
  1165. PSRLD XMM0,15 ; XMM3 already shifted by 1
  1166. PEXTRW EBX, XMM0,0
  1167. PINSRW XMM4, EBX, 0
  1168. ; blue done
  1169. ; prepared source
  1170. CMP ECX, 0FFH ; ECX released
  1171. JE alpha255
  1172. NEG ECX
  1173. ADD ECX, 0FFH
  1174. PINSRW XMM1, ECX, 1 ; 255-ca
  1175. PINSRW XMM1, ECX, 3 ; 255-ca
  1176. PINSRW XMM1, ECX, 5 ; 255-ca
  1177. MOV EAX, 0FFH
  1178. PINSRW XMM1, EAX, 0 ; 255
  1179. PINSRW XMM1, EAX, 2 ; 255
  1180. PINSRW XMM1, EAX, 4 ; 255
  1181. ;prepare destination
  1182. MOV EBX, [EBP+adr]
  1183. MOV EBX, [EBX]
  1184. MOV EAX, EBX
  1185. AND EAX, 01FH
  1186. SHL EAX,3
  1187. PINSRW XMM4, EAX, 1 ; dstb
  1188. MOV EAX, EBX
  1189. AND EAX, 07E0H
  1190. SHR EAX, 3
  1191. PINSRW XMM4, EAX, 3 ; dstg
  1192. AND EBX, 0F800H
  1193. SHR EBX,8
  1194. PINSRW XMM4, EBX, 5 ; dstr
  1195. PMADDWD XMM4, XMM1
  1196. PSRLD XMM4, 8
  1197. PXOR XMM1,XMM1
  1198. PACKUSWB XMM4,XMM1
  1199. ; put results into their words
  1200. PEXTRW EAX, XMM4, 2 ; end red
  1201. PINSRW XMM4, EAX, 4
  1202. PEXTRW EAX, XMM4, 1 ; end green
  1203. PINSRW XMM4, EAX, 2
  1204. alpha255:
  1205. ; red in XMM4,4; green in XMM4, 2; blue in XMM4,0
  1206. ;SYSTEM.PUT16(adr, ASH(cb, -3) + ASH(ASH(cg, -2), 5) + ASH(ASH(cr, -3), 11))
  1207. PEXTRW EAX, XMM4, 0 ; end blue
  1208. SHR EAX,3
  1209. AND EAX, 001FH
  1210. PEXTRW EBX, XMM4, 2 ; end green
  1211. SHL EBX,3
  1212. AND EBX, 07E0H
  1213. OR EAX, EBX
  1214. PEXTRW EBX, XMM4, 4 ; end red
  1215. SHL EBX,8
  1216. AND EBX, 0F800H
  1217. OR EAX, EBX
  1218. MOV EDI,[EBP+adr]
  1219. MOV [EDI], AX
  1220. alphazero: ; alpha = 0, no writeback
  1221. MOV ECX,[EBP+fx]
  1222. ADD ECX, [EBP+sdx]
  1223. MOV [EBP+fx],ECX
  1224. MOV EDI,[EBP+adr]
  1225. ADD EDI, 2 ; inc adr
  1226. MOV [EBP+adr],EDI
  1227. SUB DWORD [EBP+x], 1
  1228. JNZ innerloop
  1229. endxloop:
  1230. MOV EAX,[EBP+fy] ; fy := fy + sdy
  1231. ADD EAX, [EBP+sdy]
  1232. MOV [EBP+fy], EAX
  1233. MOV EAX,[EBP+yadr]
  1234. ADD EAX, [EBP+dstbpr]
  1235. ;MOV EDI, EAX
  1236. MOV [EBP+yadr], EAX
  1237. SUB DWORD [EBP+y], 1
  1238. JNZ outerloop
  1239. endyloop:
  1240. EMMS ; declare FPU registers free
  1241. POP EBX
  1242. POPFD
  1243. END SSE2Q1BGRA8888BGR565;
  1244. *)
  1245. PROCEDURE Q0BGRA8888BGR565(srcadr, dstadr: ADDRESS; srcbpr, dstbpr, dl, dt, dr, db, sx, sy, sdx, sdy, sw, sh : LONGINT);
  1246. VAR x, y : LONGINT; yadr, adr: ADDRESS; col, col0 : LONGINT;
  1247. cb, cg, cr, ca, dstb, dstg, dstr: LONGINT; yadd: ADDRESS;
  1248. fx, fy : LONGINT;
  1249. BEGIN
  1250. fy := sy;
  1251. yadr := dstadr + dl * 2 + dt * dstbpr;
  1252. FOR y := dt TO db - 1 DO
  1253. fx := sx;
  1254. adr := yadr;
  1255. yadd := srcadr + (fy DIV 65536) * srcbpr;
  1256. FOR x := dl TO dr - 1 DO
  1257. (* destination color *)
  1258. col := SYSTEM.GET16(adr);
  1259. dstb := (col MOD 32) * 8; dstg := (col DIV 32 MOD 64) * 4; dstr := (col DIV 2048 MOD 32) * 8;
  1260. col0 := SYSTEM.GET32(yadd + (fx DIV 65536) * 4);
  1261. ca := (col0 DIV 1000000H MOD 100H);
  1262. IF ca # 0 THEN
  1263. cb := (col0 MOD 100H);
  1264. cg := (col0 DIV 100H MOD 100H);
  1265. cr := (col0 DIV 10000H MOD 100H);
  1266. IF ca # 255 THEN
  1267. cb := (cb * 256 + (256 - ca) * dstb) DIV 256; IF cb > 256 THEN cb := 256 END;
  1268. cg := (cg * 256 + (256 - ca) * dstg) DIV 256; IF cg > 256 THEN cg := 256 END;
  1269. cr := (cr * 256 + (256 - ca) * dstr) DIV 256; IF cr > 256 THEN cr := 256 END
  1270. END;
  1271. SYSTEM.PUT16(adr, ASH(cb, -3) + ASH(ASH(cg, -2), 5) + ASH(ASH(cr, -3), 11))
  1272. END;
  1273. INC(fx, sdx);
  1274. INC(adr, 2)
  1275. END;
  1276. INC(fy, sdy);
  1277. INC(yadr, dstbpr)
  1278. END
  1279. END Q0BGRA8888BGR565;
  1280. PROCEDURE Q0BGRA8888BGRA8888(srcadr, dstadr: ADDRESS; srcbpr, dstbpr, dl, dt, dr, db, sx, sy, sdx, sdy, sw, sh : LONGINT);
  1281. VAR x, y : LONGINT; yadr, adr: ADDRESS; col, col0 : LONGINT;
  1282. cb, cg, cr, ca, dstb, dstg, dstr, dsta : LONGINT; yadd: ADDRESS;
  1283. fx, fy : LONGINT;
  1284. BEGIN
  1285. fy := sy;
  1286. yadr := dstadr + dl * 4 + dt * dstbpr;
  1287. FOR y := dt TO db - 1 DO
  1288. fx := sx;
  1289. adr := yadr;
  1290. yadd := srcadr + (fy DIV 65536) * srcbpr;
  1291. FOR x := dl TO dr - 1 DO
  1292. (* destination color *)
  1293. col := SYSTEM.GET32(adr);
  1294. dstb := (col MOD 100H);
  1295. dstg := (col DIV 100H) MOD 100H;
  1296. dstr := (col DIV 10000H) MOD 100H;
  1297. dsta := (col DIV 1000000H) MOD 100H;
  1298. col0 := SYSTEM.GET32(yadd + (fx DIV 65536) * 4);
  1299. ca := (col0 DIV 1000000H MOD 100H);
  1300. IF ca # 0 THEN
  1301. cb := (col0 MOD 100H);
  1302. cg := (col0 DIV 100H MOD 100H);
  1303. cr := (col0 DIV 10000H MOD 100H);
  1304. IF ca # 255 THEN
  1305. cb := (cb * 256 + (256 - ca) * dstb) DIV 256; IF cb > 256 THEN cb := 256 END;
  1306. cg := (cg * 256 + (256 - ca) * dstg) DIV 256; IF cg > 255 THEN cg := 256 END;
  1307. cr := (cr * 256 + (256 - ca) * dstr) DIV 256; IF cr > 256 THEN cr := 256 END;
  1308. ca := (ca * 256 + (256 - ca) * dsta) DIV 256; IF ca > 256 THEN ca := 256; END;
  1309. END;
  1310. SYSTEM.PUT32(adr, cb + LSH(cg, 8) + LSH(cr, 16) + LSH(ca, 24));
  1311. END;
  1312. INC(fx, sdx);
  1313. INC(adr, 4)
  1314. END;
  1315. INC(fy, sdy);
  1316. INC(yadr, dstbpr)
  1317. END
  1318. END Q0BGRA8888BGRA8888;
  1319. PROCEDURE Q0BGRA8888BGRA8888Copy(srcadr, dstadr: ADDRESS; srcbpr, dstbpr, dl, dt, dr, db, sx, sy, sdx, sdy, sw, sh : LONGINT);
  1320. VAR x, y : LONGINT; yadr, adr: ADDRESS; col, col0 : LONGINT; yadd : ADDRESS;
  1321. fx, fy : LONGINT;
  1322. BEGIN
  1323. fy := sy;
  1324. yadr := dstadr + dl * 4 + dt * dstbpr;
  1325. FOR y := dt TO db - 1 DO
  1326. fx := sx;
  1327. adr := yadr;
  1328. yadd := srcadr + (fy DIV 65536) * srcbpr;
  1329. FOR x := dl TO dr - 1 DO
  1330. col0 := SYSTEM.GET32(yadd + (fx DIV 65536) * 4);
  1331. SYSTEM.PUT32(adr, col0);
  1332. INC(fx, sdx);
  1333. INC(adr, 4)
  1334. END;
  1335. INC(fy, sdy);
  1336. INC(yadr, dstbpr)
  1337. END
  1338. END Q0BGRA8888BGRA8888Copy;
  1339. PROCEDURE Q1BGRA8888BGRA8888(srcadr, dstadr: ADDRESS; srcbpr, dstbpr, dl, dt, dr, db, sx, sy, sdx, sdy, sw, sh : LONGINT);
  1340. VAR x, y, xfleft, xfright, yftop, yfbottom : LONGINT; yadr: ADDRESS; col, col0, col1, col2, col3 : LONGINT;
  1341. b0, g0, r0, a0, b1, g1, r1, a1, cb, cg, cr, ca, dstb, dstg, dstr, dsta : LONGINT;
  1342. fx, fy, xadd0, xadd1: LONGINT; yadd0, yadd1: ADDRESS;
  1343. BEGIN
  1344. yadr := dstadr + dl * 4 + dt * dstbpr;
  1345. fy := sy - 8000H; sx := sx - 8000H;
  1346. FOR y := dt TO db - 1 DO
  1347. fx := sx;
  1348. dstadr := yadr;
  1349. yadd0 := srcadr + Bounds(fy DIV 65536, 0, sh - 1) * srcbpr;
  1350. yadd1 := srcadr + Bounds(fy DIV 65536 + 1, 0, sh - 1) * srcbpr;
  1351. FOR x := dl TO dr - 1 DO
  1352. (* destination color *)
  1353. col := SYSTEM.GET32(dstadr);
  1354. dstb := col MOD 100H;
  1355. dstg := col DIV 100H MOD 100H;
  1356. dstr := col DIV 10000H MOD 100H;
  1357. dsta := col DIV 1000000H MOD 100H;
  1358. xadd0 := Bounds(fx DIV 65536, 0, sw - 1) * 4;
  1359. xadd1 := Bounds(fx DIV 65536 + 1, 0, sw - 1) * 4;
  1360. col0 := SYSTEM.GET32(yadd0 + xadd0);
  1361. col1 := SYSTEM.GET32(yadd0 + xadd1);
  1362. col2 := SYSTEM.GET32(yadd1 + xadd0);
  1363. col3 := SYSTEM.GET32(yadd1 + xadd1);
  1364. xfleft := (65536 - fx MOD 65536);
  1365. xfright := (fx MOD 65536);
  1366. yftop := (65536 - fy MOD 65536);
  1367. yfbottom := (fy MOD 65536);
  1368. a0 := ((col0 DIV 1000000H MOD 100H) * xfleft + (col1 DIV 1000000H MOD 100H) * xfright) DIV 65536;
  1369. a1 := ((col2 DIV 1000000H MOD 100H) * xfleft + (col3 DIV 1000000H MOD 100H) * xfright) DIV 65536;
  1370. ca := (a0 * yftop + a1 * yfbottom) DIV 65536;
  1371. IF ca # 0 THEN
  1372. b0 := ((col0 MOD 100H) * xfleft + (col1 MOD 100H) * xfright) DIV 65536;
  1373. g0 := ((col0 DIV 100H MOD 100H) * xfleft + (col1 DIV 100H MOD 100H) * xfright) DIV 65536;
  1374. r0 := ((col0 DIV 10000H MOD 100H) * xfleft + (col1 DIV 10000H MOD 100H) * xfright) DIV 65536;
  1375. a0 := ((col0 DIV 1000000H MOD 100H) * xfleft + (col1 DIV 1000000H MOD 100H) * xfright) DIV 65536;
  1376. b1 := ((col2 MOD 100H) * xfleft + (col3 MOD 100H) * xfright) DIV 65536;
  1377. g1 := ((col2 DIV 100H MOD 100H) * xfleft + (col3 DIV 100H MOD 100H) * xfright) DIV 65536;
  1378. r1 := ((col2 DIV 10000H MOD 100H) * xfleft + (col3 DIV 10000H MOD 100H) * xfright) DIV 65536;
  1379. a1 := ((col2 DIV 1000000H MOD 100H) * xfleft + (col3 DIV 1000000H MOD 100H) * xfright) DIV 65536;
  1380. cb := (b0 * yftop + b1 * yfbottom) DIV 65536;
  1381. cg := (g0 * yftop + g1 * yfbottom) DIV 65536;
  1382. cr := (r0 * yftop + r1 * yfbottom) DIV 65536;
  1383. ca := (a0 * yftop + a1 * yfbottom) DIV 65536;
  1384. IF ca # 255 THEN
  1385. cb := (cb * 256 + (256 - ca) * dstb) DIV 256; IF cb > 256 THEN cb := 256 END;
  1386. cg := (cg * 256 + (256 - ca) * dstg) DIV 256; IF cg > 256 THEN cg := 256 END;
  1387. cr := (cr * 256 + (256 - ca) * dstr) DIV 256; IF cr > 256 THEN cr := 256 END;
  1388. ca := (ca * 256 + (256 - ca) * dsta) DIV 256; IF ca > 256 THEN ca := 256; END;
  1389. END;
  1390. SYSTEM.PUT32(dstadr, cb + LSH(cg, 8) + LSH(cr, 16) + LSH(ca, 24));
  1391. END;
  1392. INC(fx, sdx);
  1393. INC(dstadr, 4);
  1394. END;
  1395. INC(fy, sdy);
  1396. INC(yadr, dstbpr)
  1397. END
  1398. END Q1BGRA8888BGRA8888;
  1399. PROCEDURE Q1BGRA8888BGRA8888Copy(srcadr, dstadr: ADDRESS; srcbpr, dstbpr, dl, dt, dr, db, sx, sy, sdx, sdy, sw, sh : LONGINT);
  1400. VAR x, y, xfleft, xfright, yftop, yfbottom : LONGINT; yadr: ADDRESS; col, col0, col1, col2, col3 : LONGINT;
  1401. b0, g0, r0, a0, b1, g1, r1, a1, cb, cg, cr, ca : LONGINT;
  1402. fx, fy, xadd0, xadd1: LONGINT; yadd0, yadd1: ADDRESS;
  1403. BEGIN
  1404. yadr := dstadr + dl * 4 + dt * dstbpr;
  1405. fy := sy - 8000H; sx := sx - 8000H;
  1406. FOR y := dt TO db - 1 DO
  1407. fx := sx;
  1408. dstadr := yadr;
  1409. yadd0 := srcadr + Bounds(fy DIV 65536, 0, sh - 1) * srcbpr;
  1410. yadd1 := srcadr + Bounds(fy DIV 65536 + 1, 0, sh - 1) * srcbpr;
  1411. FOR x := dl TO dr - 1 DO
  1412. (* destination color *)
  1413. xadd0 := Bounds(fx DIV 65536, 0, sw - 1) * 4;
  1414. xadd1 := Bounds(fx DIV 65536 + 1, 0, sw - 1) * 4;
  1415. col0 := SYSTEM.GET32(yadd0 + xadd0);
  1416. col1 := SYSTEM.GET32(yadd0 + xadd1);
  1417. col2 := SYSTEM.GET32(yadd1 + xadd0);
  1418. col3 := SYSTEM.GET32(yadd1 + xadd1);
  1419. xfleft := (65536 - fx MOD 65536);
  1420. xfright := (fx MOD 65536);
  1421. yftop := (65536 - fy MOD 65536);
  1422. yfbottom := (fy MOD 65536);
  1423. a0 := ((col0 DIV 1000000H MOD 100H) * xfleft + (col1 DIV 1000000H MOD 100H) * xfright) DIV 65536;
  1424. a1 := ((col2 DIV 1000000H MOD 100H) * xfleft + (col3 DIV 1000000H MOD 100H) * xfright) DIV 65536;
  1425. ca := (a0 * yftop + a1 * yfbottom) DIV 65536;
  1426. IF ca # 0 THEN
  1427. b0 := ((col0 MOD 100H) * xfleft + (col1 MOD 100H) * xfright) DIV 65536;
  1428. g0 := ((col0 DIV 100H MOD 100H) * xfleft + (col1 DIV 100H MOD 100H) * xfright) DIV 65536;
  1429. r0 := ((col0 DIV 10000H MOD 100H) * xfleft + (col1 DIV 10000H MOD 100H) * xfright) DIV 65536;
  1430. b1 := ((col2 MOD 100H) * xfleft + (col3 MOD 100H) * xfright) DIV 65536;
  1431. g1 := ((col2 DIV 100H MOD 100H) * xfleft + (col3 DIV 100H MOD 100H) * xfright) DIV 65536;
  1432. r1 := ((col2 DIV 10000H MOD 100H) * xfleft + (col3 DIV 10000H MOD 100H) * xfright) DIV 65536;
  1433. cb := (b0 * yftop + b1 * yfbottom) DIV 65536;
  1434. cg := (g0 * yftop + g1 * yfbottom) DIV 65536;
  1435. cr := (r0 * yftop + r1 * yfbottom) DIV 65536;
  1436. SYSTEM.PUT32(dstadr, cb + LSH(cg, 8) + LSH(cr, 16) + LSH(ca, 24));
  1437. END;
  1438. INC(fx, sdx);
  1439. INC(dstadr, 4);
  1440. END;
  1441. INC(fy, sdy);
  1442. INC(yadr, dstbpr)
  1443. END
  1444. END Q1BGRA8888BGRA8888Copy;
  1445. PROCEDURE SSE2Q0BGRA8888BGR565(srcadr, dstadr: ADDRESS; srcbpr, dstbpr, dl, dt, dr, db, sx, sy, sdx, sdy, sw, sh : LONGINT);
  1446. VAR x, y, xfleft, xfright, yftop, yfbottom : LONGINT; yadr, adr, col, col0, col1, col2, col3 : LONGINT;
  1447. cb, cg, cr, ca, dstb, dstg, dstr, yadd : LONGINT;
  1448. fx, fy : LONGINT;
  1449. w : LONGINT;
  1450. END SSE2Q0BGRA8888BGR565;
  1451. PROCEDURE Scale*(src : Image; sr : Rectangle; dst : Image; dr : Rectangle; clip : Rectangle; copyMode, scaleMode : LONGINT);
  1452. VAR dw, dh, sw, sh : LONGINT;
  1453. fw, fh : LONGREAL; sx, sy : LONGINT;
  1454. scaler : ScalerProc; xscaler : XScalerProc;
  1455. mode : Raster.Mode;
  1456. SSE2enabled : BOOLEAN;
  1457. BEGIN
  1458. ASSERT((clip.l >= 0) & (clip.t >= 0) & (clip.r <= dst.width) & (clip.b <= dst.height));
  1459. ASSERT((sr.l >= 0) & (sr.t >= 0) & (sr.r <= src.width) & (sr.b <= src.height));
  1460. dw := dr.r - dr.l; dh := dr.b - dr.t;
  1461. sw := sr.r - sr.l; sh := sr.b - sr.t;
  1462. IF (sw = dw) & (sh = dh) THEN (* optimize special case *)
  1463. IF ~Rect.IsContained(clip, dr) THEN
  1464. IF dr.l < clip.l THEN DEC(dw, (clip.l - dr.l)); INC(sr.l, (clip.l - dr.l)); dr.l := clip.l END;
  1465. IF dr.t < clip.t THEN DEC(dh, (clip.t - dr.t)); INC(sr.t, (clip.t - dr.t)); dr.t := clip.t END;
  1466. IF dr.r > clip.r THEN DEC(dw, (dr.r - clip.r)) END;
  1467. IF dr.b > clip.b THEN DEC(dh, (dr.b - clip.b)) END;
  1468. END;
  1469. IF (dw > 0) & (dh > 0) THEN
  1470. IF copyMode = ModeCopy THEN Raster.InitMode(mode, Raster.srcCopy)
  1471. ELSE Raster.InitMode(mode, Raster.srcOverDst)
  1472. END;
  1473. Raster.Copy(src, dst, sr.l, sr.t, sr.l + dw, sr.t + dh, dr.l, dr.t, mode)
  1474. END;
  1475. RETURN
  1476. END;
  1477. fw := sw / dw;
  1478. fh := sh / dh;
  1479. sx := sr.l * 65536;
  1480. sy := sr.t * 65536;
  1481. (* clipping *)
  1482. IF ~Rect.IsContained(clip, dr) THEN
  1483. sw := sr.r - sr.l; sh := sr.b - sr.t;
  1484. dw := dr.r - dr.l; dh := dr.b - dr.t;
  1485. IF dr.r > clip.r THEN dr.r := clip.r END;
  1486. IF dr.b > clip.b THEN dr.b := clip.b END;
  1487. IF dr.l < clip.l THEN sx := ENTIER(65536 * (sr.l + sw * (clip.l - dr.l) / dw)); dr.l := clip.l END;
  1488. IF dr.t < clip.t THEN sy := ENTIER(65536 * (sr.t + sh * (clip.t - dr.t) / dh)); dr.t := clip.t END;
  1489. END;
  1490. IF Rect.RectEmpty(dr) THEN RETURN END;
  1491. xscaler := NIL;
  1492. SSE2enabled :=Raster.SSE2enabled; (*Machine.SSE2Support; *)
  1493. (*IF SSE2enabled THEN
  1494. IF (src.fmt.code = Raster.bgr565) & (dst.fmt.code = Raster.bgr565) THEN
  1495. IF copyMode = ModeCopy THEN
  1496. IF scaleMode = 0 THEN xscaler := SSE2Q0BGR565BGR565;
  1497. ELSIF scaleMode = 1 THEN xscaler:= SSE2Q1BGR565BGR565;
  1498. END;
  1499. END;
  1500. ELSIF (src.fmt.code = Raster.bgra8888) & (dst.fmt.code = Raster.bgr565) THEN
  1501. IF copyMode = ModeSrcOverDst THEN
  1502. IF scaleMode = 0 THEN xscaler := SSE2Q0BGRA8888BGR565;
  1503. ELSIF scaleMode = 1 THEN xscaler := SSE2Q1BGRA8888BGR565;
  1504. END;
  1505. END;
  1506. END;
  1507. END;*)
  1508. IF (xscaler = NIL) THEN
  1509. IF (src.fmt.code = Raster.bgr565) & (dst.fmt.code = Raster.bgr565) THEN
  1510. IF copyMode = ModeCopy THEN
  1511. IF scaleMode = 0 THEN xscaler := XQ0BGR565BGR565;
  1512. ELSIF scaleMode = 1 THEN xscaler := Q1BGR565BGR565;
  1513. END;
  1514. END;
  1515. ELSIF (src.fmt.code = Raster.bgra8888) & (dst.fmt.code = Raster.bgr565) THEN
  1516. IF copyMode = ModeSrcOverDst THEN
  1517. IF scaleMode = 0 THEN xscaler := Q0BGRA8888BGR565;
  1518. ELSIF scaleMode = 1 THEN xscaler := Q1BGRA8888BGR565;
  1519. END;
  1520. END;
  1521. ELSIF (src.fmt.code = Raster.bgra8888) & (dst.fmt.code = Raster.bgra8888) THEN
  1522. IF (copyMode = ModeSrcOverDst) THEN
  1523. IF (scaleMode = 0) THEN xscaler := Q0BGRA8888BGRA8888;
  1524. ELSIF (scaleMode = 1) THEN xscaler := Q1BGRA8888BGRA8888;
  1525. END;
  1526. ELSIF (copyMode = ModeCopy) THEN
  1527. IF (scaleMode = 0) THEN xscaler := Q0BGRA8888BGRA8888Copy;
  1528. ELSIF (scaleMode = 1) THEN xscaler := Q1BGRA8888BGRA8888Copy;
  1529. END;
  1530. END;
  1531. END;
  1532. END;
  1533. IF xscaler # NIL THEN
  1534. xscaler(src.adr, dst.adr, src.bpr, dst.bpr, dr.l, dr.t, dr.r, dr.b, sx, sy,
  1535. ENTIER(fw * 65536), ENTIER(fh * 65536), src.width, src.height)
  1536. ELSE
  1537. scaler := Q0GenericSrcOverDst; (* fallback case *)
  1538. IF copyMode = ModeCopy THEN
  1539. IF scaleMode = 0 THEN scaler := Q0GenericCopy
  1540. ELSIF scaleMode = 1 THEN scaler := Q1GenericCopy
  1541. END
  1542. ELSIF copyMode = ModeSrcOverDst THEN
  1543. IF scaleMode = 0 THEN scaler := Q0GenericSrcOverDst
  1544. ELSIF scaleMode = 1 THEN scaler := Q1GenericSrcOverDst
  1545. END;
  1546. END;
  1547. scaler(src, dst, dr, sx, sy, ENTIER(fw * 65536), ENTIER(fh * 65536));
  1548. END;
  1549. END Scale;
  1550. PROCEDURE Bounds(val, min, max : LONGINT) : LONGINT;
  1551. BEGIN
  1552. IF val < min THEN RETURN min ELSIF val > max THEN RETURN max ELSE RETURN val END
  1553. END Bounds;
  1554. END WMRasterScale.
  1555. SpeedTest.Mod