2
0

AMD64.Shortreal.Mod 54 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669
  1. (**
  2. AUTHOR: Alexey Morozov
  3. PURPOSE: AMD64 half precision floating point runtime
  4. *)
  5. MODULE Shortreal;
  6. IMPORT
  7. SYSTEM, FoxArrayBase;
  8. CONST
  9. MinValue = 0xFBFF; (* minimal SHORTREAL value *)
  10. MaxValue = 0x7BFF; (* maximal SHORTREAL value *)
  11. TYPE
  12. Real = REAL;
  13. Vector4* = ARRAY [4] OF SHORTREAL;
  14. Matrix4* = ARRAY [4,4] OF SHORTREAL;
  15. OPERATOR "SHORT"*(x: Real): SHORTREAL;
  16. VAR y: SHORTREAL;
  17. BEGIN
  18. y.value := RealToShortreal(x); RETURN y;
  19. END "SHORT";
  20. OPERATOR "LONG"*(x: SHORTREAL): REAL;
  21. BEGIN
  22. RETURN ShortrealToReal(x.value);
  23. END "LONG";
  24. OPERATOR ":="*(VAR y: REAL; x: SHORTREAL);
  25. BEGIN
  26. y := ShortrealToReal(x.value);
  27. END ":=";
  28. OPERATOR "+"*(x, y: SHORTREAL): SHORTREAL;
  29. VAR z: SHORTREAL;
  30. BEGIN
  31. z.value := Add(x.value,y.value);
  32. RETURN z;
  33. END "+";
  34. OPERATOR "-"*(x, y: SHORTREAL): SHORTREAL;
  35. VAR z: SHORTREAL;
  36. BEGIN
  37. z.value := Sub(x.value,y.value);
  38. RETURN z;
  39. END "-";
  40. OPERATOR "*"*(x, y: SHORTREAL): SHORTREAL;
  41. VAR z: SHORTREAL;
  42. BEGIN
  43. z.value := Mul(x.value,y.value);
  44. RETURN z;
  45. END "*";
  46. OPERATOR "/"*(x, y: SHORTREAL): SHORTREAL;
  47. VAR z: SHORTREAL;
  48. BEGIN
  49. z.value := Div(x.value,y.value);
  50. RETURN z;
  51. END "/";
  52. OPERATOR "-"*(x: SHORTREAL): SHORTREAL;
  53. VAR y: SHORTREAL;
  54. BEGIN
  55. y.value := Negate(x.value);
  56. RETURN y;
  57. END "-";
  58. OPERATOR "ABS"*(x: SHORTREAL): SHORTREAL;
  59. VAR y: SHORTREAL;
  60. BEGIN
  61. y.value := Abs(x.value);
  62. RETURN y;
  63. END "ABS";
  64. OPERATOR "MIN"*(x, y: SHORTREAL): SHORTREAL;
  65. VAR z: SHORTREAL;
  66. BEGIN
  67. IF x < y THEN z.value := x.value;
  68. ELSE z.value := y.value;
  69. END;
  70. RETURN z;
  71. END "MIN";
  72. OPERATOR "MAX"*(x, y: SHORTREAL): SHORTREAL;
  73. VAR z: SHORTREAL;
  74. BEGIN
  75. IF x > y THEN z.value := x.value;
  76. ELSE z.value := y.value;
  77. END;
  78. RETURN z;
  79. END "MAX";
  80. OPERATOR "="*(x, y: SHORTREAL): BOOLEAN;
  81. BEGIN
  82. RETURN Equal(x.value,y.value);
  83. END "=";
  84. OPERATOR "#"*(x, y: SHORTREAL): BOOLEAN;
  85. BEGIN
  86. RETURN ~Equal(x.value,y.value);
  87. END "#";
  88. OPERATOR "<"*(x, y: SHORTREAL): BOOLEAN;
  89. BEGIN
  90. RETURN LessThan(x.value,y.value);
  91. END "<";
  92. OPERATOR "<="*(x, y: SHORTREAL): BOOLEAN;
  93. BEGIN
  94. RETURN ~GreaterThan(x.value,y.value);
  95. END "<=";
  96. OPERATOR ">"*(x, y: SHORTREAL): BOOLEAN;
  97. BEGIN
  98. RETURN GreaterThan(x.value,y.value);
  99. END ">";
  100. OPERATOR ">="*(x, y: SHORTREAL): BOOLEAN;
  101. BEGIN
  102. RETURN ~LessThan(x.value,y.value);
  103. END ">=";
  104. OPERATOR ":="*(VAR y: ARRAY {UNSAFE} [?] OF SHORTREAL; x: SHORTREAL);
  105. BEGIN
  106. FoxArrayBase.ApplyUnarySAOp(y,ADDRESS OF x,AssignScalarLoop);
  107. END ":=";
  108. OPERATOR "+"*(CONST x, y: ARRAY [?] OF SHORTREAL): ARRAY {UNSAFE} [?] OF SHORTREAL;
  109. BEGIN
  110. FoxArrayBase.ApplyBinaryAAAOp(RESULT,x,y,SIZEOF(SHORTREAL),AddLoop);
  111. RETURN RESULT;
  112. END "+";
  113. OPERATOR "+"*(CONST x: ARRAY [?] OF SHORTREAL; y: SHORTREAL): ARRAY {UNSAFE} [?] OF SHORTREAL;
  114. BEGIN
  115. FoxArrayBase.ApplyBinaryASAOp(RESULT,x,ADDRESS OF y,SIZEOF(SHORTREAL),AddScalarLoop);
  116. RETURN RESULT;
  117. END "+";
  118. OPERATOR "+"*(x: SHORTREAL; CONST y: ARRAY [?] OF SHORTREAL): ARRAY {UNSAFE} [?] OF SHORTREAL;
  119. BEGIN
  120. FoxArrayBase.ApplyBinaryASAOp(RESULT,y,ADDRESS OF x,SIZEOF(SHORTREAL),AddScalarLoop);
  121. RETURN RESULT;
  122. END "+";
  123. OPERATOR "-"*(CONST x, y: ARRAY [?] OF SHORTREAL): ARRAY {UNSAFE} [?] OF SHORTREAL;
  124. BEGIN
  125. FoxArrayBase.ApplyBinaryAAAOp(RESULT,x,y,SIZEOF(SHORTREAL),SubLoop);
  126. RETURN RESULT;
  127. END "-";
  128. OPERATOR "-"*(CONST x: ARRAY [?] OF SHORTREAL; y: SHORTREAL): ARRAY {UNSAFE} [?] OF SHORTREAL;
  129. BEGIN
  130. y.value := Negate(y.value);
  131. FoxArrayBase.ApplyBinaryASAOp(RESULT,x,ADDRESS OF y,SIZEOF(SHORTREAL),AddScalarLoop);
  132. RETURN RESULT;
  133. END "-";
  134. OPERATOR "-"*(x: SHORTREAL; CONST y: ARRAY [?] OF SHORTREAL): ARRAY {UNSAFE} [?] OF SHORTREAL;
  135. BEGIN
  136. FoxArrayBase.ApplyBinaryASAOp(RESULT,y,ADDRESS OF x,SIZEOF(SHORTREAL),ScalarSubLoop);
  137. RETURN RESULT;
  138. END "-";
  139. OPERATOR ".*"*(CONST x, y: ARRAY [?] OF SHORTREAL): ARRAY {UNSAFE} [?] OF SHORTREAL;
  140. BEGIN
  141. FoxArrayBase.ApplyBinaryAAAOp(RESULT,x,y,SIZEOF(SHORTREAL),MulLoop);
  142. RETURN RESULT;
  143. END ".*";
  144. OPERATOR ".*"*(CONST x: ARRAY [?] OF SHORTREAL; y: SHORTREAL): ARRAY {UNSAFE} [?] OF SHORTREAL;
  145. BEGIN
  146. FoxArrayBase.ApplyBinaryASAOp(RESULT,x,ADDRESS OF y,SIZEOF(SHORTREAL),MulScalarLoop);
  147. RETURN RESULT;
  148. END ".*";
  149. OPERATOR ".*"*(x: SHORTREAL; CONST y: ARRAY [?] OF SHORTREAL): ARRAY {UNSAFE} [?] OF SHORTREAL;
  150. BEGIN
  151. FoxArrayBase.ApplyBinaryASAOp(RESULT,y,ADDRESS OF x,SIZEOF(SHORTREAL),MulScalarLoop);
  152. RETURN RESULT;
  153. END ".*";
  154. OPERATOR "*"*(CONST x: ARRAY [?] OF SHORTREAL; y: SHORTREAL): ARRAY {UNSAFE} [?] OF SHORTREAL;
  155. BEGIN
  156. FoxArrayBase.ApplyBinaryASAOp(RESULT,x,ADDRESS OF y,SIZEOF(SHORTREAL),MulScalarLoop);
  157. RETURN RESULT;
  158. END "*";
  159. OPERATOR "*"*(x: SHORTREAL; CONST y: ARRAY [?] OF SHORTREAL): ARRAY {UNSAFE} [?] OF SHORTREAL;
  160. BEGIN
  161. FoxArrayBase.ApplyBinaryASAOp(RESULT,y,ADDRESS OF x,SIZEOF(SHORTREAL),MulScalarLoop);
  162. RETURN RESULT;
  163. END "*";
  164. OPERATOR "./"*(CONST x, y: ARRAY [?] OF SHORTREAL): ARRAY {UNSAFE} [?] OF SHORTREAL;
  165. BEGIN
  166. FoxArrayBase.ApplyBinaryAAAOp(RESULT,x,y,SIZEOF(SHORTREAL),DivLoop);
  167. RETURN RESULT;
  168. END "./";
  169. OPERATOR "./"*(CONST x: ARRAY [?] OF SHORTREAL; y: SHORTREAL): ARRAY {UNSAFE} [?] OF SHORTREAL;
  170. BEGIN
  171. FoxArrayBase.ApplyBinaryASAOp(RESULT,x,ADDRESS OF y,SIZEOF(SHORTREAL),DivScalarLoop);
  172. RETURN RESULT;
  173. END "./";
  174. OPERATOR "./"*(x: SHORTREAL; CONST y: ARRAY [?] OF SHORTREAL): ARRAY {UNSAFE} [?] OF SHORTREAL;
  175. BEGIN
  176. FoxArrayBase.ApplyBinaryASAOp(RESULT,y,ADDRESS OF x,SIZEOF(SHORTREAL),ScalarDivLoop);
  177. RETURN RESULT;
  178. END "./";
  179. OPERATOR "/"*(CONST x: ARRAY [?] OF SHORTREAL; y: SHORTREAL): ARRAY {UNSAFE} [?] OF SHORTREAL;
  180. BEGIN
  181. FoxArrayBase.ApplyBinaryASAOp(RESULT,x,ADDRESS OF y,SIZEOF(SHORTREAL),DivScalarLoop);
  182. RETURN RESULT;
  183. END "/";
  184. OPERATOR "+*"*(CONST x, y: ARRAY [?] OF SHORTREAL): REAL;
  185. VAR acc: REAL;
  186. BEGIN
  187. acc := 0;
  188. FoxArrayBase.ApplyBinaryAASOp(ADDRESSOF(acc),x,y,InnerProdLoop);
  189. RETURN acc;
  190. END "+*";
  191. OPERATOR "*"*(CONST x, y: ARRAY [*,*] OF SHORTREAL): ARRAY {UNSAFE} [*,*] OF SHORTREAL;
  192. BEGIN
  193. FoxArrayBase.ApplyMatMulLoop(ADDRESS OF RESULT,ADDRESS OF x,ADDRESS OF y,SIZEOF(SHORTREAL),MatMulLoop,NIL);
  194. RETURN RESULT;
  195. END "*";
  196. OPERATOR "="*(CONST x, y: ARRAY [?] OF SHORTREAL): BOOLEAN;
  197. BEGIN
  198. RETURN FoxArrayBase.ApplyBinaryAABOp(x,y,EqualLoop,FALSE);
  199. END "=";
  200. OPERATOR "<"*(CONST x, y: ARRAY [?] OF SHORTREAL): BOOLEAN;
  201. BEGIN
  202. RETURN FoxArrayBase.ApplyBinaryAABOp(x,y,LessThanLoop,FALSE);
  203. END "<";
  204. OPERATOR "<="*(CONST x, y: ARRAY [?] OF SHORTREAL): BOOLEAN;
  205. BEGIN
  206. RETURN ~FoxArrayBase.ApplyBinaryAABOp(x,y,GreaterThanLoop,FALSE);
  207. END "<=";
  208. OPERATOR ">"*(CONST x, y: ARRAY [?] OF SHORTREAL): BOOLEAN;
  209. BEGIN
  210. RETURN FoxArrayBase.ApplyBinaryAABOp(x,y,GreaterThanLoop,FALSE);
  211. END ">";
  212. OPERATOR ">="*(CONST x, y: ARRAY [?] OF SHORTREAL): BOOLEAN;
  213. BEGIN
  214. RETURN ~FoxArrayBase.ApplyBinaryAABOp(x,y,LessThanLoop,FALSE);
  215. END ">=";
  216. OPERATOR ".="*(CONST x, y: ARRAY [?] OF SHORTREAL): ARRAY {UNSAFE} [?] OF BOOLEAN;
  217. BEGIN
  218. FoxArrayBase.ApplyBinaryAAAOp(RESULT,x,y,SIZEOF(BOOLEAN),EwEqualLoop);
  219. RETURN RESULT;
  220. END ".=";
  221. OPERATOR ".<"*(CONST x, y: ARRAY [?] OF SHORTREAL): ARRAY {UNSAFE} [?] OF BOOLEAN;
  222. BEGIN
  223. FoxArrayBase.ApplyBinaryAAAOp(RESULT,x,y,SIZEOF(BOOLEAN),EwLessThanLoop);
  224. RETURN RESULT;
  225. END ".<";
  226. OPERATOR ".<="*(CONST x, y: ARRAY [?] OF SHORTREAL): ARRAY {UNSAFE} [?] OF BOOLEAN;
  227. BEGIN
  228. FoxArrayBase.ApplyBinaryAAAOp(RESULT,x,y,SIZEOF(BOOLEAN),EwLessOrEqualThanLoop);
  229. RETURN RESULT;
  230. END ".<=";
  231. OPERATOR ".>"*(CONST x, y: ARRAY [?] OF SHORTREAL): ARRAY {UNSAFE} [?] OF BOOLEAN;
  232. BEGIN
  233. FoxArrayBase.ApplyBinaryAAAOp(RESULT,x,y,SIZEOF(BOOLEAN),EwGreaterThanLoop);
  234. RETURN RESULT;
  235. END ".>";
  236. OPERATOR ".>="*(CONST x, y: ARRAY [?] OF SHORTREAL): ARRAY {UNSAFE} [?] OF BOOLEAN;
  237. BEGIN
  238. FoxArrayBase.ApplyBinaryAAAOp(RESULT,x,y,SIZEOF(BOOLEAN),EwGreaterOrEqualThanLoop);
  239. RETURN RESULT;
  240. END ".>=";
  241. OPERATOR "+"*(CONST x, y: Vector4): Vector4;
  242. BEGIN
  243. CODE
  244. MOV RAX, [RBP+x]
  245. MOV RBX, [RBP+y]
  246. MOV RCX, [RBP+RESULT]
  247. MOVQ XMM0, [RAX]
  248. MOVQ XMM1, [RBX]
  249. VCVTPH2PS XMM0, XMM0
  250. VCVTPH2PS XMM1, XMM1
  251. ADDPS XMM0, XMM1
  252. VCVTPS2PH XMM0, XMM0, 0
  253. MOVQ [RCX], XMM0
  254. END;
  255. RETURN RESULT;
  256. END "+";
  257. OPERATOR "+"*(CONST x: Vector4; y: SHORTREAL): Vector4;
  258. BEGIN
  259. CODE
  260. MOV RAX, [RBP+x]
  261. MOV BX, [RBP+y]
  262. MOV RCX, [RBP+RESULT]
  263. MOVQ XMM0, [RAX]
  264. MOVD XMM1, EBX
  265. VCVTPH2PS XMM0, XMM0
  266. VCVTPH2PS XMM1, XMM1
  267. SHUFPS XMM1, XMM1, 0
  268. ADDPS XMM0, XMM1
  269. VCVTPS2PH XMM0, XMM0, 0
  270. MOVQ [RCX], XMM0
  271. END;
  272. RETURN RESULT;
  273. END "+";
  274. OPERATOR "-"*(CONST x, y: Vector4): Vector4;
  275. BEGIN
  276. CODE
  277. MOV RAX, [RBP+x]
  278. MOV RBX, [RBP+y]
  279. MOV RCX, [RBP+RESULT]
  280. MOVQ XMM0, [RAX]
  281. MOVQ XMM1, [RBX]
  282. VCVTPH2PS XMM0, XMM0
  283. VCVTPH2PS XMM1, XMM1
  284. SUBPS XMM0, XMM1
  285. VCVTPS2PH XMM0, XMM0, 0
  286. MOVQ [RCX], XMM0
  287. END;
  288. RETURN RESULT;
  289. END "-";
  290. OPERATOR "-"*(CONST x: Vector4; y: SHORTREAL): Vector4;
  291. BEGIN
  292. CODE
  293. MOV RAX, [RBP+x]
  294. MOV BX, [RBP+y]
  295. MOV RCX, [RBP+RESULT]
  296. MOVQ XMM0, [RAX]
  297. MOVD XMM1, EBX
  298. VCVTPH2PS XMM0, XMM0
  299. VCVTPH2PS XMM1, XMM1
  300. SHUFPS XMM1, XMM1, 0
  301. SUBPS XMM0, XMM1
  302. VCVTPS2PH XMM0, XMM0, 0
  303. MOVQ [RCX], XMM0
  304. END;
  305. RETURN RESULT;
  306. END "-";
  307. OPERATOR ".*"*(CONST x, y: Vector4): Vector4;
  308. BEGIN
  309. CODE
  310. MOV RAX, [RBP+x]
  311. MOV RBX, [RBP+y]
  312. MOV RCX, [RBP+RESULT]
  313. MOVQ XMM0, [RAX]
  314. MOVQ XMM1, [RBX]
  315. VCVTPH2PS XMM0, XMM0
  316. VCVTPH2PS XMM1, XMM1
  317. MULPS XMM0, XMM1
  318. VCVTPS2PH XMM0, XMM0, 0
  319. MOVQ [RCX], XMM0
  320. END;
  321. RETURN RESULT;
  322. END ".*";
  323. OPERATOR ".*"*(CONST x: Vector4; y: SHORTREAL): Vector4;
  324. BEGIN
  325. CODE
  326. MOV RAX, [RBP+x]
  327. MOV BX, [RBP+y]
  328. MOV RCX, [RBP+RESULT]
  329. MOVQ XMM0, [RAX]
  330. MOVD XMM1, EBX
  331. VCVTPH2PS XMM0, XMM0
  332. VCVTPH2PS XMM1, XMM1
  333. SHUFPS XMM1, XMM1, 0
  334. MULPS XMM0, XMM1
  335. VCVTPS2PH XMM0, XMM0, 0
  336. MOVQ [RCX], XMM0
  337. END;
  338. RETURN RESULT;
  339. END ".*";
  340. OPERATOR "./"*(CONST x, y: Vector4): Vector4;
  341. BEGIN
  342. CODE
  343. MOV RAX, [RBP+x]
  344. MOV RBX, [RBP+y]
  345. MOV RCX, [RBP+RESULT]
  346. MOVQ XMM0, [RAX]
  347. MOVQ XMM1, [RBX]
  348. VCVTPH2PS XMM0, XMM0
  349. VCVTPH2PS XMM1, XMM1
  350. DIVPS XMM0, XMM1
  351. VCVTPS2PH XMM0, XMM0, 0
  352. MOVQ [RCX], XMM0
  353. END;
  354. RETURN RESULT;
  355. END "./";
  356. OPERATOR "./"*(CONST x: Vector4; y: SHORTREAL): Vector4;
  357. BEGIN
  358. CODE
  359. MOV RAX, [RBP+x]
  360. MOV BX, [RBP+y]
  361. MOV RCX, [RBP+RESULT]
  362. MOVQ XMM0, [RAX]
  363. MOVD XMM1, EBX
  364. VCVTPH2PS XMM0, XMM0
  365. VCVTPH2PS XMM1, XMM1
  366. SHUFPS XMM1, XMM1, 0
  367. DIVPS XMM0, XMM1
  368. VCVTPS2PH XMM0, XMM0, 0
  369. MOVQ [RCX], XMM0
  370. END;
  371. RETURN RESULT;
  372. END "./";
  373. OPERATOR "+*"*(CONST x, y: Vector4): REAL;
  374. CODE
  375. MOV RAX, [RBP+x]
  376. MOV RBX, [RBP+y]
  377. MOVQ XMM0, [RAX]
  378. MOVQ XMM1, [RBX]
  379. VCVTPH2PS XMM0, XMM0
  380. VCVTPH2PS XMM1, XMM1
  381. MULPS XMM0, XMM1
  382. HADDPS XMM0, XMM0
  383. HADDPS XMM0, XMM0
  384. END "+*";
  385. OPERATOR "SUM"*(CONST x: Vector4): REAL;
  386. CODE
  387. MOV RAX, [RBP+x]
  388. MOVQ XMM0, [RAX]
  389. VCVTPH2PS XMM0, XMM0
  390. HADDPS XMM0, XMM0
  391. HADDPS XMM0, XMM0
  392. END "SUM";
  393. OPERATOR "MAX"*(CONST x: Vector4): SHORTREAL;
  394. VAR y: SHORTREAL;
  395. BEGIN
  396. CODE
  397. MOV RAX, [RBP+x]
  398. MOVQ XMM0, [RAX]
  399. VCVTPH2PS XMM0, XMM0
  400. SHUFPS XMM1, XMM0, 044H ; XMM0=[s3, s2, s1, s0], XMM1=[d3, d2, d1, d0] => XMM1 = [s1, s0, d1, d0]
  401. MAXPS XMM0, XMM1
  402. SHUFPS XMM1, XMM0, 0F4H ; XMM0=[s3, s2, s1, s0], XMM1=[d3, d2, d1, d0] => XMM1 = [s3, s3, d1, d0]
  403. MAXPS XMM0, XMM1
  404. MOVHLPS XMM0, XMM0
  405. VCVTPS2PH XMM0, XMM0, 0
  406. MOVD EAX, XMM0
  407. MOV [RBP+y], AX
  408. END;
  409. RETURN y;
  410. END "MAX";
  411. OPERATOR "MIN"*(CONST x: Vector4): SHORTREAL;
  412. VAR y: SHORTREAL;
  413. BEGIN
  414. CODE
  415. MOV RAX, [RBP+x]
  416. MOVQ XMM0, [RAX]
  417. VCVTPH2PS XMM0, XMM0
  418. SHUFPS XMM1, XMM0, 044H ; XMM0=[s3, s2, s1, s0], XMM1=[d3, d2, d1, d0] => XMM1 = [s1, s0, d1, d0]
  419. MINPS XMM0, XMM1
  420. SHUFPS XMM1, XMM0, 0F4H ; XMM0=[s3, s2, s1, s0], XMM1=[d3, d2, d1, d0] => XMM1 = [s3, s3, d1, d0]
  421. MINPS XMM0, XMM1
  422. MOVHLPS XMM0, XMM0
  423. VCVTPS2PH XMM0, XMM0, 0
  424. MOVD EAX, XMM0
  425. MOV [RBP+y], AX
  426. END;
  427. RETURN y;
  428. END "MIN";
  429. OPERATOR "ABS"*(CONST x: Vector4): Vector4;
  430. CODE
  431. MOV RAX, [RBP+x]
  432. MOV RBX, [RBP+RESULT]
  433. MOV EDX, 7FFF7FFFH
  434. SHL RDX, 32
  435. OR RDX, 7FFF7FFFH
  436. MOV RAX, [RAX]
  437. AND RAX, RDX
  438. MOV [RBX], RAX
  439. END "ABS";
  440. OPERATOR "-"*(CONST x: Vector4): Vector4;
  441. CODE
  442. MOV RAX, [RBP+x]
  443. MOV RBX, [RBP+RESULT]
  444. MOV EDX, 80008000H
  445. SHL RDX, 32
  446. OR RDX, 80008000H
  447. MOV RAX, [RAX]
  448. XOR RAX, RDX
  449. MOV [RBX], RAX
  450. END "-";
  451. OPERATOR "SUM"*(CONST x: ARRAY [?] OF SHORTREAL): REAL;
  452. VAR acc: REAL;
  453. BEGIN
  454. acc := 0;
  455. FoxArrayBase.ApplyUnaryASOp(ADDRESSOF(acc),x,SumLoop);
  456. RETURN acc;
  457. END "SUM";
  458. OPERATOR "MAX"*(CONST x: ARRAY [?] OF SHORTREAL): SHORTREAL;
  459. VAR max: SHORTREAL;
  460. BEGIN
  461. max.value := MinValue;
  462. FoxArrayBase.ApplyUnaryASOp(ADDRESSOF(max),x,MaxLoop);
  463. RETURN max;
  464. END "MAX";
  465. OPERATOR "MIN"*(CONST x: ARRAY [?] OF SHORTREAL): SHORTREAL;
  466. VAR max: SHORTREAL;
  467. BEGIN
  468. max.value := MaxValue;
  469. FoxArrayBase.ApplyUnaryASOp(ADDRESSOF(max),x,MinLoop);
  470. RETURN max;
  471. END "MIN";
  472. OPERATOR "ABS"*(CONST x: ARRAY [?] OF SHORTREAL): ARRAY {UNSAFE} [?] OF SHORTREAL;
  473. BEGIN
  474. FoxArrayBase.ApplyUnaryAAOp(RESULT,x,SIZEOF(SHORTREAL),AbsLoop);
  475. RETURN RESULT;
  476. END "ABS";
  477. OPERATOR "-"*(CONST x: ARRAY [?] OF SHORTREAL): ARRAY {UNSAFE} [?] OF SHORTREAL;
  478. BEGIN
  479. FoxArrayBase.ApplyUnaryAAOp(RESULT,x,SIZEOF(SHORTREAL),NegateLoop);
  480. RETURN RESULT;
  481. END "-";
  482. (*!TODO: replace by SHORT operator after fixing a compiler bug which does not allow to compile the operator code *)
  483. PROCEDURE Short*(CONST x: ARRAY [?] OF Real): ARRAY {UNSAFE} [?] OF SHORTREAL;
  484. BEGIN
  485. FoxArrayBase.ApplyUnaryAAOp(RESULT,x,SIZEOF(SHORTREAL),ShortLoop);
  486. RETURN RESULT;
  487. END Short;
  488. (*
  489. OPERATOR "SHORT"*(CONST x: ARRAY [?] OF Real): ARRAY {UNSAFE} [?] OF SHORTREAL;
  490. BEGIN
  491. FoxArrayBase.ApplyUnaryAAOp(RESULT,x,SIZEOF(SHORTREAL),ShortLoop);
  492. RETURN RESULT;
  493. END "SHORT";
  494. *)
  495. OPERATOR "LONG"*(CONST x: ARRAY [?] OF SHORTREAL): ARRAY {UNSAFE} [?] OF REAL;
  496. BEGIN
  497. FoxArrayBase.ApplyUnaryAAOp(RESULT,x,SIZEOF(REAL),LongLoop);
  498. RETURN RESULT;
  499. END "LONG";
  500. PROCEDURE Sqrt*(CONST x: ARRAY [?] OF SHORTREAL): ARRAY {UNSAFE} [?] OF SHORTREAL;
  501. BEGIN
  502. FoxArrayBase.ApplyUnaryAAOp(RESULT,x,SIZEOF(SHORTREAL),SqrtLoop);
  503. RETURN RESULT;
  504. END Sqrt;
  505. PROCEDURE Sin*(CONST x: ARRAY [?] OF SHORTREAL): ARRAY {UNSAFE} [?] OF SHORTREAL;
  506. BEGIN
  507. FoxArrayBase.ApplyUnaryAAOp(RESULT,x,SIZEOF(SHORTREAL),SinLoop);
  508. RETURN RESULT;
  509. END Sin;
  510. PROCEDURE Cos*(CONST x: ARRAY [?] OF SHORTREAL): ARRAY {UNSAFE} [?] OF SHORTREAL;
  511. BEGIN
  512. FoxArrayBase.ApplyUnaryAAOp(RESULT,x,SIZEOF(SHORTREAL),CosLoop);
  513. RETURN RESULT;
  514. END Cos;
  515. PROCEDURE Arctan*(CONST x: ARRAY [?] OF SHORTREAL): ARRAY {UNSAFE} [?] OF SHORTREAL;
  516. BEGIN
  517. FoxArrayBase.ApplyUnaryAAOp(RESULT,x,SIZEOF(SHORTREAL),ArctanLoop);
  518. RETURN RESULT;
  519. END Arctan;
  520. PROCEDURE Ln*(CONST x: ARRAY [?] OF SHORTREAL): ARRAY {UNSAFE} [?] OF SHORTREAL;
  521. BEGIN
  522. FoxArrayBase.ApplyUnaryAAOp(RESULT,x,SIZEOF(SHORTREAL),LnLoop);
  523. RETURN RESULT;
  524. END Ln;
  525. PROCEDURE Exp*(CONST x: ARRAY [?] OF SHORTREAL): ARRAY {UNSAFE} [?] OF SHORTREAL;
  526. BEGIN
  527. FoxArrayBase.ApplyUnaryAAOp(RESULT,x,SIZEOF(SHORTREAL),ExpLoop);
  528. RETURN RESULT;
  529. END Exp;
  530. PROCEDURE Sqrt0(x: UNSIGNED16): UNSIGNED16;
  531. CODE
  532. MOV AX, [RBP+x] ; load U16
  533. MOVD XMM0, EAX
  534. VCVTPH2PS XMM0, XMM0
  535. SQRTSS XMM0, XMM0
  536. VCVTPS2PH XMM0, XMM0, 0
  537. MOVD RAX, XMM0
  538. END Sqrt0;
  539. PROCEDURE Sin0(x: UNSIGNED16): UNSIGNED16;
  540. CODE
  541. SUB RSP, 4 ; create a local variable of type REAL
  542. MOV AX, [RBP+x] ; load U16
  543. MOVD XMM0, EAX
  544. VCVTPH2PS XMM0, XMM0
  545. MOVD [RSP], XMM0
  546. FLD [RSP]
  547. FSIN
  548. FST [RSP]
  549. MOVSS XMM0, [RSP]
  550. VCVTPS2PH XMM0, XMM0, 0
  551. MOVD RAX, XMM0
  552. ADD RSP, 4 ; remove local variable
  553. END Sin0;
  554. PROCEDURE Cos0(x: UNSIGNED16): UNSIGNED16;
  555. CODE
  556. SUB RSP, 4 ; create a local variable of type REAL
  557. MOV AX, [RBP+x] ; load U16
  558. MOVD XMM0, EAX
  559. VCVTPH2PS XMM0, XMM0
  560. MOVD [RSP], XMM0
  561. FLD [RSP]
  562. FCOS
  563. FST [RSP]
  564. MOVSS XMM0, [RSP]
  565. VCVTPS2PH XMM0, XMM0, 0
  566. MOVD RAX, XMM0
  567. ADD RSP, 4 ; remove local variable
  568. END Cos0;
  569. PROCEDURE Arctan0(x: UNSIGNED16): UNSIGNED16;
  570. CODE
  571. SUB RSP, 4 ; create a local variable of type REAL
  572. MOV AX, [RBP+x] ; load U16
  573. MOVD XMM0, EAX
  574. VCVTPH2PS XMM0, XMM0
  575. MOVD [RSP], XMM0
  576. FLD [RSP]
  577. FLD1
  578. FPATAN
  579. FST [RSP]
  580. MOVSS XMM0, [RSP]
  581. VCVTPS2PH XMM0, XMM0, 0
  582. MOVD RAX, XMM0
  583. ADD RSP, 4 ; remove local variable
  584. END Arctan0;
  585. PROCEDURE Ln0(x: UNSIGNED16): UNSIGNED16;
  586. CODE
  587. SUB RSP, 4 ; create a local variable of type REAL
  588. MOV AX, [RBP+x] ; load U16
  589. MOVD XMM0, EAX
  590. VCVTPH2PS XMM0, XMM0
  591. MOVD [RSP], XMM0
  592. FLD1
  593. FLDL2E
  594. FDIVP
  595. FLD [RSP]
  596. FYL2X
  597. FST [RSP]
  598. MOVSS XMM0, [RSP]
  599. VCVTPS2PH XMM0, XMM0, 0
  600. MOVD RAX, XMM0
  601. ADD RSP, 4 ; remove local variable
  602. END Ln0;
  603. PROCEDURE Exp0(x: UNSIGNED16): UNSIGNED16;
  604. CODE
  605. SUB RSP, 4 ; create a local variable of type REAL
  606. MOV AX, [RBP+x] ; load U16
  607. MOVD XMM0, EAX
  608. VCVTPH2PS XMM0, XMM0
  609. MOVD [RSP], XMM0
  610. FLD [RSP]
  611. FLDL2E
  612. FMULP
  613. FLD ST0
  614. FRNDINT
  615. FXCH ST1
  616. FSUB ST0, ST1
  617. F2XM1
  618. FLD1
  619. FADDP
  620. FSCALE
  621. FSTP ST1
  622. FST [RSP]
  623. MOVSS XMM0, [RSP]
  624. VCVTPS2PH XMM0, XMM0, 0
  625. MOVD RAX, XMM0
  626. ADD RSP, 4 ; remove local variable
  627. END Exp0;
  628. PROCEDURE Abs(x: UNSIGNED16): UNSIGNED16;
  629. CODE
  630. MOV AX, [RBP+x] ; load U16
  631. AND RAX, 7FFFH
  632. END Abs;
  633. PROCEDURE Negate(x: UNSIGNED16): UNSIGNED16;
  634. CODE
  635. MOV AX, [RBP+x] ; load U16
  636. XOR AX, 8000H
  637. END Negate;
  638. PROCEDURE Add(x, y: UNSIGNED16): UNSIGNED16;
  639. CODE
  640. MOV AX, [RBP+x]
  641. MOV BX, [RBP+y]
  642. MOVD XMM0, EAX
  643. MOVD XMM1, EBX
  644. VCVTPH2PS XMM0, XMM0
  645. VCVTPH2PS XMM1, XMM1
  646. ADDSS XMM0, XMM1
  647. VCVTPS2PH XMM0, XMM0, 0
  648. MOVD RAX, XMM0
  649. END Add;
  650. PROCEDURE Sub(x, y: UNSIGNED16): UNSIGNED16;
  651. CODE
  652. MOV AX, [RBP+x]
  653. MOV BX, [RBP+y]
  654. MOVD XMM0, EAX
  655. MOVD XMM1, EBX
  656. VCVTPH2PS XMM0, XMM0
  657. VCVTPH2PS XMM1, XMM1
  658. SUBSS XMM0, XMM1
  659. VCVTPS2PH XMM0, XMM0, 0
  660. MOVD RAX, XMM0
  661. END Sub;
  662. PROCEDURE Mul(x, y: UNSIGNED16): UNSIGNED16;
  663. CODE
  664. MOV AX, [RBP+x]
  665. MOV BX, [RBP+y]
  666. MOVD XMM0, EAX
  667. MOVD XMM1, EBX
  668. VCVTPH2PS XMM0, XMM0
  669. VCVTPH2PS XMM1, XMM1
  670. MULSS XMM0, XMM1
  671. VCVTPS2PH XMM0, XMM0, 0
  672. MOVD RAX, XMM0
  673. END Mul;
  674. PROCEDURE Div(x, y: UNSIGNED16): UNSIGNED16;
  675. CODE
  676. MOV AX, [RBP+x]
  677. MOV BX, [RBP+y]
  678. MOVD XMM0, EAX
  679. MOVD XMM1, EBX
  680. VCVTPH2PS XMM0, XMM0
  681. VCVTPH2PS XMM1, XMM1
  682. DIVSS XMM0, XMM1
  683. VCVTPS2PH XMM0, XMM0, 0
  684. MOVD RAX, XMM0
  685. END Div;
  686. PROCEDURE Equal(x, y: UNSIGNED16): BOOLEAN;
  687. CODE
  688. MOV AX, [RBP+x]
  689. MOV BX, [RBP+y]
  690. MOVD XMM0, EAX
  691. MOVD XMM1, EBX
  692. VCVTPH2PS XMM0, XMM0
  693. VCVTPH2PS XMM1, XMM1
  694. CMPSS XMM0, XMM1, 0
  695. MOVD EAX, XMM0
  696. AND EAX, 1
  697. END Equal;
  698. PROCEDURE LessThan(x, y: UNSIGNED16): BOOLEAN;
  699. CODE
  700. MOV AX, [RBP+x]
  701. MOV BX, [RBP+y]
  702. MOVD XMM0, EAX
  703. MOVD XMM1, EBX
  704. VCVTPH2PS XMM0, XMM0
  705. VCVTPH2PS XMM1, XMM1
  706. CMPSS XMM0, XMM1, 1
  707. MOVD EAX, XMM0
  708. AND EAX, 1
  709. END LessThan;
  710. PROCEDURE GreaterThan(x, y: UNSIGNED16): BOOLEAN;
  711. CODE
  712. MOV AX, [RBP+x]
  713. MOV BX, [RBP+y]
  714. MOVD XMM0, EAX
  715. MOVD XMM1, EBX
  716. VCVTPH2PS XMM0, XMM0
  717. VCVTPH2PS XMM1, XMM1
  718. CMPSS XMM0, XMM1, 6
  719. MOVD EAX, XMM0
  720. AND EAX, 1
  721. END GreaterThan;
  722. PROCEDURE AssignScalarLoop(laddr, daddr: ADDRESS; dinc, len: SIZE);
  723. BEGIN
  724. (*!TODO: optimize contiguous case *)
  725. IF FALSE(*dinc = 2*) THEN
  726. ELSE (* striding single element access *)
  727. CODE
  728. MOV RAX, [RBP+laddr]
  729. MOV AX, [RAX]
  730. MOV RBX, [RBP+daddr]
  731. MOV RCX, [RBP+len]
  732. MOV RDX, [RBP+dinc]
  733. Loop:
  734. CMP RCX, 0
  735. JLE Exit
  736. MOV [RBX], AX
  737. ADD RBX, RDX
  738. SUB RCX, 1
  739. JMP Loop
  740. Exit:
  741. END;
  742. END;
  743. END AssignScalarLoop;
  744. PROCEDURE SumLoop(laddr, daddr: ADDRESS; linc, len: SIZE);
  745. BEGIN
  746. IF linc = 2 THEN
  747. CODE
  748. MOV RAX, [RBP+laddr]
  749. MOV RCX, [RBP+len]
  750. MOV RDX, [RBP+daddr]
  751. MOVD XMM0, [RDX]
  752. Loop4:
  753. CMP RCX, 4
  754. JL Reminder2
  755. MOVQ XMM1, [RAX]
  756. VCVTPH2PS XMM1, XMM1
  757. ADDPS XMM0, XMM1
  758. ADD RAX, 8
  759. SUB RCX, 4
  760. JMP Loop4
  761. Reminder2:
  762. CMP RCX, 2
  763. JL Reminder1
  764. MOV EDX, [RAX]
  765. MOVD XMM1, EDX
  766. VCVTPH2PS XMM1, XMM1
  767. ADDPS XMM0, XMM1
  768. ADD RAX, 4
  769. SUB RCX, 2
  770. Reminder1:
  771. CMP RCX, 0
  772. JLE Exit
  773. MOV AX, [RAX]
  774. MOVD XMM1, EAX
  775. VCVTPH2PS XMM1, XMM1
  776. ADDSS XMM0, XMM1
  777. Exit:
  778. HADDPS XMM0, XMM0
  779. HADDPS XMM0, XMM0
  780. MOV RDX, [RBP+daddr]
  781. MOVD [RDX], XMM0
  782. END;
  783. ELSE
  784. CODE
  785. MOV RAX, [RBP+laddr]
  786. MOV RBX, [RBP+linc]
  787. MOV RCX, [RBP+len]
  788. MOV RDX, [RBP+daddr]
  789. MOVD XMM0, [RDX]
  790. Loop:
  791. CMP RCX, 0
  792. JLE Exit
  793. MOV DX, [RAX]
  794. MOVD XMM1, EDX
  795. VCVTPH2PS XMM1, XMM1
  796. ADDSS XMM0, XMM1
  797. ADD RAX, RBX
  798. SUB RCX, 1
  799. JMP Loop
  800. Exit:
  801. MOV RDX, [RBP+daddr]
  802. MOVD [RDX], XMM0
  803. END;
  804. END;
  805. END SumLoop;
  806. PROCEDURE MaxLoop( laddr, daddr: ADDRESS; linc, len: SIZE );
  807. BEGIN
  808. IF linc = 2 THEN
  809. CODE
  810. MOV RAX, [RBP+laddr]
  811. MOV RCX, [RBP+len]
  812. MOV RDX, [RBP+daddr]
  813. MOV DX, [RDX]
  814. MOVD XMM0, EDX
  815. VCVTPH2PS XMM0, XMM0
  816. SHUFPS XMM0, XMM0, 0 ; [0, 0, 0, x0] => [x0, x0, x0, x0]
  817. Loop4:
  818. CMP RCX, 4
  819. JL Reminder2
  820. MOVQ XMM1, [RAX]
  821. VCVTPH2PS XMM1, XMM1
  822. MAXPS XMM0, XMM1
  823. ADD RAX, 8
  824. SUB RCX, 4
  825. JMP Loop4
  826. Reminder2:
  827. CMP RCX, 2
  828. JL Reminder1
  829. MOV EDX, [RAX]
  830. MOVD XMM1, EDX
  831. VCVTPH2PS XMM1, XMM1
  832. SHUFPS XMM1, XMM1, 044H ; [0, 0, x1, x0] => [x1, x0, x1, x0]
  833. MAXPS XMM0, XMM1
  834. ADD RAX, 4
  835. SUB RCX, 2
  836. Reminder1:
  837. CMP RCX, 0
  838. JLE Exit
  839. MOV AX, [RAX]
  840. MOVD XMM1, EAX
  841. VCVTPH2PS XMM1, XMM1
  842. SHUFPS XMM1, XMM1, 0 ; [0, 0, 0, x0] => [x0, x0, x0, x0]
  843. MAXPS XMM0, XMM1
  844. Exit:
  845. SHUFPS XMM1, XMM0, 044H ; XMM0=[s3, s2, s1, s0], XMM1=[d3, d2, d1, d0] => XMM1 = [s1, s0, d1, d0]
  846. MAXPS XMM0, XMM1
  847. SHUFPS XMM1, XMM0, 0F4H ; XMM0=[s3, s2, s1, s0], XMM1=[d3, d2, d1, d0] => XMM1 = [s3, s3, d1, d0]
  848. MAXPS XMM0, XMM1
  849. MOVHLPS XMM0, XMM0
  850. MOV RDX, [RBP+daddr]
  851. VCVTPS2PH XMM0, XMM0, 0
  852. MOVD EAX, XMM0
  853. MOV [RDX], AX
  854. END;
  855. ELSE
  856. CODE
  857. MOV RAX, [RBP+laddr]
  858. MOV RBX, [RBP+linc]
  859. MOV RCX, [RBP+len]
  860. MOV RDX, [RBP+daddr]
  861. MOV DX, [RDX]
  862. MOVD XMM0, EDX
  863. VCVTPH2PS XMM0, XMM0
  864. Loop:
  865. CMP RCX, 0
  866. JLE Exit
  867. MOV DX, [RAX]
  868. MOVD XMM1, EDX
  869. VCVTPH2PS XMM1, XMM1
  870. MAXSS XMM0, XMM1
  871. ADD RAX, RBX
  872. SUB RCX, 1
  873. JMP Loop
  874. Exit:
  875. MOV RDX, [RBP+daddr]
  876. VCVTPS2PH XMM0, XMM0, 0
  877. MOVD EAX, XMM0
  878. MOV [RDX], AX
  879. END;
  880. END;
  881. END MaxLoop;
  882. PROCEDURE MinLoop( laddr, daddr: ADDRESS; linc, len: SIZE );
  883. BEGIN
  884. IF linc = 2 THEN
  885. CODE
  886. MOV RAX, [RBP+laddr]
  887. MOV RCX, [RBP+len]
  888. MOV RDX, [RBP+daddr]
  889. MOV DX, [RDX]
  890. MOVD XMM0, EDX
  891. VCVTPH2PS XMM0, XMM0
  892. SHUFPS XMM0, XMM0, 0 ; [0, 0, 0, x0] => [x0, x0, x0, x0]
  893. Loop4:
  894. CMP RCX, 4
  895. JL Reminder2
  896. MOVQ XMM1, [RAX]
  897. VCVTPH2PS XMM1, XMM1
  898. MINPS XMM0, XMM1
  899. ADD RAX, 8
  900. SUB RCX, 4
  901. JMP Loop4
  902. Reminder2:
  903. CMP RCX, 2
  904. JL Reminder1
  905. MOV EDX, [RAX]
  906. MOVD XMM1, EDX
  907. VCVTPH2PS XMM1, XMM1
  908. SHUFPS XMM1, XMM1, 044H ; [0, 0, x1, x0] => [x1, x0, x1, x0]
  909. MINPS XMM0, XMM1
  910. ADD RAX, 4
  911. SUB RCX, 2
  912. Reminder1:
  913. CMP RCX, 0
  914. JLE Exit
  915. MOV AX, [RAX]
  916. MOVD XMM1, EAX
  917. VCVTPH2PS XMM1, XMM1
  918. SHUFPS XMM1, XMM1, 0 ; [0, 0, 0, x0] => [x0, x0, x0, x0]
  919. MINPS XMM0, XMM1
  920. Exit:
  921. SHUFPS XMM1, XMM0, 044H ; XMM0=[s3, s2, s1, s0], XMM1=[d3, d2, d1, d0] => XMM1 = [s1, s0, d1, d0]
  922. MINPS XMM0, XMM1
  923. SHUFPS XMM1, XMM0, 0F4H ; XMM0=[s3, s2, s1, s0], XMM1=[d3, d2, d1, d0] => XMM1 = [s3, s3, d1, d0]
  924. MINPS XMM0, XMM1
  925. MOVHLPS XMM0, XMM0
  926. MOV RDX, [RBP+daddr]
  927. VCVTPS2PH XMM0, XMM0, 0
  928. MOVD EAX, XMM0
  929. MOV [RDX], AX
  930. END;
  931. ELSE
  932. CODE
  933. MOV RAX, [RBP+laddr]
  934. MOV RBX, [RBP+linc]
  935. MOV RCX, [RBP+len]
  936. MOV RDX, [RBP+daddr]
  937. MOV DX, [RDX]
  938. MOVD XMM0, EDX
  939. VCVTPH2PS XMM0, XMM0
  940. Loop:
  941. CMP RCX, 0
  942. JLE Exit
  943. MOV DX, [RAX]
  944. MOVD XMM1, EDX
  945. VCVTPH2PS XMM1, XMM1
  946. MINSS XMM0, XMM1
  947. ADD RAX, RBX
  948. SUB RCX, 1
  949. JMP Loop
  950. Exit:
  951. MOV RDX, [RBP+daddr]
  952. VCVTPS2PH XMM0, XMM0, 0
  953. MOVD EAX, XMM0
  954. MOV [RDX], AX
  955. END;
  956. END;
  957. END MinLoop;
  958. PROCEDURE AbsLoop( laddr, daddr: ADDRESS; linc, dinc, len: SIZE );
  959. BEGIN
  960. IF (linc = 2) & (dinc = 2) THEN
  961. CODE
  962. MOV RAX, [RBP+laddr]
  963. MOV RBX, [RBP+daddr]
  964. MOV RCX, [RBP+len]
  965. MOV EDX, 7FFF7FFFH
  966. MOVD XMM1, EDX
  967. SHUFPS XMM1, XMM1, 0 ; mask
  968. Loop8:
  969. CMP RCX, 8
  970. JL Reminder4
  971. MOVUPS XMM0, [RAX]
  972. ANDPS XMM0, XMM1
  973. MOVUPS [RBX], XMM0
  974. ADD RAX, 16
  975. ADD RBX, 16
  976. SUB RCX, 8
  977. JMP Loop8
  978. Reminder4:
  979. CMP RCX, 4
  980. JL Reminder2
  981. MOVQ XMM0, [RAX]
  982. ANDPS XMM0, XMM1
  983. MOVQ [RBX], XMM0
  984. ADD RAX, 8
  985. ADD RBX, 8
  986. SUB RCX, 4
  987. Reminder2:
  988. CMP RCX, 2
  989. JL Reminder1
  990. MOV EDX, [RAX]
  991. AND RDX, 7FFF7FFFH
  992. MOV [RBX], EDX
  993. ADD RAX, 4
  994. ADD RBX, 4
  995. SUB RCX, 2
  996. Reminder1:
  997. CMP RCX, 0
  998. JLE Exit
  999. MOV DX, [RAX]
  1000. AND RDX, 7FFFH
  1001. MOV [RBX], DX
  1002. Exit:
  1003. END;
  1004. ELSE (* striding single element access *)
  1005. CODE
  1006. MOV RAX, [RBP+laddr]
  1007. MOV RBX, [RBP+daddr]
  1008. MOV RCX, [RBP+len]
  1009. Loop:
  1010. CMP RCX, 0
  1011. JLE Exit
  1012. MOV DX, [RAX]
  1013. AND RDX, 7FFFH
  1014. MOV [RBX], DX
  1015. ADD RAX, [RBP+linc]
  1016. ADD RBX, [RBP+dinc]
  1017. SUB RCX, 1
  1018. JMP Loop
  1019. Exit:
  1020. END;
  1021. END;
  1022. END AbsLoop;
  1023. PROCEDURE NegateLoop( laddr, daddr: ADDRESS; linc, dinc, len: SIZE );
  1024. BEGIN
  1025. IF (linc = 2) & (dinc = 2) THEN
  1026. CODE
  1027. MOV RAX, [RBP+laddr]
  1028. MOV RBX, [RBP+daddr]
  1029. MOV RCX, [RBP+len]
  1030. MOV EDX, 80008000H ; mask
  1031. MOVD XMM1, EDX
  1032. SHUFPS XMM1, XMM1, 0
  1033. Loop8:
  1034. CMP RCX, 8
  1035. JL Reminder4
  1036. MOVUPS XMM0, [RAX]
  1037. XORPS XMM0, XMM1
  1038. MOVUPS [RBX], XMM0
  1039. ADD RAX, 16
  1040. ADD RBX, 16
  1041. SUB RCX, 8
  1042. JMP Loop8
  1043. Reminder4:
  1044. CMP RCX, 4
  1045. JL Reminder2
  1046. MOVQ XMM0, [RAX]
  1047. XORPS XMM0, XMM1
  1048. MOVQ [RBX], XMM0
  1049. ADD RAX, 8
  1050. ADD RBX, 8
  1051. SUB RCX, 4
  1052. Reminder2:
  1053. CMP RCX, 2
  1054. JL Reminder1
  1055. MOV EDX, [RAX]
  1056. XOR RDX, 80008000H
  1057. MOV [RBX], EDX
  1058. ADD RAX, 4
  1059. ADD RBX, 4
  1060. SUB RCX, 2
  1061. Reminder1:
  1062. CMP RCX, 0
  1063. JLE Exit
  1064. MOV DX, [RAX]
  1065. XOR RDX, 8000H
  1066. MOV [RBX], DX
  1067. Exit:
  1068. END;
  1069. ELSE (* striding single element access *)
  1070. CODE
  1071. MOV RAX, [RBP+laddr]
  1072. MOV RBX, [RBP+daddr]
  1073. MOV RCX, [RBP+len]
  1074. Loop:
  1075. CMP RCX, 0
  1076. JLE Exit
  1077. MOV DX, [RAX]
  1078. XOR RDX, 8000H
  1079. MOV [RBX], DX
  1080. ADD RAX, [RBP+linc]
  1081. ADD RBX, [RBP+dinc]
  1082. SUB RCX, 1
  1083. JMP Loop
  1084. Exit:
  1085. END;
  1086. END;
  1087. END NegateLoop;
  1088. PROCEDURE ShortLoop( laddr, daddr: ADDRESS; linc, dinc, len: SIZE );
  1089. BEGIN
  1090. IF (linc = 4) & (dinc = 2) THEN
  1091. CODE
  1092. MOV RAX, [RBP+laddr]
  1093. MOV RBX, [RBP+daddr]
  1094. MOV RCX, [RBP+len]
  1095. Loop4:
  1096. CMP RCX, 4
  1097. JL Reminder2
  1098. MOVUPS XMM0, [RAX]
  1099. VCVTPS2PH XMM0, XMM0, 0
  1100. MOVQ [RBX], XMM0
  1101. ADD RAX, 16
  1102. ADD RBX, 8
  1103. SUB RCX, 4
  1104. JMP Loop4
  1105. Reminder2:
  1106. CMP RCX, 2
  1107. JL Reminder1
  1108. MOVQ XMM0, [RAX]
  1109. VCVTPS2PH XMM0, XMM0, 0
  1110. MOVD [RBX], XMM0
  1111. ADD RAX, 8
  1112. ADD RBX, 4
  1113. SUB RCX, 2
  1114. Reminder1:
  1115. CMP RCX, 0
  1116. JLE Exit
  1117. MOVD XMM0, [RAX]
  1118. VCVTPS2PH XMM0, XMM0, 0
  1119. MOVD EDX, XMM0
  1120. MOV [RBX], DX
  1121. Exit:
  1122. END;
  1123. ELSE (* striding single element access *)
  1124. CODE
  1125. MOV RAX, [RBP+laddr]
  1126. MOV RBX, [RBP+daddr]
  1127. MOV RCX, [RBP+len]
  1128. Loop:
  1129. CMP RCX, 0
  1130. JLE Exit
  1131. MOVD XMM0, [RAX]
  1132. VCVTPS2PH XMM0, XMM0, 0
  1133. MOVD EDX, XMM0
  1134. MOV [RBX], DX
  1135. ADD RAX, [RBP+linc]
  1136. ADD RBX, [RBP+dinc]
  1137. SUB RCX, 1
  1138. JMP Loop
  1139. Exit:
  1140. END;
  1141. END;
  1142. END ShortLoop;
  1143. PROCEDURE LongLoop( laddr, daddr: ADDRESS; linc, dinc, len: SIZE );
  1144. BEGIN
  1145. IF (linc = 2) & (dinc = 4) THEN
  1146. CODE
  1147. MOV RAX, [RBP+laddr]
  1148. MOV RBX, [RBP+daddr]
  1149. MOV RCX, [RBP+len]
  1150. Loop4:
  1151. CMP RCX, 4
  1152. JL Reminder2
  1153. MOVQ XMM0, [RAX]
  1154. VCVTPH2PS XMM0, XMM0
  1155. MOVUPS [RBX], XMM0
  1156. ADD RAX, 8
  1157. ADD RBX, 16
  1158. SUB RCX, 4
  1159. JMP Loop4
  1160. Reminder2:
  1161. CMP RCX, 2
  1162. JL Reminder1
  1163. MOV EDX, [RAX]
  1164. MOVD XMM0, EDX
  1165. VCVTPH2PS XMM0, XMM0
  1166. MOVQ [RBX], XMM0
  1167. ADD RAX, 4
  1168. ADD RBX, 8
  1169. SUB RCX, 2
  1170. Reminder1:
  1171. CMP RCX, 0
  1172. JLE Exit
  1173. MOV DX, [RAX]
  1174. MOVD XMM0, EDX
  1175. VCVTPH2PS XMM0, XMM0
  1176. MOVD [RBX], XMM0
  1177. Exit:
  1178. END;
  1179. ELSE (* striding single element access *)
  1180. CODE
  1181. MOV RAX, [RBP+laddr]
  1182. MOV RBX, [RBP+daddr]
  1183. MOV RCX, [RBP+len]
  1184. Loop:
  1185. CMP RCX, 0
  1186. JLE Exit
  1187. MOV DX, [RAX]
  1188. MOVD XMM0, EDX
  1189. VCVTPH2PS XMM0, XMM0
  1190. MOVD [RBX], XMM0
  1191. ADD RAX, [RBP+linc]
  1192. ADD RBX, [RBP+dinc]
  1193. SUB RCX, 1
  1194. JMP Loop
  1195. Exit:
  1196. END;
  1197. END;
  1198. END LongLoop;
  1199. PROCEDURE AddLoop( laddr, raddr, daddr: ADDRESS; linc, rinc, dinc, len: SIZE );
  1200. BEGIN
  1201. IF (linc = 2) & (rinc = 2) & (dinc = 2) THEN
  1202. CODE
  1203. MOV RAX, [RBP+laddr]
  1204. MOV RBX, [RBP+raddr]
  1205. MOV RCX, [RBP+daddr]
  1206. MOV RDX, [RBP+len]
  1207. Loop4:
  1208. CMP RDX, 4
  1209. JL Reminder2
  1210. MOVQ XMM0, [RAX]
  1211. VCVTPH2PS XMM0, XMM0
  1212. MOVQ XMM1, [RBX]
  1213. VCVTPH2PS XMM1, XMM1
  1214. ADDPS XMM0, XMM1
  1215. VCVTPS2PH XMM0, XMM0, 0
  1216. MOVQ [RCX], XMM0
  1217. ADD RAX, 8
  1218. ADD RBX, 8
  1219. ADD RCX, 8
  1220. SUB RDX, 4
  1221. JMP Loop4
  1222. Reminder2:
  1223. CMP RDX, 2
  1224. JL Reminder1
  1225. MOV [RBP+len], RDX
  1226. MOVD XMM0, [RAX]
  1227. MOVD XMM1, [RBX]
  1228. VCVTPH2PS XMM0, XMM0
  1229. VCVTPH2PS XMM1, XMM1
  1230. ADDPS XMM0, XMM1
  1231. VCVTPS2PH XMM0, XMM0, 0
  1232. MOVD [RCX], XMM0
  1233. ADD RAX, 4
  1234. ADD RBX, 4
  1235. ADD RCX, 4
  1236. MOV RDX, [RBP+len]
  1237. SUB RDX, 2
  1238. Reminder1:
  1239. CMP RDX, 0
  1240. JLE Exit
  1241. MOV AX, [RAX]
  1242. MOVD XMM0, EAX
  1243. MOV BX, [RBX]
  1244. MOVD XMM1, EBX
  1245. VCVTPH2PS XMM0, XMM0
  1246. VCVTPH2PS XMM1, XMM1
  1247. ADDSS XMM0, XMM1
  1248. VCVTPS2PH XMM0, XMM0, 0
  1249. MOVD RAX, XMM0
  1250. MOV [RCX], AX
  1251. Exit:
  1252. END;
  1253. ELSE (* striding single element access *)
  1254. CODE
  1255. MOV RAX, [RBP+laddr]
  1256. MOV RBX, [RBP+raddr]
  1257. MOV RCX, [RBP+daddr]
  1258. Loop:
  1259. MOV RDX, [RBP+len]
  1260. CMP RDX, 0
  1261. JLE Exit
  1262. SUB RDX, 1
  1263. MOV [RBP+len], RDX
  1264. MOV DX, [RAX]
  1265. MOVD XMM0, EDX
  1266. MOV DX, [RBX]
  1267. MOVD XMM1, EDX
  1268. VCVTPH2PS XMM0, XMM0
  1269. VCVTPH2PS XMM1, XMM1
  1270. ADDSS XMM0, XMM1
  1271. VCVTPS2PH XMM0, XMM0, 0
  1272. MOVD RDX, XMM0
  1273. MOV [RCX], DX
  1274. ADD RAX, [RBP+linc]
  1275. ADD RBX, [RBP+rinc]
  1276. ADD RCX, [RBP+dinc]
  1277. JMP Loop
  1278. Exit:
  1279. END;
  1280. END;
  1281. END AddLoop;
  1282. (* array@daddr := array@laddr + scalar@raddr *)
  1283. PROCEDURE AddScalarLoop(laddr, raddr, daddr: ADDRESS; linc, dinc, len: SIZE);
  1284. BEGIN
  1285. IF (linc = 2) & (dinc = 2) THEN
  1286. CODE
  1287. MOV RAX, [RBP+laddr]
  1288. MOV RBX, [RBP+raddr]
  1289. MOV RCX, [RBP+daddr]
  1290. MOV RDX, [RBP+len]
  1291. MOV BX, [RBX]
  1292. MOVD XMM1, EBX
  1293. VCVTPH2PS XMM1, XMM1
  1294. SHUFPS XMM1, XMM1, 0
  1295. Loop4:
  1296. CMP RDX, 4
  1297. JL Reminder2
  1298. MOVQ XMM0, [RAX]
  1299. VCVTPH2PS XMM0, XMM0
  1300. ADDPS XMM0, XMM1
  1301. VCVTPS2PH XMM0, XMM0, 0
  1302. MOVQ [RCX], XMM0
  1303. ADD RAX, 8
  1304. ADD RCX, 8
  1305. SUB RDX, 4
  1306. JMP Loop4
  1307. Reminder2:
  1308. CMP RDX, 2
  1309. JL Reminder1
  1310. MOVD XMM0, [RAX]
  1311. VCVTPH2PS XMM0, XMM0
  1312. ADDPS XMM0, XMM1
  1313. VCVTPS2PH XMM0, XMM0, 0
  1314. MOVD [RCX], XMM0
  1315. ADD RAX, 4
  1316. ADD RCX, 4
  1317. SUB RDX, 2
  1318. Reminder1:
  1319. CMP RDX, 0
  1320. JLE Exit
  1321. MOV AX, [RAX]
  1322. MOVD XMM0, EAX
  1323. VCVTPH2PS XMM0, XMM0
  1324. ADDSS XMM0, XMM1
  1325. VCVTPS2PH XMM0, XMM0, 0
  1326. MOVD EAX, XMM0
  1327. MOV [RCX], AX
  1328. Exit:
  1329. END;
  1330. ELSE (* striding single element access *)
  1331. CODE
  1332. MOV RAX, [RBP+laddr]
  1333. MOV RBX, [RBP+raddr]
  1334. MOV RCX, [RBP+daddr]
  1335. MOV RDX, [RBP+len]
  1336. MOV BX, [RBX]
  1337. MOVD XMM1, EBX
  1338. VCVTPH2PS XMM1, XMM1
  1339. Loop:
  1340. CMP RDX, 0
  1341. JLE Exit
  1342. MOV BX, [RAX]
  1343. MOVD XMM0, EBX
  1344. VCVTPH2PS XMM0, XMM0
  1345. ADDSS XMM0, XMM1
  1346. VCVTPS2PH XMM0, XMM0, 0
  1347. MOVD EBX, XMM0
  1348. MOV [RCX], BX
  1349. ADD RAX, [RBP+linc]
  1350. ADD RCX, [RBP+dinc]
  1351. SUB RDX, 1
  1352. JMP Loop
  1353. Exit:
  1354. END;
  1355. END;
  1356. END AddScalarLoop;
  1357. PROCEDURE SubLoop( laddr, raddr, daddr: ADDRESS; linc, rinc, dinc, len: SIZE );
  1358. BEGIN
  1359. IF (linc = 2) & (rinc = 2) & (dinc = 2) THEN
  1360. CODE
  1361. MOV RAX, [RBP+laddr]
  1362. MOV RBX, [RBP+raddr]
  1363. MOV RCX, [RBP+daddr]
  1364. MOV RDX, [RBP+len]
  1365. Loop4:
  1366. CMP RDX, 4
  1367. JL Reminder2
  1368. MOVQ XMM0, [RAX]
  1369. VCVTPH2PS XMM0, XMM0
  1370. MOVQ XMM1, [RBX]
  1371. VCVTPH2PS XMM1, XMM1
  1372. SUBPS XMM0, XMM1
  1373. VCVTPS2PH XMM0, XMM0, 0
  1374. MOVQ [RCX], XMM0
  1375. ADD RAX, 8
  1376. ADD RBX, 8
  1377. ADD RCX, 8
  1378. SUB RDX, 4
  1379. JMP Loop4
  1380. Reminder2:
  1381. CMP RDX, 2
  1382. JL Reminder1
  1383. MOV [RBP+len], RDX
  1384. MOVD XMM0, [RAX]
  1385. MOVD XMM1, [RBX]
  1386. VCVTPH2PS XMM0, XMM0
  1387. VCVTPH2PS XMM1, XMM1
  1388. SUBPS XMM0, XMM1
  1389. VCVTPS2PH XMM0, XMM0, 0
  1390. MOVD [RCX], XMM0
  1391. ADD RAX, 4
  1392. ADD RBX, 4
  1393. ADD RCX, 4
  1394. MOV RDX, [RBP+len]
  1395. SUB RDX, 2
  1396. Reminder1:
  1397. CMP RDX, 0
  1398. JLE Exit
  1399. MOV AX, [RAX]
  1400. MOVD XMM0, EAX
  1401. MOV BX, [RBX]
  1402. MOVD XMM1, EBX
  1403. VCVTPH2PS XMM0, XMM0
  1404. VCVTPH2PS XMM1, XMM1
  1405. SUBSS XMM0, XMM1
  1406. VCVTPS2PH XMM0, XMM0, 0
  1407. MOVD RAX, XMM0
  1408. MOV [RCX], AX
  1409. Exit:
  1410. END;
  1411. ELSE (* striding single element access *)
  1412. CODE
  1413. MOV RAX, [RBP+laddr]
  1414. MOV RBX, [RBP+raddr]
  1415. MOV RCX, [RBP+daddr]
  1416. Loop:
  1417. MOV RDX, [RBP+len]
  1418. CMP RDX, 0
  1419. JLE Exit
  1420. SUB RDX, 1
  1421. MOV [RBP+len], RDX
  1422. MOV DX, [RAX]
  1423. MOVD XMM0, EDX
  1424. MOV DX, [RBX]
  1425. MOVD XMM1, EDX
  1426. VCVTPH2PS XMM0, XMM0
  1427. VCVTPH2PS XMM1, XMM1
  1428. SUBSS XMM0, XMM1
  1429. VCVTPS2PH XMM0, XMM0, 0
  1430. MOVD RDX, XMM0
  1431. MOV [RCX], DX
  1432. ADD RAX, [RBP+linc]
  1433. ADD RBX, [RBP+rinc]
  1434. ADD RCX, [RBP+dinc]
  1435. JMP Loop
  1436. Exit:
  1437. END;
  1438. END;
  1439. END SubLoop;
  1440. (* array@daddr := scalar@raddr - array@laddr *)
  1441. PROCEDURE ScalarSubLoop(laddr, raddr, daddr: ADDRESS; linc, dinc, len: SIZE);
  1442. BEGIN
  1443. IF (linc = 2) & (dinc = 2) THEN
  1444. CODE
  1445. MOV RAX, [RBP+laddr]
  1446. MOV RBX, [RBP+raddr]
  1447. MOV RCX, [RBP+daddr]
  1448. MOV RDX, [RBP+len]
  1449. MOV BX, [RBX]
  1450. MOVD XMM1, EBX
  1451. VCVTPH2PS XMM1, XMM1
  1452. SHUFPS XMM1, XMM1, 0
  1453. Loop4:
  1454. CMP RDX, 4
  1455. JL Reminder2
  1456. MOVQ XMM0, [RAX]
  1457. VCVTPH2PS XMM0, XMM0
  1458. MOVAPS XMM2, XMM1
  1459. SUBPS XMM2, XMM0
  1460. VCVTPS2PH XMM0, XMM2, 0
  1461. MOVQ [RCX], XMM0
  1462. ADD RAX, 8
  1463. ADD RCX, 8
  1464. SUB RDX, 4
  1465. JMP Loop4
  1466. Reminder2:
  1467. CMP RDX, 2
  1468. JL Reminder1
  1469. MOVD XMM0, [RAX]
  1470. VCVTPH2PS XMM0, XMM0
  1471. MOVAPS XMM2, XMM1
  1472. SUBPS XMM2, XMM0
  1473. VCVTPS2PH XMM0, XMM2, 0
  1474. MOVD [RCX], XMM0
  1475. ADD RAX, 4
  1476. ADD RCX, 4
  1477. SUB RDX, 2
  1478. Reminder1:
  1479. CMP RDX, 0
  1480. JLE Exit
  1481. MOV AX, [RAX]
  1482. MOVD XMM0, EAX
  1483. VCVTPH2PS XMM0, XMM0
  1484. MOVAPS XMM2, XMM1
  1485. SUBSS XMM2, XMM0
  1486. VCVTPS2PH XMM0, XMM2, 0
  1487. MOVD EAX, XMM0
  1488. MOV [RCX], AX
  1489. Exit:
  1490. END;
  1491. ELSE (* striding single element access *)
  1492. CODE
  1493. MOV RAX, [RBP+laddr]
  1494. MOV RBX, [RBP+raddr]
  1495. MOV RCX, [RBP+daddr]
  1496. MOV RDX, [RBP+len]
  1497. MOV BX, [RBX]
  1498. MOVD XMM1, EBX
  1499. VCVTPH2PS XMM1, XMM1
  1500. Loop:
  1501. CMP RDX, 0
  1502. JLE Exit
  1503. MOV BX, [RAX]
  1504. MOVD XMM0, EBX
  1505. VCVTPH2PS XMM0, XMM0
  1506. MOVAPS XMM2, XMM1
  1507. SUBSS XMM2, XMM0
  1508. VCVTPS2PH XMM0, XMM2, 0
  1509. MOVD EBX, XMM0
  1510. MOV [RCX], BX
  1511. ADD RAX, [RBP+linc]
  1512. ADD RCX, [RBP+dinc]
  1513. SUB RDX, 1
  1514. JMP Loop
  1515. Exit:
  1516. END;
  1517. END;
  1518. END ScalarSubLoop;
  1519. PROCEDURE MulLoop( laddr, raddr, daddr: ADDRESS; linc, rinc, dinc, len: SIZE );
  1520. BEGIN
  1521. IF (linc = 2) & (rinc = 2) & (dinc = 2) THEN
  1522. CODE
  1523. MOV RAX, [RBP+laddr]
  1524. MOV RBX, [RBP+raddr]
  1525. MOV RCX, [RBP+daddr]
  1526. MOV RDX, [RBP+len]
  1527. Loop4:
  1528. CMP RDX, 4
  1529. JL Reminder2
  1530. MOVQ XMM0, [RAX]
  1531. VCVTPH2PS XMM0, XMM0
  1532. MOVQ XMM1, [RBX]
  1533. VCVTPH2PS XMM1, XMM1
  1534. MULPS XMM0, XMM1
  1535. VCVTPS2PH XMM0, XMM0, 0
  1536. MOVQ [RCX], XMM0
  1537. ADD RAX, 8
  1538. ADD RBX, 8
  1539. ADD RCX, 8
  1540. SUB RDX, 4
  1541. JMP Loop4
  1542. Reminder2:
  1543. CMP RDX, 2
  1544. JL Reminder1
  1545. MOV [RBP+len], RDX
  1546. MOVD XMM0, [RAX]
  1547. MOVD XMM1, [RBX]
  1548. VCVTPH2PS XMM0, XMM0
  1549. VCVTPH2PS XMM1, XMM1
  1550. MULPS XMM0, XMM1
  1551. VCVTPS2PH XMM0, XMM0, 0
  1552. MOVD [RCX], XMM0
  1553. ADD RAX, 4
  1554. ADD RBX, 4
  1555. ADD RCX, 4
  1556. MOV RDX, [RBP+len]
  1557. SUB RDX, 2
  1558. Reminder1:
  1559. CMP RDX, 0
  1560. JLE Exit
  1561. MOV AX, [RAX]
  1562. MOVD XMM0, EAX
  1563. MOV BX, [RBX]
  1564. MOVD XMM1, EBX
  1565. VCVTPH2PS XMM0, XMM0
  1566. VCVTPH2PS XMM1, XMM1
  1567. MULSS XMM0, XMM1
  1568. VCVTPS2PH XMM0, XMM0, 0
  1569. MOVD RAX, XMM0
  1570. MOV [RCX], AX
  1571. Exit:
  1572. END;
  1573. ELSE (* striding single element access *)
  1574. CODE
  1575. MOV RAX, [RBP+laddr]
  1576. MOV RBX, [RBP+raddr]
  1577. MOV RCX, [RBP+daddr]
  1578. Loop:
  1579. MOV RDX, [RBP+len]
  1580. CMP RDX, 0
  1581. JLE Exit
  1582. SUB RDX, 1
  1583. MOV [RBP+len], RDX
  1584. MOV DX, [RAX]
  1585. MOVD XMM0, EDX
  1586. MOV DX, [RBX]
  1587. MOVD XMM1, EDX
  1588. VCVTPH2PS XMM0, XMM0
  1589. VCVTPH2PS XMM1, XMM1
  1590. MULSS XMM0, XMM1
  1591. VCVTPS2PH XMM0, XMM0, 0
  1592. MOVD RDX, XMM0
  1593. MOV [RCX], DX
  1594. ADD RAX, [RBP+linc]
  1595. ADD RBX, [RBP+rinc]
  1596. ADD RCX, [RBP+dinc]
  1597. JMP Loop
  1598. Exit:
  1599. END;
  1600. END;
  1601. END MulLoop;
  1602. PROCEDURE MulScalarLoop(laddr, raddr, daddr: ADDRESS; linc, dinc, len: SIZE);
  1603. BEGIN
  1604. IF (linc = 2) & (dinc = 2) THEN
  1605. CODE
  1606. MOV RAX, [RBP+laddr]
  1607. MOV RBX, [RBP+raddr]
  1608. MOV RCX, [RBP+daddr]
  1609. MOV RDX, [RBP+len]
  1610. MOV BX, [RBX]
  1611. MOVD XMM1, EBX
  1612. VCVTPH2PS XMM1, XMM1
  1613. SHUFPS XMM1, XMM1, 0
  1614. Loop4:
  1615. CMP RDX, 4
  1616. JL Reminder2
  1617. MOVQ XMM0, [RAX]
  1618. VCVTPH2PS XMM0, XMM0
  1619. MULPS XMM0, XMM1
  1620. VCVTPS2PH XMM0, XMM0, 0
  1621. MOVQ [RCX], XMM0
  1622. ADD RAX, 8
  1623. ADD RCX, 8
  1624. SUB RDX, 4
  1625. JMP Loop4
  1626. Reminder2:
  1627. CMP RDX, 2
  1628. JL Reminder1
  1629. MOVD XMM0, [RAX]
  1630. VCVTPH2PS XMM0, XMM0
  1631. MULPS XMM0, XMM1
  1632. VCVTPS2PH XMM0, XMM0, 0
  1633. MOVD [RCX], XMM0
  1634. ADD RAX, 4
  1635. ADD RCX, 4
  1636. SUB RDX, 2
  1637. Reminder1:
  1638. CMP RDX, 0
  1639. JLE Exit
  1640. MOV AX, [RAX]
  1641. MOVD XMM0, EAX
  1642. VCVTPH2PS XMM0, XMM0
  1643. MULSS XMM0, XMM1
  1644. VCVTPS2PH XMM0, XMM0, 0
  1645. MOVD EAX, XMM0
  1646. MOV [RCX], AX
  1647. Exit:
  1648. END;
  1649. ELSE (* striding single element access *)
  1650. CODE
  1651. MOV RAX, [RBP+laddr]
  1652. MOV RBX, [RBP+raddr]
  1653. MOV RCX, [RBP+daddr]
  1654. MOV RDX, [RBP+len]
  1655. MOV BX, [RBX]
  1656. MOVD XMM1, EBX
  1657. VCVTPH2PS XMM1, XMM1
  1658. Loop:
  1659. CMP RDX, 0
  1660. JLE Exit
  1661. MOV BX, [RAX]
  1662. MOVD XMM0, EBX
  1663. VCVTPH2PS XMM0, XMM0
  1664. MULSS XMM0, XMM1
  1665. VCVTPS2PH XMM0, XMM0, 0
  1666. MOVD EBX, XMM0
  1667. MOV [RCX], BX
  1668. ADD RAX, [RBP+linc]
  1669. ADD RCX, [RBP+dinc]
  1670. SUB RDX, 1
  1671. JMP Loop
  1672. Exit:
  1673. END;
  1674. END;
  1675. END MulScalarLoop;
  1676. PROCEDURE DivLoop( laddr, raddr, daddr: ADDRESS; linc, rinc, dinc, len: SIZE );
  1677. BEGIN
  1678. IF (linc = 2) & (rinc = 2) & (dinc = 2) THEN
  1679. CODE
  1680. MOV RAX, [RBP+laddr]
  1681. MOV RBX, [RBP+raddr]
  1682. MOV RCX, [RBP+daddr]
  1683. MOV RDX, [RBP+len]
  1684. Loop4:
  1685. CMP RDX, 4
  1686. JL Reminder2
  1687. MOVQ XMM0, [RAX]
  1688. VCVTPH2PS XMM0, XMM0
  1689. MOVQ XMM1, [RBX]
  1690. VCVTPH2PS XMM1, XMM1
  1691. DIVPS XMM0, XMM1
  1692. VCVTPS2PH XMM0, XMM0, 0
  1693. MOVQ [RCX], XMM0
  1694. ADD RAX, 8
  1695. ADD RBX, 8
  1696. ADD RCX, 8
  1697. SUB RDX, 4
  1698. JMP Loop4
  1699. Reminder2:
  1700. CMP RDX, 2
  1701. JL Reminder1
  1702. MOV [RBP+len], RDX
  1703. MOVD XMM0, [RAX]
  1704. MOVD XMM1, [RBX]
  1705. VCVTPH2PS XMM0, XMM0
  1706. VCVTPH2PS XMM1, XMM1
  1707. DIVPS XMM0, XMM1
  1708. VCVTPS2PH XMM0, XMM0, 0
  1709. MOVD [RCX], XMM0
  1710. ADD RAX, 4
  1711. ADD RBX, 4
  1712. ADD RCX, 4
  1713. MOV RDX, [RBP+len]
  1714. SUB RDX, 2
  1715. Reminder1:
  1716. CMP RDX, 0
  1717. JLE Exit
  1718. MOV AX, [RAX]
  1719. MOVD XMM0, EAX
  1720. MOV BX, [RBX]
  1721. MOVD XMM1, EBX
  1722. VCVTPH2PS XMM0, XMM0
  1723. VCVTPH2PS XMM1, XMM1
  1724. DIVSS XMM0, XMM1
  1725. VCVTPS2PH XMM0, XMM0, 0
  1726. MOVD RAX, XMM0
  1727. MOV [RCX], AX
  1728. Exit:
  1729. END;
  1730. ELSE (* striding single element access *)
  1731. CODE
  1732. MOV RAX, [RBP+laddr]
  1733. MOV RBX, [RBP+raddr]
  1734. MOV RCX, [RBP+daddr]
  1735. Loop:
  1736. MOV RDX, [RBP+len]
  1737. CMP RDX, 0
  1738. JLE Exit
  1739. SUB RDX, 1
  1740. MOV [RBP+len], RDX
  1741. MOV DX, [RAX]
  1742. MOVD XMM0, EDX
  1743. MOV DX, [RBX]
  1744. MOVD XMM1, EDX
  1745. VCVTPH2PS XMM0, XMM0
  1746. VCVTPH2PS XMM1, XMM1
  1747. DIVSS XMM0, XMM1
  1748. VCVTPS2PH XMM0, XMM0, 0
  1749. MOVD RDX, XMM0
  1750. MOV [RCX], DX
  1751. ADD RAX, [RBP+linc]
  1752. ADD RBX, [RBP+rinc]
  1753. ADD RCX, [RBP+dinc]
  1754. JMP Loop
  1755. Exit:
  1756. END;
  1757. END;
  1758. END DivLoop;
  1759. (* array@daddr := scalar@raddr / array@laddr *)
  1760. PROCEDURE DivScalarLoop(laddr, raddr, daddr: ADDRESS; linc, dinc, len: SIZE);
  1761. BEGIN
  1762. IF (linc = 2) & (dinc = 2) THEN
  1763. CODE
  1764. MOV RAX, [RBP+laddr]
  1765. MOV RBX, [RBP+raddr]
  1766. MOV RCX, [RBP+daddr]
  1767. MOV RDX, [RBP+len]
  1768. MOV BX, [RBX]
  1769. MOVD XMM1, EBX
  1770. VCVTPH2PS XMM1, XMM1
  1771. SHUFPS XMM1, XMM1, 0
  1772. Loop4:
  1773. CMP RDX, 4
  1774. JL Reminder2
  1775. MOVQ XMM0, [RAX]
  1776. VCVTPH2PS XMM0, XMM0
  1777. DIVPS XMM0, XMM1
  1778. VCVTPS2PH XMM0, XMM0, 0
  1779. MOVQ [RCX], XMM0
  1780. ADD RAX, 8
  1781. ADD RCX, 8
  1782. SUB RDX, 4
  1783. JMP Loop4
  1784. Reminder2:
  1785. CMP RDX, 2
  1786. JL Reminder1
  1787. MOVD XMM0, [RAX]
  1788. VCVTPH2PS XMM0, XMM0
  1789. DIVPS XMM0, XMM1
  1790. VCVTPS2PH XMM0, XMM0, 0
  1791. MOVD [RCX], XMM0
  1792. ADD RAX, 4
  1793. ADD RCX, 4
  1794. SUB RDX, 2
  1795. Reminder1:
  1796. CMP RDX, 0
  1797. JLE Exit
  1798. MOV AX, [RAX]
  1799. MOVD XMM0, EAX
  1800. VCVTPH2PS XMM0, XMM0
  1801. DIVSS XMM0, XMM1
  1802. VCVTPS2PH XMM0, XMM0, 0
  1803. MOVD EAX, XMM0
  1804. MOV [RCX], AX
  1805. Exit:
  1806. END;
  1807. ELSE (* striding single element access *)
  1808. CODE
  1809. MOV RAX, [RBP+laddr]
  1810. MOV RBX, [RBP+raddr]
  1811. MOV RCX, [RBP+daddr]
  1812. MOV RDX, [RBP+len]
  1813. MOV BX, [RBX]
  1814. MOVD XMM1, EBX
  1815. VCVTPH2PS XMM1, XMM1
  1816. Loop:
  1817. CMP RDX, 0
  1818. JLE Exit
  1819. MOV BX, [RAX]
  1820. MOVD XMM0, EBX
  1821. VCVTPH2PS XMM0, XMM0
  1822. DIVSS XMM0, XMM1
  1823. VCVTPS2PH XMM0, XMM0, 0
  1824. MOVD EBX, XMM0
  1825. MOV [RCX], BX
  1826. ADD RAX, [RBP+linc]
  1827. ADD RCX, [RBP+dinc]
  1828. SUB RDX, 1
  1829. JMP Loop
  1830. Exit:
  1831. END;
  1832. END;
  1833. END DivScalarLoop;
  1834. PROCEDURE ScalarDivLoop(laddr, raddr, daddr: ADDRESS; linc, dinc, len: SIZE);
  1835. BEGIN
  1836. IF (linc = 2) & (dinc = 2) THEN
  1837. CODE
  1838. MOV RAX, [RBP+laddr]
  1839. MOV RBX, [RBP+raddr]
  1840. MOV RCX, [RBP+daddr]
  1841. MOV RDX, [RBP+len]
  1842. MOV BX, [RBX]
  1843. MOVD XMM1, EBX
  1844. VCVTPH2PS XMM1, XMM1
  1845. SHUFPS XMM1, XMM1, 0
  1846. Loop4:
  1847. CMP RDX, 4
  1848. JL Reminder2
  1849. MOVQ XMM0, [RAX]
  1850. VCVTPH2PS XMM0, XMM0
  1851. MOVAPS XMM2, XMM1
  1852. DIVPS XMM2, XMM0
  1853. VCVTPS2PH XMM0, XMM2, 0
  1854. MOVQ [RCX], XMM0
  1855. ADD RAX, 8
  1856. ADD RCX, 8
  1857. SUB RDX, 4
  1858. JMP Loop4
  1859. Reminder2:
  1860. CMP RDX, 2
  1861. JL Reminder1
  1862. MOVD XMM0, [RAX]
  1863. VCVTPH2PS XMM0, XMM0
  1864. MOVAPS XMM2, XMM1
  1865. DIVPS XMM2, XMM0
  1866. VCVTPS2PH XMM0, XMM2, 0
  1867. MOVD [RCX], XMM0
  1868. ADD RAX, 4
  1869. ADD RCX, 4
  1870. SUB RDX, 2
  1871. Reminder1:
  1872. CMP RDX, 0
  1873. JLE Exit
  1874. MOV AX, [RAX]
  1875. MOVD XMM0, EAX
  1876. VCVTPH2PS XMM0, XMM0
  1877. MOVAPS XMM2, XMM1
  1878. DIVSS XMM2, XMM0
  1879. VCVTPS2PH XMM0, XMM2, 0
  1880. MOVD EAX, XMM0
  1881. MOV [RCX], AX
  1882. Exit:
  1883. END;
  1884. ELSE (* striding single element access *)
  1885. CODE
  1886. MOV RAX, [RBP+laddr]
  1887. MOV RBX, [RBP+raddr]
  1888. MOV RCX, [RBP+daddr]
  1889. MOV RDX, [RBP+len]
  1890. MOV BX, [RBX]
  1891. MOVD XMM1, EBX
  1892. VCVTPH2PS XMM1, XMM1
  1893. Loop:
  1894. CMP RDX, 0
  1895. JLE Exit
  1896. MOV BX, [RAX]
  1897. MOVD XMM0, EBX
  1898. VCVTPH2PS XMM0, XMM0
  1899. MOVAPS XMM2, XMM1
  1900. DIVSS XMM2, XMM0
  1901. VCVTPS2PH XMM0, XMM2, 0
  1902. MOVD EBX, XMM0
  1903. MOV [RCX], BX
  1904. ADD RAX, [RBP+linc]
  1905. ADD RCX, [RBP+dinc]
  1906. SUB RDX, 1
  1907. JMP Loop
  1908. Exit:
  1909. END;
  1910. END;
  1911. END ScalarDivLoop;
  1912. PROCEDURE InnerProdLoop(laddr, raddr, daddr: ADDRESS; linc, rinc, len: SIZE);
  1913. BEGIN
  1914. IF (linc = 2) & (rinc = 2) THEN
  1915. CODE
  1916. MOV RAX, [RBP+laddr]
  1917. MOV RBX, [RBP+raddr]
  1918. MOV RCX, [RBP+len]
  1919. MOV RDX, [RBP+daddr]
  1920. MOVD XMM0, [RDX]
  1921. Loop4:
  1922. CMP RCX, 4
  1923. JL Reminder2
  1924. MOVQ XMM1, [RAX]
  1925. MOVQ XMM2, [RBX]
  1926. VCVTPH2PS XMM1, XMM1
  1927. VCVTPH2PS XMM2, XMM2
  1928. MULPS XMM1, XMM2
  1929. ADDPS XMM0, XMM1
  1930. ADD RAX, 8
  1931. SUB RCX, 4
  1932. JMP Loop4
  1933. Reminder2:
  1934. CMP RCX, 2
  1935. JL Reminder1
  1936. MOV EDX, [RAX]
  1937. MOVD XMM1, EDX
  1938. MOV EDX, [RBX]
  1939. MOVD XMM2, EDX
  1940. VCVTPH2PS XMM1, XMM1
  1941. VCVTPH2PS XMM2, XMM2
  1942. MULPS XMM1, XMM2
  1943. ADDPS XMM0, XMM1
  1944. ADD RAX, 4
  1945. SUB RCX, 2
  1946. Reminder1:
  1947. CMP RCX, 0
  1948. JLE Exit
  1949. MOV AX, [RAX]
  1950. MOV BX, [RBX]
  1951. MOVD XMM1, EAX
  1952. MOVD XMM2, EBX
  1953. VCVTPH2PS XMM1, XMM1
  1954. VCVTPH2PS XMM2, XMM2
  1955. MULSS XMM1, XMM2
  1956. ADDSS XMM0, XMM1
  1957. Exit:
  1958. HADDPS XMM0, XMM0
  1959. HADDPS XMM0, XMM0
  1960. MOV RDX, [RBP+daddr]
  1961. MOVD [RDX], XMM0
  1962. END;
  1963. ELSE
  1964. CODE
  1965. MOV RAX, [RBP+laddr]
  1966. MOV RBX, [RBP+raddr]
  1967. MOV RCX, [RBP+len]
  1968. MOV RDX, [RBP+daddr]
  1969. MOVD XMM0, [RDX]
  1970. Loop:
  1971. CMP RCX, 0
  1972. JLE Exit
  1973. MOV DX, [RAX]
  1974. MOVD XMM1, EDX
  1975. MOV DX, [RBX]
  1976. MOVD XMM2, EDX
  1977. VCVTPH2PS XMM1, XMM1
  1978. VCVTPH2PS XMM2, XMM2
  1979. MULSS XMM1, XMM2
  1980. ADDSS XMM0, XMM1
  1981. ADD RAX, [RBP+linc]
  1982. ADD RBX, [RBP+rinc]
  1983. SUB RCX, 1
  1984. JMP Loop
  1985. Exit:
  1986. MOV RDX, [RBP+daddr]
  1987. MOVD [RDX], XMM0
  1988. END;
  1989. END;
  1990. END InnerProdLoop;
  1991. PROCEDURE MatMulLoop(laddr, raddr, daddr: ADDRESS; linc, rinc, len: SIZE);
  1992. VAR y: REAL;
  1993. BEGIN
  1994. InnerProdLoop(laddr,raddr,ADDRESS OF y,linc,rinc,len);
  1995. SYSTEM.PUT16(daddr,RealToShortreal(y));
  1996. END MatMulLoop;
  1997. PROCEDURE EqualLoop( laddr, raddr: ADDRESS; linc, rinc, len: SIZE ): BOOLEAN;
  1998. VAR
  1999. b: BOOLEAN;
  2000. BEGIN
  2001. (*!TODO: optimize contiguous case *)
  2002. IF FALSE(*(linc = 2) & (rinc = 2)*) THEN
  2003. ELSE (* striding single element access *)
  2004. CODE
  2005. MOV RAX, [RBP+laddr]
  2006. MOV RBX, [RBP+raddr]
  2007. MOV RCX, [RBP+len]
  2008. Loop:
  2009. CMP RCX, 0
  2010. JLE EQ
  2011. MOV DX, [RAX]
  2012. MOVD XMM0, EDX
  2013. MOV DX, [RBX]
  2014. MOVD XMM1, EDX
  2015. VCVTPH2PS XMM0, XMM0
  2016. VCVTPH2PS XMM1, XMM1
  2017. COMISS XMM0, XMM1
  2018. JNE Exit
  2019. ADD RAX, [RBP+linc]
  2020. ADD RBX, [RBP+rinc]
  2021. JMP Loop
  2022. EQ:
  2023. MOV [RBP+b], 1
  2024. Exit:
  2025. END;
  2026. END;
  2027. RETURN b;
  2028. END EqualLoop;
  2029. PROCEDURE LessThanLoop( laddr, raddr: ADDRESS; linc, rinc, len: SIZE ): BOOLEAN;
  2030. VAR
  2031. b: BOOLEAN;
  2032. BEGIN
  2033. (*!TODO: optimize contiguous case *)
  2034. IF FALSE(*(linc = 2) & (rinc = 2)*) THEN
  2035. ELSE (* striding single element access *)
  2036. CODE
  2037. MOV RAX, [RBP+laddr]
  2038. MOV RBX, [RBP+raddr]
  2039. MOV RCX, [RBP+len]
  2040. Loop:
  2041. CMP RCX, 0
  2042. JLE LT
  2043. MOV DX, [RAX]
  2044. MOVD XMM0, EDX
  2045. MOV DX, [RBX]
  2046. MOVD XMM1, EDX
  2047. VCVTPH2PS XMM0, XMM0
  2048. VCVTPH2PS XMM1, XMM1
  2049. COMISS XMM0, XMM1
  2050. JGE Exit
  2051. ADD RAX, [RBP+linc]
  2052. ADD RBX, [RBP+rinc]
  2053. JMP Loop
  2054. LT:
  2055. MOV [RBP+b], 1
  2056. Exit:
  2057. END;
  2058. END;
  2059. RETURN b;
  2060. END LessThanLoop;
  2061. PROCEDURE GreaterThanLoop( laddr, raddr: ADDRESS; linc, rinc, len: SIZE ): BOOLEAN;
  2062. VAR
  2063. b: BOOLEAN;
  2064. BEGIN
  2065. (*!TODO: optimize contiguous case *)
  2066. IF FALSE(*(linc = 2) & (rinc = 2)*) THEN
  2067. ELSE (* striding single element access *)
  2068. CODE
  2069. MOV RAX, [RBP+laddr]
  2070. MOV RBX, [RBP+raddr]
  2071. MOV RCX, [RBP+len]
  2072. Loop:
  2073. CMP RCX, 0
  2074. JLE GT
  2075. MOV DX, [RAX]
  2076. MOVD XMM0, EDX
  2077. MOV DX, [RBX]
  2078. MOVD XMM1, EDX
  2079. VCVTPH2PS XMM0, XMM0
  2080. VCVTPH2PS XMM1, XMM1
  2081. COMISS XMM0, XMM1
  2082. JLE Exit
  2083. ADD RAX, [RBP+linc]
  2084. ADD RBX, [RBP+rinc]
  2085. JMP Loop
  2086. GT:
  2087. MOV [RBP+b], 1
  2088. Exit:
  2089. END;
  2090. END;
  2091. RETURN b;
  2092. END GreaterThanLoop;
  2093. PROCEDURE EwEqualLoop( laddr, raddr, daddr: ADDRESS; linc, rinc, dinc, len: SIZE );
  2094. BEGIN
  2095. (*!TODO: optimize contiguous case *)
  2096. IF FALSE(*(linc = 2) & (rinc = 2) & (dinc = 1)*) THEN
  2097. ELSE (* striding single element access *)
  2098. CODE
  2099. MOV RAX, [RBP+laddr]
  2100. MOV RBX, [RBP+raddr]
  2101. MOV RCX, [RBP+daddr]
  2102. Loop:
  2103. MOV RDX, [RBP+len]
  2104. CMP RDX, 0
  2105. JLE Exit
  2106. SUB RDX, 1
  2107. MOV [RBP+len], RDX
  2108. MOV DX, [RAX]
  2109. MOVD XMM0, EDX
  2110. MOV DX, [RBX]
  2111. MOVD XMM1, EDX
  2112. VCVTPH2PS XMM0, XMM0
  2113. VCVTPH2PS XMM1, XMM1
  2114. CMPSS XMM0, XMM1, 0
  2115. MOVD RDX, XMM0
  2116. MOV [RCX], DX
  2117. ADD RAX, [RBP+linc]
  2118. ADD RBX, [RBP+rinc]
  2119. ADD RCX, [RBP+dinc]
  2120. JMP Loop
  2121. Exit:
  2122. END;
  2123. END;
  2124. END EwEqualLoop;
  2125. PROCEDURE EwLessThanLoop( laddr, raddr, daddr: ADDRESS; linc, rinc, dinc, len: SIZE );
  2126. BEGIN
  2127. (*!TODO: optimize contiguous case *)
  2128. IF FALSE(*(linc = 2) & (rinc = 2) & (dinc = 1)*) THEN
  2129. ELSE (* striding single element access *)
  2130. CODE
  2131. MOV RAX, [RBP+laddr]
  2132. MOV RBX, [RBP+raddr]
  2133. MOV RCX, [RBP+daddr]
  2134. Loop:
  2135. MOV RDX, [RBP+len]
  2136. CMP RDX, 0
  2137. JLE Exit
  2138. SUB RDX, 1
  2139. MOV [RBP+len], RDX
  2140. MOV DX, [RAX]
  2141. MOVD XMM0, EDX
  2142. MOV DX, [RBX]
  2143. MOVD XMM1, EDX
  2144. VCVTPH2PS XMM0, XMM0
  2145. VCVTPH2PS XMM1, XMM1
  2146. CMPSS XMM0, XMM1, 1
  2147. MOVD RDX, XMM0
  2148. MOV [RCX], DX
  2149. ADD RAX, [RBP+linc]
  2150. ADD RBX, [RBP+rinc]
  2151. ADD RCX, [RBP+dinc]
  2152. JMP Loop
  2153. Exit:
  2154. END;
  2155. END;
  2156. END EwLessThanLoop;
  2157. PROCEDURE EwLessOrEqualThanLoop( laddr, raddr, daddr: ADDRESS; linc, rinc, dinc, len: SIZE );
  2158. BEGIN
  2159. (*!TODO: optimize contiguous case *)
  2160. IF FALSE(*(linc = 2) & (rinc = 2) & (dinc = 1)*) THEN
  2161. ELSE (* striding single element access *)
  2162. CODE
  2163. MOV RAX, [RBP+laddr]
  2164. MOV RBX, [RBP+raddr]
  2165. MOV RCX, [RBP+daddr]
  2166. Loop:
  2167. MOV RDX, [RBP+len]
  2168. CMP RDX, 0
  2169. JLE Exit
  2170. SUB RDX, 1
  2171. MOV [RBP+len], RDX
  2172. MOV DX, [RAX]
  2173. MOVD XMM0, EDX
  2174. MOV DX, [RBX]
  2175. MOVD XMM1, EDX
  2176. VCVTPH2PS XMM0, XMM0
  2177. VCVTPH2PS XMM1, XMM1
  2178. CMPSS XMM0, XMM1, 2
  2179. MOVD RDX, XMM0
  2180. MOV [RCX], DX
  2181. ADD RAX, [RBP+linc]
  2182. ADD RBX, [RBP+rinc]
  2183. ADD RCX, [RBP+dinc]
  2184. JMP Loop
  2185. Exit:
  2186. END;
  2187. END;
  2188. END EwLessOrEqualThanLoop;
  2189. PROCEDURE EwGreaterThanLoop( laddr, raddr, daddr: ADDRESS; linc, rinc, dinc, len: SIZE );
  2190. BEGIN
  2191. (*!TODO: optimize contiguous case *)
  2192. IF FALSE(*(linc = 2) & (rinc = 2) & (dinc = 1)*) THEN
  2193. ELSE (* striding single element access *)
  2194. CODE
  2195. MOV RAX, [RBP+laddr]
  2196. MOV RBX, [RBP+raddr]
  2197. MOV RCX, [RBP+daddr]
  2198. Loop:
  2199. MOV RDX, [RBP+len]
  2200. CMP RDX, 0
  2201. JLE Exit
  2202. SUB RDX, 1
  2203. MOV [RBP+len], RDX
  2204. MOV DX, [RAX]
  2205. MOVD XMM0, EDX
  2206. MOV DX, [RBX]
  2207. MOVD XMM1, EDX
  2208. VCVTPH2PS XMM0, XMM0
  2209. VCVTPH2PS XMM1, XMM1
  2210. CMPSS XMM0, XMM1, 6
  2211. MOVD RDX, XMM0
  2212. MOV [RCX], DX
  2213. ADD RAX, [RBP+linc]
  2214. ADD RBX, [RBP+rinc]
  2215. ADD RCX, [RBP+dinc]
  2216. JMP Loop
  2217. Exit:
  2218. END;
  2219. END;
  2220. END EwGreaterThanLoop;
  2221. PROCEDURE EwGreaterOrEqualThanLoop( laddr, raddr, daddr: ADDRESS; linc, rinc, dinc, len: SIZE );
  2222. BEGIN
  2223. (*!TODO: optimize contiguous case *)
  2224. IF FALSE(*(linc = 2) & (rinc = 2) & (dinc = 1)*) THEN
  2225. ELSE (* striding single element access *)
  2226. CODE
  2227. MOV RAX, [RBP+laddr]
  2228. MOV RBX, [RBP+raddr]
  2229. MOV RCX, [RBP+daddr]
  2230. Loop:
  2231. MOV RDX, [RBP+len]
  2232. CMP RDX, 0
  2233. JLE Exit
  2234. SUB RDX, 1
  2235. MOV [RBP+len], RDX
  2236. MOV DX, [RAX]
  2237. MOVD XMM0, EDX
  2238. MOV DX, [RBX]
  2239. MOVD XMM1, EDX
  2240. VCVTPH2PS XMM0, XMM0
  2241. VCVTPH2PS XMM1, XMM1
  2242. CMPSS XMM0, XMM1, 5
  2243. MOVD RDX, XMM0
  2244. MOV [RCX], DX
  2245. ADD RAX, [RBP+linc]
  2246. ADD RBX, [RBP+rinc]
  2247. ADD RCX, [RBP+dinc]
  2248. JMP Loop
  2249. Exit:
  2250. END;
  2251. END;
  2252. END EwGreaterOrEqualThanLoop;
  2253. PROCEDURE SqrtLoop(laddr, daddr: ADDRESS; linc, dinc, len: SIZE);
  2254. BEGIN
  2255. IF (linc = 2) & (dinc = 2) THEN
  2256. CODE
  2257. MOV RAX, [RBP+laddr]
  2258. MOV RCX, [RBP+daddr]
  2259. MOV RDX, [RBP+len]
  2260. Loop4:
  2261. CMP RDX, 4
  2262. JL Reminder2
  2263. MOVQ XMM0, [RAX]
  2264. VCVTPH2PS XMM0, XMM0
  2265. SQRTPS XMM0, XMM0
  2266. VCVTPS2PH XMM0, XMM0, 0
  2267. MOVQ [RCX], XMM0
  2268. ADD RAX, 8
  2269. ADD RCX, 8
  2270. SUB RDX, 4
  2271. JMP Loop4
  2272. Reminder2:
  2273. CMP RDX, 2
  2274. JL Reminder1
  2275. MOVD XMM0, [RAX]
  2276. VCVTPH2PS XMM0, XMM0
  2277. SQRTPS XMM0, XMM1
  2278. VCVTPS2PH XMM0, XMM0, 0
  2279. MOVD [RCX], XMM0
  2280. ADD RAX, 4
  2281. ADD RCX, 4
  2282. SUB RDX, 2
  2283. Reminder1:
  2284. CMP RDX, 0
  2285. JLE Exit
  2286. MOV AX, [RAX]
  2287. MOVD XMM0, EAX
  2288. VCVTPH2PS XMM0, XMM0
  2289. SQRTSS XMM0, XMM0
  2290. VCVTPS2PH XMM0, XMM0, 0
  2291. MOVD EAX, XMM0
  2292. MOV [RCX], AX
  2293. Exit:
  2294. END;
  2295. ELSE (* striding single element access *)
  2296. CODE
  2297. MOV RAX, [RBP+laddr]
  2298. MOV RCX, [RBP+daddr]
  2299. MOV RDX, [RBP+len]
  2300. Loop:
  2301. CMP RDX, 0
  2302. JLE Exit
  2303. MOV BX, [RAX]
  2304. MOVD XMM0, EBX
  2305. VCVTPH2PS XMM0, XMM0
  2306. SQRTSS XMM0, XMM0
  2307. VCVTPS2PH XMM0, XMM0, 0
  2308. MOVD EBX, XMM0
  2309. MOV [RCX], BX
  2310. ADD RAX, [RBP+linc]
  2311. ADD RCX, [RBP+dinc]
  2312. SUB RDX, 1
  2313. JMP Loop
  2314. Exit:
  2315. END;
  2316. END;
  2317. END SqrtLoop;
  2318. PROCEDURE SinLoop(laddr, daddr: ADDRESS; linc, dinc, len: SIZE);
  2319. CODE
  2320. MOV RAX, [RBP+laddr]
  2321. MOV RCX, [RBP+daddr]
  2322. MOV RDX, [RBP+len]
  2323. SUB RSP, 4 ; create a local variable of type REAL
  2324. Loop:
  2325. CMP RDX, 0
  2326. JLE Exit
  2327. MOV BX, [RAX]
  2328. MOVD XMM0, EBX
  2329. VCVTPH2PS XMM0, XMM0
  2330. MOVD [RSP], XMM0
  2331. FLD [RSP]
  2332. FSIN
  2333. FST [RSP]
  2334. MOVSS XMM0, [RSP]
  2335. VCVTPS2PH XMM0, XMM0, 0
  2336. MOVD EBX, XMM0
  2337. MOV [RCX], BX
  2338. ADD RAX, [RBP+linc]
  2339. ADD RCX, [RBP+dinc]
  2340. SUB RDX, 1
  2341. JMP Loop
  2342. Exit:
  2343. ADD RSP, 4 ; remove local variable
  2344. END SinLoop;
  2345. PROCEDURE CosLoop(laddr, daddr: ADDRESS; linc, dinc, len: SIZE);
  2346. CODE
  2347. MOV RAX, [RBP+laddr]
  2348. MOV RCX, [RBP+daddr]
  2349. MOV RDX, [RBP+len]
  2350. SUB RSP, 4 ; create a local variable of type REAL
  2351. Loop:
  2352. CMP RDX, 0
  2353. JLE Exit
  2354. MOV BX, [RAX]
  2355. MOVD XMM0, EBX
  2356. VCVTPH2PS XMM0, XMM0
  2357. MOVD [RSP], XMM0
  2358. FLD [RSP]
  2359. FCOS
  2360. FST [RSP]
  2361. MOVSS XMM0, [RSP]
  2362. VCVTPS2PH XMM0, XMM0, 0
  2363. MOVD EBX, XMM0
  2364. MOV [RCX], BX
  2365. ADD RAX, [RBP+linc]
  2366. ADD RCX, [RBP+dinc]
  2367. SUB RDX, 1
  2368. JMP Loop
  2369. Exit:
  2370. ADD RSP, 4 ; remove local variable
  2371. END CosLoop;
  2372. PROCEDURE ArctanLoop(laddr, daddr: ADDRESS; linc, dinc, len: SIZE);
  2373. CODE
  2374. MOV RAX, [RBP+laddr]
  2375. MOV RCX, [RBP+daddr]
  2376. MOV RDX, [RBP+len]
  2377. SUB RSP, 4 ; create a local variable of type REAL
  2378. Loop:
  2379. CMP RDX, 0
  2380. JLE Exit
  2381. MOV BX, [RAX]
  2382. MOVD XMM0, EBX
  2383. VCVTPH2PS XMM0, XMM0
  2384. MOVD [RSP], XMM0
  2385. FLD [RSP]
  2386. FLD1
  2387. FPATAN
  2388. FST [RSP]
  2389. MOVSS XMM0, [RSP]
  2390. VCVTPS2PH XMM0, XMM0, 0
  2391. MOVD EBX, XMM0
  2392. MOV [RCX], BX
  2393. ADD RAX, [RBP+linc]
  2394. ADD RCX, [RBP+dinc]
  2395. SUB RDX, 1
  2396. JMP Loop
  2397. Exit:
  2398. ADD RSP, 4 ; remove local variable
  2399. END ArctanLoop;
  2400. PROCEDURE LnLoop(laddr, daddr: ADDRESS; linc, dinc, len: SIZE);
  2401. CODE
  2402. MOV RAX, [RBP+laddr]
  2403. MOV RCX, [RBP+daddr]
  2404. MOV RDX, [RBP+len]
  2405. SUB RSP, 4 ; create a local variable of type REAL
  2406. Loop:
  2407. CMP RDX, 0
  2408. JLE Exit
  2409. MOV BX, [RAX]
  2410. MOVD XMM0, EBX
  2411. VCVTPH2PS XMM0, XMM0
  2412. MOVD [RSP], XMM0
  2413. FLD1
  2414. FLDL2E
  2415. FDIVP
  2416. FLD [RSP]
  2417. FYL2X
  2418. FST [RSP]
  2419. MOVSS XMM0, [RSP]
  2420. VCVTPS2PH XMM0, XMM0, 0
  2421. MOVD EBX, XMM0
  2422. MOV [RCX], BX
  2423. ADD RAX, [RBP+linc]
  2424. ADD RCX, [RBP+dinc]
  2425. SUB RDX, 1
  2426. JMP Loop
  2427. Exit:
  2428. ADD RSP, 4 ; remove local variable
  2429. END LnLoop;
  2430. PROCEDURE ExpLoop(laddr, daddr: ADDRESS; linc, dinc, len: SIZE);
  2431. CODE
  2432. MOV RAX, [RBP+laddr]
  2433. MOV RCX, [RBP+daddr]
  2434. MOV RDX, [RBP+len]
  2435. SUB RSP, 4 ; create a local variable of type REAL
  2436. Loop:
  2437. CMP RDX, 0
  2438. JLE Exit
  2439. MOV BX, [RAX]
  2440. MOVD XMM0, EBX
  2441. VCVTPH2PS XMM0, XMM0
  2442. MOVD [RSP], XMM0
  2443. FLD [RSP]
  2444. FLDL2E
  2445. FMULP
  2446. FLD ST0
  2447. FRNDINT
  2448. FXCH ST1
  2449. FSUB ST0, ST1
  2450. F2XM1
  2451. FLD1
  2452. FADDP
  2453. FSCALE
  2454. FSTP ST1
  2455. FST [RSP]
  2456. MOVSS XMM0, [RSP]
  2457. VCVTPS2PH XMM0, XMM0, 0
  2458. MOVD EBX, XMM0
  2459. MOV [RCX], BX
  2460. ADD RAX, [RBP+linc]
  2461. ADD RCX, [RBP+dinc]
  2462. SUB RDX, 1
  2463. JMP Loop
  2464. Exit:
  2465. ADD RSP, 4 ; remove local variable
  2466. END ExpLoop;
  2467. PROCEDURE ShortrealToReal(x: UNSIGNED16): REAL;
  2468. CODE
  2469. MOV AX, [RBP+x] ; load U16
  2470. MOVD XMM0, EAX
  2471. VCVTPH2PS XMM0, XMM0 ; result is returned in XMM0
  2472. END ShortrealToReal;
  2473. PROCEDURE RealToShortreal(x: REAL): UNSIGNED16;
  2474. CODE
  2475. MOVD XMM0, [RBP+x]
  2476. VCVTPS2PH XMM0, XMM0, 0
  2477. MOVD RAX, XMM0 ; result is returned in RAX
  2478. END RealToShortreal;
  2479. VAR
  2480. eps-: SHORTREAL;
  2481. PROCEDURE InitMod;
  2482. VAR i: SIZE;
  2483. BEGIN
  2484. eps := SHORT(1.0);
  2485. FOR i := 0 TO 9 DO
  2486. eps := eps / SHORT(2.0);
  2487. END;
  2488. END InitMod;
  2489. TYPE
  2490. SHORTREAL* = RECORD
  2491. value*: UNSIGNED16;
  2492. PROCEDURE Sqrt*(): SHORTREAL;
  2493. VAR y: SHORTREAL;
  2494. BEGIN
  2495. y.value := Sqrt0(value);
  2496. RETURN y;
  2497. END Sqrt;
  2498. PROCEDURE Sin*(): SHORTREAL;
  2499. VAR y: SHORTREAL;
  2500. BEGIN
  2501. y.value := Sin0(value);
  2502. RETURN y;
  2503. END Sin;
  2504. PROCEDURE Cos*(): SHORTREAL;
  2505. VAR y: SHORTREAL;
  2506. BEGIN
  2507. y.value := Cos0(value);
  2508. RETURN y;
  2509. END Cos;
  2510. PROCEDURE Arctan*(): SHORTREAL;
  2511. VAR y: SHORTREAL;
  2512. BEGIN
  2513. y.value := Arctan0(value);
  2514. RETURN y;
  2515. END Arctan;
  2516. PROCEDURE Ln*(): SHORTREAL;
  2517. VAR y: SHORTREAL;
  2518. BEGIN
  2519. y.value := Ln0(value);
  2520. RETURN y;
  2521. END Ln;
  2522. PROCEDURE Exp*(): SHORTREAL;
  2523. VAR y: SHORTREAL;
  2524. BEGIN
  2525. y.value := Exp0(value);
  2526. RETURN y;
  2527. END Exp;
  2528. END;
  2529. BEGIN
  2530. ASSERT(SIZEOF(SHORTREAL) = 2);
  2531. InitMod;
  2532. END Shortreal.
  2533. System.FreeDownTo Shortreal ~