|
@@ -1,222 +0,0 @@
|
|
|
-package in.ocsf.bee.freigeld.core.cl;
|
|
|
-
|
|
|
-import com.aparapi.Kernel;
|
|
|
-import com.aparapi.Range;
|
|
|
-import com.aparapi.device.Device;
|
|
|
-import com.aparapi.device.OpenCLDevice;
|
|
|
-import com.fasterxml.jackson.databind.ObjectMapper;
|
|
|
-
|
|
|
-import java.util.HashMap;
|
|
|
-import java.util.Map;
|
|
|
-import java.util.concurrent.atomic.AtomicInteger;
|
|
|
-import java.util.logging.Logger;
|
|
|
-
|
|
|
-public class Sample1 implements Runnable {
|
|
|
-
|
|
|
- private final ObjectMapper objectMapper = new ObjectMapper();
|
|
|
-
|
|
|
- private final Logger log = Logger.getLogger(getClass().getName());
|
|
|
- private final Integer TOTAL_MEM;
|
|
|
- private OpenCLDevice device;
|
|
|
-
|
|
|
- public Sample1() {
|
|
|
- Device _device = Device.best();
|
|
|
- if (_device instanceof OpenCLDevice)
|
|
|
- this.device = (OpenCLDevice) _device;
|
|
|
- else
|
|
|
- throw new RuntimeException("no opencl device");
|
|
|
-
|
|
|
- log.info(device.getShortDescription() + " " + device.getName());
|
|
|
- TOTAL_MEM = Long.valueOf(Math.min(128, device.getMaxMemAllocSize() / 1024 / 1024 / 2)).intValue() * 1024 * 1024;
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- public void run() {
|
|
|
- final int N = 4;
|
|
|
- final int G = 32;
|
|
|
-
|
|
|
- final int pB = 4 * 1024;
|
|
|
- final int pBtotal = 0;
|
|
|
- final int pBfrom = 1;
|
|
|
- final int pBto = 2;
|
|
|
- final int pBpos = 3;
|
|
|
- final int pBlen = 4;
|
|
|
-
|
|
|
- final byte[] mm = new byte[TOTAL_MEM];
|
|
|
- final long[] arg = new long[3];
|
|
|
- final long[][] mem = new long[N][G];
|
|
|
- final long[][] res = new long[2][16];
|
|
|
- final AtomicInteger[] at = new AtomicInteger[]{new AtomicInteger(0), new AtomicInteger(0), new AtomicInteger(0)};
|
|
|
-
|
|
|
- Kernel kernel = new DryRunnable() {
|
|
|
-
|
|
|
- @Constant
|
|
|
- boolean[] dryRun = new boolean[]{false};
|
|
|
-
|
|
|
- @PrivateMemorySpace(pB)
|
|
|
- byte[] membuf = new byte[pB];
|
|
|
- @PrivateMemorySpace(5)
|
|
|
- int[] bufposlimits = new int[5];
|
|
|
-
|
|
|
- private void mem2buf() {
|
|
|
- int p0 = bufposlimits[pBpos];
|
|
|
- int len = bufposlimits[pBlen];
|
|
|
- int from = bufposlimits[pBfrom];
|
|
|
- int p = p0;
|
|
|
- while (p < p0 + len - 1) {
|
|
|
- membuf[p - p0] = mm[from + p];
|
|
|
- p++;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- private void buf2mem() {
|
|
|
- int p0 = bufposlimits[pBpos];
|
|
|
- int len = bufposlimits[pBlen];
|
|
|
- int from = bufposlimits[pBfrom];
|
|
|
- int p = p0;
|
|
|
- while (p < p0 + len - 1) {
|
|
|
- mm[from + p] = membuf[p - p0];
|
|
|
- p++;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- private void initbuf() {
|
|
|
- int x = dryRun[0] ? 1 : getGlobalId(0);
|
|
|
- int xN = dryRun[0] ? G : getGlobalSize(0);
|
|
|
- int block = mm.length / xN;
|
|
|
- bufposlimits[pBtotal] = block;
|
|
|
- int from = x * block;
|
|
|
- int to = (x + 1) * block - 1;
|
|
|
- bufposlimits[pBfrom] = from;
|
|
|
- bufposlimits[pBto] = to;
|
|
|
- bufpos(0);
|
|
|
- }
|
|
|
-
|
|
|
- private void bufpos(int pos) {
|
|
|
- if (pos >= bufposlimits[pBpos] && pos < (bufposlimits[pBpos] + bufposlimits[pBlen]))
|
|
|
- return;
|
|
|
-
|
|
|
- bufposlimits[pBpos] = max(0, min(bufposlimits[pBtotal] - pB, pos));
|
|
|
- bufposlimits[pBlen] = min(bufposlimits[pBtotal] - bufposlimits[pBpos], pB);
|
|
|
- mem2buf();
|
|
|
- }
|
|
|
-
|
|
|
- private void memset(int pos, byte val) {
|
|
|
- bufpos(pos);
|
|
|
- int pos0 = bufposlimits[pBpos];
|
|
|
- membuf[pos - pos0] = val;
|
|
|
- }
|
|
|
-
|
|
|
- private byte memget(int pos) {
|
|
|
- bufpos(pos);
|
|
|
- int pos0 = bufposlimits[pBpos];
|
|
|
- return membuf[pos - pos0];
|
|
|
- }
|
|
|
-
|
|
|
- private void test() {
|
|
|
- int x = getGlobalId(0);
|
|
|
- int y = getGlobalId(1);
|
|
|
- int z = getGlobalId(2);
|
|
|
- int xN = getGlobalSize(0);
|
|
|
- int yN = getGlobalSize(1);
|
|
|
- int zN = getGlobalSize(2);
|
|
|
- int gx = getGroupId(0);
|
|
|
- int gy = getGroupId(1);
|
|
|
- int gz = getGroupId(2);
|
|
|
- int gxN = getNumGroups(0);
|
|
|
- int gyN = getNumGroups(1);
|
|
|
- int gzN = getNumGroups(2);
|
|
|
- int lx = getLocalId(0);
|
|
|
- int ly = getLocalId(1);
|
|
|
- int lz = getLocalId(2);
|
|
|
- int lxN = getLocalSize(0);
|
|
|
- int lyN = getLocalSize(1);
|
|
|
- int lzN = getLocalSize(2);
|
|
|
- int p = getPassId();
|
|
|
-
|
|
|
- mem[0][x] = (x + 1) * 1000000 + (gx + 1) * 1000 + (lx + 1);
|
|
|
- mem[1][y] = (y + 1) * 1000000 + (gy + 1) * 1000 + (ly + 1);
|
|
|
- mem[2][z] = (z + 1) * 1000000 + (gz + 1) * 1000 + (lz + 1);
|
|
|
-
|
|
|
- res[0][0] = p;
|
|
|
- res[0][1] = xN;
|
|
|
- res[0][2] = yN;
|
|
|
- res[0][3] = zN;
|
|
|
- res[0][4] = gxN;
|
|
|
- res[0][5] = gyN;
|
|
|
- res[0][6] = gzN;
|
|
|
- res[0][7] = lxN;
|
|
|
- res[0][8] = lyN;
|
|
|
- res[0][9] = lzN;
|
|
|
-
|
|
|
- atomicInc(at[0]);
|
|
|
- atomicInc(at[1]);
|
|
|
- atomicInc(at[2]);
|
|
|
- int block = mm.length / xN;
|
|
|
- int from = x * block;
|
|
|
- int to = (x + 1) * block - 1;
|
|
|
- mem[3][x] = to;
|
|
|
- int i = from;
|
|
|
- while (i <= to) {
|
|
|
- mm[i] = (byte) (x + 1);
|
|
|
- i++;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
-
|
|
|
- @Override
|
|
|
- public void run() {
|
|
|
- int x = getGlobalId(0);
|
|
|
- initbuf();
|
|
|
- memset(0, (byte) 1);
|
|
|
- byte val = memget(0);
|
|
|
- if (val == 1) memset(1, (byte) 1);
|
|
|
- buf2mem();
|
|
|
- }
|
|
|
-
|
|
|
- public void dryRun() {
|
|
|
- dryRun[0] = true;
|
|
|
- run();
|
|
|
- dryRun[0] = false;
|
|
|
- }
|
|
|
- };
|
|
|
-
|
|
|
- Range range = Range.create(device, G);
|
|
|
- /*switch (device.getMaxWorkItemDimensions()){
|
|
|
- case 1:
|
|
|
- range = Range.create(device, G);
|
|
|
- break;
|
|
|
- case 2:
|
|
|
- range = Range.create2D(device, G, G);
|
|
|
- break;
|
|
|
- case 3:
|
|
|
- range = Range.create3D(device, G, G, G);
|
|
|
- break;
|
|
|
- default:
|
|
|
- throw new RuntimeException("not supported");
|
|
|
- }*/
|
|
|
- try {
|
|
|
- ((DryRunnable) kernel).dryRun();
|
|
|
- kernel.execute(range, 1);
|
|
|
- } catch (Exception e) {
|
|
|
- throw new RuntimeException("dry run failed", e);
|
|
|
- }
|
|
|
- Map<String, Object> info = new HashMap<>();
|
|
|
- info.put("time", kernel.getAccumulatedExecutionTime() / 1000.0);
|
|
|
- info.put("memg", TOTAL_MEM / 1024.0 / 1024.0);
|
|
|
- info.put("meml", device.getLocalMemSize() / 1024.0);
|
|
|
- try {
|
|
|
- log.info(objectMapper.writer().withDefaultPrettyPrinter().writeValueAsString(info));
|
|
|
- log.info(objectMapper.writeValueAsString(mem[0]));
|
|
|
- log.info(objectMapper.writeValueAsString(mem[1]));
|
|
|
- log.info(objectMapper.writeValueAsString(mem[2]));
|
|
|
- log.info(objectMapper.writeValueAsString(res[0]));
|
|
|
- log.info(objectMapper.writeValueAsString(res[1]));
|
|
|
- log.info(objectMapper.writeValueAsString(at));
|
|
|
- } catch (Exception e) {
|
|
|
- throw new RuntimeException(e);
|
|
|
- }
|
|
|
- kernel.dispose();
|
|
|
- }
|
|
|
-
|
|
|
-}
|