- 01
 - 02
 - 03
 - 04
 - 05
 - 06
 - 07
 - 08
 - 09
 - 10
 - 11
 - 12
 - 13
 - 14
 - 15
 - 16
 - 17
 - 18
 - 19
 - 20
 - 21
 - 22
 - 23
 - 24
 - 25
 - 26
 - 27
 - 28
 - 29
 - 30
 - 31
 - 32
 - 33
 - 34
 - 35
 - 36
 - 37
 - 38
 - 39
 - 40
 - 41
 - 42
 - 43
 - 44
 - 45
 - 46
 - 47
 - 48
 - 49
 - 50
 - 51
 - 52
 - 53
 - 54
 - 55
 - 56
 - 57
 - 58
 - 59
 - 60
 - 61
 - 62
 - 63
 - 64
 - 65
 - 66
 - 67
 - 68
 - 69
 - 70
 - 71
 - 72
 - 73
 - 74
 - 75
 - 76
 - 77
 - 78
 - 79
 - 80
 - 81
 - 82
 - 83
 - 84
 - 85
 - 86
 - 87
 - 88
 - 89
 - 90
 - 91
 - 92
 
                        let src1 = r#"
        __kernel void add1(__global float* A, __global float* BBB, __global float* B, int m, int n) 
        {
            __local float Blo[64];
            int x = get_local_id(0);
            int y = get_local_id(1);
            int i = get_global_id(0);
            int j = get_global_id(1);
            int k = get_global_id(2);
            i += k / 8;
            j += k % 8;
            if (i >= n || j >= m) return;
            Blo[x * 8 + y] = A[i * m + j];
            barrier(CLK_LOCAL_MEM_FENCE);
            float BB = 0;
            for (int xx = 0; xx < 8; ++xx)
                for (int yy = 0; yy < 8; ++yy)
                {
    
                        float c = (2 * xx + 1) * x * 3.1415926535 / 16;
                        float cc = (2 * yy + 1) * y * 3.1415926535 / 16;
                        c = cos(c);
                        cc = cos(cc);
                        BB += Blo[xx * 8 + yy] * c * cc;
                }
            float Ci, Cj;
            if (x == 0)
                Ci = 1 / 1.4142135623;
            else
                Ci = 1;
            if (y == 0)
                Cj = 1 / 1.4142135623;
            else
                Cj = 1;
            B[k * m * n + i * m + j] = Ci * Cj / 4 * BB;
            barrier(CLK_LOCAL_MEM_FENCE);
            i = get_global_id(0);
            j = get_global_id(1);
            float summ = 0;
            for (int ii = 0; ii < 64; ++ii)
                summ += B[ii * m * n + i * m + j];
            BBB[i * m + j] = summ / 64;
            
        }
    "#;
let pro_que = ProQue::builder().src(src1).dims((hi, wi, 64)).build().unwrap();
   let matr11 = Buffer::builder()
        .queue(pro_que.queue().clone())
        .flags(MemFlags::new().read_only().use_host_ptr())
        .dims((hi, wi))
        .host_data(&Resr)
        .build().unwrap();
        let matg11 = Buffer::builder()
        .queue(pro_que.queue().clone())
        .flags(MemFlags::new().read_only().use_host_ptr())
        .dims((hi, wi))
        .host_data(&Resg)
        .build().unwrap();
...
    let mut kernel;
    {
            let wi = wi as i32;
            let hi = hi as i32;
            kernel = pro_que.create_kernel("add1").unwrap().arg_buf(&matr11).arg_buf(&resr11).arg_buf(&bor1).arg_scl(wi).arg_scl(hi);
            kernel.lws((8, 8)).enq().unwrap();
            kernel = pro_que.create_kernel("add1").unwrap().arg_buf(&matg11).arg_buf(&resg11).arg_buf(&bog1).arg_scl(wi).arg_scl(hi);
            kernel.lws((8, 8)).enq().unwrap();
            kernel = pro_que.create_kernel("add1").unwrap().arg_buf(&matb11).arg_buf(&resb11).arg_buf(&bob1).arg_scl(wi).arg_scl(hi);
            kernel.lws((8, 8)).enq().unwrap();
    }
                                 
        
Комментарии (8) RSS
Добавить комментарий