Subversion Repositories dashGPS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 mjames 1
/* ----------------------------------------------------------------------
2
* Copyright (C) 2010-2018 Arm Limited. All rights reserved.
3
*
4
*
5
* Project:       CMSIS NN Library
6
* Title:         arm_nnexamples_nn_test.cpp
7
*
8
* Description:   Example code for NN kernel testing.
9
*
10
* Target Processor: Cortex-M cores
11
*
12
* Redistribution and use in source and binary forms, with or without
13
* modification, are permitted provided that the following conditions
14
* are met:
15
*   - Redistributions of source code must retain the above copyright
16
*     notice, this list of conditions and the following disclaimer.
17
*   - Redistributions in binary form must reproduce the above copyright
18
*     notice, this list of conditions and the following disclaimer in
19
*     the documentation and/or other materials provided with the
20
*     distribution.
21
*   - Neither the name of ARM LIMITED nor the names of its contributors
22
*     may be used to endorse or promote products derived from this
23
*     software without specific prior written permission.
24
*
25
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
28
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
29
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
30
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
31
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
32
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
33
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
35
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36
* POSSIBILITY OF SUCH DAMAGE.
37
* -------------------------------------------------------------------- */
38
 
39
#include "arm_nnexamples_nn_test.h"
40
 
41
//#define TEST_SIGMOID
42
//#define TEST_TANH
43
#define TEST_POOL
44
#define TEST_RELU
45
#define TEST_IP
46
#define TEST_CONV
47
#define TEST_NONSQUARE
48
#define TEST_NNMULT
49
 
50
int test_index = 0;
51
q7_t test_flags[50];
52
bool test_pass;
53
 
54
int main()
55
{
56
    printf("start tests\n");
57
 
58
    srand(1);
59
 
60
    // common pointers for testing data
61
    q7_t     *test1;
62
    q15_t    *test2;
63
    q7_t     *test3;
64
    q15_t    *test4;
65
 
66
    for (test_index = 0; test_index<50; test_index++) {
67
        test_flags[test_index] = -1;
68
    }
69
    test_index = 0;
70
 
71
#ifdef TEST_NNMULT
72
#define NNMULT_DIM 128
73
    test1 = new q7_t[NNMULT_DIM*2];
74
    test2 = new q15_t[NNMULT_DIM*2];
75
    test3 = new q7_t[NNMULT_DIM*2];
76
    test4 = new q15_t[NNMULT_DIM*2];
77
 
78
    q7_t * mult_out_q7 = test3;
79
    q7_t * mult_ref_q7 = test3 + NNMULT_DIM;
80
    q15_t * mult_out_q15 = test4;
81
    q15_t * mult_ref_q15 = test4 + NNMULT_DIM;
82
 
83
    for (int i=0;i<NNMULT_DIM*2;i++) {
84
        test1[i] = (rand() % 256 - 128);
85
        test2[i] = (rand() % 65536 - 32768);
86
    }
87
 
88
    // Test q7
89
    arm_nn_mult_q7(test1, test1+NNMULT_DIM, mult_out_q7, 5, NNMULT_DIM);
90
 
91
    arm_nn_mult_q7_ref(test1, test1+NNMULT_DIM, mult_ref_q7, 5, NNMULT_DIM);
92
 
93
    verify_results_q7(mult_out_q7, mult_ref_q7, NNMULT_DIM);
94
 
95
    arm_nn_mult_q7(test1, test1+NNMULT_DIM, mult_out_q7, 9, NNMULT_DIM);
96
 
97
    arm_nn_mult_q7_ref(test1, test1+NNMULT_DIM, mult_ref_q7, 9, NNMULT_DIM);
98
 
99
    verify_results_q7(mult_out_q7, mult_ref_q7, NNMULT_DIM);
100
 
101
    // Test q15
102
    arm_nn_mult_q15(test2, test2+NNMULT_DIM, mult_out_q15, 13, NNMULT_DIM);
103
 
104
    arm_nn_mult_q15_ref(test2, test2+NNMULT_DIM, mult_ref_q15, 13, NNMULT_DIM);
105
 
106
    verify_results_q15(mult_out_q15, mult_ref_q15, NNMULT_DIM);
107
 
108
    arm_nn_mult_q15(test2, test2+NNMULT_DIM, mult_out_q15, 18, NNMULT_DIM);
109
 
110
    arm_nn_mult_q15_ref(test2, test2+NNMULT_DIM, mult_ref_q15, 18, NNMULT_DIM);
111
 
112
    verify_results_q15(mult_out_q15, mult_ref_q15, NNMULT_DIM);
113
 
114
#endif
115
 
116
#ifdef TEST_SIGMOID
117
 
118
#define SIGMOID_DIM 128
119
 
120
    /* This part tests the running of sigmoid functions */
121
 
122
    test1 = new q7_t[SIGMOID_DIM];
123
    test2 = new q15_t[SIGMOID_DIM];
124
    test3 = new q7_t[SIGMOID_DIM];
125
    test4 = new q15_t[SIGMOID_DIM];
126
 
127
    srand(1);
128
 
129
    for (int i = 0; i < SIGMOID_DIM; i++)
130
    {
131
        test1[i] = (rand() % 256 - 128);
132
        test2[i] = (rand() % 65536 - 32768);
133
        test3[i] = test1[i];
134
        test4[i] = test2[i];
135
    }
136
 
137
    arm_nn_activations_direct_q7(test3, SIGMOID_DIM, 3, ARM_SIGMOID);
138
 
139
    for (int i = 0; i < SIGMOID_DIM; i++)
140
    {
141
        printf("in: %d  out: %d\n", test1[i], test3[i]);
142
    }
143
 
144
    printf("start testing q15_t sigmoid\n\n");
145
 
146
    arm_nn_activations_direct_q15(test4, SIGMOID_DIM, 3, ARM_SIGMOID);
147
 
148
    for (int i = 0; i < SIGMOID_DIM; i++)
149
    {
150
        printf("in: %d  out: %d\n", test2[i], test4[i]);
151
    }
152
 
153
    delete[]test1;
154
    delete[]test2;
155
    delete[]test3;
156
    delete[]test4;
157
 
158
#endif
159
 
160
#ifdef TEST_TANH
161
 
162
#define TANH_DIM 128
163
 
164
    /* This part tests the running of sigmoid functions */
165
 
166
    test1 = new q7_t[TANH_DIM];
167
    test2 = new q15_t[TANH_DIM];
168
    test3 = new q7_t[TANH_DIM];
169
    test4 = new q15_t[TANH_DIM];
170
 
171
    srand(1);
172
 
173
    for (int i = 0; i < TANH_DIM; i++)
174
    {
175
        test1[i] = (rand() % 256 - 128);
176
        test2[i] = (rand() % 65536 - 32768);
177
        test3[i] = test1[i];
178
        test4[i] = test2[i];
179
    }
180
 
181
    arm_nn_activations_direct_q7(test3, TANH_DIM, 3, ARM_TANH);
182
 
183
    printf("start testing q7_t tanh\n\n");
184
 
185
    for (int i = 0; i < TANH_DIM; i++)
186
    {
187
        printf("in: %d  out: %d\n", test1[i], test3[i]);
188
    }
189
 
190
    printf("start testing q15_t tanh\n\n");
191
 
192
    arm_nn_activations_direct_q15(test4, TANH_DIM, 3, ARM_TANH);
193
 
194
    for (int i = 0; i < TANH_DIM; i++)
195
    {
196
        printf("in: %d  out: %d\n", test2[i], test4[i]);
197
    }
198
 
199
    delete[]test1;
200
    delete[]test2;
201
    delete[]test3;
202
    delete[]test4;
203
 
204
#endif
205
 
206
#ifdef TEST_POOL
207
 
208
#define POOL_IM_DIM 32
209
#define POOL_IM_CH 8
210
 
211
    test1 = new q7_t[POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH * 2];
212
    test2 = new q15_t[POOL_IM_DIM * POOL_IM_CH];
213
    test3 = new q7_t[POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH];
214
 
215
    for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
216
    {
217
        test1[i] = (rand() % 256 - 128);
218
    }
219
 
220
    q7_t     *img_in = test1 + POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH;
221
    q7_t     *pool_out_ref = test3;
222
    q7_t     *pool_out_opt = test3 + POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH / 2;
223
 
224
    for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
225
    {
226
        test3[i] = 0;
227
    }
228
 
229
    // copy over the img input
230
    for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
231
    {
232
        img_in[i] = test1[i];
233
    }
234
 
235
    initialize_results_q7(pool_out_ref, pool_out_opt, POOL_IM_DIM / 2 * POOL_IM_DIM / 2 * POOL_IM_CH);
236
 
237
    printf("Start maxpool reference implementation\n");
238
 
239
    arm_maxpool_q7_HWC_ref(img_in, POOL_IM_DIM, POOL_IM_CH, 3, 0, 2, POOL_IM_DIM / 2, (q7_t *) test2, pool_out_ref);
240
 
241
    // copy over the img input
242
    for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
243
    {
244
        img_in[i] = test1[i];
245
    }
246
 
247
    printf("Start maxpool opt implementation\n");
248
 
249
    arm_maxpool_q7_HWC(img_in, POOL_IM_DIM, POOL_IM_CH, 3, 0, 2, POOL_IM_DIM / 2, (q7_t *) test2, pool_out_opt);
250
 
251
    verify_results_q7(pool_out_ref, pool_out_opt, POOL_IM_DIM / 2 * POOL_IM_DIM / 2 * POOL_IM_CH);
252
 
253
    // copy over the img input
254
    for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
255
    {
256
        img_in[i] = test1[i];
257
    }
258
 
259
    // copy over the img input
260
    for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
261
    {
262
        img_in[i] = test1[i];
263
    }
264
 
265
    printf("Start avepool ref implementation\n");
266
 
267
    arm_avepool_q7_HWC_ref(img_in, POOL_IM_DIM, POOL_IM_CH, 3, 0, 2, POOL_IM_DIM / 2, (q7_t *) test2, pool_out_ref);
268
 
269
    // copy over the img input
270
    for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
271
    {
272
        img_in[i] = test1[i];
273
    }
274
 
275
    printf("Start avepool opt implementation\n");
276
 
277
    arm_avepool_q7_HWC(img_in, POOL_IM_DIM, POOL_IM_CH, 3, 0, 2, POOL_IM_DIM / 2, (q7_t *) test2, pool_out_opt);
278
 
279
    // special check here
280
    bool      if_ave_pool_match = true;
281
    for (int i = 0; i < POOL_IM_DIM / 2 * POOL_IM_DIM / 2 * POOL_IM_CH; i++)
282
    {
283
        // we tolerate at most difference of 1 here because of rounding errors
284
        if (pool_out_ref[i] - pool_out_opt[i] >= 2 || pool_out_opt[i] - pool_out_ref[i] >= 2)
285
        {
286
            printf("Output mismatch at %d, expected %d, actual %d\n", i, pool_out_ref[i], pool_out_opt[i]);
287
            if_ave_pool_match = false;
288
        }
289
    }
290
    if (if_ave_pool_match == true)
291
    {
292
        printf("Outputs match.\n");
293
    }
294
 
295
    delete[]test1;
296
    delete[]test2;
297
    delete[]test3;
298
 
299
#endif
300
 
301
#ifdef TEST_RELU
302
 
303
#define RELU_DIM 127
304
 
305
    test1 = new q7_t[RELU_DIM];
306
    test2 = new q15_t[RELU_DIM];
307
    test3 = new q7_t[RELU_DIM];
308
    test4 = new q15_t[RELU_DIM];
309
 
310
    for (int i = 0; i < RELU_DIM; i++)
311
    {
312
        test1[i] = (rand() % 256 - 128);
313
        test2[i] = (rand() % 65536 - 32768);
314
        test3[i] = test1[i];
315
        test4[i] = test2[i];
316
    }
317
 
318
    q7_t     *relu_ref_data_q7 = test1;
319
    q7_t     *relu_opt_data_q7 = test3;
320
    q15_t    *relu_ref_data_q15 = test2;
321
    q15_t    *relu_opt_data_q15 = test4;
322
 
323
    printf("Start ref relu q7 implementation\n");
324
 
325
    arm_relu_q7_ref(relu_ref_data_q7, RELU_DIM);
326
 
327
    printf("Start opt relu q7 implementation\n");
328
 
329
    arm_relu_q7(relu_opt_data_q7, RELU_DIM);
330
 
331
    verify_results_q7(relu_ref_data_q7, relu_opt_data_q7, RELU_DIM);
332
 
333
    printf("Start ref relu q15 implementation\n");
334
 
335
    arm_relu_q15_ref(relu_ref_data_q15, RELU_DIM);
336
 
337
    printf("Start opt relu q15 implementation\n");
338
 
339
    arm_relu_q15(relu_opt_data_q15, RELU_DIM);
340
 
341
    verify_results_q15(relu_ref_data_q15, relu_opt_data_q15, RELU_DIM);
342
 
343
    delete[]test1;
344
    delete[]test2;
345
    delete[]test3;
346
    delete[]test4;
347
 
348
#endif
349
 
350
#ifdef TEST_IP
351
 
352
#define IP_ROW_DIM 127
353
#define IP_COL_DIM 127
354
 
355
    q7_t      ip_weights[IP_ROW_DIM * IP_COL_DIM] = IP2_WEIGHT;
356
    q7_t      ip_q7_opt_weights[IP_ROW_DIM * IP_COL_DIM] = IP4_WEIGHT;
357
    q7_t      ip_q7_q15_opt_weights[IP_ROW_DIM * IP_COL_DIM] = IP4_q7_q15_WEIGHT;
358
    q15_t     ip_q15_weights[IP_ROW_DIM * IP_COL_DIM] = IP2_WEIGHT;
359
    q15_t     ip_q15_opt_weights[IP_ROW_DIM * IP_COL_DIM] = IP4_WEIGHT_Q15;
360
 
361
    test1 = new q7_t[IP_COL_DIM + IP_ROW_DIM];
362
    test2 = new q15_t[IP_COL_DIM];
363
    test3 = new q7_t[IP_ROW_DIM * 3];
364
    test4 = new q15_t[IP_COL_DIM + IP_ROW_DIM * 2];
365
 
366
    for (int i = 0; i < IP_ROW_DIM + IP_COL_DIM; i++)
367
    {
368
        test1[i] = rand() % 256 - 100;
369
    }
370
    for (int i = 0; i < IP_ROW_DIM * 3; i++)
371
    {
372
        test3[i] = 0;
373
    }
374
 
375
    q7_t     *ip_bias_q7 = test1 + IP_COL_DIM;
376
 
377
    q7_t     *ip_out_q7_ref = test3;
378
    q7_t     *ip_out_q7_opt = test3 + IP_ROW_DIM;
379
    q7_t     *ip_out_q7_opt_fast = test3 + 2 * IP_ROW_DIM;
380
    q15_t    *ip_out_q15_ref = test4 + IP_COL_DIM;
381
    q15_t    *ip_out_q15_opt = test4 + IP_COL_DIM + IP_ROW_DIM;
382
 
383
    initialize_results_q7(ip_out_q7_ref, ip_out_q7_opt, IP_ROW_DIM);
384
    initialize_results_q7(ip_out_q7_ref, ip_out_q7_opt_fast, IP_ROW_DIM);
385
    initialize_results_q7(ip_out_q7_ref, ip_out_q7_opt_fast, IP_ROW_DIM);
386
 
387
    printf("Start ref q7 implementation\n");
388
 
389
    arm_fully_connected_q7_ref(test1, ip_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q7_ref, test2);
390
 
391
    printf("Start q7 implementation\n");
392
 
393
    arm_fully_connected_q7(test1, ip_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q7_opt, test2);
394
 
395
    verify_results_q7(ip_out_q7_ref, ip_out_q7_opt, IP_ROW_DIM);
396
 
397
    printf("Start q7 ref opt implementation\n");
398
 
399
    arm_fully_connected_q7_opt_ref(test1, ip_q7_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7,
400
                                   ip_out_q7_opt_fast, test2);
401
 
402
    verify_results_q7(ip_out_q7_ref, ip_out_q7_opt_fast, IP_ROW_DIM);
403
 
404
    printf("Start q7 opt implementation\n");
405
 
406
    arm_fully_connected_q7_opt(test1, ip_q7_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q7_opt_fast,
407
                               test2);
408
 
409
    verify_results_q7(ip_out_q7_ref, ip_out_q7_opt_fast, IP_ROW_DIM);
410
 
411
    for (int i = 0; i < IP_ROW_DIM + IP_COL_DIM; i++)
412
    {
413
        test4[i] = (rand() % 65536 - 32768);
414
    }
415
 
416
    initialize_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
417
 
418
    printf("Start ref q15 implementation\n");
419
 
420
    arm_fully_connected_q15_ref(test4, ip_q15_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, test2, ip_out_q15_ref, NULL);
421
 
422
    printf("Start q15 implementation\n");
423
 
424
    arm_fully_connected_q15(test4, ip_q15_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, test2, ip_out_q15_opt, NULL);
425
 
426
    verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
427
 
428
    printf("Start ref opt q15 implementation\n");
429
 
430
    arm_fully_connected_q15_opt_ref(test4, ip_q15_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, test2, ip_out_q15_opt,
431
                                    NULL);
432
 
433
    verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
434
 
435
    printf("Start opt q15 implementation\n");
436
 
437
    arm_fully_connected_q15_opt(test4, ip_q15_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, test2, ip_out_q15_opt, NULL);
438
 
439
    verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
440
 
441
    initialize_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
442
 
443
    printf("Start ref q7_q15 implementation\n");
444
 
445
    arm_fully_connected_mat_q7_vec_q15_ref(test4, ip_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q15_ref,
446
                                           test2);
447
 
448
    printf("Start q7_q15 implementation\n");
449
 
450
    arm_fully_connected_mat_q7_vec_q15(test4, ip_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q15_opt,
451
                                       test2);
452
 
453
    verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
454
 
455
    printf("Start ref opt q7_q15 implementation\n");
456
 
457
    arm_fully_connected_mat_q7_vec_q15_opt_ref(test4, ip_q7_q15_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7,
458
                                               ip_out_q15_opt, test2);
459
 
460
    verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
461
 
462
    printf("Start opt q7_q15 implementation\n");
463
 
464
    arm_fully_connected_mat_q7_vec_q15_opt(test4, ip_q7_q15_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7,
465
                                           ip_out_q15_opt, test2);
466
 
467
    verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
468
 
469
    delete[]test1;
470
    delete[]test2;
471
    delete[]test3;
472
    delete[]test4;
473
 
474
#endif
475
 
476
#ifdef TEST_NONSQUARE
477
 
478
/* Use RCONV to differential with square CONV */
479
 
480
#define RCONV_IM_DIM_X 10
481
#define RCONV_IM_DIM_Y 8
482
#define RCONV_IM_CH 4
483
#define RCONV_KER_DIM_X 5
484
#define RCONV_KER_DIM_Y 3
485
#define RCONV_STRIDE_X 1
486
#define RCONV_STRIDE_Y 1
487
#define RCONV_PADDING_X 2
488
#define RCONV_PADDING_Y 1
489
#define RCONV_OUT_CH 4
490
#define RCONV_OUT_DIM_X 10
491
#define RCONV_OUT_DIM_Y 8
492
 
493
    test1 = new q7_t[RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH + RCONV_OUT_CH];
494
    test2 = new q15_t[2 * RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH];
495
    test3 =
496
        new q7_t[RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + 2 * RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH];
497
 
498
    for (int i = 0; i < RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH + RCONV_OUT_CH; i++)
499
    {
500
        test1[i] = rand() % 256 - 100;
501
    }
502
 
503
    for (int i = 0;
504
         i < RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + 2 * RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH; i++)
505
    {
506
        test3[i] = rand() % 256 - 100;
507
    }
508
 
509
    q7_t     *rconv_weight_q7 = test1;
510
    q7_t     *rconv_bias_q7 = test1 + RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH;
511
 
512
    q15_t    *rconv_buf = test2;
513
 
514
    q7_t     *rconv_im_in_q7 = test3;
515
    q7_t     *rconv_im_out_ref_q7 = test3 + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH;
516
    q7_t     *rconv_im_out_opt_q7 =
517
        test3 + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH;
518
 
519
    initialize_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
520
 
521
    printf("start conv q7 nonsquare ref implementation\n");
522
    arm_convolve_HWC_q7_ref_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7,
523
                                      RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y,
524
                                      RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_ref_q7,
525
                                      RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL);
526
 
527
    printf("start conv q7 nonsquare opt implementation\n");
528
    arm_convolve_HWC_q7_fast_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7,
529
                                       RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y,
530
                                       RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_opt_q7,
531
                                       RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL);
532
 
533
    verify_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
534
 
535
    initialize_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
536
 
537
    printf("start conv q7 nonsquare ref implementation\n");
538
    arm_convolve_HWC_q7_ref_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7,
539
                                      RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y,
540
                                      RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_ref_q7,
541
                                      RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL);
542
 
543
    printf("start conv q7 nonsquare basic implementation\n");
544
    arm_convolve_HWC_q7_basic_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7,
545
                                       RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y,
546
                                       RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_opt_q7,
547
                                       RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL);
548
 
549
    verify_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
550
 
551
    initialize_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
552
 
553
    printf("start 1x1 conv q7 nonsquare fast implementation\n");
554
    arm_convolve_HWC_q7_fast_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7,
555
                                       RCONV_OUT_CH, 1, 1, 0, 0, RCONV_STRIDE_X,
556
                                       RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_ref_q7, RCONV_OUT_DIM_X,
557
                                       RCONV_OUT_DIM_Y, rconv_buf, NULL);
558
 
559
    printf("start 1x1 conv q7 nonsquare dedicated function implementation\n");
560
    arm_convolve_1x1_HWC_q7_fast_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7,
561
                                           RCONV_OUT_CH, 1, 1, 0, 0, RCONV_STRIDE_X,
562
                                           RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_opt_q7, RCONV_OUT_DIM_X,
563
                                           RCONV_OUT_DIM_Y, rconv_buf, NULL);
564
 
565
    verify_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
566
 
567
    printf("start depthwise separable conv q7 nonsquare ref implementation\n");
568
    arm_depthwise_separable_conv_HWC_q7_ref_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH,
569
                                                      rconv_weight_q7, RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y,
570
                                                      RCONV_PADDING_X, RCONV_PADDING_Y, RCONV_STRIDE_X, RCONV_STRIDE_Y,
571
                                                      rconv_bias_q7, 1, 7, rconv_im_out_ref_q7, RCONV_OUT_DIM_X,
572
                                                      RCONV_OUT_DIM_Y, rconv_buf, NULL);
573
 
574
    printf("start depthwise separable conv q7 nonsquare opt implementation\n");
575
    arm_depthwise_separable_conv_HWC_q7_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH,
576
                                                  rconv_weight_q7, RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y,
577
                                                  RCONV_PADDING_X, RCONV_PADDING_Y, RCONV_STRIDE_X, RCONV_STRIDE_Y,
578
                                                  rconv_bias_q7, 1, 7, rconv_im_out_opt_q7, RCONV_OUT_DIM_X,
579
                                                  RCONV_OUT_DIM_Y, rconv_buf, NULL);
580
 
581
    verify_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
582
 
583
    delete[]test1;
584
    delete[]test2;
585
    delete[]test3;
586
 
587
        test2 = new q15_t[RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH + RCONV_OUT_CH]; // weights + bias
588
        test4 = new q15_t[2 * RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH   //buffer
589
                 + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + 2 * RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH]; // i/o
590
 
591
    for (int i = 0; i < RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH + RCONV_OUT_CH; i++)
592
    {
593
        test2[i] = rand() % 256 - 100;
594
    }
595
 
596
    for (int i = 0;
597
         i < 2 * RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH
598
         + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + 2 * RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH;
599
        i++)
600
    {
601
        test4[i] = rand() % 256 - 100;
602
    }
603
 
604
    q15_t     *rconv_weight_q15 = test2;
605
    q15_t     *rconv_bias_q15 = test2 + RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH;
606
 
607
    rconv_buf = test4;
608
 
609
    q15_t     *rconv_im_in_q15 = test4 + 2 * RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH;
610
    q15_t     *rconv_im_out_ref_q15 = rconv_im_in_q15 + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH;
611
    q15_t     *rconv_im_out_opt_q15 = rconv_im_out_ref_q15 + RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH;
612
 
613
    initialize_results_q15(rconv_im_out_ref_q15, rconv_im_out_opt_q15, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
614
 
615
    printf("start conv q15 nonsquare ref implementation\n");
616
    arm_convolve_HWC_q15_nonsquare_ref(rconv_im_in_q15, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q15,
617
                                      RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y,
618
                                      RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q15, 1, 7, rconv_im_out_ref_q15,
619
                                      RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL);
620
 
621
    printf("start conv q5 nonsquare opt implementation\n");
622
    arm_convolve_HWC_q15_fast_nonsquare(rconv_im_in_q15, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q15,
623
                                       RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y,
624
                                       RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q15, 1, 7, rconv_im_out_opt_q15,
625
                                       RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL);
626
 
627
    verify_results_q15(rconv_im_out_ref_q15, rconv_im_out_opt_q15, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
628
 
629
    delete [] test2;
630
    delete [] test4;
631
#endif
632
 
633
#ifdef TEST_CONV
634
 
635
#define CONV_IM_DIM 16
636
#define CONV_IM_CH 16
637
#define CONV_KER_DIM 5
638
#define CONV_OUT_CH 16
639
#define CONV_OUT_DIM 16
640
 
641
    test1 = new q7_t[CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + CONV_OUT_CH];
642
    test2 =
643
        new q15_t[CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH +
644
                  2 * CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + CONV_OUT_CH];
645
    test3 = new q7_t[CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + 2 * CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH];
646
    test4 = new q15_t[CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + 2 * CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH];
647
 
648
    for (int i = 0; i < CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + CONV_OUT_CH; i++)
649
    {
650
        test1[i] = rand() % 256 - 100;
651
    }
652
 
653
    for (int i = 0;
654
         i <
655
         CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH +
656
         2 * CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + CONV_OUT_CH; i++)
657
    {
658
        test2[i] = (rand() % 65536 - 32768);
659
    }
660
 
661
    for (int i = 0; i < CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + 2 * CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH; i++)
662
    {
663
        test3[i] = rand() % 256 - 100;
664
    }
665
 
666
    for (int i = 0; i < CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + 2 * CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH; i++)
667
    {
668
        test4[i] = (rand() % 65536 - 32768);
669
    }
670
 
671
    q7_t     *conv_weight_q7 = test1;
672
    q7_t     *conv_bias_q7 = test1 + CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH;
673
 
674
    q15_t    *conv_weight_q15 = test2;
675
    q15_t    *conv_buf = test2 + CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH;
676
    q15_t    *conv_bias_q15 =
677
        test2 + CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH +
678
        2 * CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH;
679
 
680
    q7_t     *conv_im_in_q7 = test3;
681
    q7_t     *conv_im_out_ref_q7 = test3 + CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH;
682
    q7_t     *conv_im_out_opt_q7 =
683
        test3 + CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH;
684
 
685
    q15_t    *conv_im_in_q15 = test4;
686
    q15_t    *conv_im_out_ref_q15 = test4 + CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH;
687
    q15_t    *conv_im_out_opt_q15 =
688
        test4 + CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH;
689
 
690
    initialize_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
691
 
692
    printf("start q7 ref implementation\n");
693
 
694
    arm_convolve_HWC_q7_ref(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7,
695
                            CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_ref_q7,
696
                            CONV_OUT_DIM, conv_buf, NULL);
697
 
698
    printf("start q7 basic implementation\n");
699
 
700
    arm_convolve_HWC_q7_basic(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7,
701
                              CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7,
702
                              CONV_OUT_DIM, conv_buf, NULL);
703
 
704
    verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
705
 
706
    printf("start q7 fast implementation\n");
707
 
708
    arm_convolve_HWC_q7_fast(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7,
709
                             CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7,
710
                             CONV_OUT_DIM, conv_buf, NULL);
711
 
712
    verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
713
 
714
    // testing with RGB
715
    printf("start q7 ref implementation for RGB\n");
716
 
717
    arm_convolve_HWC_q7_ref(conv_im_in_q7, CONV_IM_DIM, 3, conv_weight_q7,
718
                            CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_ref_q7,
719
                            CONV_OUT_DIM, conv_buf, NULL);
720
 
721
    printf("start q7 basic implementation for RGB\n");
722
 
723
    arm_convolve_HWC_q7_basic(conv_im_in_q7, CONV_IM_DIM, 3, conv_weight_q7,
724
                              CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7,
725
                              CONV_OUT_DIM, conv_buf, NULL);
726
 
727
    verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
728
 
729
    printf("start q7 RGB implementation for RGB\n");
730
 
731
    arm_convolve_HWC_q7_RGB(conv_im_in_q7, CONV_IM_DIM, 3, conv_weight_q7,
732
                            CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7,
733
                            CONV_OUT_DIM, conv_buf, NULL);
734
 
735
    verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
736
 
737
    // testing q15
738
    initialize_results_q15(conv_im_out_ref_q15, conv_im_out_opt_q15, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
739
 
740
    printf("start q15 ref implementation\n");
741
 
742
    arm_convolve_HWC_q15_ref(conv_im_in_q15, CONV_IM_DIM, CONV_IM_CH, conv_weight_q15,
743
                             CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q15, 0, 15, conv_im_out_ref_q15,
744
                             CONV_OUT_DIM, conv_buf, NULL);
745
 
746
    printf("start q15 basic implementation\n");
747
 
748
    arm_convolve_HWC_q15_basic(conv_im_in_q15, CONV_IM_DIM, CONV_IM_CH, conv_weight_q15,
749
                               CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q15, 0, 15, conv_im_out_opt_q15,
750
                               CONV_OUT_DIM, conv_buf, NULL);
751
 
752
    verify_results_q15(conv_im_out_ref_q15, conv_im_out_opt_q15, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
753
 
754
    printf("start q15 fast implementation\n");
755
 
756
    arm_convolve_HWC_q15_fast(conv_im_in_q15, CONV_IM_DIM, CONV_IM_CH, conv_weight_q15,
757
                              CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q15, 0, 15, conv_im_out_opt_q15,
758
                              CONV_OUT_DIM, conv_buf, NULL);
759
 
760
    verify_results_q15(conv_im_out_ref_q15, conv_im_out_opt_q15, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
761
 
762
    // depthwise separable conv
763
    initialize_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
764
 
765
    printf("start q7 depthwise_separable_conv ref implementation\n");
766
 
767
    arm_depthwise_separable_conv_HWC_q7_ref(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7,
768
                                            CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_ref_q7,
769
                                            CONV_OUT_DIM, conv_buf, NULL);
770
 
771
    printf("start q7 depthwise_separable_conv implementation\n");
772
 
773
    arm_depthwise_separable_conv_HWC_q7(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7,
774
                                        CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7,
775
                                        CONV_OUT_DIM, conv_buf, NULL);
776
 
777
    verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
778
 
779
    delete[]test1;
780
    delete[]test2;
781
    delete[]test3;
782
    delete[]test4;
783
 
784
#endif
785
 
786
    test_pass = true;
787
    test_index = 0;
788
    while (test_flags[test_index] != -1) {
789
        if (test_flags[test_index]) {
790
             test_pass = false;
791
        }
792
        test_index ++;
793
    }
794
    if (test_pass) {
795
        printf("All tests passed\n");
796
    } else {
797
        printf("Test failed passed\n");
798
    }
799
 
800
    return 0;
801
}