Subversion Repositories dashGPS

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
  3.  *
  4.  * SPDX-License-Identifier: Apache-2.0
  5.  *
  6.  * Licensed under the Apache License, Version 2.0 (the License); you may
  7.  * not use this file except in compliance with the License.
  8.  * You may obtain a copy of the License at
  9.  *
  10.  * www.apache.org/licenses/LICENSE-2.0
  11.  *
  12.  * Unless required by applicable law or agreed to in writing, software
  13.  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  14.  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15.  * See the License for the specific language governing permissions and
  16.  * limitations under the License.
  17.  */
  18.  
  19. /* ----------------------------------------------------------------------
  20.  * Project:      CMSIS NN Library
  21.  * Title:        arm_relu_q7.c
  22.  * Description:  Q7 version of ReLU
  23.  *
  24.  * $Date:        17. January 2018
  25.  * $Revision:    V.1.0.0
  26.  *
  27.  * Target Processor:  Cortex-M cores
  28.  *
  29.  * -------------------------------------------------------------------- */
  30.  
  31. #include "arm_math.h"
  32. #include "arm_nnfunctions.h"
  33.  
  34. /**
  35.  *  @ingroup groupNN
  36.  */
  37.  
  38. /**
  39.  * @addtogroup Acti
  40.  * @{
  41.  */
  42.  
  43.   /**
  44.    * @brief Q7 RELU function
  45.    * @param[in,out]   data        pointer to input
  46.    * @param[in]       size        number of elements
  47.    * @return none.
  48.    *
  49.    * @details
  50.    *
  51.    * Optimized relu with QSUB instructions.
  52.    *
  53.    */
  54.  
  55. void arm_relu_q7(q7_t * data, uint16_t size)
  56. {
  57.  
  58. #if defined (ARM_MATH_DSP)
  59.     /* Run the following code for Cortex-M4 and Cortex-M7 */
  60.  
  61.     uint16_t  i = size >> 2;
  62.     q7_t     *pIn = data;
  63.     q7_t     *pOut = data;
  64.     q31_t     in;
  65.     q31_t     buf;
  66.     q31_t     mask;
  67.  
  68.     while (i)
  69.     {
  70.         in = *__SIMD32(pIn)++;
  71.  
  72.         /* extract the first bit */
  73.         buf = __ROR(in & 0x80808080, 7);
  74.  
  75.         /* if MSB=1, mask will be 0xFF, 0x0 otherwise */
  76.         mask = __QSUB8(0x00000000, buf);
  77.  
  78.         *__SIMD32(pOut)++ = in & (~mask);
  79.         i--;
  80.     }
  81.  
  82.     i = size & 0x3;
  83.     while (i)
  84.     {
  85.         if (*pIn < 0)
  86.         {
  87.             *pIn = 0;
  88.         }
  89.         pIn++;
  90.         i--;
  91.     }
  92.  
  93. #else
  94.     /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
  95.  
  96.     uint16_t  i;
  97.  
  98.     for (i = 0; i < size; i++)
  99.     {
  100.         if (data[i] < 0)
  101.             data[i] = 0;
  102.     }
  103.  
  104. #endif                          /* ARM_MATH_DSP */
  105.  
  106. }
  107.  
  108. /**
  109.  * @} end of Acti group
  110.  */
  111.