Subversion Repositories dashGPS

Rev

Rev 2 | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. /* ----------------------------------------------------------------------
  2.  * Project:      CMSIS DSP Library
  3.  * Title:        arm_shift_q7.c
  4.  * Description:  Processing function for the Q7 Shifting
  5.  *
  6.  * $Date:        27. January 2017
  7.  * $Revision:    V.1.5.1
  8.  *
  9.  * Target Processor: Cortex-M cores
  10.  * -------------------------------------------------------------------- */
  11. /*
  12.  * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
  13.  *
  14.  * SPDX-License-Identifier: Apache-2.0
  15.  *
  16.  * Licensed under the Apache License, Version 2.0 (the License); you may
  17.  * not use this file except in compliance with the License.
  18.  * You may obtain a copy of the License at
  19.  *
  20.  * www.apache.org/licenses/LICENSE-2.0
  21.  *
  22.  * Unless required by applicable law or agreed to in writing, software
  23.  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24.  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25.  * See the License for the specific language governing permissions and
  26.  * limitations under the License.
  27.  */
  28.  
  29. #include "arm_math.h"
  30.  
  31. /**
  32.  * @ingroup groupMath
  33.  */
  34.  
  35. /**
  36.  * @addtogroup shift
  37.  * @{
  38.  */
  39.  
  40.  
  41. /**
  42.  * @brief  Shifts the elements of a Q7 vector a specified number of bits.
  43.  * @param[in]  *pSrc points to the input vector
  44.  * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
  45.  * @param[out]  *pDst points to the output vector
  46.  * @param[in]  blockSize number of samples in the vector
  47.  * @return none.
  48.  *
  49.  * \par Conditions for optimum performance
  50.  *  Input and output buffers should be aligned by 32-bit
  51.  *
  52.  *
  53.  * <b>Scaling and Overflow Behavior:</b>
  54.  * \par
  55.  * The function uses saturating arithmetic.
  56.  * Results outside of the allowable Q7 range [0x8 0x7F] will be saturated.
  57.  */
  58.  
  59. void arm_shift_q7(
  60.   q7_t * pSrc,
  61.   int8_t shiftBits,
  62.   q7_t * pDst,
  63.   uint32_t blockSize)
  64. {
  65.   uint32_t blkCnt;                               /* loop counter */
  66.   uint8_t sign;                                  /* Sign of shiftBits */
  67.  
  68. #if defined (ARM_MATH_DSP)
  69.  
  70. /* Run the below code for Cortex-M4 and Cortex-M3 */
  71.   q7_t in1;                                      /* Input value1 */
  72.   q7_t in2;                                      /* Input value2 */
  73.   q7_t in3;                                      /* Input value3 */
  74.   q7_t in4;                                      /* Input value4 */
  75.  
  76.  
  77.   /*loop Unrolling */
  78.   blkCnt = blockSize >> 2U;
  79.  
  80.   /* Getting the sign of shiftBits */
  81.   sign = (shiftBits & 0x80);
  82.  
  83.   /* If the shift value is positive then do right shift else left shift */
  84.   if (sign == 0U)
  85.   {
  86.     /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
  87.      ** a second loop below computes the remaining 1 to 3 samples. */
  88.     while (blkCnt > 0U)
  89.     {
  90.       /* C = A << shiftBits */
  91.       /* Read 4 inputs */
  92.       in1 = *pSrc;
  93.       in2 = *(pSrc + 1);
  94.       in3 = *(pSrc + 2);
  95.       in4 = *(pSrc + 3);
  96.  
  97.       /* Store the Shifted result in the destination buffer in single cycle by packing the outputs */
  98.       *__SIMD32(pDst)++ = __PACKq7(__SSAT((in1 << shiftBits), 8),
  99.                                    __SSAT((in2 << shiftBits), 8),
  100.                                    __SSAT((in3 << shiftBits), 8),
  101.                                    __SSAT((in4 << shiftBits), 8));
  102.       /* Update source pointer to process next sampels */
  103.       pSrc += 4U;
  104.  
  105.       /* Decrement the loop counter */
  106.       blkCnt--;
  107.     }
  108.  
  109.     /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
  110.      ** No loop unrolling is used. */
  111.     blkCnt = blockSize % 0x4U;
  112.  
  113.     while (blkCnt > 0U)
  114.     {
  115.       /* C = A << shiftBits */
  116.       /* Shift the input and then store the result in the destination buffer. */
  117.       *pDst++ = (q7_t) __SSAT((*pSrc++ << shiftBits), 8);
  118.  
  119.       /* Decrement the loop counter */
  120.       blkCnt--;
  121.     }
  122.   }
  123.   else
  124.   {
  125.     shiftBits = -shiftBits;
  126.     /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
  127.      ** a second loop below computes the remaining 1 to 3 samples. */
  128.     while (blkCnt > 0U)
  129.     {
  130.       /* C = A >> shiftBits */
  131.       /* Read 4 inputs */
  132.       in1 = *pSrc;
  133.       in2 = *(pSrc + 1);
  134.       in3 = *(pSrc + 2);
  135.       in4 = *(pSrc + 3);
  136.  
  137.       /* Store the Shifted result in the destination buffer in single cycle by packing the outputs */
  138.       *__SIMD32(pDst)++ = __PACKq7((in1 >> shiftBits), (in2 >> shiftBits),
  139.                                    (in3 >> shiftBits), (in4 >> shiftBits));
  140.  
  141.  
  142.       pSrc += 4U;
  143.  
  144.       /* Decrement the loop counter */
  145.       blkCnt--;
  146.     }
  147.  
  148.     /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
  149.      ** No loop unrolling is used. */
  150.     blkCnt = blockSize % 0x4U;
  151.  
  152.     while (blkCnt > 0U)
  153.     {
  154.       /* C = A >> shiftBits */
  155.       /* Shift the input and then store the result in the destination buffer. */
  156.       in1 = *pSrc++;
  157.       *pDst++ = (in1 >> shiftBits);
  158.  
  159.       /* Decrement the loop counter */
  160.       blkCnt--;
  161.     }
  162.   }
  163.  
  164. #else
  165.  
  166.   /* Run the below code for Cortex-M0 */
  167.  
  168.   /* Getting the sign of shiftBits */
  169.   sign = (shiftBits & 0x80);
  170.  
  171.   /* If the shift value is positive then do right shift else left shift */
  172.   if (sign == 0U)
  173.   {
  174.     /* Initialize blkCnt with number of samples */
  175.     blkCnt = blockSize;
  176.  
  177.     while (blkCnt > 0U)
  178.     {
  179.       /* C = A << shiftBits */
  180.       /* Shift the input and then store the result in the destination buffer. */
  181.       *pDst++ = (q7_t) __SSAT(((q15_t) * pSrc++ << shiftBits), 8);
  182.  
  183.       /* Decrement the loop counter */
  184.       blkCnt--;
  185.     }
  186.   }
  187.   else
  188.   {
  189.     /* Initialize blkCnt with number of samples */
  190.     blkCnt = blockSize;
  191.  
  192.     while (blkCnt > 0U)
  193.     {
  194.       /* C = A >> shiftBits */
  195.       /* Shift the input and then store the result in the destination buffer. */
  196.       *pDst++ = (*pSrc++ >> -shiftBits);
  197.  
  198.       /* Decrement the loop counter */
  199.       blkCnt--;
  200.     }
  201.   }
  202.  
  203. #endif /* #if defined (ARM_MATH_DSP) */
  204. }
  205.  
  206. /**
  207.  * @} end of shift group
  208.  */
  209.