Subversion Repositories AFRtranscoder

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /* ----------------------------------------------------------------------
  2.  * Project:      CMSIS DSP Library
  3.  * Title:        arm_lms_q15.c
  4.  * Description:  Processing function for the Q15 LMS filter
  5.  *
  6.  * $Date:        27. January 2017
  7.  * $Revision:    V.1.5.1
  8.  *
  9.  * Target Processor: Cortex-M cores
  10.  * -------------------------------------------------------------------- */
  11. /*
  12.  * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
  13.  *
  14.  * SPDX-License-Identifier: Apache-2.0
  15.  *
  16.  * Licensed under the Apache License, Version 2.0 (the License); you may
  17.  * not use this file except in compliance with the License.
  18.  * You may obtain a copy of the License at
  19.  *
  20.  * www.apache.org/licenses/LICENSE-2.0
  21.  *
  22.  * Unless required by applicable law or agreed to in writing, software
  23.  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24.  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25.  * See the License for the specific language governing permissions and
  26.  * limitations under the License.
  27.  */
  28.  
  29. #include "arm_math.h"
  30. /**
  31.  * @ingroup groupFilters
  32.  */
  33.  
  34. /**
  35.  * @addtogroup LMS
  36.  * @{
  37.  */
  38.  
  39.  /**
  40.  * @brief Processing function for Q15 LMS filter.
  41.  * @param[in] *S points to an instance of the Q15 LMS filter structure.
  42.  * @param[in] *pSrc points to the block of input data.
  43.  * @param[in] *pRef points to the block of reference data.
  44.  * @param[out] *pOut points to the block of output data.
  45.  * @param[out] *pErr points to the block of error data.
  46.  * @param[in] blockSize number of samples to process.
  47.  * @return none.
  48.  *
  49.  * \par Scaling and Overflow Behavior:
  50.  * The function is implemented using a 64-bit internal accumulator.
  51.  * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
  52.  * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
  53.  * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
  54.  * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
  55.  * Lastly, the accumulator is saturated to yield a result in 1.15 format.
  56.  *
  57.  * \par
  58.  *      In this filter, filter coefficients are updated for each sample and the updation of filter cofficients are saturted.
  59.  *
  60.  */
  61.  
  62. void arm_lms_q15(
  63.   const arm_lms_instance_q15 * S,
  64.   q15_t * pSrc,
  65.   q15_t * pRef,
  66.   q15_t * pOut,
  67.   q15_t * pErr,
  68.   uint32_t blockSize)
  69. {
  70.   q15_t *pState = S->pState;                     /* State pointer */
  71.   uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
  72.   q15_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */
  73.   q15_t *pStateCurnt;                            /* Points to the current sample of the state */
  74.   q15_t mu = S->mu;                              /* Adaptive factor */
  75.   q15_t *px;                                     /* Temporary pointer for state */
  76.   q15_t *pb;                                     /* Temporary pointer for coefficient buffer */
  77.   uint32_t tapCnt, blkCnt;                       /* Loop counters */
  78.   q63_t acc;                                     /* Accumulator */
  79.   q15_t e = 0;                                   /* error of data sample */
  80.   q15_t alpha;                                   /* Intermediate constant for taps update */
  81.   q31_t coef;                                    /* Teporary variable for coefficient */
  82.   q31_t acc_l, acc_h;
  83.   int32_t lShift = (15 - (int32_t) S->postShift);       /*  Post shift  */
  84.   int32_t uShift = (32 - lShift);
  85.  
  86.  
  87. #if defined (ARM_MATH_DSP)
  88.  
  89.   /* Run the below code for Cortex-M4 and Cortex-M3 */
  90.  
  91.  
  92.   /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
  93.   /* pStateCurnt points to the location where the new input data should be written */
  94.   pStateCurnt = &(S->pState[(numTaps - 1U)]);
  95.  
  96.   /* Initializing blkCnt with blockSize */
  97.   blkCnt = blockSize;
  98.  
  99.   while (blkCnt > 0U)
  100.   {
  101.     /* Copy the new input sample into the state buffer */
  102.     *pStateCurnt++ = *pSrc++;
  103.  
  104.     /* Initialize state pointer */
  105.     px = pState;
  106.  
  107.     /* Initialize coefficient pointer */
  108.     pb = pCoeffs;
  109.  
  110.     /* Set the accumulator to zero */
  111.     acc = 0;
  112.  
  113.     /* Loop unrolling.  Process 4 taps at a time. */
  114.     tapCnt = numTaps >> 2U;
  115.  
  116.     while (tapCnt > 0U)
  117.     {
  118.       /* acc +=  b[N] * x[n-N] + b[N-1] * x[n-N-1] */
  119.       /* Perform the multiply-accumulate */
  120. #ifndef UNALIGNED_SUPPORT_DISABLE
  121.  
  122.       acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc);
  123.       acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc);
  124.  
  125. #else
  126.  
  127.       acc += (q63_t) (((q31_t) (*px++) * (*pb++)));
  128.       acc += (q63_t) (((q31_t) (*px++) * (*pb++)));
  129.       acc += (q63_t) (((q31_t) (*px++) * (*pb++)));
  130.       acc += (q63_t) (((q31_t) (*px++) * (*pb++)));
  131.  
  132.  
  133. #endif  /*      #ifndef UNALIGNED_SUPPORT_DISABLE       */
  134.  
  135.       /* Decrement the loop counter */
  136.       tapCnt--;
  137.     }
  138.  
  139.     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
  140.     tapCnt = numTaps % 0x4U;
  141.  
  142.     while (tapCnt > 0U)
  143.     {
  144.       /* Perform the multiply-accumulate */
  145.       acc += (q63_t) (((q31_t) (*px++) * (*pb++)));
  146.  
  147.       /* Decrement the loop counter */
  148.       tapCnt--;
  149.     }
  150.  
  151.     /* Calc lower part of acc */
  152.     acc_l = acc & 0xffffffff;
  153.  
  154.     /* Calc upper part of acc */
  155.     acc_h = (acc >> 32) & 0xffffffff;
  156.  
  157.     /* Apply shift for lower part of acc and upper part of acc */
  158.     acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
  159.  
  160.     /* Converting the result to 1.15 format and saturate the output */
  161.     acc = __SSAT(acc, 16);
  162.  
  163.     /* Store the result from accumulator into the destination buffer. */
  164.     *pOut++ = (q15_t) acc;
  165.  
  166.     /* Compute and store error */
  167.     e = *pRef++ - (q15_t) acc;
  168.  
  169.     *pErr++ = (q15_t) e;
  170.  
  171.     /* Compute alpha i.e. intermediate constant for taps update */
  172.     alpha = (q15_t) (((q31_t) e * (mu)) >> 15);
  173.  
  174.     /* Initialize state pointer */
  175.     /* Advance state pointer by 1 for the next sample */
  176.     px = pState++;
  177.  
  178.     /* Initialize coefficient pointer */
  179.     pb = pCoeffs;
  180.  
  181.     /* Loop unrolling.  Process 4 taps at a time. */
  182.     tapCnt = numTaps >> 2U;
  183.  
  184.     /* Update filter coefficients */
  185.     while (tapCnt > 0U)
  186.     {
  187.       coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
  188.       *pb++ = (q15_t) __SSAT((coef), 16);
  189.       coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
  190.       *pb++ = (q15_t) __SSAT((coef), 16);
  191.       coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
  192.       *pb++ = (q15_t) __SSAT((coef), 16);
  193.       coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
  194.       *pb++ = (q15_t) __SSAT((coef), 16);
  195.  
  196.       /* Decrement the loop counter */
  197.       tapCnt--;
  198.     }
  199.  
  200.     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
  201.     tapCnt = numTaps % 0x4U;
  202.  
  203.     while (tapCnt > 0U)
  204.     {
  205.       /* Perform the multiply-accumulate */
  206.       coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
  207.       *pb++ = (q15_t) __SSAT((coef), 16);
  208.  
  209.       /* Decrement the loop counter */
  210.       tapCnt--;
  211.     }
  212.  
  213.     /* Decrement the loop counter */
  214.     blkCnt--;
  215.  
  216.   }
  217.  
  218.   /* Processing is complete. Now copy the last numTaps - 1 samples to the
  219.      satrt of the state buffer. This prepares the state buffer for the
  220.      next function call. */
  221.  
  222.   /* Points to the start of the pState buffer */
  223.   pStateCurnt = S->pState;
  224.  
  225.   /* Calculation of count for copying integer writes */
  226.   tapCnt = (numTaps - 1U) >> 2;
  227.  
  228.   while (tapCnt > 0U)
  229.   {
  230.  
  231. #ifndef UNALIGNED_SUPPORT_DISABLE
  232.  
  233.     *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
  234.     *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
  235. #else
  236.     *pStateCurnt++ = *pState++;
  237.     *pStateCurnt++ = *pState++;
  238.     *pStateCurnt++ = *pState++;
  239.     *pStateCurnt++ = *pState++;
  240. #endif
  241.  
  242.     tapCnt--;
  243.  
  244.   }
  245.  
  246.   /* Calculation of count for remaining q15_t data */
  247.   tapCnt = (numTaps - 1U) % 0x4U;
  248.  
  249.   /* copy data */
  250.   while (tapCnt > 0U)
  251.   {
  252.     *pStateCurnt++ = *pState++;
  253.  
  254.     /* Decrement the loop counter */
  255.     tapCnt--;
  256.   }
  257.  
  258. #else
  259.  
  260.   /* Run the below code for Cortex-M0 */
  261.  
  262.   /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
  263.   /* pStateCurnt points to the location where the new input data should be written */
  264.   pStateCurnt = &(S->pState[(numTaps - 1U)]);
  265.  
  266.   /* Loop over blockSize number of values */
  267.   blkCnt = blockSize;
  268.  
  269.   while (blkCnt > 0U)
  270.   {
  271.     /* Copy the new input sample into the state buffer */
  272.     *pStateCurnt++ = *pSrc++;
  273.  
  274.     /* Initialize pState pointer */
  275.     px = pState;
  276.  
  277.     /* Initialize pCoeffs pointer */
  278.     pb = pCoeffs;
  279.  
  280.     /* Set the accumulator to zero */
  281.     acc = 0;
  282.  
  283.     /* Loop over numTaps number of values */
  284.     tapCnt = numTaps;
  285.  
  286.     while (tapCnt > 0U)
  287.     {
  288.       /* Perform the multiply-accumulate */
  289.       acc += (q63_t) ((q31_t) (*px++) * (*pb++));
  290.  
  291.       /* Decrement the loop counter */
  292.       tapCnt--;
  293.     }
  294.  
  295.     /* Calc lower part of acc */
  296.     acc_l = acc & 0xffffffff;
  297.  
  298.     /* Calc upper part of acc */
  299.     acc_h = (acc >> 32) & 0xffffffff;
  300.  
  301.     /* Apply shift for lower part of acc and upper part of acc */
  302.     acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
  303.  
  304.     /* Converting the result to 1.15 format and saturate the output */
  305.     acc = __SSAT(acc, 16);
  306.  
  307.     /* Store the result from accumulator into the destination buffer. */
  308.     *pOut++ = (q15_t) acc;
  309.  
  310.     /* Compute and store error */
  311.     e = *pRef++ - (q15_t) acc;
  312.  
  313.     *pErr++ = (q15_t) e;
  314.  
  315.     /* Compute alpha i.e. intermediate constant for taps update */
  316.     alpha = (q15_t) (((q31_t) e * (mu)) >> 15);
  317.  
  318.     /* Initialize pState pointer */
  319.     /* Advance state pointer by 1 for the next sample */
  320.     px = pState++;
  321.  
  322.     /* Initialize pCoeffs pointer */
  323.     pb = pCoeffs;
  324.  
  325.     /* Loop over numTaps number of values */
  326.     tapCnt = numTaps;
  327.  
  328.     while (tapCnt > 0U)
  329.     {
  330.       /* Perform the multiply-accumulate */
  331.       coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
  332.       *pb++ = (q15_t) __SSAT((coef), 16);
  333.  
  334.       /* Decrement the loop counter */
  335.       tapCnt--;
  336.     }
  337.  
  338.     /* Decrement the loop counter */
  339.     blkCnt--;
  340.  
  341.   }
  342.  
  343.   /* Processing is complete. Now copy the last numTaps - 1 samples to the
  344.      start of the state buffer. This prepares the state buffer for the
  345.      next function call. */
  346.  
  347.   /* Points to the start of the pState buffer */
  348.   pStateCurnt = S->pState;
  349.  
  350.   /*  Copy (numTaps - 1U) samples  */
  351.   tapCnt = (numTaps - 1U);
  352.  
  353.   /* Copy the data */
  354.   while (tapCnt > 0U)
  355.   {
  356.     *pStateCurnt++ = *pState++;
  357.  
  358.     /* Decrement the loop counter */
  359.     tapCnt--;
  360.   }
  361.  
  362. #endif /*   #if defined (ARM_MATH_DSP) */
  363.  
  364. }
  365.  
  366. /**
  367.    * @} end of LMS group
  368.    */
  369.