Subversion Repositories ScreenTimer

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /* ----------------------------------------------------------------------
  2.  * Project:      CMSIS DSP Library
  3.  * Title:        arm_lms_q31.c
  4.  * Description:  Processing function for the Q31 LMS filter
  5.  *
  6.  * $Date:        27. January 2017
  7.  * $Revision:    V.1.5.1
  8.  *
  9.  * Target Processor: Cortex-M cores
  10.  * -------------------------------------------------------------------- */
  11. /*
  12.  * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
  13.  *
  14.  * SPDX-License-Identifier: Apache-2.0
  15.  *
  16.  * Licensed under the Apache License, Version 2.0 (the License); you may
  17.  * not use this file except in compliance with the License.
  18.  * You may obtain a copy of the License at
  19.  *
  20.  * www.apache.org/licenses/LICENSE-2.0
  21.  *
  22.  * Unless required by applicable law or agreed to in writing, software
  23.  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24.  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25.  * See the License for the specific language governing permissions and
  26.  * limitations under the License.
  27.  */
  28.  
  29. #include "arm_math.h"
  30. /**
  31.  * @ingroup groupFilters
  32.  */
  33.  
  34. /**
  35.  * @addtogroup LMS
  36.  * @{
  37.  */
  38.  
  39.  /**
  40.  * @brief Processing function for Q31 LMS filter.
  41.  * @param[in]  *S points to an instance of the Q15 LMS filter structure.
  42.  * @param[in]  *pSrc points to the block of input data.
  43.  * @param[in]  *pRef points to the block of reference data.
  44.  * @param[out] *pOut points to the block of output data.
  45.  * @param[out] *pErr points to the block of error data.
  46.  * @param[in]  blockSize number of samples to process.
  47.  * @return     none.
  48.  *
  49.  * \par Scaling and Overflow Behavior:
  50.  * The function is implemented using an internal 64-bit accumulator.
  51.  * The accumulator has a 2.62 format and maintains full precision of the intermediate
  52.  * multiplication results but provides only a single guard bit.
  53.  * Thus, if the accumulator result overflows it wraps around rather than clips.
  54.  * In order to avoid overflows completely the input signal must be scaled down by
  55.  * log2(numTaps) bits.
  56.  * The reference signal should not be scaled down.
  57.  * After all multiply-accumulates are performed, the 2.62 accumulator is shifted
  58.  * and saturated to 1.31 format to yield the final result.
  59.  * The output signal and error signal are in 1.31 format.
  60.  *
  61.  * \par
  62.  *      In this filter, filter coefficients are updated for each sample and the updation of filter cofficients are saturted.
  63.  */
  64.  
  65. void arm_lms_q31(
  66.   const arm_lms_instance_q31 * S,
  67.   q31_t * pSrc,
  68.   q31_t * pRef,
  69.   q31_t * pOut,
  70.   q31_t * pErr,
  71.   uint32_t blockSize)
  72. {
  73.   q31_t *pState = S->pState;                     /* State pointer */
  74.   uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
  75.   q31_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */
  76.   q31_t *pStateCurnt;                            /* Points to the current sample of the state */
  77.   q31_t mu = S->mu;                              /* Adaptive factor */
  78.   q31_t *px;                                     /* Temporary pointer for state */
  79.   q31_t *pb;                                     /* Temporary pointer for coefficient buffer */
  80.   uint32_t tapCnt, blkCnt;                       /* Loop counters */
  81.   q63_t acc;                                     /* Accumulator */
  82.   q31_t e = 0;                                   /* error of data sample */
  83.   q31_t alpha;                                   /* Intermediate constant for taps update */
  84.   q31_t coef;                                    /* Temporary variable for coef */
  85.   q31_t acc_l, acc_h;                            /*  temporary input */
  86.   uint32_t uShift = ((uint32_t) S->postShift + 1U);
  87.   uint32_t lShift = 32U - uShift;                /*  Shift to be applied to the output */
  88.  
  89.   /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
  90.   /* pStateCurnt points to the location where the new input data should be written */
  91.   pStateCurnt = &(S->pState[(numTaps - 1U)]);
  92.  
  93.   /* Initializing blkCnt with blockSize */
  94.   blkCnt = blockSize;
  95.  
  96.  
  97. #if defined (ARM_MATH_DSP)
  98.  
  99.   /* Run the below code for Cortex-M4 and Cortex-M3 */
  100.  
  101.   while (blkCnt > 0U)
  102.   {
  103.     /* Copy the new input sample into the state buffer */
  104.     *pStateCurnt++ = *pSrc++;
  105.  
  106.     /* Initialize state pointer */
  107.     px = pState;
  108.  
  109.     /* Initialize coefficient pointer */
  110.     pb = pCoeffs;
  111.  
  112.     /* Set the accumulator to zero */
  113.     acc = 0;
  114.  
  115.     /* Loop unrolling.  Process 4 taps at a time. */
  116.     tapCnt = numTaps >> 2;
  117.  
  118.     while (tapCnt > 0U)
  119.     {
  120.       /* Perform the multiply-accumulate */
  121.       /* acc +=  b[N] * x[n-N] */
  122.       acc += ((q63_t) (*px++)) * (*pb++);
  123.  
  124.       /* acc +=  b[N-1] * x[n-N-1] */
  125.       acc += ((q63_t) (*px++)) * (*pb++);
  126.  
  127.       /* acc +=  b[N-2] * x[n-N-2] */
  128.       acc += ((q63_t) (*px++)) * (*pb++);
  129.  
  130.       /* acc +=  b[N-3] * x[n-N-3] */
  131.       acc += ((q63_t) (*px++)) * (*pb++);
  132.  
  133.       /* Decrement the loop counter */
  134.       tapCnt--;
  135.     }
  136.  
  137.     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
  138.     tapCnt = numTaps % 0x4U;
  139.  
  140.     while (tapCnt > 0U)
  141.     {
  142.       /* Perform the multiply-accumulate */
  143.       acc += ((q63_t) (*px++)) * (*pb++);
  144.  
  145.       /* Decrement the loop counter */
  146.       tapCnt--;
  147.     }
  148.  
  149.     /* Converting the result to 1.31 format */
  150.     /* Calc lower part of acc */
  151.     acc_l = acc & 0xffffffff;
  152.  
  153.     /* Calc upper part of acc */
  154.     acc_h = (acc >> 32) & 0xffffffff;
  155.  
  156.     acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
  157.  
  158.     /* Store the result from accumulator into the destination buffer. */
  159.     *pOut++ = (q31_t) acc;
  160.  
  161.     /* Compute and store error */
  162.     e = *pRef++ - (q31_t) acc;
  163.  
  164.     *pErr++ = (q31_t) e;
  165.  
  166.     /* Compute alpha i.e. intermediate constant for taps update */
  167.     alpha = (q31_t) (((q63_t) e * mu) >> 31);
  168.  
  169.     /* Initialize state pointer */
  170.     /* Advance state pointer by 1 for the next sample */
  171.     px = pState++;
  172.  
  173.     /* Initialize coefficient pointer */
  174.     pb = pCoeffs;
  175.  
  176.     /* Loop unrolling.  Process 4 taps at a time. */
  177.     tapCnt = numTaps >> 2;
  178.  
  179.     /* Update filter coefficients */
  180.     while (tapCnt > 0U)
  181.     {
  182.       /* coef is in 2.30 format */
  183.       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
  184.       /* get coef in 1.31 format by left shifting */
  185.       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1U));
  186.       /* update coefficient buffer to next coefficient */
  187.       pb++;
  188.  
  189.       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
  190.       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1U));
  191.       pb++;
  192.  
  193.       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
  194.       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1U));
  195.       pb++;
  196.  
  197.       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
  198.       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1U));
  199.       pb++;
  200.  
  201.       /* Decrement the loop counter */
  202.       tapCnt--;
  203.     }
  204.  
  205.     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
  206.     tapCnt = numTaps % 0x4U;
  207.  
  208.     while (tapCnt > 0U)
  209.     {
  210.       /* Perform the multiply-accumulate */
  211.       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
  212.       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1U));
  213.       pb++;
  214.  
  215.       /* Decrement the loop counter */
  216.       tapCnt--;
  217.     }
  218.  
  219.     /* Decrement the loop counter */
  220.     blkCnt--;
  221.   }
  222.  
  223.   /* Processing is complete. Now copy the last numTaps - 1 samples to the
  224.      satrt of the state buffer. This prepares the state buffer for the
  225.      next function call. */
  226.  
  227.   /* Points to the start of the pState buffer */
  228.   pStateCurnt = S->pState;
  229.  
  230.   /* Loop unrolling for (numTaps - 1U) samples copy */
  231.   tapCnt = (numTaps - 1U) >> 2U;
  232.  
  233.   /* copy data */
  234.   while (tapCnt > 0U)
  235.   {
  236.     *pStateCurnt++ = *pState++;
  237.     *pStateCurnt++ = *pState++;
  238.     *pStateCurnt++ = *pState++;
  239.     *pStateCurnt++ = *pState++;
  240.  
  241.     /* Decrement the loop counter */
  242.     tapCnt--;
  243.   }
  244.  
  245.   /* Calculate remaining number of copies */
  246.   tapCnt = (numTaps - 1U) % 0x4U;
  247.  
  248.   /* Copy the remaining q31_t data */
  249.   while (tapCnt > 0U)
  250.   {
  251.     *pStateCurnt++ = *pState++;
  252.  
  253.     /* Decrement the loop counter */
  254.     tapCnt--;
  255.   }
  256.  
  257. #else
  258.  
  259.   /* Run the below code for Cortex-M0 */
  260.  
  261.   while (blkCnt > 0U)
  262.   {
  263.     /* Copy the new input sample into the state buffer */
  264.     *pStateCurnt++ = *pSrc++;
  265.  
  266.     /* Initialize pState pointer */
  267.     px = pState;
  268.  
  269.     /* Initialize pCoeffs pointer */
  270.     pb = pCoeffs;
  271.  
  272.     /* Set the accumulator to zero */
  273.     acc = 0;
  274.  
  275.     /* Loop over numTaps number of values */
  276.     tapCnt = numTaps;
  277.  
  278.     while (tapCnt > 0U)
  279.     {
  280.       /* Perform the multiply-accumulate */
  281.       acc += ((q63_t) (*px++)) * (*pb++);
  282.  
  283.       /* Decrement the loop counter */
  284.       tapCnt--;
  285.     }
  286.  
  287.     /* Converting the result to 1.31 format */
  288.     /* Store the result from accumulator into the destination buffer. */
  289.     /* Calc lower part of acc */
  290.     acc_l = acc & 0xffffffff;
  291.  
  292.     /* Calc upper part of acc */
  293.     acc_h = (acc >> 32) & 0xffffffff;
  294.  
  295.     acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
  296.  
  297.     *pOut++ = (q31_t) acc;
  298.  
  299.     /* Compute and store error */
  300.     e = *pRef++ - (q31_t) acc;
  301.  
  302.     *pErr++ = (q31_t) e;
  303.  
  304.     /* Weighting factor for the LMS version */
  305.     alpha = (q31_t) (((q63_t) e * mu) >> 31);
  306.  
  307.     /* Initialize pState pointer */
  308.     /* Advance state pointer by 1 for the next sample */
  309.     px = pState++;
  310.  
  311.     /* Initialize pCoeffs pointer */
  312.     pb = pCoeffs;
  313.  
  314.     /* Loop over numTaps number of values */
  315.     tapCnt = numTaps;
  316.  
  317.     while (tapCnt > 0U)
  318.     {
  319.       /* Perform the multiply-accumulate */
  320.       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
  321.       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1U));
  322.       pb++;
  323.  
  324.       /* Decrement the loop counter */
  325.       tapCnt--;
  326.     }
  327.  
  328.     /* Decrement the loop counter */
  329.     blkCnt--;
  330.   }
  331.  
  332.   /* Processing is complete. Now copy the last numTaps - 1 samples to the
  333.      start of the state buffer. This prepares the state buffer for the
  334.      next function call. */
  335.  
  336.   /* Points to the start of the pState buffer */
  337.   pStateCurnt = S->pState;
  338.  
  339.   /*  Copy (numTaps - 1U) samples  */
  340.   tapCnt = (numTaps - 1U);
  341.  
  342.   /* Copy the data */
  343.   while (tapCnt > 0U)
  344.   {
  345.     *pStateCurnt++ = *pState++;
  346.  
  347.     /* Decrement the loop counter */
  348.     tapCnt--;
  349.   }
  350.  
  351. #endif /*   #if defined (ARM_MATH_DSP) */
  352.  
  353. }
  354.  
  355. /**
  356.    * @} end of LMS group
  357.    */
  358.