Subversion Repositories dashGPS

Rev

Rev 2 | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. /* ----------------------------------------------------------------------
  2.  * Project:      CMSIS DSP Library
  3.  * Title:        arm_cmplx_mult_cmplx_f32.c
  4.  * Description:  Floating-point complex-by-complex multiplication
  5.  *
  6.  * $Date:        27. January 2017
  7.  * $Revision:    V.1.5.1
  8.  *
  9.  * Target Processor: Cortex-M cores
  10.  * -------------------------------------------------------------------- */
  11. /*
  12.  * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
  13.  *
  14.  * SPDX-License-Identifier: Apache-2.0
  15.  *
  16.  * Licensed under the Apache License, Version 2.0 (the License); you may
  17.  * not use this file except in compliance with the License.
  18.  * You may obtain a copy of the License at
  19.  *
  20.  * www.apache.org/licenses/LICENSE-2.0
  21.  *
  22.  * Unless required by applicable law or agreed to in writing, software
  23.  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24.  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25.  * See the License for the specific language governing permissions and
  26.  * limitations under the License.
  27.  */
  28.  
  29. #include "arm_math.h"
  30.  
  31. /**
  32.  * @ingroup groupCmplxMath
  33.  */
  34.  
  35. /**
  36.  * @defgroup CmplxByCmplxMult Complex-by-Complex Multiplication
  37.  *
  38.  * Multiplies a complex vector by another complex vector and generates a complex result.
  39.  * The data in the complex arrays is stored in an interleaved fashion
  40.  * (real, imag, real, imag, ...).
  41.  * The parameter <code>numSamples</code> represents the number of complex
  42.  * samples processed.  The complex arrays have a total of <code>2*numSamples</code>
  43.  * real values.
  44.  *
  45.  * The underlying algorithm is used:
  46.  *
  47.  * <pre>
  48.  * for(n=0; n<numSamples; n++) {
  49.  *     pDst[(2*n)+0] = pSrcA[(2*n)+0] * pSrcB[(2*n)+0] - pSrcA[(2*n)+1] * pSrcB[(2*n)+1];
  50.  *     pDst[(2*n)+1] = pSrcA[(2*n)+0] * pSrcB[(2*n)+1] + pSrcA[(2*n)+1] * pSrcB[(2*n)+0];
  51.  * }
  52.  * </pre>
  53.  *
  54.  * There are separate functions for floating-point, Q15, and Q31 data types.
  55.  */
  56.  
  57. /**
  58.  * @addtogroup CmplxByCmplxMult
  59.  * @{
  60.  */
  61.  
  62.  
  63. /**
  64.  * @brief  Floating-point complex-by-complex multiplication
  65.  * @param[in]  *pSrcA points to the first input vector
  66.  * @param[in]  *pSrcB points to the second input vector
  67.  * @param[out]  *pDst  points to the output vector
  68.  * @param[in]  numSamples number of complex samples in each vector
  69.  * @return none.
  70.  */
  71.  
  72. void arm_cmplx_mult_cmplx_f32(
  73.   float32_t * pSrcA,
  74.   float32_t * pSrcB,
  75.   float32_t * pDst,
  76.   uint32_t numSamples)
  77. {
  78.   float32_t a1, b1, c1, d1;                      /* Temporary variables to store real and imaginary values */
  79.   uint32_t blkCnt;                               /* loop counters */
  80.  
  81. #if defined (ARM_MATH_DSP)
  82.  
  83.   /* Run the below code for Cortex-M4 and Cortex-M3 */
  84.   float32_t a2, b2, c2, d2;                      /* Temporary variables to store real and imaginary values */
  85.   float32_t acc1, acc2, acc3, acc4;
  86.  
  87.  
  88.   /* loop Unrolling */
  89.   blkCnt = numSamples >> 2U;
  90.  
  91.   /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
  92.    ** a second loop below computes the remaining 1 to 3 samples. */
  93.   while (blkCnt > 0U)
  94.   {
  95.     /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
  96.     /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
  97.     a1 = *pSrcA;                /* A[2 * i] */
  98.     c1 = *pSrcB;                /* B[2 * i] */
  99.  
  100.     b1 = *(pSrcA + 1);          /* A[2 * i + 1] */
  101.     acc1 = a1 * c1;             /* acc1 = A[2 * i] * B[2 * i] */
  102.  
  103.     a2 = *(pSrcA + 2);          /* A[2 * i + 2] */
  104.     acc2 = (b1 * c1);           /* acc2 = A[2 * i + 1] * B[2 * i] */
  105.  
  106.     d1 = *(pSrcB + 1);          /* B[2 * i + 1] */
  107.     c2 = *(pSrcB + 2);          /* B[2 * i + 2] */
  108.     acc1 -= b1 * d1;            /* acc1 =      A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1] */
  109.  
  110.     d2 = *(pSrcB + 3);          /* B[2 * i + 3] */
  111.     acc3 = a2 * c2;             /* acc3 =       A[2 * i + 2] * B[2 * i + 2] */
  112.  
  113.     b2 = *(pSrcA + 3);          /* A[2 * i + 3] */
  114.     acc2 += (a1 * d1);          /* acc2 =      A[2 * i + 1] * B[2 * i] + A[2 * i] * B[2 * i + 1] */
  115.  
  116.     a1 = *(pSrcA + 4);          /* A[2 * i + 4] */
  117.     acc4 = (a2 * d2);           /* acc4 =   A[2 * i + 2] * B[2 * i + 3] */
  118.  
  119.     c1 = *(pSrcB + 4);          /* B[2 * i + 4] */
  120.     acc3 -= (b2 * d2);          /* acc3 =       A[2 * i + 2] * B[2 * i + 2] - A[2 * i + 3] * B[2 * i + 3] */
  121.     *pDst = acc1;               /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1] */
  122.  
  123.     b1 = *(pSrcA + 5);          /* A[2 * i + 5] */
  124.     acc4 += b2 * c2;            /* acc4 =   A[2 * i + 2] * B[2 * i + 3] + A[2 * i + 3] * B[2 * i + 2] */
  125.  
  126.     *(pDst + 1) = acc2;         /* C[2 * i + 1] = A[2 * i + 1] * B[2 * i] + A[2 * i] * B[2 * i + 1]  */
  127.     acc1 = (a1 * c1);
  128.  
  129.     d1 = *(pSrcB + 5);
  130.     acc2 = (b1 * c1);
  131.  
  132.     *(pDst + 2) = acc3;
  133.     *(pDst + 3) = acc4;
  134.  
  135.     a2 = *(pSrcA + 6);
  136.     acc1 -= (b1 * d1);
  137.  
  138.     c2 = *(pSrcB + 6);
  139.     acc2 += (a1 * d1);
  140.  
  141.     b2 = *(pSrcA + 7);
  142.     acc3 = (a2 * c2);
  143.  
  144.     d2 = *(pSrcB + 7);
  145.     acc4 = (b2 * c2);
  146.  
  147.     *(pDst + 4) = acc1;
  148.     pSrcA += 8U;
  149.  
  150.     acc3 -= (b2 * d2);
  151.     acc4 += (a2 * d2);
  152.  
  153.     *(pDst + 5) = acc2;
  154.     pSrcB += 8U;
  155.  
  156.     *(pDst + 6) = acc3;
  157.     *(pDst + 7) = acc4;
  158.  
  159.     pDst += 8U;
  160.  
  161.     /* Decrement the numSamples loop counter */
  162.     blkCnt--;
  163.   }
  164.  
  165.   /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
  166.    ** No loop unrolling is used. */
  167.   blkCnt = numSamples % 0x4U;
  168.  
  169. #else
  170.  
  171.   /* Run the below code for Cortex-M0 */
  172.   blkCnt = numSamples;
  173.  
  174. #endif /* #if defined (ARM_MATH_DSP) */
  175.  
  176.   while (blkCnt > 0U)
  177.   {
  178.     /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
  179.     /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
  180.     a1 = *pSrcA++;
  181.     b1 = *pSrcA++;
  182.     c1 = *pSrcB++;
  183.     d1 = *pSrcB++;
  184.  
  185.     /* store the result in the destination buffer. */
  186.     *pDst++ = (a1 * c1) - (b1 * d1);
  187.     *pDst++ = (a1 * d1) + (b1 * c1);
  188.  
  189.     /* Decrement the numSamples loop counter */
  190.     blkCnt--;
  191.   }
  192. }
  193.  
  194. /**
  195.  * @} end of CmplxByCmplxMult group
  196.  */
  197.