Subversion Repositories dashGPS

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /* ----------------------------------------------------------------------
  2.  * Project:      CMSIS DSP Library
  3.  * Title:        arm_cmplx_dot_prod_f32.c
  4.  * Description:  Floating-point complex dot product
  5.  *
  6.  * $Date:        27. January 2017
  7.  * $Revision:    V.1.5.1
  8.  *
  9.  * Target Processor: Cortex-M cores
  10.  * -------------------------------------------------------------------- */
  11. /*
  12.  * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
  13.  *
  14.  * SPDX-License-Identifier: Apache-2.0
  15.  *
  16.  * Licensed under the Apache License, Version 2.0 (the License); you may
  17.  * not use this file except in compliance with the License.
  18.  * You may obtain a copy of the License at
  19.  *
  20.  * www.apache.org/licenses/LICENSE-2.0
  21.  *
  22.  * Unless required by applicable law or agreed to in writing, software
  23.  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24.  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25.  * See the License for the specific language governing permissions and
  26.  * limitations under the License.
  27.  */
  28.  
  29. #include "arm_math.h"
  30.  
  31. /**
  32.  * @ingroup groupCmplxMath
  33.  */
  34.  
  35. /**
  36.  * @defgroup cmplx_dot_prod Complex Dot Product
  37.  *
  38.  * Computes the dot product of two complex vectors.
  39.  * The vectors are multiplied element-by-element and then summed.
  40.  *
  41.  * The <code>pSrcA</code> points to the first complex input vector and
  42.  * <code>pSrcB</code> points to the second complex input vector.
  43.  * <code>numSamples</code> specifies the number of complex samples
  44.  * and the data in each array is stored in an interleaved fashion
  45.  * (real, imag, real, imag, ...).
  46.  * Each array has a total of <code>2*numSamples</code> values.
  47.  *
  48.  * The underlying algorithm is used:
  49.  * <pre>
  50.  * realResult=0;
  51.  * imagResult=0;
  52.  * for(n=0; n<numSamples; n++) {
  53.  *     realResult += pSrcA[(2*n)+0]*pSrcB[(2*n)+0] - pSrcA[(2*n)+1]*pSrcB[(2*n)+1];
  54.  *     imagResult += pSrcA[(2*n)+0]*pSrcB[(2*n)+1] + pSrcA[(2*n)+1]*pSrcB[(2*n)+0];
  55.  * }
  56.  * </pre>
  57.  *
  58.  * There are separate functions for floating-point, Q15, and Q31 data types.
  59.  */
  60.  
  61. /**
  62.  * @addtogroup cmplx_dot_prod
  63.  * @{
  64.  */
  65.  
  66. /**
  67.  * @brief  Floating-point complex dot product
  68.  * @param  *pSrcA points to the first input vector
  69.  * @param  *pSrcB points to the second input vector
  70.  * @param  numSamples number of complex samples in each vector
  71.  * @param  *realResult real part of the result returned here
  72.  * @param  *imagResult imaginary part of the result returned here
  73.  * @return none.
  74.  */
  75.  
  76. void arm_cmplx_dot_prod_f32(
  77.   float32_t * pSrcA,
  78.   float32_t * pSrcB,
  79.   uint32_t numSamples,
  80.   float32_t * realResult,
  81.   float32_t * imagResult)
  82. {
  83.   float32_t real_sum = 0.0f, imag_sum = 0.0f;    /* Temporary result storage */
  84.   float32_t a0,b0,c0,d0;
  85.  
  86. #if defined (ARM_MATH_DSP)
  87.  
  88.   /* Run the below code for Cortex-M4 and Cortex-M3 */
  89.   uint32_t blkCnt;                               /* loop counter */
  90.  
  91.   /*loop Unrolling */
  92.   blkCnt = numSamples >> 2U;
  93.  
  94.   /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
  95.    ** a second loop below computes the remaining 1 to 3 samples. */
  96.   while (blkCnt > 0U)
  97.   {
  98.       a0 = *pSrcA++;
  99.       b0 = *pSrcA++;
  100.       c0 = *pSrcB++;
  101.       d0 = *pSrcB++;
  102.  
  103.       real_sum += a0 * c0;
  104.       imag_sum += a0 * d0;
  105.       real_sum -= b0 * d0;
  106.       imag_sum += b0 * c0;
  107.  
  108.       a0 = *pSrcA++;
  109.       b0 = *pSrcA++;
  110.       c0 = *pSrcB++;
  111.       d0 = *pSrcB++;
  112.  
  113.       real_sum += a0 * c0;
  114.       imag_sum += a0 * d0;
  115.       real_sum -= b0 * d0;
  116.       imag_sum += b0 * c0;
  117.  
  118.       a0 = *pSrcA++;
  119.       b0 = *pSrcA++;
  120.       c0 = *pSrcB++;
  121.       d0 = *pSrcB++;
  122.  
  123.       real_sum += a0 * c0;
  124.       imag_sum += a0 * d0;
  125.       real_sum -= b0 * d0;
  126.       imag_sum += b0 * c0;
  127.  
  128.       a0 = *pSrcA++;
  129.       b0 = *pSrcA++;
  130.       c0 = *pSrcB++;
  131.       d0 = *pSrcB++;
  132.  
  133.       real_sum += a0 * c0;
  134.       imag_sum += a0 * d0;
  135.       real_sum -= b0 * d0;
  136.       imag_sum += b0 * c0;
  137.  
  138.       /* Decrement the loop counter */
  139.       blkCnt--;
  140.   }
  141.  
  142.   /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
  143.    ** No loop unrolling is used. */
  144.   blkCnt = numSamples & 0x3U;
  145.  
  146.   while (blkCnt > 0U)
  147.   {
  148.       a0 = *pSrcA++;
  149.       b0 = *pSrcA++;
  150.       c0 = *pSrcB++;
  151.       d0 = *pSrcB++;
  152.  
  153.       real_sum += a0 * c0;
  154.       imag_sum += a0 * d0;
  155.       real_sum -= b0 * d0;
  156.       imag_sum += b0 * c0;
  157.  
  158.       /* Decrement the loop counter */
  159.       blkCnt--;
  160.   }
  161.  
  162. #else
  163.  
  164.   /* Run the below code for Cortex-M0 */
  165.  
  166.   while (numSamples > 0U)
  167.   {
  168.       a0 = *pSrcA++;
  169.       b0 = *pSrcA++;
  170.       c0 = *pSrcB++;
  171.       d0 = *pSrcB++;
  172.  
  173.       real_sum += a0 * c0;
  174.       imag_sum += a0 * d0;
  175.       real_sum -= b0 * d0;
  176.       imag_sum += b0 * c0;
  177.  
  178.       /* Decrement the loop counter */
  179.       numSamples--;
  180.   }
  181.  
  182. #endif /* #if defined (ARM_MATH_DSP) */
  183.  
  184.   /* Store the real and imaginary results in the destination buffers */
  185.   *realResult = real_sum;
  186.   *imagResult = imag_sum;
  187. }
  188.  
  189. /**
  190.  * @} end of cmplx_dot_prod group
  191.  */
  192.