Subversion Repositories DashDisplay

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /* ----------------------------------------------------------------------    
  2. * Copyright (C) 2010-2014 ARM Limited. All rights reserved.    
  3. *    
  4. * $Date:        19. March 2015
  5. * $Revision:    V.1.4.5
  6. *    
  7. * Project:          CMSIS DSP Library    
  8. * Title:            arm_cmplx_mult_cmplx_q31.c    
  9. *    
  10. * Description:  Q31 complex-by-complex multiplication    
  11. *    
  12. * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13. *  
  14. * Redistribution and use in source and binary forms, with or without
  15. * modification, are permitted provided that the following conditions
  16. * are met:
  17. *   - Redistributions of source code must retain the above copyright
  18. *     notice, this list of conditions and the following disclaimer.
  19. *   - Redistributions in binary form must reproduce the above copyright
  20. *     notice, this list of conditions and the following disclaimer in
  21. *     the documentation and/or other materials provided with the
  22. *     distribution.
  23. *   - Neither the name of ARM LIMITED nor the names of its contributors
  24. *     may be used to endorse or promote products derived from this
  25. *     software without specific prior written permission.
  26. *
  27. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  30. * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  31. * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  32. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  33. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  34. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  35. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  36. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  37. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38. * POSSIBILITY OF SUCH DAMAGE.
  39. * -------------------------------------------------------------------- */
  40.  
  41. #include "arm_math.h"
  42.  
  43. /**    
  44.  * @ingroup groupCmplxMath    
  45.  */
  46.  
  47. /**    
  48.  * @addtogroup CmplxByCmplxMult    
  49.  * @{    
  50.  */
  51.  
  52.  
  53. /**    
  54.  * @brief  Q31 complex-by-complex multiplication    
  55.  * @param[in]  *pSrcA points to the first input vector    
  56.  * @param[in]  *pSrcB points to the second input vector    
  57.  * @param[out]  *pDst  points to the output vector    
  58.  * @param[in]  numSamples number of complex samples in each vector    
  59.  * @return none.    
  60.  *    
  61.  * <b>Scaling and Overflow Behavior:</b>    
  62.  * \par    
  63.  * The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.    
  64.  * Input down scaling is not required.    
  65.  */
  66.  
  67. void arm_cmplx_mult_cmplx_q31(
  68.   q31_t * pSrcA,
  69.   q31_t * pSrcB,
  70.   q31_t * pDst,
  71.   uint32_t numSamples)
  72. {
  73.   q31_t a, b, c, d;                              /* Temporary variables to store real and imaginary values */
  74.   uint32_t blkCnt;                               /* loop counters */
  75.   q31_t mul1, mul2, mul3, mul4;
  76.   q31_t out1, out2;
  77.  
  78. #ifndef ARM_MATH_CM0_FAMILY
  79.  
  80.   /* Run the below code for Cortex-M4 and Cortex-M3 */
  81.  
  82.   /* loop Unrolling */
  83.   blkCnt = numSamples >> 2u;
  84.  
  85.   /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
  86.    ** a second loop below computes the remaining 1 to 3 samples. */
  87.   while(blkCnt > 0u)
  88.   {
  89.     /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
  90.     /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
  91.     a = *pSrcA++;
  92.     b = *pSrcA++;
  93.     c = *pSrcB++;
  94.     d = *pSrcB++;
  95.  
  96.     mul1 = (q31_t) (((q63_t) a * c) >> 32);
  97.     mul2 = (q31_t) (((q63_t) b * d) >> 32);
  98.     mul3 = (q31_t) (((q63_t) a * d) >> 32);
  99.     mul4 = (q31_t) (((q63_t) b * c) >> 32);
  100.  
  101.     mul1 = (mul1 >> 1);
  102.     mul2 = (mul2 >> 1);
  103.     mul3 = (mul3 >> 1);
  104.     mul4 = (mul4 >> 1);
  105.  
  106.     out1 = mul1 - mul2;
  107.     out2 = mul3 + mul4;
  108.  
  109.     /* store the real result in 3.29 format in the destination buffer. */
  110.     *pDst++ = out1;
  111.     /* store the imag result in 3.29 format in the destination buffer. */
  112.     *pDst++ = out2;
  113.  
  114.     a = *pSrcA++;
  115.     b = *pSrcA++;
  116.     c = *pSrcB++;
  117.     d = *pSrcB++;
  118.  
  119.     mul1 = (q31_t) (((q63_t) a * c) >> 32);
  120.     mul2 = (q31_t) (((q63_t) b * d) >> 32);
  121.     mul3 = (q31_t) (((q63_t) a * d) >> 32);
  122.     mul4 = (q31_t) (((q63_t) b * c) >> 32);
  123.  
  124.     mul1 = (mul1 >> 1);
  125.     mul2 = (mul2 >> 1);
  126.     mul3 = (mul3 >> 1);
  127.     mul4 = (mul4 >> 1);
  128.  
  129.     out1 = mul1 - mul2;
  130.     out2 = mul3 + mul4;
  131.  
  132.     /* store the real result in 3.29 format in the destination buffer. */
  133.     *pDst++ = out1;
  134.     /* store the imag result in 3.29 format in the destination buffer. */
  135.     *pDst++ = out2;
  136.  
  137.     a = *pSrcA++;
  138.     b = *pSrcA++;
  139.     c = *pSrcB++;
  140.     d = *pSrcB++;
  141.  
  142.     mul1 = (q31_t) (((q63_t) a * c) >> 32);
  143.     mul2 = (q31_t) (((q63_t) b * d) >> 32);
  144.     mul3 = (q31_t) (((q63_t) a * d) >> 32);
  145.     mul4 = (q31_t) (((q63_t) b * c) >> 32);
  146.  
  147.     mul1 = (mul1 >> 1);
  148.     mul2 = (mul2 >> 1);
  149.     mul3 = (mul3 >> 1);
  150.     mul4 = (mul4 >> 1);
  151.  
  152.     out1 = mul1 - mul2;
  153.     out2 = mul3 + mul4;
  154.  
  155.     /* store the real result in 3.29 format in the destination buffer. */
  156.     *pDst++ = out1;
  157.     /* store the imag result in 3.29 format in the destination buffer. */
  158.     *pDst++ = out2;
  159.  
  160.     a = *pSrcA++;
  161.     b = *pSrcA++;
  162.     c = *pSrcB++;
  163.     d = *pSrcB++;
  164.  
  165.     mul1 = (q31_t) (((q63_t) a * c) >> 32);
  166.     mul2 = (q31_t) (((q63_t) b * d) >> 32);
  167.     mul3 = (q31_t) (((q63_t) a * d) >> 32);
  168.     mul4 = (q31_t) (((q63_t) b * c) >> 32);
  169.  
  170.     mul1 = (mul1 >> 1);
  171.     mul2 = (mul2 >> 1);
  172.     mul3 = (mul3 >> 1);
  173.     mul4 = (mul4 >> 1);
  174.  
  175.     out1 = mul1 - mul2;
  176.     out2 = mul3 + mul4;
  177.  
  178.     /* store the real result in 3.29 format in the destination buffer. */
  179.     *pDst++ = out1;
  180.     /* store the imag result in 3.29 format in the destination buffer. */
  181.     *pDst++ = out2;
  182.  
  183.     /* Decrement the blockSize loop counter */
  184.     blkCnt--;
  185.   }
  186.  
  187.   /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
  188.    ** No loop unrolling is used. */
  189.   blkCnt = numSamples % 0x4u;
  190.  
  191.   while(blkCnt > 0u)
  192.   {
  193.     /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
  194.     /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
  195.     a = *pSrcA++;
  196.     b = *pSrcA++;
  197.     c = *pSrcB++;
  198.     d = *pSrcB++;
  199.  
  200.     mul1 = (q31_t) (((q63_t) a * c) >> 32);
  201.     mul2 = (q31_t) (((q63_t) b * d) >> 32);
  202.     mul3 = (q31_t) (((q63_t) a * d) >> 32);
  203.     mul4 = (q31_t) (((q63_t) b * c) >> 32);
  204.  
  205.     mul1 = (mul1 >> 1);
  206.     mul2 = (mul2 >> 1);
  207.     mul3 = (mul3 >> 1);
  208.     mul4 = (mul4 >> 1);
  209.  
  210.     out1 = mul1 - mul2;
  211.     out2 = mul3 + mul4;
  212.  
  213.     /* store the real result in 3.29 format in the destination buffer. */
  214.     *pDst++ = out1;
  215.     /* store the imag result in 3.29 format in the destination buffer. */
  216.     *pDst++ = out2;
  217.  
  218.     /* Decrement the blockSize loop counter */
  219.     blkCnt--;
  220.   }
  221.  
  222. #else
  223.  
  224.   /* Run the below code for Cortex-M0 */
  225.  
  226.   /* loop Unrolling */
  227.   blkCnt = numSamples >> 1u;
  228.  
  229.   /* First part of the processing with loop unrolling.  Compute 2 outputs at a time.    
  230.    ** a second loop below computes the remaining 1 sample. */
  231.   while(blkCnt > 0u)
  232.   {
  233.     /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
  234.     /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
  235.     a = *pSrcA++;
  236.     b = *pSrcA++;
  237.     c = *pSrcB++;
  238.     d = *pSrcB++;
  239.  
  240.     mul1 = (q31_t) (((q63_t) a * c) >> 32);
  241.     mul2 = (q31_t) (((q63_t) b * d) >> 32);
  242.     mul3 = (q31_t) (((q63_t) a * d) >> 32);
  243.     mul4 = (q31_t) (((q63_t) b * c) >> 32);
  244.  
  245.     mul1 = (mul1 >> 1);
  246.     mul2 = (mul2 >> 1);
  247.     mul3 = (mul3 >> 1);
  248.     mul4 = (mul4 >> 1);
  249.  
  250.     out1 = mul1 - mul2;
  251.     out2 = mul3 + mul4;
  252.  
  253.     /* store the real result in 3.29 format in the destination buffer. */
  254.     *pDst++ = out1;
  255.     /* store the imag result in 3.29 format in the destination buffer. */
  256.     *pDst++ = out2;
  257.  
  258.     a = *pSrcA++;
  259.     b = *pSrcA++;
  260.     c = *pSrcB++;
  261.     d = *pSrcB++;
  262.  
  263.     mul1 = (q31_t) (((q63_t) a * c) >> 32);
  264.     mul2 = (q31_t) (((q63_t) b * d) >> 32);
  265.     mul3 = (q31_t) (((q63_t) a * d) >> 32);
  266.     mul4 = (q31_t) (((q63_t) b * c) >> 32);
  267.  
  268.     mul1 = (mul1 >> 1);
  269.     mul2 = (mul2 >> 1);
  270.     mul3 = (mul3 >> 1);
  271.     mul4 = (mul4 >> 1);
  272.  
  273.     out1 = mul1 - mul2;
  274.     out2 = mul3 + mul4;
  275.  
  276.     /* store the real result in 3.29 format in the destination buffer. */
  277.     *pDst++ = out1;
  278.     /* store the imag result in 3.29 format in the destination buffer. */
  279.     *pDst++ = out2;
  280.  
  281.     /* Decrement the blockSize loop counter */
  282.     blkCnt--;
  283.   }
  284.  
  285.   /* If the blockSize is not a multiple of 2, compute any remaining output samples here.    
  286.    ** No loop unrolling is used. */
  287.   blkCnt = numSamples % 0x2u;
  288.  
  289.   while(blkCnt > 0u)
  290.   {
  291.     /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
  292.     /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
  293.     a = *pSrcA++;
  294.     b = *pSrcA++;
  295.     c = *pSrcB++;
  296.     d = *pSrcB++;
  297.  
  298.     mul1 = (q31_t) (((q63_t) a * c) >> 32);
  299.     mul2 = (q31_t) (((q63_t) b * d) >> 32);
  300.     mul3 = (q31_t) (((q63_t) a * d) >> 32);
  301.     mul4 = (q31_t) (((q63_t) b * c) >> 32);
  302.  
  303.     mul1 = (mul1 >> 1);
  304.     mul2 = (mul2 >> 1);
  305.     mul3 = (mul3 >> 1);
  306.     mul4 = (mul4 >> 1);
  307.  
  308.     out1 = mul1 - mul2;
  309.     out2 = mul3 + mul4;
  310.  
  311.     /* store the real result in 3.29 format in the destination buffer. */
  312.     *pDst++ = out1;
  313.     /* store the imag result in 3.29 format in the destination buffer. */
  314.     *pDst++ = out2;
  315.  
  316.     /* Decrement the blockSize loop counter */
  317.     blkCnt--;
  318.   }
  319.  
  320. #endif /* #ifndef ARM_MATH_CM0_FAMILY */
  321.  
  322. }
  323.  
  324. /**    
  325.  * @} end of CmplxByCmplxMult group    
  326.  */
  327.