Subversion Repositories DashDisplay

Rev

Rev 2 | Rev 49 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. /* ----------------------------------------------------------------------    
  2. * Copyright (C) 2010-2014 ARM Limited. All rights reserved.    
  3. *    
  4. * $Date:        19. March 2015
  5. * $Revision:    V.1.4.5
  6. *    
  7. * Project:          CMSIS DSP Library    
  8. * Title:            arm_mult_f32.c    
  9. *    
  10. * Description:  Floating-point vector multiplication.    
  11. *    
  12. * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13. *  
  14. * Redistribution and use in source and binary forms, with or without
  15. * modification, are permitted provided that the following conditions
  16. * are met:
  17. *   - Redistributions of source code must retain the above copyright
  18. *     notice, this list of conditions and the following disclaimer.
  19. *   - Redistributions in binary form must reproduce the above copyright
  20. *     notice, this list of conditions and the following disclaimer in
  21. *     the documentation and/or other materials provided with the
  22. *     distribution.
  23. *   - Neither the name of ARM LIMITED nor the names of its contributors
  24. *     may be used to endorse or promote products derived from this
  25. *     software without specific prior written permission.
  26. *
  27. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  30. * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  31. * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  32. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  33. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  34. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  35. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  36. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  37. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38. * POSSIBILITY OF SUCH DAMAGE.  
  39. * -------------------------------------------------------------------- */
  40.  
  41. #include "arm_math.h"
  42.  
  43. /**        
  44.  * @ingroup groupMath        
  45.  */
  46.  
  47. /**        
  48.  * @defgroup BasicMult Vector Multiplication        
  49.  *        
  50.  * Element-by-element multiplication of two vectors.        
  51.  *        
  52.  * <pre>        
  53.  *     pDst[n] = pSrcA[n] * pSrcB[n],   0 <= n < blockSize.        
  54.  * </pre>        
  55.  *        
  56.  * There are separate functions for floating-point, Q7, Q15, and Q31 data types.        
  57.  */
  58.  
  59. /**        
  60.  * @addtogroup BasicMult        
  61.  * @{        
  62.  */
  63.  
  64. /**        
  65.  * @brief Floating-point vector multiplication.        
  66.  * @param[in]       *pSrcA points to the first input vector        
  67.  * @param[in]       *pSrcB points to the second input vector        
  68.  * @param[out]      *pDst points to the output vector        
  69.  * @param[in]       blockSize number of samples in each vector        
  70.  * @return none.        
  71.  */
  72.  
  73. void arm_mult_f32(
  74.   float32_t * pSrcA,
  75.   float32_t * pSrcB,
  76.   float32_t * pDst,
  77.   uint32_t blockSize)
  78. {
  79.   uint32_t blkCnt;                               /* loop counters */
  80. #ifndef ARM_MATH_CM0_FAMILY
  81.  
  82.   /* Run the below code for Cortex-M4 and Cortex-M3 */
  83.   float32_t inA1, inA2, inA3, inA4;              /* temporary input variables */
  84.   float32_t inB1, inB2, inB3, inB4;              /* temporary input variables */
  85.   float32_t out1, out2, out3, out4;              /* temporary output variables */
  86.  
  87.   /* loop Unrolling */
  88.   blkCnt = blockSize >> 2u;
  89.  
  90.   /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.        
  91.    ** a second loop below computes the remaining 1 to 3 samples. */
  92.   while(blkCnt > 0u)
  93.   {
  94.     /* C = A * B */
  95.     /* Multiply the inputs and store the results in output buffer */
  96.     /* read sample from sourceA */
  97.     inA1 = *pSrcA;
  98.     /* read sample from sourceB */
  99.     inB1 = *pSrcB;
  100.     /* read sample from sourceA */
  101.     inA2 = *(pSrcA + 1);
  102.     /* read sample from sourceB */
  103.     inB2 = *(pSrcB + 1);
  104.  
  105.     /* out = sourceA * sourceB */
  106.     out1 = inA1 * inB1;
  107.  
  108.     /* read sample from sourceA */
  109.     inA3 = *(pSrcA + 2);
  110.     /* read sample from sourceB */
  111.     inB3 = *(pSrcB + 2);
  112.  
  113.     /* out = sourceA * sourceB */
  114.     out2 = inA2 * inB2;
  115.  
  116.     /* read sample from sourceA */
  117.     inA4 = *(pSrcA + 3);
  118.  
  119.     /* store result to destination buffer */
  120.     *pDst = out1;
  121.  
  122.     /* read sample from sourceB */
  123.     inB4 = *(pSrcB + 3);
  124.  
  125.     /* out = sourceA * sourceB */
  126.     out3 = inA3 * inB3;
  127.  
  128.     /* store result to destination buffer */
  129.     *(pDst + 1) = out2;
  130.  
  131.     /* out = sourceA * sourceB */
  132.     out4 = inA4 * inB4;
  133.     /* store result to destination buffer */
  134.     *(pDst + 2) = out3;
  135.     /* store result to destination buffer */
  136.     *(pDst + 3) = out4;
  137.  
  138.  
  139.     /* update pointers to process next samples */
  140.     pSrcA += 4u;
  141.     pSrcB += 4u;
  142.     pDst += 4u;
  143.  
  144.     /* Decrement the blockSize loop counter */
  145.     blkCnt--;
  146.   }
  147.  
  148.   /* If the blockSize is not a multiple of 4, compute any remaining output samples here.        
  149.    ** No loop unrolling is used. */
  150.   blkCnt = blockSize % 0x4u;
  151.  
  152. #else
  153.  
  154.   /* Run the below code for Cortex-M0 */
  155.  
  156.   /* Initialize blkCnt with number of samples */
  157.   blkCnt = blockSize;
  158.  
  159. #endif /* #ifndef ARM_MATH_CM0_FAMILY */
  160.  
  161.   while(blkCnt > 0u)
  162.   {
  163.     /* C = A * B */
  164.     /* Multiply the inputs and store the results in output buffer */
  165.     *pDst++ = (*pSrcA++) * (*pSrcB++);
  166.  
  167.     /* Decrement the blockSize loop counter */
  168.     blkCnt--;
  169.   }
  170. }
  171.  
  172. /**        
  173.  * @} end of BasicMult group        
  174.  */
  175.