Subversion Repositories DashDisplay

Rev

Rev 28 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 mjames 1
/* ----------------------------------------------------------------------    
2
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.    
3
*    
4
* $Date:        19. March 2015
5
* $Revision:    V.1.4.5
6
*    
7
* Project:          CMSIS DSP Library    
8
* Title:                arm_dot_prod_f32.c    
9
*    
10
* Description:  Floating-point dot product.    
11
*    
12
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
13
*  
14
* Redistribution and use in source and binary forms, with or without
15
* modification, are permitted provided that the following conditions
16
* are met:
17
*   - Redistributions of source code must retain the above copyright
18
*     notice, this list of conditions and the following disclaimer.
19
*   - Redistributions in binary form must reproduce the above copyright
20
*     notice, this list of conditions and the following disclaimer in
21
*     the documentation and/or other materials provided with the
22
*     distribution.
23
*   - Neither the name of ARM LIMITED nor the names of its contributors
24
*     may be used to endorse or promote products derived from this
25
*     software without specific prior written permission.
26
*
27
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
* POSSIBILITY OF SUCH DAMAGE.  
39
* ---------------------------------------------------------------------------- */
40
 
41
#include "arm_math.h"
42
 
43
/**    
44
 * @ingroup groupMath
45
 */
46
 
47
/**
48
 * @defgroup dot_prod Vector Dot Product
49
 *
50
 * Computes the dot product of two vectors.
51
 * The vectors are multiplied element-by-element and then summed.
52
 *
53
 * <pre>
54
 *     sum = pSrcA[0]*pSrcB[0] + pSrcA[1]*pSrcB[1] + ... + pSrcA[blockSize-1]*pSrcB[blockSize-1]
55
 * </pre>    
56
 *
57
 * There are separate functions for floating-point, Q7, Q15, and Q31 data types.    
58
 */
59
 
60
/**    
61
 * @addtogroup dot_prod    
62
 * @{    
63
 */
64
 
65
/**    
66
 * @brief Dot product of floating-point vectors.    
67
 * @param[in]       *pSrcA points to the first input vector    
68
 * @param[in]       *pSrcB points to the second input vector    
69
 * @param[in]       blockSize number of samples in each vector    
70
 * @param[out]      *result output result returned here    
71
 * @return none.    
72
 */
73
 
74
 
75
void arm_dot_prod_f32(
76
  float32_t * pSrcA,
77
  float32_t * pSrcB,
78
  uint32_t blockSize,
79
  float32_t * result)
80
{
81
  float32_t sum = 0.0f;                          /* Temporary result storage */
82
  uint32_t blkCnt;                               /* loop counter */
83
 
84
 
85
#ifndef ARM_MATH_CM0_FAMILY
86
 
87
/* Run the below code for Cortex-M4 and Cortex-M3 */
88
  /*loop Unrolling */
89
  blkCnt = blockSize >> 2u;
90
 
91
  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
92
   ** a second loop below computes the remaining 1 to 3 samples. */
93
  while(blkCnt > 0u)
94
  {
95
    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
96
    /* Calculate dot product and then store the result in a temporary buffer */
97
    sum += (*pSrcA++) * (*pSrcB++);
98
    sum += (*pSrcA++) * (*pSrcB++);
99
    sum += (*pSrcA++) * (*pSrcB++);
100
    sum += (*pSrcA++) * (*pSrcB++);
101
 
102
    /* Decrement the loop counter */
103
    blkCnt--;
104
  }
105
 
106
  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
107
   ** No loop unrolling is used. */
108
  blkCnt = blockSize % 0x4u;
109
 
110
#else
111
 
112
  /* Run the below code for Cortex-M0 */
113
 
114
  /* Initialize blkCnt with number of samples */
115
  blkCnt = blockSize;
116
 
117
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
118
 
119
 
120
  while(blkCnt > 0u)
121
  {
122
    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
123
    /* Calculate dot product and then store the result in a temporary buffer. */
124
    sum += (*pSrcA++) * (*pSrcB++);
125
 
126
    /* Decrement the loop counter */
127
    blkCnt--;
128
  }
129
  /* Store the result back in the destination buffer */
130
  *result = sum;
131
}
132
 
133
/**    
134
 * @} end of dot_prod group    
135
 */