Subversion Repositories LedShow

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 mjames 1
/* ----------------------------------------------------------------------    
2
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.    
3
*    
4
* $Date:        19. March 2015
5
* $Revision:    V.1.4.5
6
*    
7
* Project:          CMSIS DSP Library    
8
* Title:            arm_cmplx_mult_cmplx_q31.c    
9
*    
10
* Description:  Q31 complex-by-complex multiplication    
11
*    
12
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
13
*  
14
* Redistribution and use in source and binary forms, with or without
15
* modification, are permitted provided that the following conditions
16
* are met:
17
*   - Redistributions of source code must retain the above copyright
18
*     notice, this list of conditions and the following disclaimer.
19
*   - Redistributions in binary form must reproduce the above copyright
20
*     notice, this list of conditions and the following disclaimer in
21
*     the documentation and/or other materials provided with the
22
*     distribution.
23
*   - Neither the name of ARM LIMITED nor the names of its contributors
24
*     may be used to endorse or promote products derived from this
25
*     software without specific prior written permission.
26
*
27
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
* POSSIBILITY OF SUCH DAMAGE.
39
* -------------------------------------------------------------------- */
40
 
41
#include "arm_math.h"
42
 
43
/**    
44
 * @ingroup groupCmplxMath    
45
 */
46
 
47
/**    
48
 * @addtogroup CmplxByCmplxMult    
49
 * @{    
50
 */
51
 
52
 
53
/**    
54
 * @brief  Q31 complex-by-complex multiplication    
55
 * @param[in]  *pSrcA points to the first input vector    
56
 * @param[in]  *pSrcB points to the second input vector    
57
 * @param[out]  *pDst  points to the output vector    
58
 * @param[in]  numSamples number of complex samples in each vector    
59
 * @return none.    
60
 *    
61
 * <b>Scaling and Overflow Behavior:</b>    
62
 * \par    
63
 * The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.    
64
 * Input down scaling is not required.    
65
 */
66
 
67
void arm_cmplx_mult_cmplx_q31(
68
  q31_t * pSrcA,
69
  q31_t * pSrcB,
70
  q31_t * pDst,
71
  uint32_t numSamples)
72
{
73
  q31_t a, b, c, d;                              /* Temporary variables to store real and imaginary values */
74
  uint32_t blkCnt;                               /* loop counters */
75
  q31_t mul1, mul2, mul3, mul4;
76
  q31_t out1, out2;
77
 
78
#ifndef ARM_MATH_CM0_FAMILY
79
 
80
  /* Run the below code for Cortex-M4 and Cortex-M3 */
81
 
82
  /* loop Unrolling */
83
  blkCnt = numSamples >> 2u;
84
 
85
  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
86
   ** a second loop below computes the remaining 1 to 3 samples. */
87
  while(blkCnt > 0u)
88
  {
89
    /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
90
    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
91
    a = *pSrcA++;
92
    b = *pSrcA++;
93
    c = *pSrcB++;
94
    d = *pSrcB++;
95
 
96
    mul1 = (q31_t) (((q63_t) a * c) >> 32);
97
    mul2 = (q31_t) (((q63_t) b * d) >> 32);
98
    mul3 = (q31_t) (((q63_t) a * d) >> 32);
99
    mul4 = (q31_t) (((q63_t) b * c) >> 32);
100
 
101
    mul1 = (mul1 >> 1);
102
    mul2 = (mul2 >> 1);
103
    mul3 = (mul3 >> 1);
104
    mul4 = (mul4 >> 1);
105
 
106
    out1 = mul1 - mul2;
107
    out2 = mul3 + mul4;
108
 
109
    /* store the real result in 3.29 format in the destination buffer. */
110
    *pDst++ = out1;
111
    /* store the imag result in 3.29 format in the destination buffer. */
112
    *pDst++ = out2;
113
 
114
    a = *pSrcA++;
115
    b = *pSrcA++;
116
    c = *pSrcB++;
117
    d = *pSrcB++;
118
 
119
    mul1 = (q31_t) (((q63_t) a * c) >> 32);
120
    mul2 = (q31_t) (((q63_t) b * d) >> 32);
121
    mul3 = (q31_t) (((q63_t) a * d) >> 32);
122
    mul4 = (q31_t) (((q63_t) b * c) >> 32);
123
 
124
    mul1 = (mul1 >> 1);
125
    mul2 = (mul2 >> 1);
126
    mul3 = (mul3 >> 1);
127
    mul4 = (mul4 >> 1);
128
 
129
    out1 = mul1 - mul2;
130
    out2 = mul3 + mul4;
131
 
132
    /* store the real result in 3.29 format in the destination buffer. */
133
    *pDst++ = out1;
134
    /* store the imag result in 3.29 format in the destination buffer. */
135
    *pDst++ = out2;
136
 
137
    a = *pSrcA++;
138
    b = *pSrcA++;
139
    c = *pSrcB++;
140
    d = *pSrcB++;
141
 
142
    mul1 = (q31_t) (((q63_t) a * c) >> 32);
143
    mul2 = (q31_t) (((q63_t) b * d) >> 32);
144
    mul3 = (q31_t) (((q63_t) a * d) >> 32);
145
    mul4 = (q31_t) (((q63_t) b * c) >> 32);
146
 
147
    mul1 = (mul1 >> 1);
148
    mul2 = (mul2 >> 1);
149
    mul3 = (mul3 >> 1);
150
    mul4 = (mul4 >> 1);
151
 
152
    out1 = mul1 - mul2;
153
    out2 = mul3 + mul4;
154
 
155
    /* store the real result in 3.29 format in the destination buffer. */
156
    *pDst++ = out1;
157
    /* store the imag result in 3.29 format in the destination buffer. */
158
    *pDst++ = out2;
159
 
160
    a = *pSrcA++;
161
    b = *pSrcA++;
162
    c = *pSrcB++;
163
    d = *pSrcB++;
164
 
165
    mul1 = (q31_t) (((q63_t) a * c) >> 32);
166
    mul2 = (q31_t) (((q63_t) b * d) >> 32);
167
    mul3 = (q31_t) (((q63_t) a * d) >> 32);
168
    mul4 = (q31_t) (((q63_t) b * c) >> 32);
169
 
170
    mul1 = (mul1 >> 1);
171
    mul2 = (mul2 >> 1);
172
    mul3 = (mul3 >> 1);
173
    mul4 = (mul4 >> 1);
174
 
175
    out1 = mul1 - mul2;
176
    out2 = mul3 + mul4;
177
 
178
    /* store the real result in 3.29 format in the destination buffer. */
179
    *pDst++ = out1;
180
    /* store the imag result in 3.29 format in the destination buffer. */
181
    *pDst++ = out2;
182
 
183
    /* Decrement the blockSize loop counter */
184
    blkCnt--;
185
  }
186
 
187
  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
188
   ** No loop unrolling is used. */
189
  blkCnt = numSamples % 0x4u;
190
 
191
  while(blkCnt > 0u)
192
  {
193
    /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
194
    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
195
    a = *pSrcA++;
196
    b = *pSrcA++;
197
    c = *pSrcB++;
198
    d = *pSrcB++;
199
 
200
    mul1 = (q31_t) (((q63_t) a * c) >> 32);
201
    mul2 = (q31_t) (((q63_t) b * d) >> 32);
202
    mul3 = (q31_t) (((q63_t) a * d) >> 32);
203
    mul4 = (q31_t) (((q63_t) b * c) >> 32);
204
 
205
    mul1 = (mul1 >> 1);
206
    mul2 = (mul2 >> 1);
207
    mul3 = (mul3 >> 1);
208
    mul4 = (mul4 >> 1);
209
 
210
    out1 = mul1 - mul2;
211
    out2 = mul3 + mul4;
212
 
213
    /* store the real result in 3.29 format in the destination buffer. */
214
    *pDst++ = out1;
215
    /* store the imag result in 3.29 format in the destination buffer. */
216
    *pDst++ = out2;
217
 
218
    /* Decrement the blockSize loop counter */
219
    blkCnt--;
220
  }
221
 
222
#else
223
 
224
  /* Run the below code for Cortex-M0 */
225
 
226
  /* loop Unrolling */
227
  blkCnt = numSamples >> 1u;
228
 
229
  /* First part of the processing with loop unrolling.  Compute 2 outputs at a time.    
230
   ** a second loop below computes the remaining 1 sample. */
231
  while(blkCnt > 0u)
232
  {
233
    /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
234
    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
235
    a = *pSrcA++;
236
    b = *pSrcA++;
237
    c = *pSrcB++;
238
    d = *pSrcB++;
239
 
240
    mul1 = (q31_t) (((q63_t) a * c) >> 32);
241
    mul2 = (q31_t) (((q63_t) b * d) >> 32);
242
    mul3 = (q31_t) (((q63_t) a * d) >> 32);
243
    mul4 = (q31_t) (((q63_t) b * c) >> 32);
244
 
245
    mul1 = (mul1 >> 1);
246
    mul2 = (mul2 >> 1);
247
    mul3 = (mul3 >> 1);
248
    mul4 = (mul4 >> 1);
249
 
250
    out1 = mul1 - mul2;
251
    out2 = mul3 + mul4;
252
 
253
    /* store the real result in 3.29 format in the destination buffer. */
254
    *pDst++ = out1;
255
    /* store the imag result in 3.29 format in the destination buffer. */
256
    *pDst++ = out2;
257
 
258
    a = *pSrcA++;
259
    b = *pSrcA++;
260
    c = *pSrcB++;
261
    d = *pSrcB++;
262
 
263
    mul1 = (q31_t) (((q63_t) a * c) >> 32);
264
    mul2 = (q31_t) (((q63_t) b * d) >> 32);
265
    mul3 = (q31_t) (((q63_t) a * d) >> 32);
266
    mul4 = (q31_t) (((q63_t) b * c) >> 32);
267
 
268
    mul1 = (mul1 >> 1);
269
    mul2 = (mul2 >> 1);
270
    mul3 = (mul3 >> 1);
271
    mul4 = (mul4 >> 1);
272
 
273
    out1 = mul1 - mul2;
274
    out2 = mul3 + mul4;
275
 
276
    /* store the real result in 3.29 format in the destination buffer. */
277
    *pDst++ = out1;
278
    /* store the imag result in 3.29 format in the destination buffer. */
279
    *pDst++ = out2;
280
 
281
    /* Decrement the blockSize loop counter */
282
    blkCnt--;
283
  }
284
 
285
  /* If the blockSize is not a multiple of 2, compute any remaining output samples here.    
286
   ** No loop unrolling is used. */
287
  blkCnt = numSamples % 0x2u;
288
 
289
  while(blkCnt > 0u)
290
  {
291
    /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
292
    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
293
    a = *pSrcA++;
294
    b = *pSrcA++;
295
    c = *pSrcB++;
296
    d = *pSrcB++;
297
 
298
    mul1 = (q31_t) (((q63_t) a * c) >> 32);
299
    mul2 = (q31_t) (((q63_t) b * d) >> 32);
300
    mul3 = (q31_t) (((q63_t) a * d) >> 32);
301
    mul4 = (q31_t) (((q63_t) b * c) >> 32);
302
 
303
    mul1 = (mul1 >> 1);
304
    mul2 = (mul2 >> 1);
305
    mul3 = (mul3 >> 1);
306
    mul4 = (mul4 >> 1);
307
 
308
    out1 = mul1 - mul2;
309
    out2 = mul3 + mul4;
310
 
311
    /* store the real result in 3.29 format in the destination buffer. */
312
    *pDst++ = out1;
313
    /* store the imag result in 3.29 format in the destination buffer. */
314
    *pDst++ = out2;
315
 
316
    /* Decrement the blockSize loop counter */
317
    blkCnt--;
318
  }
319
 
320
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
321
 
322
}
323
 
324
/**    
325
 * @} end of CmplxByCmplxMult group    
326
 */