WebSVN – LedShow – Blame – /trunk/Drivers/CMSIS/DSP_Lib/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q31.c

Rev	Author	Line No.	Line
2	mjames	1	/* ----------------------------------------------------------------------
		2	* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
		3	*
		4	* $Date: 19. March 2015
		5	* $Revision: V.1.4.5
		6	*
		7	* Project: CMSIS DSP Library
		8	* Title: arm_cmplx_mult_cmplx_q31.c
		9	*
		10	* Description: Q31 complex-by-complex multiplication
		11	*
		12	* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
		13	*
		14	* Redistribution and use in source and binary forms, with or without
		15	* modification, are permitted provided that the following conditions
		16	* are met:
		17	* - Redistributions of source code must retain the above copyright
		18	* notice, this list of conditions and the following disclaimer.
		19	* - Redistributions in binary form must reproduce the above copyright
		20	* notice, this list of conditions and the following disclaimer in
		21	* the documentation and/or other materials provided with the
		22	* distribution.
		23	* - Neither the name of ARM LIMITED nor the names of its contributors
		24	* may be used to endorse or promote products derived from this
		25	* software without specific prior written permission.
		26	*
		27	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
		28	* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
		29	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
		30	* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
		31	* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
		32	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
		33	* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
		34	* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
		35	* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
		36	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
		37	* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
		38	* POSSIBILITY OF SUCH DAMAGE.
		39	* -------------------------------------------------------------------- */
		40
		41	#include "arm_math.h"
		42
		43	/**
		44	* @ingroup groupCmplxMath
		45	*/
		46
		47	/**
		48	* @addtogroup CmplxByCmplxMult
		49	* @{
		50	*/
		51
		52
		53	/**
		54	* @brief Q31 complex-by-complex multiplication
		55	* @param[in] *pSrcA points to the first input vector
		56	* @param[in] *pSrcB points to the second input vector
		57	* @param[out] *pDst points to the output vector
		58	* @param[in] numSamples number of complex samples in each vector
		59	* @return none.
		60	*
		61	* <b>Scaling and Overflow Behavior:</b>
		62	* \par
		63	* The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.
		64	* Input down scaling is not required.
		65	*/
		66
		67	void arm_cmplx_mult_cmplx_q31(
		68	q31_t * pSrcA,
		69	q31_t * pSrcB,
		70	q31_t * pDst,
		71	uint32_t numSamples)
		72	{
		73	q31_t a, b, c, d; /* Temporary variables to store real and imaginary values */
		74	uint32_t blkCnt; /* loop counters */
		75	q31_t mul1, mul2, mul3, mul4;
		76	q31_t out1, out2;
		77
		78	#ifndef ARM_MATH_CM0_FAMILY
		79
		80	/* Run the below code for Cortex-M4 and Cortex-M3 */
		81
		82	/* loop Unrolling */
		83	blkCnt = numSamples >> 2u;
		84
		85	/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
		86	** a second loop below computes the remaining 1 to 3 samples. */
		87	while(blkCnt > 0u)
		88	{
		89	/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
		90	/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
		91	a = *pSrcA++;
		92	b = *pSrcA++;
		93	c = *pSrcB++;
		94	d = *pSrcB++;
		95
		96	mul1 = (q31_t) (((q63_t) a * c) >> 32);
		97	mul2 = (q31_t) (((q63_t) b * d) >> 32);
		98	mul3 = (q31_t) (((q63_t) a * d) >> 32);
		99	mul4 = (q31_t) (((q63_t) b * c) >> 32);
		100
		101	mul1 = (mul1 >> 1);
		102	mul2 = (mul2 >> 1);
		103	mul3 = (mul3 >> 1);
		104	mul4 = (mul4 >> 1);
		105
		106	out1 = mul1 - mul2;
		107	out2 = mul3 + mul4;
		108
		109	/* store the real result in 3.29 format in the destination buffer. */
		110	*pDst++ = out1;
		111	/* store the imag result in 3.29 format in the destination buffer. */
		112	*pDst++ = out2;
		113
		114	a = *pSrcA++;
		115	b = *pSrcA++;
		116	c = *pSrcB++;
		117	d = *pSrcB++;
		118
		119	mul1 = (q31_t) (((q63_t) a * c) >> 32);
		120	mul2 = (q31_t) (((q63_t) b * d) >> 32);
		121	mul3 = (q31_t) (((q63_t) a * d) >> 32);
		122	mul4 = (q31_t) (((q63_t) b * c) >> 32);
		123
		124	mul1 = (mul1 >> 1);
		125	mul2 = (mul2 >> 1);
		126	mul3 = (mul3 >> 1);
		127	mul4 = (mul4 >> 1);
		128
		129	out1 = mul1 - mul2;
		130	out2 = mul3 + mul4;
		131
		132	/* store the real result in 3.29 format in the destination buffer. */
		133	*pDst++ = out1;
		134	/* store the imag result in 3.29 format in the destination buffer. */
		135	*pDst++ = out2;
		136
		137	a = *pSrcA++;
		138	b = *pSrcA++;
		139	c = *pSrcB++;
		140	d = *pSrcB++;
		141
		142	mul1 = (q31_t) (((q63_t) a * c) >> 32);
		143	mul2 = (q31_t) (((q63_t) b * d) >> 32);
		144	mul3 = (q31_t) (((q63_t) a * d) >> 32);
		145	mul4 = (q31_t) (((q63_t) b * c) >> 32);
		146
		147	mul1 = (mul1 >> 1);
		148	mul2 = (mul2 >> 1);
		149	mul3 = (mul3 >> 1);
		150	mul4 = (mul4 >> 1);
		151
		152	out1 = mul1 - mul2;
		153	out2 = mul3 + mul4;
		154
		155	/* store the real result in 3.29 format in the destination buffer. */
		156	*pDst++ = out1;
		157	/* store the imag result in 3.29 format in the destination buffer. */
		158	*pDst++ = out2;
		159
		160	a = *pSrcA++;
		161	b = *pSrcA++;
		162	c = *pSrcB++;
		163	d = *pSrcB++;
		164
		165	mul1 = (q31_t) (((q63_t) a * c) >> 32);
		166	mul2 = (q31_t) (((q63_t) b * d) >> 32);
		167	mul3 = (q31_t) (((q63_t) a * d) >> 32);
		168	mul4 = (q31_t) (((q63_t) b * c) >> 32);
		169
		170	mul1 = (mul1 >> 1);
		171	mul2 = (mul2 >> 1);
		172	mul3 = (mul3 >> 1);
		173	mul4 = (mul4 >> 1);
		174
		175	out1 = mul1 - mul2;
		176	out2 = mul3 + mul4;
		177
		178	/* store the real result in 3.29 format in the destination buffer. */
		179	*pDst++ = out1;
		180	/* store the imag result in 3.29 format in the destination buffer. */
		181	*pDst++ = out2;
		182
		183	/* Decrement the blockSize loop counter */
		184	blkCnt--;
		185	}
		186
		187	/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
		188	** No loop unrolling is used. */
		189	blkCnt = numSamples % 0x4u;
		190
		191	while(blkCnt > 0u)
		192	{
		193	/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
		194	/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
		195	a = *pSrcA++;
		196	b = *pSrcA++;
		197	c = *pSrcB++;
		198	d = *pSrcB++;
		199
		200	mul1 = (q31_t) (((q63_t) a * c) >> 32);
		201	mul2 = (q31_t) (((q63_t) b * d) >> 32);
		202	mul3 = (q31_t) (((q63_t) a * d) >> 32);
		203	mul4 = (q31_t) (((q63_t) b * c) >> 32);
		204
		205	mul1 = (mul1 >> 1);
		206	mul2 = (mul2 >> 1);
		207	mul3 = (mul3 >> 1);
		208	mul4 = (mul4 >> 1);
		209
		210	out1 = mul1 - mul2;
		211	out2 = mul3 + mul4;
		212
		213	/* store the real result in 3.29 format in the destination buffer. */
		214	*pDst++ = out1;
		215	/* store the imag result in 3.29 format in the destination buffer. */
		216	*pDst++ = out2;
		217
		218	/* Decrement the blockSize loop counter */
		219	blkCnt--;
		220	}
		221
		222	#else
		223
		224	/* Run the below code for Cortex-M0 */
		225
		226	/* loop Unrolling */
		227	blkCnt = numSamples >> 1u;
		228
		229	/* First part of the processing with loop unrolling. Compute 2 outputs at a time.
		230	** a second loop below computes the remaining 1 sample. */
		231	while(blkCnt > 0u)
		232	{
		233	/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
		234	/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
		235	a = *pSrcA++;
		236	b = *pSrcA++;
		237	c = *pSrcB++;
		238	d = *pSrcB++;
		239
		240	mul1 = (q31_t) (((q63_t) a * c) >> 32);
		241	mul2 = (q31_t) (((q63_t) b * d) >> 32);
		242	mul3 = (q31_t) (((q63_t) a * d) >> 32);
		243	mul4 = (q31_t) (((q63_t) b * c) >> 32);
		244
		245	mul1 = (mul1 >> 1);
		246	mul2 = (mul2 >> 1);
		247	mul3 = (mul3 >> 1);
		248	mul4 = (mul4 >> 1);
		249
		250	out1 = mul1 - mul2;
		251	out2 = mul3 + mul4;
		252
		253	/* store the real result in 3.29 format in the destination buffer. */
		254	*pDst++ = out1;
		255	/* store the imag result in 3.29 format in the destination buffer. */
		256	*pDst++ = out2;
		257
		258	a = *pSrcA++;
		259	b = *pSrcA++;
		260	c = *pSrcB++;
		261	d = *pSrcB++;
		262
		263	mul1 = (q31_t) (((q63_t) a * c) >> 32);
		264	mul2 = (q31_t) (((q63_t) b * d) >> 32);
		265	mul3 = (q31_t) (((q63_t) a * d) >> 32);
		266	mul4 = (q31_t) (((q63_t) b * c) >> 32);
		267
		268	mul1 = (mul1 >> 1);
		269	mul2 = (mul2 >> 1);
		270	mul3 = (mul3 >> 1);
		271	mul4 = (mul4 >> 1);
		272
		273	out1 = mul1 - mul2;
		274	out2 = mul3 + mul4;
		275
		276	/* store the real result in 3.29 format in the destination buffer. */
		277	*pDst++ = out1;
		278	/* store the imag result in 3.29 format in the destination buffer. */
		279	*pDst++ = out2;
		280
		281	/* Decrement the blockSize loop counter */
		282	blkCnt--;
		283	}
		284
		285	/* If the blockSize is not a multiple of 2, compute any remaining output samples here.
		286	** No loop unrolling is used. */
		287	blkCnt = numSamples % 0x2u;
		288
		289	while(blkCnt > 0u)
		290	{
		291	/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
		292	/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
		293	a = *pSrcA++;
		294	b = *pSrcA++;
		295	c = *pSrcB++;
		296	d = *pSrcB++;
		297
		298	mul1 = (q31_t) (((q63_t) a * c) >> 32);
		299	mul2 = (q31_t) (((q63_t) b * d) >> 32);
		300	mul3 = (q31_t) (((q63_t) a * d) >> 32);
		301	mul4 = (q31_t) (((q63_t) b * c) >> 32);
		302
		303	mul1 = (mul1 >> 1);
		304	mul2 = (mul2 >> 1);
		305	mul3 = (mul3 >> 1);
		306	mul4 = (mul4 >> 1);
		307
		308	out1 = mul1 - mul2;
		309	out2 = mul3 + mul4;
		310
		311	/* store the real result in 3.29 format in the destination buffer. */
		312	*pDst++ = out1;
		313	/* store the imag result in 3.29 format in the destination buffer. */
		314	*pDst++ = out2;
		315
		316	/* Decrement the blockSize loop counter */
		317	blkCnt--;
		318	}
		319
		320	#endif /* #ifndef ARM_MATH_CM0_FAMILY */
		321
		322	}
		323
		324	/**
		325	* @} end of CmplxByCmplxMult group
		326	*/

Subversion Repositories LedShow

(root)/trunk/Drivers/CMSIS/DSP_Lib/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q31.c – Rev 2