WebSVN – dashGPS – Blame – /branches/dashGPS-bmp/Drivers/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_mult_f32.c

Rev	Author	Line No.	Line
2	mjames	1	/* ----------------------------------------------------------------------
		2	* Project: CMSIS DSP Library
		3	* Title: arm_mat_cmplx_mult_f32.c
		4	* Description: Floating-point matrix multiplication
		5	*
		6	* $Date: 27. January 2017
		7	* $Revision: V.1.5.1
		8	*
		9	* Target Processor: Cortex-M cores
		10	* -------------------------------------------------------------------- */
		11	/*
		12	* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
		13	*
		14	* SPDX-License-Identifier: Apache-2.0
		15	*
		16	* Licensed under the Apache License, Version 2.0 (the License); you may
		17	* not use this file except in compliance with the License.
		18	* You may obtain a copy of the License at
		19	*
		20	* www.apache.org/licenses/LICENSE-2.0
		21	*
		22	* Unless required by applicable law or agreed to in writing, software
		23	* distributed under the License is distributed on an AS IS BASIS, WITHOUT
		24	* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		25	* See the License for the specific language governing permissions and
		26	* limitations under the License.
		27	*/
		28
		29	#include "arm_math.h"
		30
		31	/**
		32	* @ingroup groupMatrix
		33	*/
		34
		35	/**
		36	* @defgroup CmplxMatrixMult Complex Matrix Multiplication
		37	*
		38	* Complex Matrix multiplication is only defined if the number of columns of the
		39	* first matrix equals the number of rows of the second matrix.
		40	* Multiplying an <code>M x N</code> matrix with an <code>N x P</code> matrix results
		41	* in an <code>M x P</code> matrix.
		42	* When matrix size checking is enabled, the functions check: (1) that the inner dimensions of
		43	* <code>pSrcA</code> and <code>pSrcB</code> are equal; and (2) that the size of the output
		44	* matrix equals the outer dimensions of <code>pSrcA</code> and <code>pSrcB</code>.
		45	*/
		46
		47
		48	/**
		49	* @addtogroup CmplxMatrixMult
		50	* @{
		51	*/
		52
		53	/**
		54	* @brief Floating-point Complex matrix multiplication.
		55	* @param[in] *pSrcA points to the first input complex matrix structure
		56	* @param[in] *pSrcB points to the second input complex matrix structure
		57	* @param[out] *pDst points to output complex matrix structure
		58	* @return The function returns either
		59	* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
		60	*/
		61
		62	arm_status arm_mat_cmplx_mult_f32(
		63	const arm_matrix_instance_f32 * pSrcA,
		64	const arm_matrix_instance_f32 * pSrcB,
		65	arm_matrix_instance_f32 * pDst)
		66	{
		67	float32_t pIn1 = pSrcA->pData; / input data matrix pointer A */
		68	float32_t pIn2 = pSrcB->pData; / input data matrix pointer B */
		69	float32_t pInA = pSrcA->pData; / input data matrix pointer A */
		70	float32_t pOut = pDst->pData; / output data matrix pointer */
		71	float32_t px; / Temporary output data matrix pointer */
		72	uint16_t numRowsA = pSrcA->numRows; /* number of rows of input matrix A */
		73	uint16_t numColsB = pSrcB->numCols; /* number of columns of input matrix B */
		74	uint16_t numColsA = pSrcA->numCols; /* number of columns of input matrix A */
		75	float32_t sumReal1, sumImag1; /* accumulator */
		76	float32_t a0, b0, c0, d0;
		77	float32_t a1, b1, c1, d1;
		78	float32_t sumReal2, sumImag2; /* accumulator */
		79
		80
		81	/* Run the below code for Cortex-M4 and Cortex-M3 */
		82
		83	uint16_t col, i = 0U, j, row = numRowsA, colCnt; /* loop counters */
		84	arm_status status; /* status of matrix multiplication */
		85
		86	#ifdef ARM_MATH_MATRIX_CHECK
		87
		88
		89	/* Check for matrix mismatch condition */
		90	if ((pSrcA->numCols != pSrcB->numRows) \|\|
		91	(pSrcA->numRows != pDst->numRows) \|\| (pSrcB->numCols != pDst->numCols))
		92	{
		93
		94	/* Set status as ARM_MATH_SIZE_MISMATCH */
		95	status = ARM_MATH_SIZE_MISMATCH;
		96	}
		97	else
		98	#endif /* #ifdef ARM_MATH_MATRIX_CHECK */
		99
		100	{
		101	/* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */
		102	/* row loop */
		103	do
		104	{
		105	/* Output pointer is set to starting address of the row being processed */
		106	px = pOut + 2 * i;
		107
		108	/* For every row wise process, the column loop counter is to be initiated */
		109	col = numColsB;
		110
		111	/* For every row wise process, the pIn2 pointer is set
		112	** to the starting address of the pSrcB data */
		113	pIn2 = pSrcB->pData;
		114
		115	j = 0U;
		116
		117	/* column loop */
		118	do
		119	{
		120	/* Set the variable sum, that acts as accumulator, to zero */
		121	sumReal1 = 0.0f;
		122	sumImag1 = 0.0f;
		123
		124	sumReal2 = 0.0f;
		125	sumImag2 = 0.0f;
		126
		127	/* Initiate the pointer pIn1 to point to the starting address of the column being processed */
		128	pIn1 = pInA;
		129
		130	/* Apply loop unrolling and compute 4 MACs simultaneously. */
		131	colCnt = numColsA >> 2;
		132
		133	/* matrix multiplication */
		134	while (colCnt > 0U)
		135	{
		136
		137	/* Reading real part of complex matrix A */
		138	a0 = *pIn1;
		139
		140	/* Reading real part of complex matrix B */
		141	c0 = *pIn2;
		142
		143	/* Reading imaginary part of complex matrix A */
		144	b0 = *(pIn1 + 1U);
		145
		146	/* Reading imaginary part of complex matrix B */
		147	d0 = *(pIn2 + 1U);
		148
		149	sumReal1 += a0 * c0;
		150	sumImag1 += b0 * c0;
		151
		152	pIn1 += 2U;
		153	pIn2 += 2 * numColsB;
		154
		155	sumReal2 -= b0 * d0;
		156	sumImag2 += a0 * d0;
		157
		158	/* c(m,n) = a(1,1)b(1,1) + a(1,2) b(2,1) + .... + a(m,p)b(p,n) /
		159
		160	a1 = *pIn1;
		161	c1 = *pIn2;
		162
		163	b1 = *(pIn1 + 1U);
		164	d1 = *(pIn2 + 1U);
		165
		166	sumReal1 += a1 * c1;
		167	sumImag1 += b1 * c1;
		168
		169	pIn1 += 2U;
		170	pIn2 += 2 * numColsB;
		171
		172	sumReal2 -= b1 * d1;
		173	sumImag2 += a1 * d1;
		174
		175	a0 = *pIn1;
		176	c0 = *pIn2;
		177
		178	b0 = *(pIn1 + 1U);
		179	d0 = *(pIn2 + 1U);
		180
		181	sumReal1 += a0 * c0;
		182	sumImag1 += b0 * c0;
		183
		184	pIn1 += 2U;
		185	pIn2 += 2 * numColsB;
		186
		187	sumReal2 -= b0 * d0;
		188	sumImag2 += a0 * d0;
		189
		190	/* c(m,n) = a(1,1)b(1,1) + a(1,2) b(2,1) + .... + a(m,p)b(p,n) /
		191
		192	a1 = *pIn1;
		193	c1 = *pIn2;
		194
		195	b1 = *(pIn1 + 1U);
		196	d1 = *(pIn2 + 1U);
		197
		198	sumReal1 += a1 * c1;
		199	sumImag1 += b1 * c1;
		200
		201	pIn1 += 2U;
		202	pIn2 += 2 * numColsB;
		203
		204	sumReal2 -= b1 * d1;
		205	sumImag2 += a1 * d1;
		206
		207	/* Decrement the loop count */
		208	colCnt--;
		209	}
		210
		211	/* If the columns of pSrcA is not a multiple of 4, compute any remaining MACs here.
		212	** No loop unrolling is used. */
		213	colCnt = numColsA % 0x4U;
		214
		215	while (colCnt > 0U)
		216	{
		217	/* c(m,n) = a(1,1)b(1,1) + a(1,2) b(2,1) + .... + a(m,p)b(p,n) /
		218	a1 = *pIn1;
		219	c1 = *pIn2;
		220
		221	b1 = *(pIn1 + 1U);
		222	d1 = *(pIn2 + 1U);
		223
		224	sumReal1 += a1 * c1;
		225	sumImag1 += b1 * c1;
		226
		227	pIn1 += 2U;
		228	pIn2 += 2 * numColsB;
		229
		230	sumReal2 -= b1 * d1;
		231	sumImag2 += a1 * d1;
		232
		233	/* Decrement the loop counter */
		234	colCnt--;
		235	}
		236
		237	sumReal1 += sumReal2;
		238	sumImag1 += sumImag2;
		239
		240	/* Store the result in the destination buffer */
		241	*px++ = sumReal1;
		242	*px++ = sumImag1;
		243
		244	/* Update the pointer pIn2 to point to the starting address of the next column */
		245	j++;
		246	pIn2 = pSrcB->pData + 2U * j;
		247
		248	/* Decrement the column loop counter */
		249	col--;
		250
		251	} while (col > 0U);
		252
		253	/* Update the pointer pInA to point to the starting address of the next row */
		254	i = i + numColsB;
		255	pInA = pInA + 2 * numColsA;
		256
		257	/* Decrement the row loop counter */
		258	row--;
		259
		260	} while (row > 0U);
		261
		262	/* Set status as ARM_MATH_SUCCESS */
		263	status = ARM_MATH_SUCCESS;
		264	}
		265
		266	/* Return to application */
		267	return (status);
		268	}
		269
		270	/**
		271	* @} end of MatrixMult group
		272	*/

Subversion Repositories dashGPS

(root)/branches/dashGPS-bmp/Drivers/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_mult_f32.c – Rev 18