WebSVN – dashGPS – Blame – /trunk/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15.c

Rev	Author	Line No.	Line
2	mjames	1	/*
		2	* Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
		3	*
		4	* SPDX-License-Identifier: Apache-2.0
		5	*
		6	* Licensed under the Apache License, Version 2.0 (the License); you may
		7	* not use this file except in compliance with the License.
		8	* You may obtain a copy of the License at
		9	*
		10	* www.apache.org/licenses/LICENSE-2.0
		11	*
		12	* Unless required by applicable law or agreed to in writing, software
		13	* distributed under the License is distributed on an AS IS BASIS, WITHOUT
		14	* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		15	* See the License for the specific language governing permissions and
		16	* limitations under the License.
		17	*/
		18
		19	/* ----------------------------------------------------------------------
		20	* Project: CMSIS NN Library
		21	* Title: arm_fully_connected_mat_q7_vec_q15.c
		22	* Description: Mixed Q15-Q7 fully-connected layer function
		23	*
		24	* $Date: 17. January 2018
		25	* $Revision: V.1.0.0
		26	*
		27	* Target Processor: Cortex-M cores
		28	*
		29	* -------------------------------------------------------------------- */
		30
		31	#include "arm_math.h"
		32	#include "arm_nnfunctions.h"
		33
		34	/**
		35	* @ingroup groupNN
		36	*/
		37
		38	/**
		39	* @addtogroup FC
		40	* @{
		41	*/
		42
		43	/**
		44	* @brief Mixed Q15-Q7 fully-connected layer function
		45	* @param[in] pV pointer to input vector
		46	* @param[in] pM pointer to matrix weights
		47	* @param[in] dim_vec length of the vector
		48	* @param[in] num_of_rows number of rows in weight matrix
		49	* @param[in] bias_shift amount of left-shift for bias
		50	* @param[in] out_shift amount of right-shift for output
		51	* @param[in] bias pointer to bias
		52	* @param[in,out] pOut pointer to output vector
		53	* @param[in,out] vec_buffer pointer to buffer space for input
		54	* @return The function returns <code>ARM_MATH_SUCCESS</code>
		55	*
		56	* @details
		57	*
		58	* <b>Buffer size:</b>
		59	*
		60	* vec_buffer size: 0
		61	*
		62	* Q7_Q15 version of the fully connected layer
		63	*
		64	* Weights are in q7_t and Activations are in q15_t
		65	*
		66	*/
		67
		68	arm_status
		69	arm_fully_connected_mat_q7_vec_q15(const q15_t * pV,
		70	const q7_t * pM,
		71	const uint16_t dim_vec,
		72	const uint16_t num_of_rows,
		73	const uint16_t bias_shift,
		74	const uint16_t out_shift,
		75	const q7_t * bias,
		76	q15_t * pOut,
		77	q15_t * vec_buffer)
		78	{
		79
		80	#if defined (ARM_MATH_DSP)
		81	/* Run the following code for Cortex-M4 and Cortex-M7 */
		82
		83	const q7_t *pB = pM;
		84	const q7_t *pB2;
		85	q15_t *pO = pOut;
		86	const q7_t *pBias = bias;
		87	const q15_t *pA = pV;
		88
		89	uint16_t rowCnt = num_of_rows >> 1;
		90
		91	while (rowCnt)
		92	{
		93	q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
		94	q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
		95	uint16_t colCnt = dim_vec >> 2;
		96
		97	pA = pV;
		98	pB2 = pB + dim_vec;
		99
		100	while (colCnt)
		101	{
		102	q31_t inV, inM11, inM12, inM21, inM22;
		103	pB = (q7_t ) read_and_pad((void )pB, &inM11, &inM12);
		104	pB2 = (q7_t ) read_and_pad((void )pB2, &inM21, &inM22);
		105
		106	inV = *__SIMD32(pA)++;
		107
		108	sum = __SMLAD(inV, inM11, sum);
		109	sum2 = __SMLAD(inV, inM21, sum2);
		110
		111	inV = *__SIMD32(pA)++;
		112
		113	sum = __SMLAD(inV, inM12, sum);
		114	sum2 = __SMLAD(inV, inM22, sum2);
		115
		116	colCnt--;
		117	}
		118	colCnt = dim_vec & 0x3;
		119	while (colCnt)
		120	{
		121	q15_t inV = *pA++;
		122	q7_t inM = *pB++;
		123	q7_t inM2 = *pB2++;
		124
		125	sum += inV * inM;
		126	sum2 += inV * inM2;
		127	colCnt--;
		128	} /* while over colCnt */
		129	*pO++ = (q15_t) (__SSAT((sum >> out_shift), 16));
		130	*pO++ = (q15_t) (__SSAT((sum2 >> out_shift), 16));
		131
		132	/adjust the pointers and counters /
		133	pB += dim_vec;
		134	rowCnt--;
		135	}
		136
		137	/* left-over part of the rows */
		138	rowCnt = num_of_rows & 0x1;
		139
		140	while (rowCnt)
		141	{
		142	q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
		143	uint16_t colCnt = dim_vec >> 2;
		144
		145	pA = pV;
		146
		147	while (colCnt)
		148	{
		149	q31_t inV1, inV2, inM11, inM12;
		150
		151	pB = (q7_t ) read_and_pad((void )pB, &inM11, &inM12);
		152
		153	inV1 = *__SIMD32(pA)++;
		154	sum = __SMLAD(inV1, inM11, sum);
		155
		156	inV2 = *__SIMD32(pA)++;
		157	sum = __SMLAD(inV2, inM12, sum);
		158
		159	colCnt--;
		160	}
		161
		162	/* left-over of the vector */
		163	colCnt = dim_vec & 0x3;
		164	while (colCnt)
		165	{
		166	q15_t inV = *pA++;
		167	q7_t inM = *pB++;
		168	sum += inV * inM;
		169	colCnt--;
		170	}
		171
		172	*pO++ = (q15_t) (__SSAT((sum >> out_shift), 16));
		173
		174	rowCnt--;
		175	}
		176
		177	#else
		178	int i, j;
		179	/* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
		180	for (i = 0; i < num_of_rows; i++)
		181	{
		182	int ip_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift);
		183	for (j = 0; j < dim_vec; j++)
		184	{
		185	ip_out += pV[j] * pM[i * dim_vec + j];
		186	}
		187	pOut[i] = (q15_t) __SSAT((ip_out >> out_shift), 16);
		188	}
		189
		190	#endif /* ARM_MATH_DSP */
		191
		192	/* Return to ARM_MATH_SUCCESS */
		193	return (ARM_MATH_SUCCESS);
		194
		195	}
		196
		197	/**
		198	* @} end of FC group
		199	*/

Subversion Repositories dashGPS

(root)/trunk/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15.c – Rev 2