WebSVN – DashDisplay – Blame – /trunk/Drivers/CMSIS/Include/arm_math.h

Rev	Author	Line No.	Line
2	mjames	1	/* ----------------------------------------------------------------------
		2	* Copyright (C) 2010-2015 ARM Limited. All rights reserved.
		3	*
5	mjames	4	* $Date: 20. October 2015
		5	* $Revision: V1.4.5 b
2	mjames	6	*
5	mjames	7	* Project: CMSIS DSP Library
		8	* Title: arm_math.h
2	mjames	9	*
5	mjames	10	* Description: Public header file for CMSIS DSP Library
2	mjames	11	*
		12	* Target Processor: Cortex-M7/Cortex-M4/Cortex-M3/Cortex-M0
		13	*
		14	* Redistribution and use in source and binary forms, with or without
		15	* modification, are permitted provided that the following conditions
		16	* are met:
		17	* - Redistributions of source code must retain the above copyright
		18	* notice, this list of conditions and the following disclaimer.
		19	* - Redistributions in binary form must reproduce the above copyright
		20	* notice, this list of conditions and the following disclaimer in
		21	* the documentation and/or other materials provided with the
		22	* distribution.
		23	* - Neither the name of ARM LIMITED nor the names of its contributors
		24	* may be used to endorse or promote products derived from this
		25	* software without specific prior written permission.
		26	*
		27	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
		28	* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
		29	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
		30	* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
		31	* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
		32	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
		33	* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
		34	* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
		35	* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
		36	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
		37	* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
		38	* POSSIBILITY OF SUCH DAMAGE.
		39	* -------------------------------------------------------------------- */
		40
		41	/**
		42	\mainpage CMSIS DSP Software Library
		43	*
		44	* Introduction
		45	* ------------
		46	*
		47	* This user manual describes the CMSIS DSP software library,
		48	* a suite of common signal processing functions for use on Cortex-M processor based devices.
		49	*
		50	* The library is divided into a number of functions each covering a specific category:
		51	* - Basic math functions
		52	* - Fast math functions
		53	* - Complex math functions
		54	* - Filters
		55	* - Matrix functions
		56	* - Transforms
		57	* - Motor control functions
		58	* - Statistical functions
		59	* - Support functions
		60	* - Interpolation functions
		61	*
		62	* The library has separate functions for operating on 8-bit integers, 16-bit integers,
		63	* 32-bit integer and 32-bit floating-point values.
		64	*
		65	* Using the Library
		66	* ------------
		67	*
		68	* The library installer contains prebuilt versions of the libraries in the <code>Lib</code> folder.
		69	* - arm_cortexM7lfdp_math.lib (Little endian and Double Precision Floating Point Unit on Cortex-M7)
		70	* - arm_cortexM7bfdp_math.lib (Big endian and Double Precision Floating Point Unit on Cortex-M7)
		71	* - arm_cortexM7lfsp_math.lib (Little endian and Single Precision Floating Point Unit on Cortex-M7)
		72	* - arm_cortexM7bfsp_math.lib (Big endian and Single Precision Floating Point Unit on Cortex-M7)
		73	* - arm_cortexM7l_math.lib (Little endian on Cortex-M7)
		74	* - arm_cortexM7b_math.lib (Big endian on Cortex-M7)
		75	* - arm_cortexM4lf_math.lib (Little endian and Floating Point Unit on Cortex-M4)
		76	* - arm_cortexM4bf_math.lib (Big endian and Floating Point Unit on Cortex-M4)
		77	* - arm_cortexM4l_math.lib (Little endian on Cortex-M4)
		78	* - arm_cortexM4b_math.lib (Big endian on Cortex-M4)
		79	* - arm_cortexM3l_math.lib (Little endian on Cortex-M3)
		80	* - arm_cortexM3b_math.lib (Big endian on Cortex-M3)
		81	* - arm_cortexM0l_math.lib (Little endian on Cortex-M0 / CortexM0+)
		82	* - arm_cortexM0b_math.lib (Big endian on Cortex-M0 / CortexM0+)
		83	*
		84	* The library functions are declared in the public file <code>arm_math.h</code> which is placed in the <code>Include</code> folder.
		85	* Simply include this file and link the appropriate library in the application and begin calling the library functions. The Library supports single
		86	* public header file <code> arm_math.h</code> for Cortex-M7/M4/M3/M0/M0+ with little endian and big endian. Same header file will be used for floating point unit(FPU) variants.
		87	* Define the appropriate pre processor MACRO ARM_MATH_CM7 or ARM_MATH_CM4 or ARM_MATH_CM3 or
		88	* ARM_MATH_CM0 or ARM_MATH_CM0PLUS depending on the target processor in the application.
		89	*
		90	* Examples
		91	* --------
		92	*
		93	* The library ships with a number of examples which demonstrate how to use the library functions.
		94	*
		95	* Toolchain Support
		96	* ------------
		97	*
		98	* The library has been developed and tested with MDK-ARM version 5.14.0.0
		99	* The library is being tested in GCC and IAR toolchains and updates on this activity will be made available shortly.
		100	*
		101	* Building the Library
		102	* ------------
		103	*
		104	* The library installer contains a project file to re build libraries on MDK-ARM Tool chain in the <code>CMSIS\\DSP_Lib\\Source\\ARM</code> folder.
		105	* - arm_cortexM_math.uvprojx
		106	*
		107	*
		108	* The libraries can be built by opening the arm_cortexM_math.uvprojx project in MDK-ARM, selecting a specific target, and defining the optional pre processor MACROs detailed above.
		109	*
		110	* Pre-processor Macros
		111	* ------------
		112	*
		113	* Each library project have differant pre-processor macros.
		114	*
		115	* - UNALIGNED_SUPPORT_DISABLE:
		116	*
		117	* Define macro UNALIGNED_SUPPORT_DISABLE, If the silicon does not support unaligned memory access
		118	*
		119	* - ARM_MATH_BIG_ENDIAN:
		120	*
		121	* Define macro ARM_MATH_BIG_ENDIAN to build the library for big endian targets. By default library builds for little endian targets.
		122	*
		123	* - ARM_MATH_MATRIX_CHECK:
		124	*
		125	* Define macro ARM_MATH_MATRIX_CHECK for checking on the input and output sizes of matrices
		126	*
		127	* - ARM_MATH_ROUNDING:
		128	*
		129	* Define macro ARM_MATH_ROUNDING for rounding on support functions
		130	*
		131	* - ARM_MATH_CMx:
		132	*
		133	* Define macro ARM_MATH_CM4 for building the library on Cortex-M4 target, ARM_MATH_CM3 for building library on Cortex-M3 target
		134	* and ARM_MATH_CM0 for building library on Cortex-M0 target, ARM_MATH_CM0PLUS for building library on Cortex-M0+ target, and
		135	* ARM_MATH_CM7 for building the library on cortex-M7.
		136	*
		137	* - __FPU_PRESENT:
		138	*
		139	* Initialize macro __FPU_PRESENT = 1 when building on FPU supported Targets. Enable this macro for M4bf and M4lf libraries
		140	*
		141	* <hr>
		142	* CMSIS-DSP in ARM::CMSIS Pack
		143	* -----------------------------
5	mjames	144	*
2	mjames	145	* The following files relevant to CMSIS-DSP are present in the <b>ARM::CMSIS</b> Pack directories:
		146	* \|File/Folder \|Content \|
		147	* \|------------------------------\|------------------------------------------------------------------------\|
		148	* \|\b CMSIS\\Documentation\\DSP \| This documentation \|
		149	* \|\b CMSIS\\DSP_Lib \| Software license agreement (license.txt) \|
		150	* \|\b CMSIS\\DSP_Lib\\Examples \| Example projects demonstrating the usage of the library functions \|
		151	* \|\b CMSIS\\DSP_Lib\\Source \| Source files for rebuilding the library \|
5	mjames	152	*
2	mjames	153	* <hr>
		154	* Revision History of CMSIS-DSP
		155	* ------------
		156	* Please refer to \ref ChangeLog_pg.
		157	*
		158	* Copyright Notice
		159	* ------------
		160	*
		161	* Copyright (C) 2010-2015 ARM Limited. All rights reserved.
		162	*/
		163
		164
		165	/**
		166	* @defgroup groupMath Basic Math Functions
		167	*/
		168
		169	/**
		170	* @defgroup groupFastMath Fast Math Functions
		171	* This set of functions provides a fast approximation to sine, cosine, and square root.
		172	* As compared to most of the other functions in the CMSIS math library, the fast math functions
		173	* operate on individual values and not arrays.
		174	* There are separate functions for Q15, Q31, and floating-point data.
		175	*
		176	*/
		177
		178	/**
		179	* @defgroup groupCmplxMath Complex Math Functions
		180	* This set of functions operates on complex data vectors.
		181	* The data in the complex arrays is stored in an interleaved fashion
		182	* (real, imag, real, imag, ...).
		183	* In the API functions, the number of samples in a complex array refers
		184	* to the number of complex values; the array contains twice this number of
		185	* real values.
		186	*/
		187
		188	/**
		189	* @defgroup groupFilters Filtering Functions
		190	*/
		191
		192	/**
		193	* @defgroup groupMatrix Matrix Functions
		194	*
		195	* This set of functions provides basic matrix math operations.
		196	* The functions operate on matrix data structures. For example,
		197	* the type
		198	* definition for the floating-point matrix structure is shown
		199	* below:
		200	* <pre>
		201	* typedef struct
		202	* {
		203	* uint16_t numRows; // number of rows of the matrix.
		204	* uint16_t numCols; // number of columns of the matrix.
		205	* float32_t *pData; // points to the data of the matrix.
		206	* } arm_matrix_instance_f32;
		207	* </pre>
		208	* There are similar definitions for Q15 and Q31 data types.
		209	*
		210	* The structure specifies the size of the matrix and then points to
		211	* an array of data. The array is of size <code>numRows X numCols</code>
		212	* and the values are arranged in row order. That is, the
		213	* matrix element (i, j) is stored at:
		214	* <pre>
		215	* pData[i*numCols + j]
		216	* </pre>
		217	*
		218	* \par Init Functions
		219	* There is an associated initialization function for each type of matrix
		220	* data structure.
		221	* The initialization function sets the values of the internal structure fields.
		222	* Refer to the function <code>arm_mat_init_f32()</code>, <code>arm_mat_init_q31()</code>
		223	* and <code>arm_mat_init_q15()</code> for floating-point, Q31 and Q15 types, respectively.
		224	*
		225	* \par
		226	* Use of the initialization function is optional. However, if initialization function is used
		227	* then the instance structure cannot be placed into a const data section.
		228	* To place the instance structure in a const data
		229	* section, manually initialize the data structure. For example:
		230	* <pre>
		231	* <code>arm_matrix_instance_f32 S = {nRows, nColumns, pData};</code>
		232	* <code>arm_matrix_instance_q31 S = {nRows, nColumns, pData};</code>
		233	* <code>arm_matrix_instance_q15 S = {nRows, nColumns, pData};</code>
		234	* </pre>
		235	* where <code>nRows</code> specifies the number of rows, <code>nColumns</code>
		236	* specifies the number of columns, and <code>pData</code> points to the
		237	* data array.
		238	*
		239	* \par Size Checking
		240	* By default all of the matrix functions perform size checking on the input and
		241	* output matrices. For example, the matrix addition function verifies that the
		242	* two input matrices and the output matrix all have the same number of rows and
		243	* columns. If the size check fails the functions return:
		244	* <pre>
		245	* ARM_MATH_SIZE_MISMATCH
		246	* </pre>
		247	* Otherwise the functions return
		248	* <pre>
		249	* ARM_MATH_SUCCESS
		250	* </pre>
		251	* There is some overhead associated with this matrix size checking.
		252	* The matrix size checking is enabled via the \#define
		253	* <pre>
		254	* ARM_MATH_MATRIX_CHECK
		255	* </pre>
		256	* within the library project settings. By default this macro is defined
		257	* and size checking is enabled. By changing the project settings and
		258	* undefining this macro size checking is eliminated and the functions
		259	* run a bit faster. With size checking disabled the functions always
		260	* return <code>ARM_MATH_SUCCESS</code>.
		261	*/
		262
		263	/**
		264	* @defgroup groupTransforms Transform Functions
		265	*/
		266
		267	/**
		268	* @defgroup groupController Controller Functions
		269	*/
		270
		271	/**
		272	* @defgroup groupStats Statistics Functions
		273	*/
		274	/**
		275	* @defgroup groupSupport Support Functions
		276	*/
		277
		278	/**
		279	* @defgroup groupInterpolation Interpolation Functions
		280	* These functions perform 1- and 2-dimensional interpolation of data.
		281	* Linear interpolation is used for 1-dimensional data and
		282	* bilinear interpolation is used for 2-dimensional data.
		283	*/
		284
		285	/**
		286	* @defgroup groupExamples Examples
		287	*/
		288	#ifndef _ARM_MATH_H
		289	#define _ARM_MATH_H
		290
5	mjames	291	/* ignore some GCC warnings */
		292	#if defined ( __GNUC__ )
		293	#pragma GCC diagnostic push
		294	#pragma GCC diagnostic ignored "-Wsign-conversion"
		295	#pragma GCC diagnostic ignored "-Wconversion"
		296	#pragma GCC diagnostic ignored "-Wunused-parameter"
		297	#endif
		298
2	mjames	299	#define __CMSIS_GENERIC /* disable NVIC and Systick functions */
		300
		301	#if defined(ARM_MATH_CM7)
		302	#include "core_cm7.h"
		303	#elif defined (ARM_MATH_CM4)
		304	#include "core_cm4.h"
		305	#elif defined (ARM_MATH_CM3)
		306	#include "core_cm3.h"
		307	#elif defined (ARM_MATH_CM0)
		308	#include "core_cm0.h"
		309	#define ARM_MATH_CM0_FAMILY
5	mjames	310	#elif defined (ARM_MATH_CM0PLUS)
		311	#include "core_cm0plus.h"
		312	#define ARM_MATH_CM0_FAMILY
2	mjames	313	#else
		314	#error "Define according the used Cortex core ARM_MATH_CM7, ARM_MATH_CM4, ARM_MATH_CM3, ARM_MATH_CM0PLUS or ARM_MATH_CM0"
		315	#endif
		316
		317	#undef __CMSIS_GENERIC /* enable NVIC and Systick functions */
		318	#include "string.h"
		319	#include "math.h"
5	mjames	320	#ifdef __cplusplus
2	mjames	321	extern "C"
		322	{
		323	#endif
		324
		325
		326	/**
		327	* @brief Macros required for reciprocal calculation in Normalized LMS
		328	*/
		329
5	mjames	330	#define DELTA_Q31 (0x100)
		331	#define DELTA_Q15 0x5
		332	#define INDEX_MASK 0x0000003F
2	mjames	333	#ifndef PI
5	mjames	334	#define PI 3.14159265358979f
2	mjames	335	#endif
		336
		337	/**
		338	* @brief Macros required for SINE and COSINE Fast math approximations
		339	*/
		340
		341	#define FAST_MATH_TABLE_SIZE 512
		342	#define FAST_MATH_Q31_SHIFT (32 - 10)
		343	#define FAST_MATH_Q15_SHIFT (16 - 10)
		344	#define CONTROLLER_Q31_SHIFT (32 - 9)
		345	#define TABLE_SIZE 256
5	mjames	346	#define TABLE_SPACING_Q31 0x400000
		347	#define TABLE_SPACING_Q15 0x80
2	mjames	348
		349	/**
		350	* @brief Macros required for SINE and COSINE Controller functions
		351	*/
		352	/* 1.31(q31) Fixed value of 2/360 */
		353	/* -1 to +1 is divided into 360 values so total spacing is (2/360) */
5	mjames	354	#define INPUT_SPACING 0xB60B61
2	mjames	355
		356	/**
		357	* @brief Macro for Unaligned Support
		358	*/
		359	#ifndef UNALIGNED_SUPPORT_DISABLE
		360	#define ALIGN4
		361	#else
		362	#if defined (__GNUC__)
		363	#define ALIGN4 __attribute__((aligned(4)))
		364	#else
		365	#define ALIGN4 __align(4)
		366	#endif
5	mjames	367	#endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
2	mjames	368
		369	/**
		370	* @brief Error status returned by some functions in the library.
		371	*/
		372
		373	typedef enum
		374	{
		375	ARM_MATH_SUCCESS = 0, /*< No error /
		376	ARM_MATH_ARGUMENT_ERROR = -1, /*< One or more arguments are incorrect /
		377	ARM_MATH_LENGTH_ERROR = -2, /*< Length of data buffer is incorrect /
		378	ARM_MATH_SIZE_MISMATCH = -3, /*< Size of matrices is not compatible with the operation. /
		379	ARM_MATH_NANINF = -4, /*< Not-a-number (NaN) or infinity is generated /
		380	ARM_MATH_SINGULAR = -5, /*< Generated by matrix inversion if the input matrix is singular and cannot be inverted. /
		381	ARM_MATH_TEST_FAILURE = -6 /*< Test Failed /
		382	} arm_status;
		383
		384	/**
		385	* @brief 8-bit fractional data type in 1.7 format.
		386	*/
		387	typedef int8_t q7_t;
		388
		389	/**
		390	* @brief 16-bit fractional data type in 1.15 format.
		391	*/
		392	typedef int16_t q15_t;
		393
		394	/**
		395	* @brief 32-bit fractional data type in 1.31 format.
		396	*/
		397	typedef int32_t q31_t;
		398
		399	/**
		400	* @brief 64-bit fractional data type in 1.63 format.
		401	*/
		402	typedef int64_t q63_t;
		403
		404	/**
		405	* @brief 32-bit floating-point type definition.
		406	*/
		407	typedef float float32_t;
		408
		409	/**
		410	* @brief 64-bit floating-point type definition.
		411	*/
		412	typedef double float64_t;
		413
		414	/**
		415	* @brief definition to read/write two 16 bit values.
		416	*/
		417	#if defined __CC_ARM
		418	#define __SIMD32_TYPE int32_t __packed
		419	#define CMSIS_UNUSED __attribute__((unused))
5	mjames	420
		421	#elif defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050)
		422	#define __SIMD32_TYPE int32_t
		423	#define CMSIS_UNUSED __attribute__((unused))
		424
		425	#elif defined __GNUC__
		426	#define __SIMD32_TYPE int32_t
		427	#define CMSIS_UNUSED __attribute__((unused))
		428
2	mjames	429	#elif defined __ICCARM__
		430	#define __SIMD32_TYPE int32_t __packed
		431	#define CMSIS_UNUSED
5	mjames	432
		433	#elif defined __CSMC__
2	mjames	434	#define __SIMD32_TYPE int32_t
		435	#define CMSIS_UNUSED
5	mjames	436
2	mjames	437	#elif defined __TASKING__
		438	#define __SIMD32_TYPE __unaligned int32_t
		439	#define CMSIS_UNUSED
5	mjames	440
2	mjames	441	#else
		442	#error Unknown compiler
		443	#endif
		444
5	mjames	445	#define __SIMD32(addr) ((__SIMD32_TYPE *) & (addr))
2	mjames	446	#define __SIMD32_CONST(addr) ((__SIMD32_TYPE *)(addr))
		447	#define _SIMD32_OFFSET(addr) ((__SIMD32_TYPE ) (addr))
5	mjames	448	#define __SIMD64(addr) ((int64_t *) & (addr))
2	mjames	449
		450	#if defined (ARM_MATH_CM3) \|\| defined (ARM_MATH_CM0_FAMILY)
		451	/**
		452	* @brief definition to pack two 16 bit values.
		453	*/
		454	#define __PKHBT(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) << 0) & (int32_t)0x0000FFFF) \| \
		455	(((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000) )
		456	#define __PKHTB(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) << 0) & (int32_t)0xFFFF0000) \| \
		457	(((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF) )
		458
		459	#endif
		460
		461
		462	/**
		463	* @brief definition to pack four 8 bit values.
		464	*/
		465	#ifndef ARM_MATH_BIG_ENDIAN
		466
5	mjames	467	#define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) << 0) & (int32_t)0x000000FF) \| \
		468	(((int32_t)(v1) << 8) & (int32_t)0x0000FF00) \| \
		469	(((int32_t)(v2) << 16) & (int32_t)0x00FF0000) \| \
		470	(((int32_t)(v3) << 24) & (int32_t)0xFF000000) )
2	mjames	471	#else
		472
5	mjames	473	#define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v3) << 0) & (int32_t)0x000000FF) \| \
		474	(((int32_t)(v2) << 8) & (int32_t)0x0000FF00) \| \
		475	(((int32_t)(v1) << 16) & (int32_t)0x00FF0000) \| \
		476	(((int32_t)(v0) << 24) & (int32_t)0xFF000000) )
2	mjames	477
		478	#endif
		479
		480
		481	/**
		482	* @brief Clips Q63 to Q31 values.
		483	*/
		484	static __INLINE q31_t clip_q63_to_q31(
		485	q63_t x)
		486	{
		487	return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
		488	((0x7FFFFFFF ^ ((q31_t) (x >> 63)))) : (q31_t) x;
		489	}
		490
		491	/**
		492	* @brief Clips Q63 to Q15 values.
		493	*/
		494	static __INLINE q15_t clip_q63_to_q15(
		495	q63_t x)
		496	{
		497	return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
		498	((0x7FFF ^ ((q15_t) (x >> 63)))) : (q15_t) (x >> 15);
		499	}
		500
		501	/**
		502	* @brief Clips Q31 to Q7 values.
		503	*/
		504	static __INLINE q7_t clip_q31_to_q7(
		505	q31_t x)
		506	{
		507	return ((q31_t) (x >> 24) != ((q31_t) x >> 23)) ?
		508	((0x7F ^ ((q7_t) (x >> 31)))) : (q7_t) x;
		509	}
		510
		511	/**
		512	* @brief Clips Q31 to Q15 values.
		513	*/
		514	static __INLINE q15_t clip_q31_to_q15(
		515	q31_t x)
		516	{
		517	return ((q31_t) (x >> 16) != ((q31_t) x >> 15)) ?
		518	((0x7FFF ^ ((q15_t) (x >> 31)))) : (q15_t) x;
		519	}
		520
		521	/**
		522	* @brief Multiplies 32 X 64 and returns 32 bit result in 2.30 format.
		523	*/
		524
		525	static __INLINE q63_t mult32x64(
		526	q63_t x,
		527	q31_t y)
		528	{
		529	return ((((q63_t) (x & 0x00000000FFFFFFFF) * y) >> 32) +
		530	(((q63_t) (x >> 32) * y)));
		531	}
		532
5	mjames	533	/*
		534	#if defined (ARM_MATH_CM0_FAMILY) && defined ( __CC_ARM )
		535	#define __CLZ __clz
		536	#endif
		537	*/
		538	/* note: function can be removed when all toolchain support __CLZ for Cortex-M0 */
2	mjames	539	#if defined (ARM_MATH_CM0_FAMILY) && ((defined (__ICCARM__)) )
		540	static __INLINE uint32_t __CLZ(
		541	q31_t data);
		542
		543	static __INLINE uint32_t __CLZ(
		544	q31_t data)
		545	{
		546	uint32_t count = 0;
		547	uint32_t mask = 0x80000000;
		548
		549	while((data & mask) == 0)
		550	{
		551	count += 1u;
		552	mask = mask >> 1u;
		553	}
		554
		555	return (count);
		556	}
		557	#endif
		558
		559	/**
		560	* @brief Function to Calculates 1/in (reciprocal) value of Q31 Data type.
		561	*/
		562
		563	static __INLINE uint32_t arm_recip_q31(
		564	q31_t in,
		565	q31_t * dst,
		566	q31_t * pRecipTable)
		567	{
5	mjames	568	q31_t out;
		569	uint32_t tempVal;
2	mjames	570	uint32_t index, i;
		571	uint32_t signBits;
		572
		573	if(in > 0)
		574	{
5	mjames	575	signBits = ((uint32_t) (__CLZ( in) - 1));
2	mjames	576	}
		577	else
		578	{
5	mjames	579	signBits = ((uint32_t) (__CLZ(-in) - 1));
2	mjames	580	}
		581
		582	/* Convert input sample to 1.31 format */
5	mjames	583	in = (in << signBits);
2	mjames	584
		585	/* calculation of index for initial approximated Val */
5	mjames	586	index = (uint32_t)(in >> 24);
2	mjames	587	index = (index & INDEX_MASK);
		588
		589	/* 1.31 with exp 1 */
		590	out = pRecipTable[index];
		591
		592	/* calculation of reciprocal value */
		593	/* running approximation for two iterations */
		594	for (i = 0u; i < 2u; i++)
		595	{
5	mjames	596	tempVal = (uint32_t) (((q63_t) in * out) >> 31);
		597	tempVal = 0x7FFFFFFFu - tempVal;
2	mjames	598	/* 1.31 with exp 1 */
5	mjames	599	/* out = (q31_t) (((q63_t) out * tempVal) >> 30); */
		600	out = clip_q63_to_q31(((q63_t) out * tempVal) >> 30);
2	mjames	601	}
		602
		603	/* write output */
		604	*dst = out;
		605
		606	/* return num of signbits of out = 1/in value */
		607	return (signBits + 1u);
		608	}
		609
5	mjames	610
2	mjames	611	/**
		612	* @brief Function to Calculates 1/in (reciprocal) value of Q15 Data type.
		613	*/
		614	static __INLINE uint32_t arm_recip_q15(
		615	q15_t in,
		616	q15_t * dst,
		617	q15_t * pRecipTable)
		618	{
5	mjames	619	q15_t out = 0;
		620	uint32_t tempVal = 0;
2	mjames	621	uint32_t index = 0, i = 0;
		622	uint32_t signBits = 0;
		623
		624	if(in > 0)
		625	{
5	mjames	626	signBits = ((uint32_t)(__CLZ( in) - 17));
2	mjames	627	}
		628	else
		629	{
5	mjames	630	signBits = ((uint32_t)(__CLZ(-in) - 17));
2	mjames	631	}
		632
		633	/* Convert input sample to 1.15 format */
5	mjames	634	in = (in << signBits);
2	mjames	635
		636	/* calculation of index for initial approximated Val */
5	mjames	637	index = (uint32_t)(in >> 8);
2	mjames	638	index = (index & INDEX_MASK);
		639
		640	/* 1.15 with exp 1 */
		641	out = pRecipTable[index];
		642
		643	/* calculation of reciprocal value */
		644	/* running approximation for two iterations */
5	mjames	645	for (i = 0u; i < 2u; i++)
2	mjames	646	{
5	mjames	647	tempVal = (uint32_t) (((q31_t) in * out) >> 15);
		648	tempVal = 0x7FFFu - tempVal;
2	mjames	649	/* 1.15 with exp 1 */
		650	out = (q15_t) (((q31_t) out * tempVal) >> 14);
5	mjames	651	/* out = clip_q31_to_q15(((q31_t) out * tempVal) >> 14); */
2	mjames	652	}
		653
		654	/* write output */
		655	*dst = out;
		656
		657	/* return num of signbits of out = 1/in value */
		658	return (signBits + 1);
		659	}
		660
		661
		662	/*
		663	* @brief C custom defined intrinisic function for only M0 processors
		664	*/
		665	#if defined(ARM_MATH_CM0_FAMILY)
		666	static __INLINE q31_t __SSAT(
		667	q31_t x,
		668	uint32_t y)
		669	{
		670	int32_t posMax, negMin;
		671	uint32_t i;
		672
		673	posMax = 1;
		674	for (i = 0; i < (y - 1); i++)
		675	{
		676	posMax = posMax * 2;
		677	}
		678
		679	if(x > 0)
		680	{
		681	posMax = (posMax - 1);
		682
		683	if(x > posMax)
		684	{
		685	x = posMax;
		686	}
		687	}
		688	else
		689	{
		690	negMin = -posMax;
		691
		692	if(x < negMin)
		693	{
		694	x = negMin;
		695	}
		696	}
		697	return (x);
		698	}
		699	#endif /* end of ARM_MATH_CM0_FAMILY */
		700
		701
		702	/*
		703	* @brief C custom defined intrinsic function for M3 and M0 processors
		704	*/
		705	#if defined (ARM_MATH_CM3) \|\| defined (ARM_MATH_CM0_FAMILY)
		706
		707	/*
		708	* @brief C custom defined QADD8 for M3 and M0 processors
		709	*/
5	mjames	710	static __INLINE uint32_t __QADD8(
		711	uint32_t x,
		712	uint32_t y)
2	mjames	713	{
5	mjames	714	q31_t r, s, t, u;
2	mjames	715
5	mjames	716	r = __SSAT(((((q31_t)x << 24) >> 24) + (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
		717	s = __SSAT(((((q31_t)x << 16) >> 24) + (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
		718	t = __SSAT(((((q31_t)x << 8) >> 24) + (((q31_t)y << 8) >> 24)), 8) & (int32_t)0x000000FF;
		719	u = __SSAT(((((q31_t)x ) >> 24) + (((q31_t)y ) >> 24)), 8) & (int32_t)0x000000FF;
2	mjames	720
5	mjames	721	return ((uint32_t)((u << 24) \| (t << 16) \| (s << 8) \| (r )));
		722	}
2	mjames	723
		724
		725	/*
		726	* @brief C custom defined QSUB8 for M3 and M0 processors
		727	*/
5	mjames	728	static __INLINE uint32_t __QSUB8(
		729	uint32_t x,
		730	uint32_t y)
2	mjames	731	{
		732	q31_t r, s, t, u;
		733
5	mjames	734	r = __SSAT(((((q31_t)x << 24) >> 24) - (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
		735	s = __SSAT(((((q31_t)x << 16) >> 24) - (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
		736	t = __SSAT(((((q31_t)x << 8) >> 24) - (((q31_t)y << 8) >> 24)), 8) & (int32_t)0x000000FF;
		737	u = __SSAT(((((q31_t)x ) >> 24) - (((q31_t)y ) >> 24)), 8) & (int32_t)0x000000FF;
2	mjames	738
5	mjames	739	return ((uint32_t)((u << 24) \| (t << 16) \| (s << 8) \| (r )));
2	mjames	740	}
		741
		742
		743	/*
		744	* @brief C custom defined QADD16 for M3 and M0 processors
		745	*/
5	mjames	746	static __INLINE uint32_t __QADD16(
		747	uint32_t x,
		748	uint32_t y)
2	mjames	749	{
5	mjames	750	/* q31_t r, s; without initialisation 'arm_offset_q15 test' fails but 'intrinsic' tests pass! for armCC */
		751	q31_t r = 0, s = 0;
2	mjames	752
5	mjames	753	r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
		754	s = __SSAT(((((q31_t)x ) >> 16) + (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF;
2	mjames	755
5	mjames	756	return ((uint32_t)((s << 16) \| (r )));
		757	}
2	mjames	758
		759
		760	/*
		761	* @brief C custom defined SHADD16 for M3 and M0 processors
		762	*/
5	mjames	763	static __INLINE uint32_t __SHADD16(
		764	uint32_t x,
		765	uint32_t y)
2	mjames	766	{
		767	q31_t r, s;
		768
5	mjames	769	r = (((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
		770	s = (((((q31_t)x ) >> 16) + (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
2	mjames	771
5	mjames	772	return ((uint32_t)((s << 16) \| (r )));
		773	}
2	mjames	774
		775
		776	/*
		777	* @brief C custom defined QSUB16 for M3 and M0 processors
		778	*/
5	mjames	779	static __INLINE uint32_t __QSUB16(
		780	uint32_t x,
		781	uint32_t y)
2	mjames	782	{
		783	q31_t r, s;
		784
5	mjames	785	r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
		786	s = __SSAT(((((q31_t)x ) >> 16) - (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF;
2	mjames	787
5	mjames	788	return ((uint32_t)((s << 16) \| (r )));
		789	}
2	mjames	790
		791
		792	/*
		793	* @brief C custom defined SHSUB16 for M3 and M0 processors
		794	*/
5	mjames	795	static __INLINE uint32_t __SHSUB16(
		796	uint32_t x,
		797	uint32_t y)
2	mjames	798	{
		799	q31_t r, s;
		800
5	mjames	801	r = (((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
		802	s = (((((q31_t)x ) >> 16) - (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
2	mjames	803
5	mjames	804	return ((uint32_t)((s << 16) \| (r )));
		805	}
2	mjames	806
		807
		808	/*
		809	* @brief C custom defined QASX for M3 and M0 processors
		810	*/
5	mjames	811	static __INLINE uint32_t __QASX(
		812	uint32_t x,
		813	uint32_t y)
2	mjames	814	{
5	mjames	815	q31_t r, s;
2	mjames	816
5	mjames	817	r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF;
		818	s = __SSAT(((((q31_t)x ) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
2	mjames	819
5	mjames	820	return ((uint32_t)((s << 16) \| (r )));
2	mjames	821	}
		822
5	mjames	823
2	mjames	824	/*
		825	* @brief C custom defined SHASX for M3 and M0 processors
		826	*/
5	mjames	827	static __INLINE uint32_t __SHASX(
		828	uint32_t x,
		829	uint32_t y)
2	mjames	830	{
		831	q31_t r, s;
		832
5	mjames	833	r = (((((q31_t)x << 16) >> 16) - (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
		834	s = (((((q31_t)x ) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
2	mjames	835
5	mjames	836	return ((uint32_t)((s << 16) \| (r )));
2	mjames	837	}
		838
		839
		840	/*
		841	* @brief C custom defined QSAX for M3 and M0 processors
		842	*/
5	mjames	843	static __INLINE uint32_t __QSAX(
		844	uint32_t x,
		845	uint32_t y)
2	mjames	846	{
5	mjames	847	q31_t r, s;
2	mjames	848
5	mjames	849	r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF;
		850	s = __SSAT(((((q31_t)x ) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
2	mjames	851
5	mjames	852	return ((uint32_t)((s << 16) \| (r )));
2	mjames	853	}
		854
5	mjames	855
2	mjames	856	/*
		857	* @brief C custom defined SHSAX for M3 and M0 processors
		858	*/
5	mjames	859	static __INLINE uint32_t __SHSAX(
		860	uint32_t x,
		861	uint32_t y)
2	mjames	862	{
		863	q31_t r, s;
		864
5	mjames	865	r = (((((q31_t)x << 16) >> 16) + (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
		866	s = (((((q31_t)x ) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
2	mjames	867
5	mjames	868	return ((uint32_t)((s << 16) \| (r )));
		869	}
2	mjames	870
		871
		872	/*
		873	* @brief C custom defined SMUSDX for M3 and M0 processors
		874	*/
5	mjames	875	static __INLINE uint32_t __SMUSDX(
		876	uint32_t x,
		877	uint32_t y)
2	mjames	878	{
5	mjames	879	return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) -
		880	((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) ));
2	mjames	881	}
		882
		883	/*
		884	* @brief C custom defined SMUADX for M3 and M0 processors
		885	*/
5	mjames	886	static __INLINE uint32_t __SMUADX(
		887	uint32_t x,
		888	uint32_t y)
2	mjames	889	{
5	mjames	890	return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) +
		891	((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) ));
2	mjames	892	}
		893
5	mjames	894
2	mjames	895	/*
		896	* @brief C custom defined QADD for M3 and M0 processors
		897	*/
5	mjames	898	static __INLINE int32_t __QADD(
		899	int32_t x,
		900	int32_t y)
2	mjames	901	{
5	mjames	902	return ((int32_t)(clip_q63_to_q31((q63_t)x + (q31_t)y)));
2	mjames	903	}
		904
5	mjames	905
2	mjames	906	/*
		907	* @brief C custom defined QSUB for M3 and M0 processors
		908	*/
5	mjames	909	static __INLINE int32_t __QSUB(
		910	int32_t x,
		911	int32_t y)
2	mjames	912	{
5	mjames	913	return ((int32_t)(clip_q63_to_q31((q63_t)x - (q31_t)y)));
2	mjames	914	}
		915
5	mjames	916
2	mjames	917	/*
		918	* @brief C custom defined SMLAD for M3 and M0 processors
		919	*/
5	mjames	920	static __INLINE uint32_t __SMLAD(
		921	uint32_t x,
		922	uint32_t y,
		923	uint32_t sum)
2	mjames	924	{
5	mjames	925	return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
		926	((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) +
		927	( ((q31_t)sum ) ) ));
2	mjames	928	}
		929
5	mjames	930
2	mjames	931	/*
		932	* @brief C custom defined SMLADX for M3 and M0 processors
		933	*/
5	mjames	934	static __INLINE uint32_t __SMLADX(
		935	uint32_t x,
		936	uint32_t y,
		937	uint32_t sum)
2	mjames	938	{
5	mjames	939	return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) +
		940	((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) +
		941	( ((q31_t)sum ) ) ));
2	mjames	942	}
		943
5	mjames	944
2	mjames	945	/*
		946	* @brief C custom defined SMLSDX for M3 and M0 processors
		947	*/
5	mjames	948	static __INLINE uint32_t __SMLSDX(
		949	uint32_t x,
		950	uint32_t y,
		951	uint32_t sum)
2	mjames	952	{
5	mjames	953	return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) -
		954	((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) +
		955	( ((q31_t)sum ) ) ));
2	mjames	956	}
		957
5	mjames	958
2	mjames	959	/*
		960	* @brief C custom defined SMLALD for M3 and M0 processors
		961	*/
5	mjames	962	static __INLINE uint64_t __SMLALD(
		963	uint32_t x,
		964	uint32_t y,
		965	uint64_t sum)
2	mjames	966	{
5	mjames	967	/* return (sum + ((q15_t) (x >> 16) * (q15_t) (y >> 16)) + ((q15_t) x * (q15_t) y)); */
		968	return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
		969	((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) +
		970	( ((q63_t)sum ) ) ));
2	mjames	971	}
		972
5	mjames	973
2	mjames	974	/*
		975	* @brief C custom defined SMLALDX for M3 and M0 processors
		976	*/
5	mjames	977	static __INLINE uint64_t __SMLALDX(
		978	uint32_t x,
		979	uint32_t y,
		980	uint64_t sum)
2	mjames	981	{
5	mjames	982	/* return (sum + ((q15_t) (x >> 16) * (q15_t) y)) + ((q15_t) x * (q15_t) (y >> 16)); */
		983	return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) +
		984	((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) +
		985	( ((q63_t)sum ) ) ));
2	mjames	986	}
		987
5	mjames	988
2	mjames	989	/*
		990	* @brief C custom defined SMUAD for M3 and M0 processors
		991	*/
5	mjames	992	static __INLINE uint32_t __SMUAD(
		993	uint32_t x,
		994	uint32_t y)
2	mjames	995	{
5	mjames	996	return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
		997	((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) ));
2	mjames	998	}
		999
5	mjames	1000
2	mjames	1001	/*
		1002	* @brief C custom defined SMUSD for M3 and M0 processors
		1003	*/
5	mjames	1004	static __INLINE uint32_t __SMUSD(
		1005	uint32_t x,
		1006	uint32_t y)
2	mjames	1007	{
5	mjames	1008	return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) -
		1009	((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) ));
2	mjames	1010	}
		1011
		1012
		1013	/*
		1014	* @brief C custom defined SXTB16 for M3 and M0 processors
		1015	*/
5	mjames	1016	static __INLINE uint32_t __SXTB16(
		1017	uint32_t x)
2	mjames	1018	{
5	mjames	1019	return ((uint32_t)(((((q31_t)x << 24) >> 24) & (q31_t)0x0000FFFF) \|
		1020	((((q31_t)x << 8) >> 8) & (q31_t)0xFFFF0000) ));
2	mjames	1021	}
		1022
		1023	#endif /* defined (ARM_MATH_CM3) \|\| defined (ARM_MATH_CM0_FAMILY) */
		1024
		1025
		1026	/**
		1027	* @brief Instance structure for the Q7 FIR filter.
		1028	*/
		1029	typedef struct
		1030	{
		1031	uint16_t numTaps; /*< number of filter coefficients in the filter. /
		1032	q7_t pState; /< points to the state variable array. The array is of length numTaps+blockSize-1. /
		1033	q7_t pCoeffs; /< points to the coefficient array. The array is of length numTaps./
		1034	} arm_fir_instance_q7;
		1035
		1036	/**
		1037	* @brief Instance structure for the Q15 FIR filter.
		1038	*/
		1039	typedef struct
		1040	{
		1041	uint16_t numTaps; /*< number of filter coefficients in the filter. /
		1042	q15_t pState; /< points to the state variable array. The array is of length numTaps+blockSize-1. /
		1043	q15_t pCoeffs; /< points to the coefficient array. The array is of length numTaps./
		1044	} arm_fir_instance_q15;
		1045
		1046	/**
		1047	* @brief Instance structure for the Q31 FIR filter.
		1048	*/
		1049	typedef struct
		1050	{
		1051	uint16_t numTaps; /*< number of filter coefficients in the filter. /
		1052	q31_t pState; /< points to the state variable array. The array is of length numTaps+blockSize-1. /
		1053	q31_t pCoeffs; /< points to the coefficient array. The array is of length numTaps. /
		1054	} arm_fir_instance_q31;
		1055
		1056	/**
		1057	* @brief Instance structure for the floating-point FIR filter.
		1058	*/
		1059	typedef struct
		1060	{
		1061	uint16_t numTaps; /*< number of filter coefficients in the filter. /
		1062	float32_t pState; /< points to the state variable array. The array is of length numTaps+blockSize-1. /
		1063	float32_t pCoeffs; /< points to the coefficient array. The array is of length numTaps. /
		1064	} arm_fir_instance_f32;
		1065
		1066
		1067	/**
		1068	* @brief Processing function for the Q7 FIR filter.
5	mjames	1069	* @param[in] S points to an instance of the Q7 FIR filter structure.
		1070	* @param[in] pSrc points to the block of input data.
		1071	* @param[out] pDst points to the block of output data.
		1072	* @param[in] blockSize number of samples to process.
2	mjames	1073	*/
		1074	void arm_fir_q7(
		1075	const arm_fir_instance_q7 * S,
		1076	q7_t * pSrc,
		1077	q7_t * pDst,
		1078	uint32_t blockSize);
		1079
		1080
		1081	/**
		1082	* @brief Initialization function for the Q7 FIR filter.
5	mjames	1083	* @param[in,out] S points to an instance of the Q7 FIR structure.
		1084	* @param[in] numTaps Number of filter coefficients in the filter.
		1085	* @param[in] pCoeffs points to the filter coefficients.
		1086	* @param[in] pState points to the state buffer.
		1087	* @param[in] blockSize number of samples that are processed.
2	mjames	1088	*/
		1089	void arm_fir_init_q7(
		1090	arm_fir_instance_q7 * S,
		1091	uint16_t numTaps,
		1092	q7_t * pCoeffs,
		1093	q7_t * pState,
		1094	uint32_t blockSize);
		1095
		1096
		1097	/**
		1098	* @brief Processing function for the Q15 FIR filter.
5	mjames	1099	* @param[in] S points to an instance of the Q15 FIR structure.
		1100	* @param[in] pSrc points to the block of input data.
		1101	* @param[out] pDst points to the block of output data.
		1102	* @param[in] blockSize number of samples to process.
2	mjames	1103	*/
		1104	void arm_fir_q15(
		1105	const arm_fir_instance_q15 * S,
		1106	q15_t * pSrc,
		1107	q15_t * pDst,
		1108	uint32_t blockSize);
		1109
5	mjames	1110
2	mjames	1111	/**
		1112	* @brief Processing function for the fast Q15 FIR filter for Cortex-M3 and Cortex-M4.
5	mjames	1113	* @param[in] S points to an instance of the Q15 FIR filter structure.
		1114	* @param[in] pSrc points to the block of input data.
		1115	* @param[out] pDst points to the block of output data.
		1116	* @param[in] blockSize number of samples to process.
2	mjames	1117	*/
		1118	void arm_fir_fast_q15(
		1119	const arm_fir_instance_q15 * S,
		1120	q15_t * pSrc,
		1121	q15_t * pDst,
		1122	uint32_t blockSize);
		1123
5	mjames	1124
2	mjames	1125	/**
		1126	* @brief Initialization function for the Q15 FIR filter.
5	mjames	1127	* @param[in,out] S points to an instance of the Q15 FIR filter structure.
		1128	* @param[in] numTaps Number of filter coefficients in the filter. Must be even and greater than or equal to 4.
		1129	* @param[in] pCoeffs points to the filter coefficients.
		1130	* @param[in] pState points to the state buffer.
		1131	* @param[in] blockSize number of samples that are processed at a time.
2	mjames	1132	* @return The function returns ARM_MATH_SUCCESS if initialization was successful or ARM_MATH_ARGUMENT_ERROR if
		1133	* <code>numTaps</code> is not a supported value.
		1134	*/
		1135	arm_status arm_fir_init_q15(
		1136	arm_fir_instance_q15 * S,
		1137	uint16_t numTaps,
		1138	q15_t * pCoeffs,
		1139	q15_t * pState,
		1140	uint32_t blockSize);
		1141
5	mjames	1142
2	mjames	1143	/**
		1144	* @brief Processing function for the Q31 FIR filter.
5	mjames	1145	* @param[in] S points to an instance of the Q31 FIR filter structure.
		1146	* @param[in] pSrc points to the block of input data.
		1147	* @param[out] pDst points to the block of output data.
		1148	* @param[in] blockSize number of samples to process.
2	mjames	1149	*/
		1150	void arm_fir_q31(
		1151	const arm_fir_instance_q31 * S,
		1152	q31_t * pSrc,
		1153	q31_t * pDst,
		1154	uint32_t blockSize);
		1155
5	mjames	1156
2	mjames	1157	/**
		1158	* @brief Processing function for the fast Q31 FIR filter for Cortex-M3 and Cortex-M4.
5	mjames	1159	* @param[in] S points to an instance of the Q31 FIR structure.
		1160	* @param[in] pSrc points to the block of input data.
		1161	* @param[out] pDst points to the block of output data.
		1162	* @param[in] blockSize number of samples to process.
2	mjames	1163	*/
		1164	void arm_fir_fast_q31(
		1165	const arm_fir_instance_q31 * S,
		1166	q31_t * pSrc,
		1167	q31_t * pDst,
		1168	uint32_t blockSize);
		1169
5	mjames	1170
2	mjames	1171	/**
		1172	* @brief Initialization function for the Q31 FIR filter.
5	mjames	1173	* @param[in,out] S points to an instance of the Q31 FIR structure.
		1174	* @param[in] numTaps Number of filter coefficients in the filter.
		1175	* @param[in] pCoeffs points to the filter coefficients.
		1176	* @param[in] pState points to the state buffer.
		1177	* @param[in] blockSize number of samples that are processed at a time.
2	mjames	1178	*/
		1179	void arm_fir_init_q31(
		1180	arm_fir_instance_q31 * S,
		1181	uint16_t numTaps,
		1182	q31_t * pCoeffs,
		1183	q31_t * pState,
		1184	uint32_t blockSize);
		1185
5	mjames	1186
2	mjames	1187	/**
		1188	* @brief Processing function for the floating-point FIR filter.
5	mjames	1189	* @param[in] S points to an instance of the floating-point FIR structure.
		1190	* @param[in] pSrc points to the block of input data.
		1191	* @param[out] pDst points to the block of output data.
		1192	* @param[in] blockSize number of samples to process.
2	mjames	1193	*/
		1194	void arm_fir_f32(
		1195	const arm_fir_instance_f32 * S,
		1196	float32_t * pSrc,
		1197	float32_t * pDst,
		1198	uint32_t blockSize);
		1199
5	mjames	1200
2	mjames	1201	/**
		1202	* @brief Initialization function for the floating-point FIR filter.
5	mjames	1203	* @param[in,out] S points to an instance of the floating-point FIR filter structure.
		1204	* @param[in] numTaps Number of filter coefficients in the filter.
		1205	* @param[in] pCoeffs points to the filter coefficients.
		1206	* @param[in] pState points to the state buffer.
		1207	* @param[in] blockSize number of samples that are processed at a time.
2	mjames	1208	*/
		1209	void arm_fir_init_f32(
		1210	arm_fir_instance_f32 * S,
		1211	uint16_t numTaps,
		1212	float32_t * pCoeffs,
		1213	float32_t * pState,
		1214	uint32_t blockSize);
		1215
		1216
		1217	/**
		1218	* @brief Instance structure for the Q15 Biquad cascade filter.
		1219	*/
		1220	typedef struct
		1221	{
5	mjames	1222	int8_t numStages; /*< number of 2nd order stages in the filter. Overall order is 2numStages. */
		1223	q15_t pState; /< Points to the array of state coefficients. The array is of length 4numStages. */
		1224	q15_t pCoeffs; /< Points to the array of coefficients. The array is of length 5numStages. */
		1225	int8_t postShift; /*< Additional shift, in bits, applied to each output sample. /
2	mjames	1226	} arm_biquad_casd_df1_inst_q15;
		1227
		1228	/**
		1229	* @brief Instance structure for the Q31 Biquad cascade filter.
		1230	*/
		1231	typedef struct
		1232	{
		1233	uint32_t numStages; /*< number of 2nd order stages in the filter. Overall order is 2numStages. */
		1234	q31_t pState; /< Points to the array of state coefficients. The array is of length 4numStages. */
		1235	q31_t pCoeffs; /< Points to the array of coefficients. The array is of length 5numStages. */
		1236	uint8_t postShift; /*< Additional shift, in bits, applied to each output sample. /
		1237	} arm_biquad_casd_df1_inst_q31;
		1238
		1239	/**
		1240	* @brief Instance structure for the floating-point Biquad cascade filter.
		1241	*/
		1242	typedef struct
		1243	{
5	mjames	1244	uint32_t numStages; /*< number of 2nd order stages in the filter. Overall order is 2numStages. */
		1245	float32_t pState; /< Points to the array of state coefficients. The array is of length 4numStages. */
		1246	float32_t pCoeffs; /< Points to the array of coefficients. The array is of length 5numStages. */
2	mjames	1247	} arm_biquad_casd_df1_inst_f32;
		1248
		1249
		1250	/**
		1251	* @brief Processing function for the Q15 Biquad cascade filter.
5	mjames	1252	* @param[in] S points to an instance of the Q15 Biquad cascade structure.
		1253	* @param[in] pSrc points to the block of input data.
		1254	* @param[out] pDst points to the block of output data.
		1255	* @param[in] blockSize number of samples to process.
2	mjames	1256	*/
		1257	void arm_biquad_cascade_df1_q15(
		1258	const arm_biquad_casd_df1_inst_q15 * S,
		1259	q15_t * pSrc,
		1260	q15_t * pDst,
		1261	uint32_t blockSize);
		1262
5	mjames	1263
2	mjames	1264	/**
		1265	* @brief Initialization function for the Q15 Biquad cascade filter.
5	mjames	1266	* @param[in,out] S points to an instance of the Q15 Biquad cascade structure.
		1267	* @param[in] numStages number of 2nd order stages in the filter.
		1268	* @param[in] pCoeffs points to the filter coefficients.
		1269	* @param[in] pState points to the state buffer.
		1270	* @param[in] postShift Shift to be applied to the output. Varies according to the coefficients format
2	mjames	1271	*/
		1272	void arm_biquad_cascade_df1_init_q15(
		1273	arm_biquad_casd_df1_inst_q15 * S,
		1274	uint8_t numStages,
		1275	q15_t * pCoeffs,
		1276	q15_t * pState,
		1277	int8_t postShift);
		1278
		1279
		1280	/**
		1281	* @brief Fast but less precise processing function for the Q15 Biquad cascade filter for Cortex-M3 and Cortex-M4.
5	mjames	1282	* @param[in] S points to an instance of the Q15 Biquad cascade structure.
		1283	* @param[in] pSrc points to the block of input data.
		1284	* @param[out] pDst points to the block of output data.
		1285	* @param[in] blockSize number of samples to process.
2	mjames	1286	*/
		1287	void arm_biquad_cascade_df1_fast_q15(
		1288	const arm_biquad_casd_df1_inst_q15 * S,
		1289	q15_t * pSrc,
		1290	q15_t * pDst,
		1291	uint32_t blockSize);
		1292
		1293
		1294	/**
		1295	* @brief Processing function for the Q31 Biquad cascade filter
5	mjames	1296	* @param[in] S points to an instance of the Q31 Biquad cascade structure.
		1297	* @param[in] pSrc points to the block of input data.
		1298	* @param[out] pDst points to the block of output data.
2	mjames	1299	* @param[in] blockSize number of samples to process.
		1300	*/
		1301	void arm_biquad_cascade_df1_q31(
		1302	const arm_biquad_casd_df1_inst_q31 * S,
		1303	q31_t * pSrc,
		1304	q31_t * pDst,
		1305	uint32_t blockSize);
		1306
5	mjames	1307
2	mjames	1308	/**
		1309	* @brief Fast but less precise processing function for the Q31 Biquad cascade filter for Cortex-M3 and Cortex-M4.
5	mjames	1310	* @param[in] S points to an instance of the Q31 Biquad cascade structure.
		1311	* @param[in] pSrc points to the block of input data.
		1312	* @param[out] pDst points to the block of output data.
2	mjames	1313	* @param[in] blockSize number of samples to process.
		1314	*/
		1315	void arm_biquad_cascade_df1_fast_q31(
		1316	const arm_biquad_casd_df1_inst_q31 * S,
		1317	q31_t * pSrc,
		1318	q31_t * pDst,
		1319	uint32_t blockSize);
		1320
5	mjames	1321
2	mjames	1322	/**
		1323	* @brief Initialization function for the Q31 Biquad cascade filter.
5	mjames	1324	* @param[in,out] S points to an instance of the Q31 Biquad cascade structure.
		1325	* @param[in] numStages number of 2nd order stages in the filter.
		1326	* @param[in] pCoeffs points to the filter coefficients.
		1327	* @param[in] pState points to the state buffer.
		1328	* @param[in] postShift Shift to be applied to the output. Varies according to the coefficients format
2	mjames	1329	*/
		1330	void arm_biquad_cascade_df1_init_q31(
		1331	arm_biquad_casd_df1_inst_q31 * S,
		1332	uint8_t numStages,
		1333	q31_t * pCoeffs,
		1334	q31_t * pState,
		1335	int8_t postShift);
		1336
5	mjames	1337
2	mjames	1338	/**
		1339	* @brief Processing function for the floating-point Biquad cascade filter.
5	mjames	1340	* @param[in] S points to an instance of the floating-point Biquad cascade structure.
		1341	* @param[in] pSrc points to the block of input data.
		1342	* @param[out] pDst points to the block of output data.
2	mjames	1343	* @param[in] blockSize number of samples to process.
		1344	*/
		1345	void arm_biquad_cascade_df1_f32(
		1346	const arm_biquad_casd_df1_inst_f32 * S,
		1347	float32_t * pSrc,
		1348	float32_t * pDst,
		1349	uint32_t blockSize);
		1350
5	mjames	1351
2	mjames	1352	/**
		1353	* @brief Initialization function for the floating-point Biquad cascade filter.
5	mjames	1354	* @param[in,out] S points to an instance of the floating-point Biquad cascade structure.
		1355	* @param[in] numStages number of 2nd order stages in the filter.
		1356	* @param[in] pCoeffs points to the filter coefficients.
		1357	* @param[in] pState points to the state buffer.
2	mjames	1358	*/
		1359	void arm_biquad_cascade_df1_init_f32(
		1360	arm_biquad_casd_df1_inst_f32 * S,
		1361	uint8_t numStages,
		1362	float32_t * pCoeffs,
		1363	float32_t * pState);
		1364
		1365
		1366	/**
		1367	* @brief Instance structure for the floating-point matrix structure.
		1368	*/
		1369	typedef struct
		1370	{
		1371	uint16_t numRows; /*< number of rows of the matrix. /
		1372	uint16_t numCols; /*< number of columns of the matrix. /
		1373	float32_t pData; /< points to the data of the matrix. /
		1374	} arm_matrix_instance_f32;
		1375
		1376
		1377	/**
		1378	* @brief Instance structure for the floating-point matrix structure.
		1379	*/
		1380	typedef struct
		1381	{
		1382	uint16_t numRows; /*< number of rows of the matrix. /
		1383	uint16_t numCols; /*< number of columns of the matrix. /
		1384	float64_t pData; /< points to the data of the matrix. /
		1385	} arm_matrix_instance_f64;
		1386
		1387	/**
		1388	* @brief Instance structure for the Q15 matrix structure.
		1389	*/
		1390	typedef struct
		1391	{
		1392	uint16_t numRows; /*< number of rows of the matrix. /
		1393	uint16_t numCols; /*< number of columns of the matrix. /
		1394	q15_t pData; /< points to the data of the matrix. /
		1395	} arm_matrix_instance_q15;
		1396
		1397	/**
		1398	* @brief Instance structure for the Q31 matrix structure.
		1399	*/
		1400	typedef struct
		1401	{
		1402	uint16_t numRows; /*< number of rows of the matrix. /
		1403	uint16_t numCols; /*< number of columns of the matrix. /
		1404	q31_t pData; /< points to the data of the matrix. /
		1405	} arm_matrix_instance_q31;
		1406
		1407
		1408	/**
		1409	* @brief Floating-point matrix addition.
5	mjames	1410	* @param[in] pSrcA points to the first input matrix structure
		1411	* @param[in] pSrcB points to the second input matrix structure
		1412	* @param[out] pDst points to output matrix structure
2	mjames	1413	* @return The function returns either
		1414	* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
		1415	*/
		1416	arm_status arm_mat_add_f32(
		1417	const arm_matrix_instance_f32 * pSrcA,
		1418	const arm_matrix_instance_f32 * pSrcB,
		1419	arm_matrix_instance_f32 * pDst);
		1420
5	mjames	1421
2	mjames	1422	/**
		1423	* @brief Q15 matrix addition.
5	mjames	1424	* @param[in] pSrcA points to the first input matrix structure
		1425	* @param[in] pSrcB points to the second input matrix structure
		1426	* @param[out] pDst points to output matrix structure
2	mjames	1427	* @return The function returns either
		1428	* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
		1429	*/
		1430	arm_status arm_mat_add_q15(
		1431	const arm_matrix_instance_q15 * pSrcA,
		1432	const arm_matrix_instance_q15 * pSrcB,
		1433	arm_matrix_instance_q15 * pDst);
		1434
5	mjames	1435
2	mjames	1436	/**
		1437	* @brief Q31 matrix addition.
5	mjames	1438	* @param[in] pSrcA points to the first input matrix structure
		1439	* @param[in] pSrcB points to the second input matrix structure
		1440	* @param[out] pDst points to output matrix structure
2	mjames	1441	* @return The function returns either
		1442	* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
		1443	*/
		1444	arm_status arm_mat_add_q31(
		1445	const arm_matrix_instance_q31 * pSrcA,
		1446	const arm_matrix_instance_q31 * pSrcB,
		1447	arm_matrix_instance_q31 * pDst);
		1448
5	mjames	1449
2	mjames	1450	/**
		1451	* @brief Floating-point, complex, matrix multiplication.
5	mjames	1452	* @param[in] pSrcA points to the first input matrix structure
		1453	* @param[in] pSrcB points to the second input matrix structure
		1454	* @param[out] pDst points to output matrix structure
2	mjames	1455	* @return The function returns either
		1456	* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
		1457	*/
		1458	arm_status arm_mat_cmplx_mult_f32(
		1459	const arm_matrix_instance_f32 * pSrcA,
		1460	const arm_matrix_instance_f32 * pSrcB,
		1461	arm_matrix_instance_f32 * pDst);
		1462
5	mjames	1463
2	mjames	1464	/**
		1465	* @brief Q15, complex, matrix multiplication.
5	mjames	1466	* @param[in] pSrcA points to the first input matrix structure
		1467	* @param[in] pSrcB points to the second input matrix structure
		1468	* @param[out] pDst points to output matrix structure
2	mjames	1469	* @return The function returns either
		1470	* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
		1471	*/
		1472	arm_status arm_mat_cmplx_mult_q15(
		1473	const arm_matrix_instance_q15 * pSrcA,
		1474	const arm_matrix_instance_q15 * pSrcB,
		1475	arm_matrix_instance_q15 * pDst,
		1476	q15_t * pScratch);
		1477
5	mjames	1478
2	mjames	1479	/**
		1480	* @brief Q31, complex, matrix multiplication.
5	mjames	1481	* @param[in] pSrcA points to the first input matrix structure
		1482	* @param[in] pSrcB points to the second input matrix structure
		1483	* @param[out] pDst points to output matrix structure
2	mjames	1484	* @return The function returns either
		1485	* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
		1486	*/
		1487	arm_status arm_mat_cmplx_mult_q31(
		1488	const arm_matrix_instance_q31 * pSrcA,
		1489	const arm_matrix_instance_q31 * pSrcB,
		1490	arm_matrix_instance_q31 * pDst);
		1491
		1492
		1493	/**
		1494	* @brief Floating-point matrix transpose.
5	mjames	1495	* @param[in] pSrc points to the input matrix
		1496	* @param[out] pDst points to the output matrix
		1497	* @return The function returns either <code>ARM_MATH_SIZE_MISMATCH</code>
2	mjames	1498	* or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
		1499	*/
		1500	arm_status arm_mat_trans_f32(
		1501	const arm_matrix_instance_f32 * pSrc,
		1502	arm_matrix_instance_f32 * pDst);
		1503
		1504
		1505	/**
		1506	* @brief Q15 matrix transpose.
5	mjames	1507	* @param[in] pSrc points to the input matrix
		1508	* @param[out] pDst points to the output matrix
		1509	* @return The function returns either <code>ARM_MATH_SIZE_MISMATCH</code>
2	mjames	1510	* or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
		1511	*/
		1512	arm_status arm_mat_trans_q15(
		1513	const arm_matrix_instance_q15 * pSrc,
		1514	arm_matrix_instance_q15 * pDst);
		1515
5	mjames	1516
2	mjames	1517	/**
		1518	* @brief Q31 matrix transpose.
5	mjames	1519	* @param[in] pSrc points to the input matrix
		1520	* @param[out] pDst points to the output matrix
		1521	* @return The function returns either <code>ARM_MATH_SIZE_MISMATCH</code>
2	mjames	1522	* or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
		1523	*/
		1524	arm_status arm_mat_trans_q31(
		1525	const arm_matrix_instance_q31 * pSrc,
		1526	arm_matrix_instance_q31 * pDst);
		1527
		1528
		1529	/**
		1530	* @brief Floating-point matrix multiplication
5	mjames	1531	* @param[in] pSrcA points to the first input matrix structure
		1532	* @param[in] pSrcB points to the second input matrix structure
		1533	* @param[out] pDst points to output matrix structure
2	mjames	1534	* @return The function returns either
		1535	* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
		1536	*/
		1537	arm_status arm_mat_mult_f32(
		1538	const arm_matrix_instance_f32 * pSrcA,
		1539	const arm_matrix_instance_f32 * pSrcB,
		1540	arm_matrix_instance_f32 * pDst);
		1541
5	mjames	1542
2	mjames	1543	/**
		1544	* @brief Q15 matrix multiplication
5	mjames	1545	* @param[in] pSrcA points to the first input matrix structure
		1546	* @param[in] pSrcB points to the second input matrix structure
		1547	* @param[out] pDst points to output matrix structure
		1548	* @param[in] pState points to the array for storing intermediate results
2	mjames	1549	* @return The function returns either
		1550	* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
		1551	*/
		1552	arm_status arm_mat_mult_q15(
		1553	const arm_matrix_instance_q15 * pSrcA,
		1554	const arm_matrix_instance_q15 * pSrcB,
		1555	arm_matrix_instance_q15 * pDst,
		1556	q15_t * pState);
		1557
5	mjames	1558
2	mjames	1559	/**
		1560	* @brief Q15 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4
5	mjames	1561	* @param[in] pSrcA points to the first input matrix structure
		1562	* @param[in] pSrcB points to the second input matrix structure
		1563	* @param[out] pDst points to output matrix structure
		1564	* @param[in] pState points to the array for storing intermediate results
2	mjames	1565	* @return The function returns either
		1566	* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
		1567	*/
		1568	arm_status arm_mat_mult_fast_q15(
		1569	const arm_matrix_instance_q15 * pSrcA,
		1570	const arm_matrix_instance_q15 * pSrcB,
		1571	arm_matrix_instance_q15 * pDst,
		1572	q15_t * pState);
		1573
5	mjames	1574
2	mjames	1575	/**
		1576	* @brief Q31 matrix multiplication
5	mjames	1577	* @param[in] pSrcA points to the first input matrix structure
		1578	* @param[in] pSrcB points to the second input matrix structure
		1579	* @param[out] pDst points to output matrix structure
2	mjames	1580	* @return The function returns either
		1581	* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
		1582	*/
		1583	arm_status arm_mat_mult_q31(
		1584	const arm_matrix_instance_q31 * pSrcA,
		1585	const arm_matrix_instance_q31 * pSrcB,
		1586	arm_matrix_instance_q31 * pDst);
		1587
5	mjames	1588
2	mjames	1589	/**
		1590	* @brief Q31 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4
5	mjames	1591	* @param[in] pSrcA points to the first input matrix structure
		1592	* @param[in] pSrcB points to the second input matrix structure
		1593	* @param[out] pDst points to output matrix structure
2	mjames	1594	* @return The function returns either
		1595	* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
		1596	*/
		1597	arm_status arm_mat_mult_fast_q31(
		1598	const arm_matrix_instance_q31 * pSrcA,
		1599	const arm_matrix_instance_q31 * pSrcB,
		1600	arm_matrix_instance_q31 * pDst);
		1601
		1602
		1603	/**
		1604	* @brief Floating-point matrix subtraction
5	mjames	1605	* @param[in] pSrcA points to the first input matrix structure
		1606	* @param[in] pSrcB points to the second input matrix structure
		1607	* @param[out] pDst points to output matrix structure
2	mjames	1608	* @return The function returns either
		1609	* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
		1610	*/
		1611	arm_status arm_mat_sub_f32(
		1612	const arm_matrix_instance_f32 * pSrcA,
		1613	const arm_matrix_instance_f32 * pSrcB,
		1614	arm_matrix_instance_f32 * pDst);
		1615
5	mjames	1616
2	mjames	1617	/**
		1618	* @brief Q15 matrix subtraction
5	mjames	1619	* @param[in] pSrcA points to the first input matrix structure
		1620	* @param[in] pSrcB points to the second input matrix structure
		1621	* @param[out] pDst points to output matrix structure
2	mjames	1622	* @return The function returns either
		1623	* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
		1624	*/
		1625	arm_status arm_mat_sub_q15(
		1626	const arm_matrix_instance_q15 * pSrcA,
		1627	const arm_matrix_instance_q15 * pSrcB,
		1628	arm_matrix_instance_q15 * pDst);
		1629
5	mjames	1630
2	mjames	1631	/**
		1632	* @brief Q31 matrix subtraction
5	mjames	1633	* @param[in] pSrcA points to the first input matrix structure
		1634	* @param[in] pSrcB points to the second input matrix structure
		1635	* @param[out] pDst points to output matrix structure
2	mjames	1636	* @return The function returns either
		1637	* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
		1638	*/
		1639	arm_status arm_mat_sub_q31(
		1640	const arm_matrix_instance_q31 * pSrcA,
		1641	const arm_matrix_instance_q31 * pSrcB,
		1642	arm_matrix_instance_q31 * pDst);
		1643
5	mjames	1644
2	mjames	1645	/**
		1646	* @brief Floating-point matrix scaling.
5	mjames	1647	* @param[in] pSrc points to the input matrix
		1648	* @param[in] scale scale factor
		1649	* @param[out] pDst points to the output matrix
2	mjames	1650	* @return The function returns either
		1651	* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
		1652	*/
		1653	arm_status arm_mat_scale_f32(
		1654	const arm_matrix_instance_f32 * pSrc,
		1655	float32_t scale,
		1656	arm_matrix_instance_f32 * pDst);
		1657
5	mjames	1658
2	mjames	1659	/**
		1660	* @brief Q15 matrix scaling.
5	mjames	1661	* @param[in] pSrc points to input matrix
		1662	* @param[in] scaleFract fractional portion of the scale factor
		1663	* @param[in] shift number of bits to shift the result by
		1664	* @param[out] pDst points to output matrix
2	mjames	1665	* @return The function returns either
		1666	* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
		1667	*/
		1668	arm_status arm_mat_scale_q15(
		1669	const arm_matrix_instance_q15 * pSrc,
		1670	q15_t scaleFract,
		1671	int32_t shift,
		1672	arm_matrix_instance_q15 * pDst);
		1673
5	mjames	1674
2	mjames	1675	/**
		1676	* @brief Q31 matrix scaling.
5	mjames	1677	* @param[in] pSrc points to input matrix
		1678	* @param[in] scaleFract fractional portion of the scale factor
		1679	* @param[in] shift number of bits to shift the result by
		1680	* @param[out] pDst points to output matrix structure
2	mjames	1681	* @return The function returns either
		1682	* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
		1683	*/
		1684	arm_status arm_mat_scale_q31(
		1685	const arm_matrix_instance_q31 * pSrc,
		1686	q31_t scaleFract,
		1687	int32_t shift,
		1688	arm_matrix_instance_q31 * pDst);
		1689
		1690
		1691	/**
		1692	* @brief Q31 matrix initialization.
5	mjames	1693	* @param[in,out] S points to an instance of the floating-point matrix structure.
		1694	* @param[in] nRows number of rows in the matrix.
		1695	* @param[in] nColumns number of columns in the matrix.
		1696	* @param[in] pData points to the matrix data array.
2	mjames	1697	*/
		1698	void arm_mat_init_q31(
		1699	arm_matrix_instance_q31 * S,
		1700	uint16_t nRows,
		1701	uint16_t nColumns,
		1702	q31_t * pData);
		1703
5	mjames	1704
2	mjames	1705	/**
		1706	* @brief Q15 matrix initialization.
5	mjames	1707	* @param[in,out] S points to an instance of the floating-point matrix structure.
		1708	* @param[in] nRows number of rows in the matrix.
		1709	* @param[in] nColumns number of columns in the matrix.
		1710	* @param[in] pData points to the matrix data array.
2	mjames	1711	*/
		1712	void arm_mat_init_q15(
		1713	arm_matrix_instance_q15 * S,
		1714	uint16_t nRows,
		1715	uint16_t nColumns,
		1716	q15_t * pData);
		1717
5	mjames	1718
2	mjames	1719	/**
		1720	* @brief Floating-point matrix initialization.
5	mjames	1721	* @param[in,out] S points to an instance of the floating-point matrix structure.
		1722	* @param[in] nRows number of rows in the matrix.
		1723	* @param[in] nColumns number of columns in the matrix.
		1724	* @param[in] pData points to the matrix data array.
2	mjames	1725	*/
		1726	void arm_mat_init_f32(
		1727	arm_matrix_instance_f32 * S,
		1728	uint16_t nRows,
		1729	uint16_t nColumns,
		1730	float32_t * pData);
		1731
		1732
		1733
		1734	/**
		1735	* @brief Instance structure for the Q15 PID Control.
		1736	*/
		1737	typedef struct
		1738	{
5	mjames	1739	q15_t A0; /*< The derived gain, A0 = Kp + Ki + Kd . /
2	mjames	1740	#ifdef ARM_MATH_CM0_FAMILY
		1741	q15_t A1;
		1742	q15_t A2;
		1743	#else
		1744	q31_t A1; /*< The derived gain A1 = -Kp - 2Kd \| Kd./
		1745	#endif
5	mjames	1746	q15_t state[3]; /*< The state array of length 3. /
2	mjames	1747	q15_t Kp; /*< The proportional gain. /
		1748	q15_t Ki; /*< The integral gain. /
		1749	q15_t Kd; /*< The derivative gain. /
		1750	} arm_pid_instance_q15;
		1751
		1752	/**
		1753	* @brief Instance structure for the Q31 PID Control.
		1754	*/
		1755	typedef struct
		1756	{
		1757	q31_t A0; /*< The derived gain, A0 = Kp + Ki + Kd . /
		1758	q31_t A1; /*< The derived gain, A1 = -Kp - 2Kd. /
		1759	q31_t A2; /*< The derived gain, A2 = Kd . /
		1760	q31_t state[3]; /*< The state array of length 3. /
		1761	q31_t Kp; /*< The proportional gain. /
		1762	q31_t Ki; /*< The integral gain. /
		1763	q31_t Kd; /*< The derivative gain. /
		1764	} arm_pid_instance_q31;
		1765
		1766	/**
		1767	* @brief Instance structure for the floating-point PID Control.
		1768	*/
		1769	typedef struct
		1770	{
		1771	float32_t A0; /*< The derived gain, A0 = Kp + Ki + Kd . /
		1772	float32_t A1; /*< The derived gain, A1 = -Kp - 2Kd. /
		1773	float32_t A2; /*< The derived gain, A2 = Kd . /
		1774	float32_t state[3]; /*< The state array of length 3. /
5	mjames	1775	float32_t Kp; /*< The proportional gain. /
		1776	float32_t Ki; /*< The integral gain. /
		1777	float32_t Kd; /*< The derivative gain. /
2	mjames	1778	} arm_pid_instance_f32;
		1779
		1780
		1781
		1782	/**
		1783	* @brief Initialization function for the floating-point PID Control.
5	mjames	1784	* @param[in,out] S points to an instance of the PID structure.
2	mjames	1785	* @param[in] resetStateFlag flag to reset the state. 0 = no change in state 1 = reset the state.
		1786	*/
		1787	void arm_pid_init_f32(
		1788	arm_pid_instance_f32 * S,
		1789	int32_t resetStateFlag);
		1790
5	mjames	1791
2	mjames	1792	/**
		1793	* @brief Reset function for the floating-point PID Control.
5	mjames	1794	* @param[in,out] S is an instance of the floating-point PID Control structure
2	mjames	1795	*/
		1796	void arm_pid_reset_f32(
		1797	arm_pid_instance_f32 * S);
		1798
		1799
		1800	/**
		1801	* @brief Initialization function for the Q31 PID Control.
5	mjames	1802	* @param[in,out] S points to an instance of the Q15 PID structure.
2	mjames	1803	* @param[in] resetStateFlag flag to reset the state. 0 = no change in state 1 = reset the state.
		1804	*/
		1805	void arm_pid_init_q31(
		1806	arm_pid_instance_q31 * S,
		1807	int32_t resetStateFlag);
		1808
		1809
		1810	/**
		1811	* @brief Reset function for the Q31 PID Control.
5	mjames	1812	* @param[in,out] S points to an instance of the Q31 PID Control structure
2	mjames	1813	*/
		1814
		1815	void arm_pid_reset_q31(
		1816	arm_pid_instance_q31 * S);
		1817
5	mjames	1818
2	mjames	1819	/**
		1820	* @brief Initialization function for the Q15 PID Control.
5	mjames	1821	* @param[in,out] S points to an instance of the Q15 PID structure.
		1822	* @param[in] resetStateFlag flag to reset the state. 0 = no change in state 1 = reset the state.
2	mjames	1823	*/
		1824	void arm_pid_init_q15(
		1825	arm_pid_instance_q15 * S,
		1826	int32_t resetStateFlag);
		1827
5	mjames	1828
2	mjames	1829	/**
		1830	* @brief Reset function for the Q15 PID Control.
5	mjames	1831	* @param[in,out] S points to an instance of the q15 PID Control structure
2	mjames	1832	*/
		1833	void arm_pid_reset_q15(
		1834	arm_pid_instance_q15 * S);
		1835
		1836
		1837	/**
		1838	* @brief Instance structure for the floating-point Linear Interpolate function.
		1839	*/
		1840	typedef struct
		1841	{
		1842	uint32_t nValues; /*< nValues /
		1843	float32_t x1; /*< x1 /
		1844	float32_t xSpacing; /*< xSpacing /
		1845	float32_t pYData; /< pointer to the table of Y values /
		1846	} arm_linear_interp_instance_f32;
		1847
		1848	/**
		1849	* @brief Instance structure for the floating-point bilinear interpolation function.
		1850	*/
		1851	typedef struct
		1852	{
		1853	uint16_t numRows; /*< number of rows in the data table. /
		1854	uint16_t numCols; /*< number of columns in the data table. /
		1855	float32_t pData; /< points to the data table. /
		1856	} arm_bilinear_interp_instance_f32;
		1857
		1858	/**
		1859	* @brief Instance structure for the Q31 bilinear interpolation function.
		1860	*/
		1861	typedef struct
		1862	{
		1863	uint16_t numRows; /*< number of rows in the data table. /
		1864	uint16_t numCols; /*< number of columns in the data table. /
		1865	q31_t pData; /< points to the data table. /
		1866	} arm_bilinear_interp_instance_q31;
		1867
		1868	/**
		1869	* @brief Instance structure for the Q15 bilinear interpolation function.
		1870	*/
		1871	typedef struct
		1872	{
		1873	uint16_t numRows; /*< number of rows in the data table. /
		1874	uint16_t numCols; /*< number of columns in the data table. /
		1875	q15_t pData; /< points to the data table. /
		1876	} arm_bilinear_interp_instance_q15;
		1877
		1878	/**
		1879	* @brief Instance structure for the Q15 bilinear interpolation function.
		1880	*/
		1881	typedef struct
		1882	{
		1883	uint16_t numRows; /*< number of rows in the data table. /
		1884	uint16_t numCols; /*< number of columns in the data table. /
5	mjames	1885	q7_t pData; /< points to the data table. /
2	mjames	1886	} arm_bilinear_interp_instance_q7;
		1887
		1888
		1889	/**
		1890	* @brief Q7 vector multiplication.
5	mjames	1891	* @param[in] pSrcA points to the first input vector
		1892	* @param[in] pSrcB points to the second input vector
		1893	* @param[out] pDst points to the output vector
		1894	* @param[in] blockSize number of samples in each vector
2	mjames	1895	*/
		1896	void arm_mult_q7(
		1897	q7_t * pSrcA,
		1898	q7_t * pSrcB,
		1899	q7_t * pDst,
		1900	uint32_t blockSize);
		1901
5	mjames	1902
2	mjames	1903	/**
		1904	* @brief Q15 vector multiplication.
5	mjames	1905	* @param[in] pSrcA points to the first input vector
		1906	* @param[in] pSrcB points to the second input vector
		1907	* @param[out] pDst points to the output vector
		1908	* @param[in] blockSize number of samples in each vector
2	mjames	1909	*/
		1910	void arm_mult_q15(
		1911	q15_t * pSrcA,
		1912	q15_t * pSrcB,
		1913	q15_t * pDst,
		1914	uint32_t blockSize);
		1915
5	mjames	1916
2	mjames	1917	/**
		1918	* @brief Q31 vector multiplication.
5	mjames	1919	* @param[in] pSrcA points to the first input vector
		1920	* @param[in] pSrcB points to the second input vector
		1921	* @param[out] pDst points to the output vector
		1922	* @param[in] blockSize number of samples in each vector
2	mjames	1923	*/
		1924	void arm_mult_q31(
		1925	q31_t * pSrcA,
		1926	q31_t * pSrcB,
		1927	q31_t * pDst,
		1928	uint32_t blockSize);
		1929
5	mjames	1930
2	mjames	1931	/**
		1932	* @brief Floating-point vector multiplication.
5	mjames	1933	* @param[in] pSrcA points to the first input vector
		1934	* @param[in] pSrcB points to the second input vector
		1935	* @param[out] pDst points to the output vector
		1936	* @param[in] blockSize number of samples in each vector
2	mjames	1937	*/
		1938	void arm_mult_f32(
		1939	float32_t * pSrcA,
		1940	float32_t * pSrcB,
		1941	float32_t * pDst,
		1942	uint32_t blockSize);
		1943
		1944
		1945	/**
		1946	* @brief Instance structure for the Q15 CFFT/CIFFT function.
		1947	*/
		1948	typedef struct
		1949	{
		1950	uint16_t fftLen; /*< length of the FFT. /
		1951	uint8_t ifftFlag; /*< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. /
		1952	uint8_t bitReverseFlag; /*< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. /
5	mjames	1953	q15_t pTwiddle; /< points to the Sin twiddle factor table. /
2	mjames	1954	uint16_t pBitRevTable; /< points to the bit reversal table. /
		1955	uint16_t twidCoefModifier; /*< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. /
		1956	uint16_t bitRevFactor; /*< bit reversal modifier that supports different size FFTs with the same bit reversal table. /
		1957	} arm_cfft_radix2_instance_q15;
		1958
		1959	/* Deprecated */
		1960	arm_status arm_cfft_radix2_init_q15(
		1961	arm_cfft_radix2_instance_q15 * S,
		1962	uint16_t fftLen,
		1963	uint8_t ifftFlag,
		1964	uint8_t bitReverseFlag);
		1965
		1966	/* Deprecated */
		1967	void arm_cfft_radix2_q15(
		1968	const arm_cfft_radix2_instance_q15 * S,
		1969	q15_t * pSrc);
		1970
		1971
		1972	/**
		1973	* @brief Instance structure for the Q15 CFFT/CIFFT function.
		1974	*/
		1975	typedef struct
		1976	{
		1977	uint16_t fftLen; /*< length of the FFT. /
		1978	uint8_t ifftFlag; /*< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. /
		1979	uint8_t bitReverseFlag; /*< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. /
		1980	q15_t pTwiddle; /< points to the twiddle factor table. /
		1981	uint16_t pBitRevTable; /< points to the bit reversal table. /
		1982	uint16_t twidCoefModifier; /*< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. /
		1983	uint16_t bitRevFactor; /*< bit reversal modifier that supports different size FFTs with the same bit reversal table. /
		1984	} arm_cfft_radix4_instance_q15;
		1985
		1986	/* Deprecated */
		1987	arm_status arm_cfft_radix4_init_q15(
		1988	arm_cfft_radix4_instance_q15 * S,
		1989	uint16_t fftLen,
		1990	uint8_t ifftFlag,
		1991	uint8_t bitReverseFlag);
		1992
		1993	/* Deprecated */
		1994	void arm_cfft_radix4_q15(
		1995	const arm_cfft_radix4_instance_q15 * S,
		1996	q15_t * pSrc);
		1997
		1998	/**
		1999	* @brief Instance structure for the Radix-2 Q31 CFFT/CIFFT function.
		2000	*/
		2001	typedef struct
		2002	{
		2003	uint16_t fftLen; /*< length of the FFT. /
		2004	uint8_t ifftFlag; /*< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. /
		2005	uint8_t bitReverseFlag; /*< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. /
5	mjames	2006	q31_t pTwiddle; /< points to the Twiddle factor table. /
2	mjames	2007	uint16_t pBitRevTable; /< points to the bit reversal table. /
		2008	uint16_t twidCoefModifier; /*< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. /
		2009	uint16_t bitRevFactor; /*< bit reversal modifier that supports different size FFTs with the same bit reversal table. /
		2010	} arm_cfft_radix2_instance_q31;
		2011
		2012	/* Deprecated */
		2013	arm_status arm_cfft_radix2_init_q31(
		2014	arm_cfft_radix2_instance_q31 * S,
		2015	uint16_t fftLen,
		2016	uint8_t ifftFlag,
		2017	uint8_t bitReverseFlag);
		2018
		2019	/* Deprecated */
		2020	void arm_cfft_radix2_q31(
		2021	const arm_cfft_radix2_instance_q31 * S,
		2022	q31_t * pSrc);
		2023
		2024	/**
		2025	* @brief Instance structure for the Q31 CFFT/CIFFT function.
		2026	*/
		2027	typedef struct
		2028	{
		2029	uint16_t fftLen; /*< length of the FFT. /
		2030	uint8_t ifftFlag; /*< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. /
		2031	uint8_t bitReverseFlag; /*< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. /
		2032	q31_t pTwiddle; /< points to the twiddle factor table. /
		2033	uint16_t pBitRevTable; /< points to the bit reversal table. /
		2034	uint16_t twidCoefModifier; /*< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. /
		2035	uint16_t bitRevFactor; /*< bit reversal modifier that supports different size FFTs with the same bit reversal table. /
		2036	} arm_cfft_radix4_instance_q31;
		2037
		2038	/* Deprecated */
		2039	void arm_cfft_radix4_q31(
		2040	const arm_cfft_radix4_instance_q31 * S,
		2041	q31_t * pSrc);
		2042
		2043	/* Deprecated */
		2044	arm_status arm_cfft_radix4_init_q31(
		2045	arm_cfft_radix4_instance_q31 * S,
		2046	uint16_t fftLen,
		2047	uint8_t ifftFlag,
		2048	uint8_t bitReverseFlag);
		2049
		2050	/**
		2051	* @brief Instance structure for the floating-point CFFT/CIFFT function.
		2052	*/
		2053	typedef struct
		2054	{
		2055	uint16_t fftLen; /*< length of the FFT. /
		2056	uint8_t ifftFlag; /*< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. /
		2057	uint8_t bitReverseFlag; /*< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. /
		2058	float32_t pTwiddle; /< points to the Twiddle factor table. /
		2059	uint16_t pBitRevTable; /< points to the bit reversal table. /
		2060	uint16_t twidCoefModifier; /*< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. /
		2061	uint16_t bitRevFactor; /*< bit reversal modifier that supports different size FFTs with the same bit reversal table. /
5	mjames	2062	float32_t onebyfftLen; /*< value of 1/fftLen. /
2	mjames	2063	} arm_cfft_radix2_instance_f32;
		2064
		2065	/* Deprecated */
		2066	arm_status arm_cfft_radix2_init_f32(
		2067	arm_cfft_radix2_instance_f32 * S,
		2068	uint16_t fftLen,
		2069	uint8_t ifftFlag,
		2070	uint8_t bitReverseFlag);
		2071
		2072	/* Deprecated */
		2073	void arm_cfft_radix2_f32(
		2074	const arm_cfft_radix2_instance_f32 * S,
		2075	float32_t * pSrc);
		2076
		2077	/**
		2078	* @brief Instance structure for the floating-point CFFT/CIFFT function.
		2079	*/
		2080	typedef struct
		2081	{
		2082	uint16_t fftLen; /*< length of the FFT. /
		2083	uint8_t ifftFlag; /*< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. /
		2084	uint8_t bitReverseFlag; /*< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. /
		2085	float32_t pTwiddle; /< points to the Twiddle factor table. /
		2086	uint16_t pBitRevTable; /< points to the bit reversal table. /
		2087	uint16_t twidCoefModifier; /*< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. /
		2088	uint16_t bitRevFactor; /*< bit reversal modifier that supports different size FFTs with the same bit reversal table. /
5	mjames	2089	float32_t onebyfftLen; /*< value of 1/fftLen. /
2	mjames	2090	} arm_cfft_radix4_instance_f32;
		2091
		2092	/* Deprecated */
		2093	arm_status arm_cfft_radix4_init_f32(
		2094	arm_cfft_radix4_instance_f32 * S,
		2095	uint16_t fftLen,
		2096	uint8_t ifftFlag,
		2097	uint8_t bitReverseFlag);
		2098
		2099	/* Deprecated */
		2100	void arm_cfft_radix4_f32(
		2101	const arm_cfft_radix4_instance_f32 * S,
		2102	float32_t * pSrc);
		2103
		2104	/**
		2105	* @brief Instance structure for the fixed-point CFFT/CIFFT function.
		2106	*/
		2107	typedef struct
		2108	{
		2109	uint16_t fftLen; /*< length of the FFT. /
		2110	const q15_t pTwiddle; /< points to the Twiddle factor table. /
		2111	const uint16_t pBitRevTable; /< points to the bit reversal table. /
		2112	uint16_t bitRevLength; /*< bit reversal table length. /
		2113	} arm_cfft_instance_q15;
		2114
5	mjames	2115	void arm_cfft_q15(
		2116	const arm_cfft_instance_q15 * S,
2	mjames	2117	q15_t * p1,
		2118	uint8_t ifftFlag,
5	mjames	2119	uint8_t bitReverseFlag);
2	mjames	2120
		2121	/**
		2122	* @brief Instance structure for the fixed-point CFFT/CIFFT function.
		2123	*/
		2124	typedef struct
		2125	{
		2126	uint16_t fftLen; /*< length of the FFT. /
		2127	const q31_t pTwiddle; /< points to the Twiddle factor table. /
		2128	const uint16_t pBitRevTable; /< points to the bit reversal table. /
		2129	uint16_t bitRevLength; /*< bit reversal table length. /
		2130	} arm_cfft_instance_q31;
		2131
5	mjames	2132	void arm_cfft_q31(
		2133	const arm_cfft_instance_q31 * S,
2	mjames	2134	q31_t * p1,
		2135	uint8_t ifftFlag,
5	mjames	2136	uint8_t bitReverseFlag);
		2137
2	mjames	2138	/**
		2139	* @brief Instance structure for the floating-point CFFT/CIFFT function.
		2140	*/
		2141	typedef struct
		2142	{
		2143	uint16_t fftLen; /*< length of the FFT. /
		2144	const float32_t pTwiddle; /< points to the Twiddle factor table. /
		2145	const uint16_t pBitRevTable; /< points to the bit reversal table. /
		2146	uint16_t bitRevLength; /*< bit reversal table length. /
		2147	} arm_cfft_instance_f32;
		2148
		2149	void arm_cfft_f32(
		2150	const arm_cfft_instance_f32 * S,
		2151	float32_t * p1,
		2152	uint8_t ifftFlag,
		2153	uint8_t bitReverseFlag);
		2154
		2155	/**
		2156	* @brief Instance structure for the Q15 RFFT/RIFFT function.
		2157	*/
		2158	typedef struct
		2159	{
		2160	uint32_t fftLenReal; /*< length of the real FFT. /
		2161	uint8_t ifftFlagR; /*< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. /
		2162	uint8_t bitReverseFlagR; /*< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. /
		2163	uint32_t twidCoefRModifier; /*< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. /
		2164	q15_t pTwiddleAReal; /< points to the real twiddle factor table. /
		2165	q15_t pTwiddleBReal; /< points to the imag twiddle factor table. /
		2166	const arm_cfft_instance_q15 pCfft; /< points to the complex FFT instance. /
		2167	} arm_rfft_instance_q15;
		2168
		2169	arm_status arm_rfft_init_q15(
		2170	arm_rfft_instance_q15 * S,
		2171	uint32_t fftLenReal,
		2172	uint32_t ifftFlagR,
		2173	uint32_t bitReverseFlag);
		2174
		2175	void arm_rfft_q15(
		2176	const arm_rfft_instance_q15 * S,
		2177	q15_t * pSrc,
		2178	q15_t * pDst);
		2179
		2180	/**
		2181	* @brief Instance structure for the Q31 RFFT/RIFFT function.
		2182	*/
		2183	typedef struct
		2184	{
		2185	uint32_t fftLenReal; /*< length of the real FFT. /
		2186	uint8_t ifftFlagR; /*< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. /
		2187	uint8_t bitReverseFlagR; /*< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. /
		2188	uint32_t twidCoefRModifier; /*< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. /
		2189	q31_t pTwiddleAReal; /< points to the real twiddle factor table. /
		2190	q31_t pTwiddleBReal; /< points to the imag twiddle factor table. /
		2191	const arm_cfft_instance_q31 pCfft; /< points to the complex FFT instance. /
		2192	} arm_rfft_instance_q31;
		2193
		2194	arm_status arm_rfft_init_q31(
		2195	arm_rfft_instance_q31 * S,
		2196	uint32_t fftLenReal,
		2197	uint32_t ifftFlagR,
		2198	uint32_t bitReverseFlag);
		2199
		2200	void arm_rfft_q31(
		2201	const arm_rfft_instance_q31 * S,
		2202	q31_t * pSrc,
		2203	q31_t * pDst);
		2204
		2205	/**
		2206	* @brief Instance structure for the floating-point RFFT/RIFFT function.
		2207	*/
		2208	typedef struct
		2209	{
		2210	uint32_t fftLenReal; /*< length of the real FFT. /
		2211	uint16_t fftLenBy2; /*< length of the complex FFT. /
		2212	uint8_t ifftFlagR; /*< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. /
		2213	uint8_t bitReverseFlagR; /*< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. /
		2214	uint32_t twidCoefRModifier; /*< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. /
		2215	float32_t pTwiddleAReal; /< points to the real twiddle factor table. /
		2216	float32_t pTwiddleBReal; /< points to the imag twiddle factor table. /
		2217	arm_cfft_radix4_instance_f32 pCfft; /< points to the complex FFT instance. /
		2218	} arm_rfft_instance_f32;
		2219
		2220	arm_status arm_rfft_init_f32(
		2221	arm_rfft_instance_f32 * S,
		2222	arm_cfft_radix4_instance_f32 * S_CFFT,
		2223	uint32_t fftLenReal,
		2224	uint32_t ifftFlagR,
		2225	uint32_t bitReverseFlag);
		2226
		2227	void arm_rfft_f32(
		2228	const arm_rfft_instance_f32 * S,
		2229	float32_t * pSrc,
		2230	float32_t * pDst);
		2231
		2232	/**
		2233	* @brief Instance structure for the floating-point RFFT/RIFFT function.
		2234	*/
		2235	typedef struct
		2236	{
		2237	arm_cfft_instance_f32 Sint; /*< Internal CFFT structure. /
5	mjames	2238	uint16_t fftLenRFFT; /*< length of the real sequence /
		2239	float32_t * pTwiddleRFFT; /*< Twiddle factors real stage /
2	mjames	2240	} arm_rfft_fast_instance_f32 ;
		2241
		2242	arm_status arm_rfft_fast_init_f32 (
5	mjames	2243	arm_rfft_fast_instance_f32 * S,
		2244	uint16_t fftLen);
2	mjames	2245
		2246	void arm_rfft_fast_f32(
		2247	arm_rfft_fast_instance_f32 * S,
		2248	float32_t * p, float32_t * pOut,
		2249	uint8_t ifftFlag);
		2250
		2251	/**
		2252	* @brief Instance structure for the floating-point DCT4/IDCT4 function.
		2253	*/
		2254	typedef struct
		2255	{
5	mjames	2256	uint16_t N; /*< length of the DCT4. /
		2257	uint16_t Nby2; /*< half of the length of the DCT4. /
		2258	float32_t normalize; /*< normalizing factor. /
		2259	float32_t pTwiddle; /< points to the twiddle factor table. /
		2260	float32_t pCosFactor; /< points to the cosFactor table. /
2	mjames	2261	arm_rfft_instance_f32 pRfft; /< points to the real FFT instance. /
		2262	arm_cfft_radix4_instance_f32 pCfft; /< points to the complex FFT instance. /
		2263	} arm_dct4_instance_f32;
		2264
5	mjames	2265
2	mjames	2266	/**
		2267	* @brief Initialization function for the floating-point DCT4/IDCT4.
5	mjames	2268	* @param[in,out] S points to an instance of floating-point DCT4/IDCT4 structure.
		2269	* @param[in] S_RFFT points to an instance of floating-point RFFT/RIFFT structure.
		2270	* @param[in] S_CFFT points to an instance of floating-point CFFT/CIFFT structure.
2	mjames	2271	* @param[in] N length of the DCT4.
		2272	* @param[in] Nby2 half of the length of the DCT4.
		2273	* @param[in] normalize normalizing factor.
5	mjames	2274	* @return arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported transform length.
2	mjames	2275	*/
		2276	arm_status arm_dct4_init_f32(
		2277	arm_dct4_instance_f32 * S,
		2278	arm_rfft_instance_f32 * S_RFFT,
		2279	arm_cfft_radix4_instance_f32 * S_CFFT,
		2280	uint16_t N,
		2281	uint16_t Nby2,
		2282	float32_t normalize);
		2283
5	mjames	2284
2	mjames	2285	/**
		2286	* @brief Processing function for the floating-point DCT4/IDCT4.
5	mjames	2287	* @param[in] S points to an instance of the floating-point DCT4/IDCT4 structure.
		2288	* @param[in] pState points to state buffer.
		2289	* @param[in,out] pInlineBuffer points to the in-place input and output buffer.
2	mjames	2290	*/
		2291	void arm_dct4_f32(
		2292	const arm_dct4_instance_f32 * S,
		2293	float32_t * pState,
		2294	float32_t * pInlineBuffer);
		2295
5	mjames	2296
2	mjames	2297	/**
		2298	* @brief Instance structure for the Q31 DCT4/IDCT4 function.
		2299	*/
		2300	typedef struct
		2301	{
5	mjames	2302	uint16_t N; /*< length of the DCT4. /
		2303	uint16_t Nby2; /*< half of the length of the DCT4. /
		2304	q31_t normalize; /*< normalizing factor. /
		2305	q31_t pTwiddle; /< points to the twiddle factor table. /
		2306	q31_t pCosFactor; /< points to the cosFactor table. /
2	mjames	2307	arm_rfft_instance_q31 pRfft; /< points to the real FFT instance. /
		2308	arm_cfft_radix4_instance_q31 pCfft; /< points to the complex FFT instance. /
		2309	} arm_dct4_instance_q31;
		2310
5	mjames	2311
2	mjames	2312	/**
		2313	* @brief Initialization function for the Q31 DCT4/IDCT4.
5	mjames	2314	* @param[in,out] S points to an instance of Q31 DCT4/IDCT4 structure.
		2315	* @param[in] S_RFFT points to an instance of Q31 RFFT/RIFFT structure
		2316	* @param[in] S_CFFT points to an instance of Q31 CFFT/CIFFT structure
2	mjames	2317	* @param[in] N length of the DCT4.
		2318	* @param[in] Nby2 half of the length of the DCT4.
		2319	* @param[in] normalize normalizing factor.
5	mjames	2320	* @return arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
2	mjames	2321	*/
		2322	arm_status arm_dct4_init_q31(
		2323	arm_dct4_instance_q31 * S,
		2324	arm_rfft_instance_q31 * S_RFFT,
		2325	arm_cfft_radix4_instance_q31 * S_CFFT,
		2326	uint16_t N,
		2327	uint16_t Nby2,
		2328	q31_t normalize);
		2329
5	mjames	2330
2	mjames	2331	/**
		2332	* @brief Processing function for the Q31 DCT4/IDCT4.
5	mjames	2333	* @param[in] S points to an instance of the Q31 DCT4 structure.
		2334	* @param[in] pState points to state buffer.
		2335	* @param[in,out] pInlineBuffer points to the in-place input and output buffer.
2	mjames	2336	*/
		2337	void arm_dct4_q31(
		2338	const arm_dct4_instance_q31 * S,
		2339	q31_t * pState,
		2340	q31_t * pInlineBuffer);
		2341
5	mjames	2342
2	mjames	2343	/**
		2344	* @brief Instance structure for the Q15 DCT4/IDCT4 function.
		2345	*/
		2346	typedef struct
		2347	{
5	mjames	2348	uint16_t N; /*< length of the DCT4. /
		2349	uint16_t Nby2; /*< half of the length of the DCT4. /
		2350	q15_t normalize; /*< normalizing factor. /
		2351	q15_t pTwiddle; /< points to the twiddle factor table. /
		2352	q15_t pCosFactor; /< points to the cosFactor table. /
2	mjames	2353	arm_rfft_instance_q15 pRfft; /< points to the real FFT instance. /
		2354	arm_cfft_radix4_instance_q15 pCfft; /< points to the complex FFT instance. /
		2355	} arm_dct4_instance_q15;
		2356
5	mjames	2357
2	mjames	2358	/**
		2359	* @brief Initialization function for the Q15 DCT4/IDCT4.
5	mjames	2360	* @param[in,out] S points to an instance of Q15 DCT4/IDCT4 structure.
		2361	* @param[in] S_RFFT points to an instance of Q15 RFFT/RIFFT structure.
		2362	* @param[in] S_CFFT points to an instance of Q15 CFFT/CIFFT structure.
2	mjames	2363	* @param[in] N length of the DCT4.
		2364	* @param[in] Nby2 half of the length of the DCT4.
		2365	* @param[in] normalize normalizing factor.
5	mjames	2366	* @return arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
2	mjames	2367	*/
		2368	arm_status arm_dct4_init_q15(
		2369	arm_dct4_instance_q15 * S,
		2370	arm_rfft_instance_q15 * S_RFFT,
		2371	arm_cfft_radix4_instance_q15 * S_CFFT,
		2372	uint16_t N,
		2373	uint16_t Nby2,
		2374	q15_t normalize);
		2375
5	mjames	2376
2	mjames	2377	/**
		2378	* @brief Processing function for the Q15 DCT4/IDCT4.
5	mjames	2379	* @param[in] S points to an instance of the Q15 DCT4 structure.
		2380	* @param[in] pState points to state buffer.
		2381	* @param[in,out] pInlineBuffer points to the in-place input and output buffer.
2	mjames	2382	*/
		2383	void arm_dct4_q15(
		2384	const arm_dct4_instance_q15 * S,
		2385	q15_t * pState,
		2386	q15_t * pInlineBuffer);
		2387
5	mjames	2388
2	mjames	2389	/**
		2390	* @brief Floating-point vector addition.
5	mjames	2391	* @param[in] pSrcA points to the first input vector
		2392	* @param[in] pSrcB points to the second input vector
		2393	* @param[out] pDst points to the output vector
		2394	* @param[in] blockSize number of samples in each vector
2	mjames	2395	*/
		2396	void arm_add_f32(
		2397	float32_t * pSrcA,
		2398	float32_t * pSrcB,
		2399	float32_t * pDst,
		2400	uint32_t blockSize);
		2401
5	mjames	2402
2	mjames	2403	/**
		2404	* @brief Q7 vector addition.
5	mjames	2405	* @param[in] pSrcA points to the first input vector
		2406	* @param[in] pSrcB points to the second input vector
		2407	* @param[out] pDst points to the output vector
		2408	* @param[in] blockSize number of samples in each vector
2	mjames	2409	*/
		2410	void arm_add_q7(
		2411	q7_t * pSrcA,
		2412	q7_t * pSrcB,
		2413	q7_t * pDst,
		2414	uint32_t blockSize);
		2415
5	mjames	2416
2	mjames	2417	/**
		2418	* @brief Q15 vector addition.
5	mjames	2419	* @param[in] pSrcA points to the first input vector
		2420	* @param[in] pSrcB points to the second input vector
		2421	* @param[out] pDst points to the output vector
		2422	* @param[in] blockSize number of samples in each vector
2	mjames	2423	*/
		2424	void arm_add_q15(
		2425	q15_t * pSrcA,
		2426	q15_t * pSrcB,
		2427	q15_t * pDst,
		2428	uint32_t blockSize);
		2429
5	mjames	2430
2	mjames	2431	/**
		2432	* @brief Q31 vector addition.
5	mjames	2433	* @param[in] pSrcA points to the first input vector
		2434	* @param[in] pSrcB points to the second input vector
		2435	* @param[out] pDst points to the output vector
		2436	* @param[in] blockSize number of samples in each vector
2	mjames	2437	*/
		2438	void arm_add_q31(
		2439	q31_t * pSrcA,
		2440	q31_t * pSrcB,
		2441	q31_t * pDst,
		2442	uint32_t blockSize);
		2443
5	mjames	2444
2	mjames	2445	/**
		2446	* @brief Floating-point vector subtraction.
5	mjames	2447	* @param[in] pSrcA points to the first input vector
		2448	* @param[in] pSrcB points to the second input vector
		2449	* @param[out] pDst points to the output vector
		2450	* @param[in] blockSize number of samples in each vector
2	mjames	2451	*/
		2452	void arm_sub_f32(
		2453	float32_t * pSrcA,
		2454	float32_t * pSrcB,
		2455	float32_t * pDst,
		2456	uint32_t blockSize);
		2457
5	mjames	2458
2	mjames	2459	/**
		2460	* @brief Q7 vector subtraction.
5	mjames	2461	* @param[in] pSrcA points to the first input vector
		2462	* @param[in] pSrcB points to the second input vector
		2463	* @param[out] pDst points to the output vector
		2464	* @param[in] blockSize number of samples in each vector
2	mjames	2465	*/
		2466	void arm_sub_q7(
		2467	q7_t * pSrcA,
		2468	q7_t * pSrcB,
		2469	q7_t * pDst,
		2470	uint32_t blockSize);
		2471
5	mjames	2472
2	mjames	2473	/**
		2474	* @brief Q15 vector subtraction.
5	mjames	2475	* @param[in] pSrcA points to the first input vector
		2476	* @param[in] pSrcB points to the second input vector
		2477	* @param[out] pDst points to the output vector
		2478	* @param[in] blockSize number of samples in each vector
2	mjames	2479	*/
		2480	void arm_sub_q15(
		2481	q15_t * pSrcA,
		2482	q15_t * pSrcB,
		2483	q15_t * pDst,
		2484	uint32_t blockSize);
		2485
5	mjames	2486
2	mjames	2487	/**
		2488	* @brief Q31 vector subtraction.
5	mjames	2489	* @param[in] pSrcA points to the first input vector
		2490	* @param[in] pSrcB points to the second input vector
		2491	* @param[out] pDst points to the output vector
		2492	* @param[in] blockSize number of samples in each vector
2	mjames	2493	*/
		2494	void arm_sub_q31(
		2495	q31_t * pSrcA,
		2496	q31_t * pSrcB,
		2497	q31_t * pDst,
		2498	uint32_t blockSize);
		2499
5	mjames	2500
2	mjames	2501	/**
		2502	* @brief Multiplies a floating-point vector by a scalar.
5	mjames	2503	* @param[in] pSrc points to the input vector
		2504	* @param[in] scale scale factor to be applied
		2505	* @param[out] pDst points to the output vector
		2506	* @param[in] blockSize number of samples in the vector
2	mjames	2507	*/
		2508	void arm_scale_f32(
		2509	float32_t * pSrc,
		2510	float32_t scale,
		2511	float32_t * pDst,
		2512	uint32_t blockSize);
		2513
5	mjames	2514
2	mjames	2515	/**
		2516	* @brief Multiplies a Q7 vector by a scalar.
5	mjames	2517	* @param[in] pSrc points to the input vector
		2518	* @param[in] scaleFract fractional portion of the scale value
		2519	* @param[in] shift number of bits to shift the result by
		2520	* @param[out] pDst points to the output vector
		2521	* @param[in] blockSize number of samples in the vector
2	mjames	2522	*/
		2523	void arm_scale_q7(
		2524	q7_t * pSrc,
		2525	q7_t scaleFract,
		2526	int8_t shift,
		2527	q7_t * pDst,
		2528	uint32_t blockSize);
		2529
5	mjames	2530
2	mjames	2531	/**
		2532	* @brief Multiplies a Q15 vector by a scalar.
5	mjames	2533	* @param[in] pSrc points to the input vector
		2534	* @param[in] scaleFract fractional portion of the scale value
		2535	* @param[in] shift number of bits to shift the result by
		2536	* @param[out] pDst points to the output vector
		2537	* @param[in] blockSize number of samples in the vector
2	mjames	2538	*/
		2539	void arm_scale_q15(
		2540	q15_t * pSrc,
		2541	q15_t scaleFract,
		2542	int8_t shift,
		2543	q15_t * pDst,
		2544	uint32_t blockSize);
		2545
5	mjames	2546
2	mjames	2547	/**
		2548	* @brief Multiplies a Q31 vector by a scalar.
5	mjames	2549	* @param[in] pSrc points to the input vector
		2550	* @param[in] scaleFract fractional portion of the scale value
		2551	* @param[in] shift number of bits to shift the result by
		2552	* @param[out] pDst points to the output vector
		2553	* @param[in] blockSize number of samples in the vector
2	mjames	2554	*/
		2555	void arm_scale_q31(
		2556	q31_t * pSrc,
		2557	q31_t scaleFract,
		2558	int8_t shift,
		2559	q31_t * pDst,
		2560	uint32_t blockSize);
		2561
5	mjames	2562
2	mjames	2563	/**
		2564	* @brief Q7 vector absolute value.
5	mjames	2565	* @param[in] pSrc points to the input buffer
		2566	* @param[out] pDst points to the output buffer
		2567	* @param[in] blockSize number of samples in each vector
2	mjames	2568	*/
		2569	void arm_abs_q7(
		2570	q7_t * pSrc,
		2571	q7_t * pDst,
		2572	uint32_t blockSize);
		2573
5	mjames	2574
2	mjames	2575	/**
		2576	* @brief Floating-point vector absolute value.
5	mjames	2577	* @param[in] pSrc points to the input buffer
		2578	* @param[out] pDst points to the output buffer
		2579	* @param[in] blockSize number of samples in each vector
2	mjames	2580	*/
		2581	void arm_abs_f32(
		2582	float32_t * pSrc,
		2583	float32_t * pDst,
		2584	uint32_t blockSize);
		2585
5	mjames	2586
2	mjames	2587	/**
		2588	* @brief Q15 vector absolute value.
5	mjames	2589	* @param[in] pSrc points to the input buffer
		2590	* @param[out] pDst points to the output buffer
		2591	* @param[in] blockSize number of samples in each vector
2	mjames	2592	*/
		2593	void arm_abs_q15(
		2594	q15_t * pSrc,
		2595	q15_t * pDst,
		2596	uint32_t blockSize);
		2597
5	mjames	2598
2	mjames	2599	/**
		2600	* @brief Q31 vector absolute value.
5	mjames	2601	* @param[in] pSrc points to the input buffer
		2602	* @param[out] pDst points to the output buffer
		2603	* @param[in] blockSize number of samples in each vector
2	mjames	2604	*/
		2605	void arm_abs_q31(
		2606	q31_t * pSrc,
		2607	q31_t * pDst,
		2608	uint32_t blockSize);
		2609
5	mjames	2610
2	mjames	2611	/**
		2612	* @brief Dot product of floating-point vectors.
5	mjames	2613	* @param[in] pSrcA points to the first input vector
		2614	* @param[in] pSrcB points to the second input vector
		2615	* @param[in] blockSize number of samples in each vector
		2616	* @param[out] result output result returned here
2	mjames	2617	*/
		2618	void arm_dot_prod_f32(
		2619	float32_t * pSrcA,
		2620	float32_t * pSrcB,
		2621	uint32_t blockSize,
		2622	float32_t * result);
		2623
5	mjames	2624
2	mjames	2625	/**
		2626	* @brief Dot product of Q7 vectors.
5	mjames	2627	* @param[in] pSrcA points to the first input vector
		2628	* @param[in] pSrcB points to the second input vector
		2629	* @param[in] blockSize number of samples in each vector
		2630	* @param[out] result output result returned here
2	mjames	2631	*/
		2632	void arm_dot_prod_q7(
		2633	q7_t * pSrcA,
		2634	q7_t * pSrcB,
		2635	uint32_t blockSize,
		2636	q31_t * result);
		2637
5	mjames	2638
2	mjames	2639	/**
		2640	* @brief Dot product of Q15 vectors.
5	mjames	2641	* @param[in] pSrcA points to the first input vector
		2642	* @param[in] pSrcB points to the second input vector
		2643	* @param[in] blockSize number of samples in each vector
		2644	* @param[out] result output result returned here
2	mjames	2645	*/
		2646	void arm_dot_prod_q15(
		2647	q15_t * pSrcA,
		2648	q15_t * pSrcB,
		2649	uint32_t blockSize,
		2650	q63_t * result);
		2651
5	mjames	2652
2	mjames	2653	/**
		2654	* @brief Dot product of Q31 vectors.
5	mjames	2655	* @param[in] pSrcA points to the first input vector
		2656	* @param[in] pSrcB points to the second input vector
		2657	* @param[in] blockSize number of samples in each vector
		2658	* @param[out] result output result returned here
2	mjames	2659	*/
		2660	void arm_dot_prod_q31(
		2661	q31_t * pSrcA,
		2662	q31_t * pSrcB,
		2663	uint32_t blockSize,
		2664	q63_t * result);
		2665
5	mjames	2666
2	mjames	2667	/**
		2668	* @brief Shifts the elements of a Q7 vector a specified number of bits.
5	mjames	2669	* @param[in] pSrc points to the input vector
		2670	* @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
		2671	* @param[out] pDst points to the output vector
		2672	* @param[in] blockSize number of samples in the vector
2	mjames	2673	*/
		2674	void arm_shift_q7(
		2675	q7_t * pSrc,
		2676	int8_t shiftBits,
		2677	q7_t * pDst,
		2678	uint32_t blockSize);
		2679
5	mjames	2680
2	mjames	2681	/**
		2682	* @brief Shifts the elements of a Q15 vector a specified number of bits.
5	mjames	2683	* @param[in] pSrc points to the input vector
		2684	* @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
		2685	* @param[out] pDst points to the output vector
		2686	* @param[in] blockSize number of samples in the vector
2	mjames	2687	*/
		2688	void arm_shift_q15(
		2689	q15_t * pSrc,
		2690	int8_t shiftBits,
		2691	q15_t * pDst,
		2692	uint32_t blockSize);
		2693
5	mjames	2694
2	mjames	2695	/**
		2696	* @brief Shifts the elements of a Q31 vector a specified number of bits.
5	mjames	2697	* @param[in] pSrc points to the input vector
		2698	* @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
		2699	* @param[out] pDst points to the output vector
		2700	* @param[in] blockSize number of samples in the vector
2	mjames	2701	*/
		2702	void arm_shift_q31(
		2703	q31_t * pSrc,
		2704	int8_t shiftBits,
		2705	q31_t * pDst,
		2706	uint32_t blockSize);
		2707
5	mjames	2708
2	mjames	2709	/**
		2710	* @brief Adds a constant offset to a floating-point vector.
5	mjames	2711	* @param[in] pSrc points to the input vector
		2712	* @param[in] offset is the offset to be added
		2713	* @param[out] pDst points to the output vector
		2714	* @param[in] blockSize number of samples in the vector
2	mjames	2715	*/
		2716	void arm_offset_f32(
		2717	float32_t * pSrc,
		2718	float32_t offset,
		2719	float32_t * pDst,
		2720	uint32_t blockSize);
		2721
5	mjames	2722
2	mjames	2723	/**
		2724	* @brief Adds a constant offset to a Q7 vector.
5	mjames	2725	* @param[in] pSrc points to the input vector
		2726	* @param[in] offset is the offset to be added
		2727	* @param[out] pDst points to the output vector
		2728	* @param[in] blockSize number of samples in the vector
2	mjames	2729	*/
		2730	void arm_offset_q7(
		2731	q7_t * pSrc,
		2732	q7_t offset,
		2733	q7_t * pDst,
		2734	uint32_t blockSize);
		2735
5	mjames	2736
2	mjames	2737	/**
		2738	* @brief Adds a constant offset to a Q15 vector.
5	mjames	2739	* @param[in] pSrc points to the input vector
		2740	* @param[in] offset is the offset to be added
		2741	* @param[out] pDst points to the output vector
		2742	* @param[in] blockSize number of samples in the vector
2	mjames	2743	*/
		2744	void arm_offset_q15(
		2745	q15_t * pSrc,
		2746	q15_t offset,
		2747	q15_t * pDst,
		2748	uint32_t blockSize);
		2749
5	mjames	2750
2	mjames	2751	/**
		2752	* @brief Adds a constant offset to a Q31 vector.
5	mjames	2753	* @param[in] pSrc points to the input vector
		2754	* @param[in] offset is the offset to be added
		2755	* @param[out] pDst points to the output vector
		2756	* @param[in] blockSize number of samples in the vector
2	mjames	2757	*/
		2758	void arm_offset_q31(
		2759	q31_t * pSrc,
		2760	q31_t offset,
		2761	q31_t * pDst,
		2762	uint32_t blockSize);
		2763
5	mjames	2764
2	mjames	2765	/**
		2766	* @brief Negates the elements of a floating-point vector.
5	mjames	2767	* @param[in] pSrc points to the input vector
		2768	* @param[out] pDst points to the output vector
		2769	* @param[in] blockSize number of samples in the vector
2	mjames	2770	*/
		2771	void arm_negate_f32(
		2772	float32_t * pSrc,
		2773	float32_t * pDst,
		2774	uint32_t blockSize);
		2775
5	mjames	2776
2	mjames	2777	/**
		2778	* @brief Negates the elements of a Q7 vector.
5	mjames	2779	* @param[in] pSrc points to the input vector
		2780	* @param[out] pDst points to the output vector
		2781	* @param[in] blockSize number of samples in the vector
2	mjames	2782	*/
		2783	void arm_negate_q7(
		2784	q7_t * pSrc,
		2785	q7_t * pDst,
		2786	uint32_t blockSize);
		2787
5	mjames	2788
2	mjames	2789	/**
		2790	* @brief Negates the elements of a Q15 vector.
5	mjames	2791	* @param[in] pSrc points to the input vector
		2792	* @param[out] pDst points to the output vector
		2793	* @param[in] blockSize number of samples in the vector
2	mjames	2794	*/
		2795	void arm_negate_q15(
		2796	q15_t * pSrc,
		2797	q15_t * pDst,
		2798	uint32_t blockSize);
		2799
5	mjames	2800
2	mjames	2801	/**
		2802	* @brief Negates the elements of a Q31 vector.
5	mjames	2803	* @param[in] pSrc points to the input vector
		2804	* @param[out] pDst points to the output vector
		2805	* @param[in] blockSize number of samples in the vector
2	mjames	2806	*/
		2807	void arm_negate_q31(
		2808	q31_t * pSrc,
		2809	q31_t * pDst,
		2810	uint32_t blockSize);
5	mjames	2811
		2812
2	mjames	2813	/**
		2814	* @brief Copies the elements of a floating-point vector.
5	mjames	2815	* @param[in] pSrc input pointer
		2816	* @param[out] pDst output pointer
		2817	* @param[in] blockSize number of samples to process
2	mjames	2818	*/
		2819	void arm_copy_f32(
		2820	float32_t * pSrc,
		2821	float32_t * pDst,
		2822	uint32_t blockSize);
		2823
5	mjames	2824
2	mjames	2825	/**
		2826	* @brief Copies the elements of a Q7 vector.
5	mjames	2827	* @param[in] pSrc input pointer
		2828	* @param[out] pDst output pointer
		2829	* @param[in] blockSize number of samples to process
2	mjames	2830	*/
		2831	void arm_copy_q7(
		2832	q7_t * pSrc,
		2833	q7_t * pDst,
		2834	uint32_t blockSize);
		2835
5	mjames	2836
2	mjames	2837	/**
		2838	* @brief Copies the elements of a Q15 vector.
5	mjames	2839	* @param[in] pSrc input pointer
		2840	* @param[out] pDst output pointer
		2841	* @param[in] blockSize number of samples to process
2	mjames	2842	*/
		2843	void arm_copy_q15(
		2844	q15_t * pSrc,
		2845	q15_t * pDst,
		2846	uint32_t blockSize);
		2847
5	mjames	2848
2	mjames	2849	/**
		2850	* @brief Copies the elements of a Q31 vector.
5	mjames	2851	* @param[in] pSrc input pointer
		2852	* @param[out] pDst output pointer
		2853	* @param[in] blockSize number of samples to process
2	mjames	2854	*/
		2855	void arm_copy_q31(
		2856	q31_t * pSrc,
		2857	q31_t * pDst,
		2858	uint32_t blockSize);
5	mjames	2859
		2860
2	mjames	2861	/**
		2862	* @brief Fills a constant value into a floating-point vector.
5	mjames	2863	* @param[in] value input value to be filled
		2864	* @param[out] pDst output pointer
		2865	* @param[in] blockSize number of samples to process
2	mjames	2866	*/
		2867	void arm_fill_f32(
		2868	float32_t value,
		2869	float32_t * pDst,
		2870	uint32_t blockSize);
		2871
5	mjames	2872
2	mjames	2873	/**
		2874	* @brief Fills a constant value into a Q7 vector.
5	mjames	2875	* @param[in] value input value to be filled
		2876	* @param[out] pDst output pointer
		2877	* @param[in] blockSize number of samples to process
2	mjames	2878	*/
		2879	void arm_fill_q7(
		2880	q7_t value,
		2881	q7_t * pDst,
		2882	uint32_t blockSize);
		2883
5	mjames	2884
2	mjames	2885	/**
		2886	* @brief Fills a constant value into a Q15 vector.
5	mjames	2887	* @param[in] value input value to be filled
		2888	* @param[out] pDst output pointer
		2889	* @param[in] blockSize number of samples to process
2	mjames	2890	*/
		2891	void arm_fill_q15(
		2892	q15_t value,
		2893	q15_t * pDst,
		2894	uint32_t blockSize);
		2895
5	mjames	2896
2	mjames	2897	/**
		2898	* @brief Fills a constant value into a Q31 vector.
5	mjames	2899	* @param[in] value input value to be filled
		2900	* @param[out] pDst output pointer
		2901	* @param[in] blockSize number of samples to process
2	mjames	2902	*/
		2903	void arm_fill_q31(
		2904	q31_t value,
		2905	q31_t * pDst,
		2906	uint32_t blockSize);
		2907
5	mjames	2908
2	mjames	2909	/**
		2910	* @brief Convolution of floating-point sequences.
5	mjames	2911	* @param[in] pSrcA points to the first input sequence.
		2912	* @param[in] srcALen length of the first input sequence.
		2913	* @param[in] pSrcB points to the second input sequence.
		2914	* @param[in] srcBLen length of the second input sequence.
		2915	* @param[out] pDst points to the location where the output result is written. Length srcALen+srcBLen-1.
2	mjames	2916	*/
		2917	void arm_conv_f32(
		2918	float32_t * pSrcA,
		2919	uint32_t srcALen,
		2920	float32_t * pSrcB,
		2921	uint32_t srcBLen,
		2922	float32_t * pDst);
		2923
		2924
		2925	/**
		2926	* @brief Convolution of Q15 sequences.
5	mjames	2927	* @param[in] pSrcA points to the first input sequence.
		2928	* @param[in] srcALen length of the first input sequence.
		2929	* @param[in] pSrcB points to the second input sequence.
		2930	* @param[in] srcBLen length of the second input sequence.
		2931	* @param[out] pDst points to the block of output data Length srcALen+srcBLen-1.
		2932	* @param[in] pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
		2933	* @param[in] pScratch2 points to scratch buffer of size min(srcALen, srcBLen).
2	mjames	2934	*/
		2935	void arm_conv_opt_q15(
		2936	q15_t * pSrcA,
		2937	uint32_t srcALen,
		2938	q15_t * pSrcB,
		2939	uint32_t srcBLen,
		2940	q15_t * pDst,
		2941	q15_t * pScratch1,
		2942	q15_t * pScratch2);
		2943
		2944
		2945	/**
		2946	* @brief Convolution of Q15 sequences.
5	mjames	2947	* @param[in] pSrcA points to the first input sequence.
		2948	* @param[in] srcALen length of the first input sequence.
		2949	* @param[in] pSrcB points to the second input sequence.
		2950	* @param[in] srcBLen length of the second input sequence.
		2951	* @param[out] pDst points to the location where the output result is written. Length srcALen+srcBLen-1.
2	mjames	2952	*/
		2953	void arm_conv_q15(
		2954	q15_t * pSrcA,
		2955	uint32_t srcALen,
		2956	q15_t * pSrcB,
		2957	uint32_t srcBLen,
		2958	q15_t * pDst);
		2959
5	mjames	2960
2	mjames	2961	/**
		2962	* @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
5	mjames	2963	* @param[in] pSrcA points to the first input sequence.
		2964	* @param[in] srcALen length of the first input sequence.
		2965	* @param[in] pSrcB points to the second input sequence.
		2966	* @param[in] srcBLen length of the second input sequence.
		2967	* @param[out] pDst points to the block of output data Length srcALen+srcBLen-1.
2	mjames	2968	*/
		2969	void arm_conv_fast_q15(
5	mjames	2970	q15_t * pSrcA,
		2971	uint32_t srcALen,
		2972	q15_t * pSrcB,
		2973	uint32_t srcBLen,
		2974	q15_t * pDst);
2	mjames	2975
5	mjames	2976
2	mjames	2977	/**
		2978	* @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
5	mjames	2979	* @param[in] pSrcA points to the first input sequence.
		2980	* @param[in] srcALen length of the first input sequence.
		2981	* @param[in] pSrcB points to the second input sequence.
		2982	* @param[in] srcBLen length of the second input sequence.
		2983	* @param[out] pDst points to the block of output data Length srcALen+srcBLen-1.
		2984	* @param[in] pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
		2985	* @param[in] pScratch2 points to scratch buffer of size min(srcALen, srcBLen).
2	mjames	2986	*/
		2987	void arm_conv_fast_opt_q15(
		2988	q15_t * pSrcA,
		2989	uint32_t srcALen,
		2990	q15_t * pSrcB,
		2991	uint32_t srcBLen,
		2992	q15_t * pDst,
		2993	q15_t * pScratch1,
		2994	q15_t * pScratch2);
		2995
		2996
		2997	/**
		2998	* @brief Convolution of Q31 sequences.
5	mjames	2999	* @param[in] pSrcA points to the first input sequence.
		3000	* @param[in] srcALen length of the first input sequence.
		3001	* @param[in] pSrcB points to the second input sequence.
		3002	* @param[in] srcBLen length of the second input sequence.
		3003	* @param[out] pDst points to the block of output data Length srcALen+srcBLen-1.
2	mjames	3004	*/
		3005	void arm_conv_q31(
		3006	q31_t * pSrcA,
		3007	uint32_t srcALen,
		3008	q31_t * pSrcB,
		3009	uint32_t srcBLen,
		3010	q31_t * pDst);
		3011
5	mjames	3012
2	mjames	3013	/**
		3014	* @brief Convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
5	mjames	3015	* @param[in] pSrcA points to the first input sequence.
		3016	* @param[in] srcALen length of the first input sequence.
		3017	* @param[in] pSrcB points to the second input sequence.
		3018	* @param[in] srcBLen length of the second input sequence.
		3019	* @param[out] pDst points to the block of output data Length srcALen+srcBLen-1.
2	mjames	3020	*/
		3021	void arm_conv_fast_q31(
		3022	q31_t * pSrcA,
		3023	uint32_t srcALen,
		3024	q31_t * pSrcB,
		3025	uint32_t srcBLen,
		3026	q31_t * pDst);
		3027
		3028
		3029	/**
		3030	* @brief Convolution of Q7 sequences.
5	mjames	3031	* @param[in] pSrcA points to the first input sequence.
		3032	* @param[in] srcALen length of the first input sequence.
		3033	* @param[in] pSrcB points to the second input sequence.
		3034	* @param[in] srcBLen length of the second input sequence.
		3035	* @param[out] pDst points to the block of output data Length srcALen+srcBLen-1.
		3036	* @param[in] pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
		3037	* @param[in] pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
2	mjames	3038	*/
		3039	void arm_conv_opt_q7(
		3040	q7_t * pSrcA,
		3041	uint32_t srcALen,
		3042	q7_t * pSrcB,
		3043	uint32_t srcBLen,
		3044	q7_t * pDst,
		3045	q15_t * pScratch1,
		3046	q15_t * pScratch2);
		3047
		3048
		3049	/**
		3050	* @brief Convolution of Q7 sequences.
5	mjames	3051	* @param[in] pSrcA points to the first input sequence.
		3052	* @param[in] srcALen length of the first input sequence.
		3053	* @param[in] pSrcB points to the second input sequence.
		3054	* @param[in] srcBLen length of the second input sequence.
		3055	* @param[out] pDst points to the block of output data Length srcALen+srcBLen-1.
2	mjames	3056	*/
		3057	void arm_conv_q7(
		3058	q7_t * pSrcA,
		3059	uint32_t srcALen,
		3060	q7_t * pSrcB,
		3061	uint32_t srcBLen,
		3062	q7_t * pDst);
		3063
		3064
		3065	/**
		3066	* @brief Partial convolution of floating-point sequences.
5	mjames	3067	* @param[in] pSrcA points to the first input sequence.
		3068	* @param[in] srcALen length of the first input sequence.
		3069	* @param[in] pSrcB points to the second input sequence.
		3070	* @param[in] srcBLen length of the second input sequence.
		3071	* @param[out] pDst points to the block of output data
		3072	* @param[in] firstIndex is the first output sample to start with.
		3073	* @param[in] numPoints is the number of output points to be computed.
2	mjames	3074	* @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
		3075	*/
		3076	arm_status arm_conv_partial_f32(
		3077	float32_t * pSrcA,
		3078	uint32_t srcALen,
		3079	float32_t * pSrcB,
		3080	uint32_t srcBLen,
		3081	float32_t * pDst,
		3082	uint32_t firstIndex,
		3083	uint32_t numPoints);
		3084
5	mjames	3085
		3086	/**
2	mjames	3087	* @brief Partial convolution of Q15 sequences.
5	mjames	3088	* @param[in] pSrcA points to the first input sequence.
		3089	* @param[in] srcALen length of the first input sequence.
		3090	* @param[in] pSrcB points to the second input sequence.
		3091	* @param[in] srcBLen length of the second input sequence.
		3092	* @param[out] pDst points to the block of output data
		3093	* @param[in] firstIndex is the first output sample to start with.
		3094	* @param[in] numPoints is the number of output points to be computed.
		3095	* @param[in] pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
		3096	* @param[in] pScratch2 points to scratch buffer of size min(srcALen, srcBLen).
2	mjames	3097	* @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
		3098	*/
		3099	arm_status arm_conv_partial_opt_q15(
		3100	q15_t * pSrcA,
		3101	uint32_t srcALen,
		3102	q15_t * pSrcB,
		3103	uint32_t srcBLen,
		3104	q15_t * pDst,
		3105	uint32_t firstIndex,
		3106	uint32_t numPoints,
		3107	q15_t * pScratch1,
		3108	q15_t * pScratch2);
		3109
		3110
5	mjames	3111	/**
2	mjames	3112	* @brief Partial convolution of Q15 sequences.
5	mjames	3113	* @param[in] pSrcA points to the first input sequence.
		3114	* @param[in] srcALen length of the first input sequence.
		3115	* @param[in] pSrcB points to the second input sequence.
		3116	* @param[in] srcBLen length of the second input sequence.
		3117	* @param[out] pDst points to the block of output data
		3118	* @param[in] firstIndex is the first output sample to start with.
		3119	* @param[in] numPoints is the number of output points to be computed.
2	mjames	3120	* @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
		3121	*/
		3122	arm_status arm_conv_partial_q15(
		3123	q15_t * pSrcA,
		3124	uint32_t srcALen,
		3125	q15_t * pSrcB,
		3126	uint32_t srcBLen,
		3127	q15_t * pDst,
		3128	uint32_t firstIndex,
		3129	uint32_t numPoints);
		3130
5	mjames	3131
2	mjames	3132	/**
		3133	* @brief Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
5	mjames	3134	* @param[in] pSrcA points to the first input sequence.
		3135	* @param[in] srcALen length of the first input sequence.
		3136	* @param[in] pSrcB points to the second input sequence.
		3137	* @param[in] srcBLen length of the second input sequence.
		3138	* @param[out] pDst points to the block of output data
		3139	* @param[in] firstIndex is the first output sample to start with.
		3140	* @param[in] numPoints is the number of output points to be computed.
2	mjames	3141	* @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
		3142	*/
		3143	arm_status arm_conv_partial_fast_q15(
5	mjames	3144	q15_t * pSrcA,
		3145	uint32_t srcALen,
		3146	q15_t * pSrcB,
		3147	uint32_t srcBLen,
		3148	q15_t * pDst,
		3149	uint32_t firstIndex,
		3150	uint32_t numPoints);
2	mjames	3151
		3152
		3153	/**
		3154	* @brief Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
5	mjames	3155	* @param[in] pSrcA points to the first input sequence.
		3156	* @param[in] srcALen length of the first input sequence.
		3157	* @param[in] pSrcB points to the second input sequence.
		3158	* @param[in] srcBLen length of the second input sequence.
		3159	* @param[out] pDst points to the block of output data
		3160	* @param[in] firstIndex is the first output sample to start with.
		3161	* @param[in] numPoints is the number of output points to be computed.
		3162	* @param[in] pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
		3163	* @param[in] pScratch2 points to scratch buffer of size min(srcALen, srcBLen).
2	mjames	3164	* @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
		3165	*/
		3166	arm_status arm_conv_partial_fast_opt_q15(
		3167	q15_t * pSrcA,
		3168	uint32_t srcALen,
		3169	q15_t * pSrcB,
		3170	uint32_t srcBLen,
		3171	q15_t * pDst,
		3172	uint32_t firstIndex,
		3173	uint32_t numPoints,
		3174	q15_t * pScratch1,
		3175	q15_t * pScratch2);
		3176
		3177
		3178	/**
		3179	* @brief Partial convolution of Q31 sequences.
5	mjames	3180	* @param[in] pSrcA points to the first input sequence.
		3181	* @param[in] srcALen length of the first input sequence.
		3182	* @param[in] pSrcB points to the second input sequence.
		3183	* @param[in] srcBLen length of the second input sequence.
		3184	* @param[out] pDst points to the block of output data
		3185	* @param[in] firstIndex is the first output sample to start with.
		3186	* @param[in] numPoints is the number of output points to be computed.
2	mjames	3187	* @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
		3188	*/
		3189	arm_status arm_conv_partial_q31(
		3190	q31_t * pSrcA,
		3191	uint32_t srcALen,
		3192	q31_t * pSrcB,
		3193	uint32_t srcBLen,
		3194	q31_t * pDst,
		3195	uint32_t firstIndex,
		3196	uint32_t numPoints);
		3197
		3198
		3199	/**
		3200	* @brief Partial convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
5	mjames	3201	* @param[in] pSrcA points to the first input sequence.
		3202	* @param[in] srcALen length of the first input sequence.
		3203	* @param[in] pSrcB points to the second input sequence.
		3204	* @param[in] srcBLen length of the second input sequence.
		3205	* @param[out] pDst points to the block of output data
		3206	* @param[in] firstIndex is the first output sample to start with.
		3207	* @param[in] numPoints is the number of output points to be computed.
2	mjames	3208	* @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
		3209	*/
		3210	arm_status arm_conv_partial_fast_q31(
		3211	q31_t * pSrcA,
		3212	uint32_t srcALen,
		3213	q31_t * pSrcB,
		3214	uint32_t srcBLen,
		3215	q31_t * pDst,
		3216	uint32_t firstIndex,
		3217	uint32_t numPoints);
		3218
		3219
		3220	/**
		3221	* @brief Partial convolution of Q7 sequences
5	mjames	3222	* @param[in] pSrcA points to the first input sequence.
		3223	* @param[in] srcALen length of the first input sequence.
		3224	* @param[in] pSrcB points to the second input sequence.
		3225	* @param[in] srcBLen length of the second input sequence.
		3226	* @param[out] pDst points to the block of output data
		3227	* @param[in] firstIndex is the first output sample to start with.
		3228	* @param[in] numPoints is the number of output points to be computed.
		3229	* @param[in] pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
		3230	* @param[in] pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
2	mjames	3231	* @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
		3232	*/
		3233	arm_status arm_conv_partial_opt_q7(
		3234	q7_t * pSrcA,
		3235	uint32_t srcALen,
		3236	q7_t * pSrcB,
		3237	uint32_t srcBLen,
		3238	q7_t * pDst,
		3239	uint32_t firstIndex,
		3240	uint32_t numPoints,
		3241	q15_t * pScratch1,
		3242	q15_t * pScratch2);
		3243
		3244
		3245	/**
		3246	* @brief Partial convolution of Q7 sequences.
5	mjames	3247	* @param[in] pSrcA points to the first input sequence.
		3248	* @param[in] srcALen length of the first input sequence.
		3249	* @param[in] pSrcB points to the second input sequence.
		3250	* @param[in] srcBLen length of the second input sequence.
		3251	* @param[out] pDst points to the block of output data
		3252	* @param[in] firstIndex is the first output sample to start with.
		3253	* @param[in] numPoints is the number of output points to be computed.
2	mjames	3254	* @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
		3255	*/
		3256	arm_status arm_conv_partial_q7(
		3257	q7_t * pSrcA,
		3258	uint32_t srcALen,
		3259	q7_t * pSrcB,
		3260	uint32_t srcBLen,
		3261	q7_t * pDst,
		3262	uint32_t firstIndex,
		3263	uint32_t numPoints);
		3264
		3265
		3266	/**
		3267	* @brief Instance structure for the Q15 FIR decimator.
		3268	*/
		3269	typedef struct
		3270	{
5	mjames	3271	uint8_t M; /*< decimation factor. /
		3272	uint16_t numTaps; /*< number of coefficients in the filter. /
		3273	q15_t pCoeffs; /< points to the coefficient array. The array is of length numTaps./
		3274	q15_t pState; /< points to the state variable array. The array is of length numTaps+blockSize-1. /
2	mjames	3275	} arm_fir_decimate_instance_q15;
		3276
		3277	/**
		3278	* @brief Instance structure for the Q31 FIR decimator.
		3279	*/
		3280	typedef struct
		3281	{
		3282	uint8_t M; /*< decimation factor. /
		3283	uint16_t numTaps; /*< number of coefficients in the filter. /
5	mjames	3284	q31_t pCoeffs; /< points to the coefficient array. The array is of length numTaps./
		3285	q31_t pState; /< points to the state variable array. The array is of length numTaps+blockSize-1. /
2	mjames	3286	} arm_fir_decimate_instance_q31;
		3287
		3288	/**
		3289	* @brief Instance structure for the floating-point FIR decimator.
		3290	*/
		3291	typedef struct
		3292	{
5	mjames	3293	uint8_t M; /*< decimation factor. /
		3294	uint16_t numTaps; /*< number of coefficients in the filter. /
		3295	float32_t pCoeffs; /< points to the coefficient array. The array is of length numTaps./
		3296	float32_t pState; /< points to the state variable array. The array is of length numTaps+blockSize-1. /
2	mjames	3297	} arm_fir_decimate_instance_f32;
		3298
		3299
		3300	/**
		3301	* @brief Processing function for the floating-point FIR decimator.
5	mjames	3302	* @param[in] S points to an instance of the floating-point FIR decimator structure.
		3303	* @param[in] pSrc points to the block of input data.
		3304	* @param[out] pDst points to the block of output data
		3305	* @param[in] blockSize number of input samples to process per call.
2	mjames	3306	*/
		3307	void arm_fir_decimate_f32(
		3308	const arm_fir_decimate_instance_f32 * S,
		3309	float32_t * pSrc,
		3310	float32_t * pDst,
		3311	uint32_t blockSize);
		3312
		3313
		3314	/**
		3315	* @brief Initialization function for the floating-point FIR decimator.
5	mjames	3316	* @param[in,out] S points to an instance of the floating-point FIR decimator structure.
		3317	* @param[in] numTaps number of coefficients in the filter.
		3318	* @param[in] M decimation factor.
		3319	* @param[in] pCoeffs points to the filter coefficients.
		3320	* @param[in] pState points to the state buffer.
		3321	* @param[in] blockSize number of input samples to process per call.
2	mjames	3322	* @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
		3323	* <code>blockSize</code> is not a multiple of <code>M</code>.
		3324	*/
		3325	arm_status arm_fir_decimate_init_f32(
		3326	arm_fir_decimate_instance_f32 * S,
		3327	uint16_t numTaps,
		3328	uint8_t M,
		3329	float32_t * pCoeffs,
		3330	float32_t * pState,
		3331	uint32_t blockSize);
		3332
5	mjames	3333
2	mjames	3334	/**
		3335	* @brief Processing function for the Q15 FIR decimator.
5	mjames	3336	* @param[in] S points to an instance of the Q15 FIR decimator structure.
		3337	* @param[in] pSrc points to the block of input data.
		3338	* @param[out] pDst points to the block of output data
		3339	* @param[in] blockSize number of input samples to process per call.
2	mjames	3340	*/
		3341	void arm_fir_decimate_q15(
		3342	const arm_fir_decimate_instance_q15 * S,
		3343	q15_t * pSrc,
		3344	q15_t * pDst,
		3345	uint32_t blockSize);
		3346
5	mjames	3347
2	mjames	3348	/**
		3349	* @brief Processing function for the Q15 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4.
5	mjames	3350	* @param[in] S points to an instance of the Q15 FIR decimator structure.
		3351	* @param[in] pSrc points to the block of input data.
		3352	* @param[out] pDst points to the block of output data
		3353	* @param[in] blockSize number of input samples to process per call.
2	mjames	3354	*/
		3355	void arm_fir_decimate_fast_q15(
		3356	const arm_fir_decimate_instance_q15 * S,
		3357	q15_t * pSrc,
		3358	q15_t * pDst,
		3359	uint32_t blockSize);
		3360
		3361
		3362	/**
		3363	* @brief Initialization function for the Q15 FIR decimator.
5	mjames	3364	* @param[in,out] S points to an instance of the Q15 FIR decimator structure.
		3365	* @param[in] numTaps number of coefficients in the filter.
		3366	* @param[in] M decimation factor.
		3367	* @param[in] pCoeffs points to the filter coefficients.
		3368	* @param[in] pState points to the state buffer.
		3369	* @param[in] blockSize number of input samples to process per call.
2	mjames	3370	* @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
		3371	* <code>blockSize</code> is not a multiple of <code>M</code>.
		3372	*/
		3373	arm_status arm_fir_decimate_init_q15(
		3374	arm_fir_decimate_instance_q15 * S,
		3375	uint16_t numTaps,
		3376	uint8_t M,
		3377	q15_t * pCoeffs,
		3378	q15_t * pState,
		3379	uint32_t blockSize);
		3380
5	mjames	3381
2	mjames	3382	/**
		3383	* @brief Processing function for the Q31 FIR decimator.
5	mjames	3384	* @param[in] S points to an instance of the Q31 FIR decimator structure.
		3385	* @param[in] pSrc points to the block of input data.
		3386	* @param[out] pDst points to the block of output data
2	mjames	3387	* @param[in] blockSize number of input samples to process per call.
		3388	*/
		3389	void arm_fir_decimate_q31(
		3390	const arm_fir_decimate_instance_q31 * S,
		3391	q31_t * pSrc,
		3392	q31_t * pDst,
		3393	uint32_t blockSize);
		3394
		3395	/**
		3396	* @brief Processing function for the Q31 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4.
5	mjames	3397	* @param[in] S points to an instance of the Q31 FIR decimator structure.
		3398	* @param[in] pSrc points to the block of input data.
		3399	* @param[out] pDst points to the block of output data
		3400	* @param[in] blockSize number of input samples to process per call.
2	mjames	3401	*/
		3402	void arm_fir_decimate_fast_q31(
		3403	arm_fir_decimate_instance_q31 * S,
		3404	q31_t * pSrc,
		3405	q31_t * pDst,
		3406	uint32_t blockSize);
		3407
		3408
		3409	/**
		3410	* @brief Initialization function for the Q31 FIR decimator.
5	mjames	3411	* @param[in,out] S points to an instance of the Q31 FIR decimator structure.
		3412	* @param[in] numTaps number of coefficients in the filter.
		3413	* @param[in] M decimation factor.
		3414	* @param[in] pCoeffs points to the filter coefficients.
		3415	* @param[in] pState points to the state buffer.
		3416	* @param[in] blockSize number of input samples to process per call.
2	mjames	3417	* @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
		3418	* <code>blockSize</code> is not a multiple of <code>M</code>.
		3419	*/
		3420	arm_status arm_fir_decimate_init_q31(
		3421	arm_fir_decimate_instance_q31 * S,
		3422	uint16_t numTaps,
		3423	uint8_t M,
		3424	q31_t * pCoeffs,
		3425	q31_t * pState,
		3426	uint32_t blockSize);
		3427
		3428
		3429	/**
		3430	* @brief Instance structure for the Q15 FIR interpolator.
		3431	*/
		3432	typedef struct
		3433	{
		3434	uint8_t L; /*< upsample factor. /
		3435	uint16_t phaseLength; /*< length of each polyphase filter component. /
		3436	q15_t pCoeffs; /< points to the coefficient array. The array is of length LphaseLength. */
		3437	q15_t pState; /< points to the state variable array. The array is of length blockSize+phaseLength-1. /
		3438	} arm_fir_interpolate_instance_q15;
		3439
		3440	/**
		3441	* @brief Instance structure for the Q31 FIR interpolator.
		3442	*/
		3443	typedef struct
		3444	{
		3445	uint8_t L; /*< upsample factor. /
		3446	uint16_t phaseLength; /*< length of each polyphase filter component. /
5	mjames	3447	q31_t pCoeffs; /< points to the coefficient array. The array is of length LphaseLength. */
		3448	q31_t pState; /< points to the state variable array. The array is of length blockSize+phaseLength-1. /
2	mjames	3449	} arm_fir_interpolate_instance_q31;
		3450
		3451	/**
		3452	* @brief Instance structure for the floating-point FIR interpolator.
		3453	*/
		3454	typedef struct
		3455	{
		3456	uint8_t L; /*< upsample factor. /
		3457	uint16_t phaseLength; /*< length of each polyphase filter component. /
5	mjames	3458	float32_t pCoeffs; /< points to the coefficient array. The array is of length LphaseLength. */
		3459	float32_t pState; /< points to the state variable array. The array is of length phaseLength+numTaps-1. /
2	mjames	3460	} arm_fir_interpolate_instance_f32;
		3461
		3462
		3463	/**
		3464	* @brief Processing function for the Q15 FIR interpolator.
5	mjames	3465	* @param[in] S points to an instance of the Q15 FIR interpolator structure.
		3466	* @param[in] pSrc points to the block of input data.
		3467	* @param[out] pDst points to the block of output data.
		3468	* @param[in] blockSize number of input samples to process per call.
2	mjames	3469	*/
		3470	void arm_fir_interpolate_q15(
		3471	const arm_fir_interpolate_instance_q15 * S,
		3472	q15_t * pSrc,
		3473	q15_t * pDst,
		3474	uint32_t blockSize);
		3475
		3476
		3477	/**
		3478	* @brief Initialization function for the Q15 FIR interpolator.
5	mjames	3479	* @param[in,out] S points to an instance of the Q15 FIR interpolator structure.
		3480	* @param[in] L upsample factor.
		3481	* @param[in] numTaps number of filter coefficients in the filter.
		3482	* @param[in] pCoeffs points to the filter coefficient buffer.
		3483	* @param[in] pState points to the state buffer.
		3484	* @param[in] blockSize number of input samples to process per call.
2	mjames	3485	* @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
		3486	* the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
		3487	*/
		3488	arm_status arm_fir_interpolate_init_q15(
		3489	arm_fir_interpolate_instance_q15 * S,
		3490	uint8_t L,
		3491	uint16_t numTaps,
		3492	q15_t * pCoeffs,
		3493	q15_t * pState,
		3494	uint32_t blockSize);
		3495
5	mjames	3496
2	mjames	3497	/**
		3498	* @brief Processing function for the Q31 FIR interpolator.
5	mjames	3499	* @param[in] S points to an instance of the Q15 FIR interpolator structure.
		3500	* @param[in] pSrc points to the block of input data.
		3501	* @param[out] pDst points to the block of output data.
		3502	* @param[in] blockSize number of input samples to process per call.
2	mjames	3503	*/
		3504	void arm_fir_interpolate_q31(
		3505	const arm_fir_interpolate_instance_q31 * S,
		3506	q31_t * pSrc,
		3507	q31_t * pDst,
		3508	uint32_t blockSize);
		3509
5	mjames	3510
2	mjames	3511	/**
		3512	* @brief Initialization function for the Q31 FIR interpolator.
5	mjames	3513	* @param[in,out] S points to an instance of the Q31 FIR interpolator structure.
		3514	* @param[in] L upsample factor.
		3515	* @param[in] numTaps number of filter coefficients in the filter.
		3516	* @param[in] pCoeffs points to the filter coefficient buffer.
		3517	* @param[in] pState points to the state buffer.
		3518	* @param[in] blockSize number of input samples to process per call.
2	mjames	3519	* @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
		3520	* the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
		3521	*/
		3522	arm_status arm_fir_interpolate_init_q31(
		3523	arm_fir_interpolate_instance_q31 * S,
		3524	uint8_t L,
		3525	uint16_t numTaps,
		3526	q31_t * pCoeffs,
		3527	q31_t * pState,
		3528	uint32_t blockSize);
		3529
		3530
		3531	/**
		3532	* @brief Processing function for the floating-point FIR interpolator.
5	mjames	3533	* @param[in] S points to an instance of the floating-point FIR interpolator structure.
		3534	* @param[in] pSrc points to the block of input data.
		3535	* @param[out] pDst points to the block of output data.
		3536	* @param[in] blockSize number of input samples to process per call.
2	mjames	3537	*/
		3538	void arm_fir_interpolate_f32(
		3539	const arm_fir_interpolate_instance_f32 * S,
		3540	float32_t * pSrc,
		3541	float32_t * pDst,
		3542	uint32_t blockSize);
		3543
5	mjames	3544
2	mjames	3545	/**
		3546	* @brief Initialization function for the floating-point FIR interpolator.
5	mjames	3547	* @param[in,out] S points to an instance of the floating-point FIR interpolator structure.
		3548	* @param[in] L upsample factor.
		3549	* @param[in] numTaps number of filter coefficients in the filter.
		3550	* @param[in] pCoeffs points to the filter coefficient buffer.
		3551	* @param[in] pState points to the state buffer.
		3552	* @param[in] blockSize number of input samples to process per call.
2	mjames	3553	* @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
		3554	* the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
		3555	*/
		3556	arm_status arm_fir_interpolate_init_f32(
		3557	arm_fir_interpolate_instance_f32 * S,
		3558	uint8_t L,
		3559	uint16_t numTaps,
		3560	float32_t * pCoeffs,
		3561	float32_t * pState,
		3562	uint32_t blockSize);
		3563
5	mjames	3564
2	mjames	3565	/**
		3566	* @brief Instance structure for the high precision Q31 Biquad cascade filter.
		3567	*/
		3568	typedef struct
		3569	{
		3570	uint8_t numStages; /*< number of 2nd order stages in the filter. Overall order is 2numStages. */
		3571	q63_t pState; /< points to the array of state coefficients. The array is of length 4numStages. */
		3572	q31_t pCoeffs; /< points to the array of coefficients. The array is of length 5numStages. */
		3573	uint8_t postShift; /*< additional shift, in bits, applied to each output sample. /
		3574	} arm_biquad_cas_df1_32x64_ins_q31;
		3575
		3576
		3577	/**
5	mjames	3578	* @param[in] S points to an instance of the high precision Q31 Biquad cascade filter structure.
		3579	* @param[in] pSrc points to the block of input data.
		3580	* @param[out] pDst points to the block of output data
		3581	* @param[in] blockSize number of samples to process.
2	mjames	3582	*/
		3583	void arm_biquad_cas_df1_32x64_q31(
		3584	const arm_biquad_cas_df1_32x64_ins_q31 * S,
		3585	q31_t * pSrc,
		3586	q31_t * pDst,
		3587	uint32_t blockSize);
		3588
		3589
		3590	/**
5	mjames	3591	* @param[in,out] S points to an instance of the high precision Q31 Biquad cascade filter structure.
		3592	* @param[in] numStages number of 2nd order stages in the filter.
		3593	* @param[in] pCoeffs points to the filter coefficients.
		3594	* @param[in] pState points to the state buffer.
		3595	* @param[in] postShift shift to be applied to the output. Varies according to the coefficients format
2	mjames	3596	*/
		3597	void arm_biquad_cas_df1_32x64_init_q31(
		3598	arm_biquad_cas_df1_32x64_ins_q31 * S,
		3599	uint8_t numStages,
		3600	q31_t * pCoeffs,
		3601	q63_t * pState,
		3602	uint8_t postShift);
		3603
		3604
		3605	/**
		3606	* @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
		3607	*/
		3608	typedef struct
		3609	{
		3610	uint8_t numStages; /*< number of 2nd order stages in the filter. Overall order is 2numStages. */
		3611	float32_t pState; /< points to the array of state coefficients. The array is of length 2numStages. */
		3612	float32_t pCoeffs; /< points to the array of coefficients. The array is of length 5numStages. */
		3613	} arm_biquad_cascade_df2T_instance_f32;
		3614
		3615	/**
		3616	* @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
		3617	*/
		3618	typedef struct
		3619	{
		3620	uint8_t numStages; /*< number of 2nd order stages in the filter. Overall order is 2numStages. */
		3621	float32_t pState; /< points to the array of state coefficients. The array is of length 4numStages. */
		3622	float32_t pCoeffs; /< points to the array of coefficients. The array is of length 5numStages. */
		3623	} arm_biquad_cascade_stereo_df2T_instance_f32;
		3624
		3625	/**
		3626	* @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
		3627	*/
		3628	typedef struct
		3629	{
		3630	uint8_t numStages; /*< number of 2nd order stages in the filter. Overall order is 2numStages. */
		3631	float64_t pState; /< points to the array of state coefficients. The array is of length 2numStages. */
		3632	float64_t pCoeffs; /< points to the array of coefficients. The array is of length 5numStages. */
		3633	} arm_biquad_cascade_df2T_instance_f64;
		3634
		3635
		3636	/**
		3637	* @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
5	mjames	3638	* @param[in] S points to an instance of the filter data structure.
		3639	* @param[in] pSrc points to the block of input data.
		3640	* @param[out] pDst points to the block of output data
		3641	* @param[in] blockSize number of samples to process.
2	mjames	3642	*/
		3643	void arm_biquad_cascade_df2T_f32(
		3644	const arm_biquad_cascade_df2T_instance_f32 * S,
		3645	float32_t * pSrc,
		3646	float32_t * pDst,
		3647	uint32_t blockSize);
		3648
		3649
		3650	/**
		3651	* @brief Processing function for the floating-point transposed direct form II Biquad cascade filter. 2 channels
5	mjames	3652	* @param[in] S points to an instance of the filter data structure.
		3653	* @param[in] pSrc points to the block of input data.
		3654	* @param[out] pDst points to the block of output data
		3655	* @param[in] blockSize number of samples to process.
2	mjames	3656	*/
		3657	void arm_biquad_cascade_stereo_df2T_f32(
		3658	const arm_biquad_cascade_stereo_df2T_instance_f32 * S,
		3659	float32_t * pSrc,
		3660	float32_t * pDst,
		3661	uint32_t blockSize);
		3662
5	mjames	3663
2	mjames	3664	/**
		3665	* @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
5	mjames	3666	* @param[in] S points to an instance of the filter data structure.
		3667	* @param[in] pSrc points to the block of input data.
		3668	* @param[out] pDst points to the block of output data
		3669	* @param[in] blockSize number of samples to process.
2	mjames	3670	*/
		3671	void arm_biquad_cascade_df2T_f64(
		3672	const arm_biquad_cascade_df2T_instance_f64 * S,
		3673	float64_t * pSrc,
		3674	float64_t * pDst,
		3675	uint32_t blockSize);
		3676
		3677
		3678	/**
		3679	* @brief Initialization function for the floating-point transposed direct form II Biquad cascade filter.
5	mjames	3680	* @param[in,out] S points to an instance of the filter data structure.
		3681	* @param[in] numStages number of 2nd order stages in the filter.
		3682	* @param[in] pCoeffs points to the filter coefficients.
		3683	* @param[in] pState points to the state buffer.
2	mjames	3684	*/
		3685	void arm_biquad_cascade_df2T_init_f32(
		3686	arm_biquad_cascade_df2T_instance_f32 * S,
		3687	uint8_t numStages,
		3688	float32_t * pCoeffs,
		3689	float32_t * pState);
		3690
		3691
		3692	/**
		3693	* @brief Initialization function for the floating-point transposed direct form II Biquad cascade filter.
5	mjames	3694	* @param[in,out] S points to an instance of the filter data structure.
		3695	* @param[in] numStages number of 2nd order stages in the filter.
		3696	* @param[in] pCoeffs points to the filter coefficients.
		3697	* @param[in] pState points to the state buffer.
2	mjames	3698	*/
		3699	void arm_biquad_cascade_stereo_df2T_init_f32(
		3700	arm_biquad_cascade_stereo_df2T_instance_f32 * S,
		3701	uint8_t numStages,
		3702	float32_t * pCoeffs,
		3703	float32_t * pState);
		3704
		3705
		3706	/**
		3707	* @brief Initialization function for the floating-point transposed direct form II Biquad cascade filter.
5	mjames	3708	* @param[in,out] S points to an instance of the filter data structure.
		3709	* @param[in] numStages number of 2nd order stages in the filter.
		3710	* @param[in] pCoeffs points to the filter coefficients.
		3711	* @param[in] pState points to the state buffer.
2	mjames	3712	*/
		3713	void arm_biquad_cascade_df2T_init_f64(
		3714	arm_biquad_cascade_df2T_instance_f64 * S,
		3715	uint8_t numStages,
		3716	float64_t * pCoeffs,
		3717	float64_t * pState);
		3718
		3719
		3720	/**
		3721	* @brief Instance structure for the Q15 FIR lattice filter.
		3722	*/
		3723	typedef struct
		3724	{
5	mjames	3725	uint16_t numStages; /*< number of filter stages. /
		3726	q15_t pState; /< points to the state variable array. The array is of length numStages. /
		3727	q15_t pCoeffs; /< points to the coefficient array. The array is of length numStages. /
2	mjames	3728	} arm_fir_lattice_instance_q15;
		3729
		3730	/**
		3731	* @brief Instance structure for the Q31 FIR lattice filter.
		3732	*/
		3733	typedef struct
		3734	{
5	mjames	3735	uint16_t numStages; /*< number of filter stages. /
		3736	q31_t pState; /< points to the state variable array. The array is of length numStages. /
		3737	q31_t pCoeffs; /< points to the coefficient array. The array is of length numStages. /
2	mjames	3738	} arm_fir_lattice_instance_q31;
		3739
		3740	/**
		3741	* @brief Instance structure for the floating-point FIR lattice filter.
		3742	*/
		3743	typedef struct
		3744	{
		3745	uint16_t numStages; /*< number of filter stages. /
		3746	float32_t pState; /< points to the state variable array. The array is of length numStages. /
		3747	float32_t pCoeffs; /< points to the coefficient array. The array is of length numStages. /
		3748	} arm_fir_lattice_instance_f32;
		3749
5	mjames	3750
2	mjames	3751	/**
		3752	* @brief Initialization function for the Q15 FIR lattice filter.
5	mjames	3753	* @param[in] S points to an instance of the Q15 FIR lattice structure.
2	mjames	3754	* @param[in] numStages number of filter stages.
5	mjames	3755	* @param[in] pCoeffs points to the coefficient buffer. The array is of length numStages.
		3756	* @param[in] pState points to the state buffer. The array is of length numStages.
2	mjames	3757	*/
		3758	void arm_fir_lattice_init_q15(
		3759	arm_fir_lattice_instance_q15 * S,
		3760	uint16_t numStages,
		3761	q15_t * pCoeffs,
		3762	q15_t * pState);
		3763
		3764
		3765	/**
		3766	* @brief Processing function for the Q15 FIR lattice filter.
5	mjames	3767	* @param[in] S points to an instance of the Q15 FIR lattice structure.
		3768	* @param[in] pSrc points to the block of input data.
		3769	* @param[out] pDst points to the block of output data.
		3770	* @param[in] blockSize number of samples to process.
2	mjames	3771	*/
		3772	void arm_fir_lattice_q15(
		3773	const arm_fir_lattice_instance_q15 * S,
		3774	q15_t * pSrc,
		3775	q15_t * pDst,
		3776	uint32_t blockSize);
		3777
5	mjames	3778
2	mjames	3779	/**
		3780	* @brief Initialization function for the Q31 FIR lattice filter.
5	mjames	3781	* @param[in] S points to an instance of the Q31 FIR lattice structure.
2	mjames	3782	* @param[in] numStages number of filter stages.
5	mjames	3783	* @param[in] pCoeffs points to the coefficient buffer. The array is of length numStages.
		3784	* @param[in] pState points to the state buffer. The array is of length numStages.
2	mjames	3785	*/
		3786	void arm_fir_lattice_init_q31(
		3787	arm_fir_lattice_instance_q31 * S,
		3788	uint16_t numStages,
		3789	q31_t * pCoeffs,
		3790	q31_t * pState);
		3791
		3792
		3793	/**
		3794	* @brief Processing function for the Q31 FIR lattice filter.
5	mjames	3795	* @param[in] S points to an instance of the Q31 FIR lattice structure.
		3796	* @param[in] pSrc points to the block of input data.
		3797	* @param[out] pDst points to the block of output data
		3798	* @param[in] blockSize number of samples to process.
2	mjames	3799	*/
		3800	void arm_fir_lattice_q31(
		3801	const arm_fir_lattice_instance_q31 * S,
		3802	q31_t * pSrc,
		3803	q31_t * pDst,
		3804	uint32_t blockSize);
		3805
5	mjames	3806
2	mjames	3807	/**
		3808	* @brief Initialization function for the floating-point FIR lattice filter.
5	mjames	3809	* @param[in] S points to an instance of the floating-point FIR lattice structure.
2	mjames	3810	* @param[in] numStages number of filter stages.
5	mjames	3811	* @param[in] pCoeffs points to the coefficient buffer. The array is of length numStages.
		3812	* @param[in] pState points to the state buffer. The array is of length numStages.
2	mjames	3813	*/
		3814	void arm_fir_lattice_init_f32(
		3815	arm_fir_lattice_instance_f32 * S,
		3816	uint16_t numStages,
		3817	float32_t * pCoeffs,
		3818	float32_t * pState);
		3819
5	mjames	3820
2	mjames	3821	/**
		3822	* @brief Processing function for the floating-point FIR lattice filter.
5	mjames	3823	* @param[in] S points to an instance of the floating-point FIR lattice structure.
		3824	* @param[in] pSrc points to the block of input data.
		3825	* @param[out] pDst points to the block of output data
		3826	* @param[in] blockSize number of samples to process.
2	mjames	3827	*/
		3828	void arm_fir_lattice_f32(
		3829	const arm_fir_lattice_instance_f32 * S,
		3830	float32_t * pSrc,
		3831	float32_t * pDst,
		3832	uint32_t blockSize);
		3833
5	mjames	3834
2	mjames	3835	/**
		3836	* @brief Instance structure for the Q15 IIR lattice filter.
		3837	*/
		3838	typedef struct
		3839	{
5	mjames	3840	uint16_t numStages; /*< number of stages in the filter. /
		3841	q15_t pState; /< points to the state variable array. The array is of length numStages+blockSize. /
		3842	q15_t pkCoeffs; /< points to the reflection coefficient array. The array is of length numStages. /
		3843	q15_t pvCoeffs; /< points to the ladder coefficient array. The array is of length numStages+1. /
2	mjames	3844	} arm_iir_lattice_instance_q15;
		3845
		3846	/**
		3847	* @brief Instance structure for the Q31 IIR lattice filter.
		3848	*/
		3849	typedef struct
		3850	{
5	mjames	3851	uint16_t numStages; /*< number of stages in the filter. /
		3852	q31_t pState; /< points to the state variable array. The array is of length numStages+blockSize. /
		3853	q31_t pkCoeffs; /< points to the reflection coefficient array. The array is of length numStages. /
		3854	q31_t pvCoeffs; /< points to the ladder coefficient array. The array is of length numStages+1. /
2	mjames	3855	} arm_iir_lattice_instance_q31;
		3856
		3857	/**
		3858	* @brief Instance structure for the floating-point IIR lattice filter.
		3859	*/
		3860	typedef struct
		3861	{
5	mjames	3862	uint16_t numStages; /*< number of stages in the filter. /
		3863	float32_t pState; /< points to the state variable array. The array is of length numStages+blockSize. /
		3864	float32_t pkCoeffs; /< points to the reflection coefficient array. The array is of length numStages. /
		3865	float32_t pvCoeffs; /< points to the ladder coefficient array. The array is of length numStages+1. /
2	mjames	3866	} arm_iir_lattice_instance_f32;
		3867
5	mjames	3868
2	mjames	3869	/**
		3870	* @brief Processing function for the floating-point IIR lattice filter.
5	mjames	3871	* @param[in] S points to an instance of the floating-point IIR lattice structure.
		3872	* @param[in] pSrc points to the block of input data.
		3873	* @param[out] pDst points to the block of output data.
		3874	* @param[in] blockSize number of samples to process.
2	mjames	3875	*/
		3876	void arm_iir_lattice_f32(
		3877	const arm_iir_lattice_instance_f32 * S,
		3878	float32_t * pSrc,
		3879	float32_t * pDst,
		3880	uint32_t blockSize);
		3881
5	mjames	3882
2	mjames	3883	/**
		3884	* @brief Initialization function for the floating-point IIR lattice filter.
5	mjames	3885	* @param[in] S points to an instance of the floating-point IIR lattice structure.
		3886	* @param[in] numStages number of stages in the filter.
		3887	* @param[in] pkCoeffs points to the reflection coefficient buffer. The array is of length numStages.
		3888	* @param[in] pvCoeffs points to the ladder coefficient buffer. The array is of length numStages+1.
		3889	* @param[in] pState points to the state buffer. The array is of length numStages+blockSize-1.
		3890	* @param[in] blockSize number of samples to process.
2	mjames	3891	*/
		3892	void arm_iir_lattice_init_f32(
		3893	arm_iir_lattice_instance_f32 * S,
		3894	uint16_t numStages,
		3895	float32_t * pkCoeffs,
		3896	float32_t * pvCoeffs,
		3897	float32_t * pState,
		3898	uint32_t blockSize);
		3899
		3900
		3901	/**
		3902	* @brief Processing function for the Q31 IIR lattice filter.
5	mjames	3903	* @param[in] S points to an instance of the Q31 IIR lattice structure.
		3904	* @param[in] pSrc points to the block of input data.
		3905	* @param[out] pDst points to the block of output data.
		3906	* @param[in] blockSize number of samples to process.
2	mjames	3907	*/
		3908	void arm_iir_lattice_q31(
		3909	const arm_iir_lattice_instance_q31 * S,
		3910	q31_t * pSrc,
		3911	q31_t * pDst,
		3912	uint32_t blockSize);
		3913
		3914
		3915	/**
		3916	* @brief Initialization function for the Q31 IIR lattice filter.
5	mjames	3917	* @param[in] S points to an instance of the Q31 IIR lattice structure.
		3918	* @param[in] numStages number of stages in the filter.
		3919	* @param[in] pkCoeffs points to the reflection coefficient buffer. The array is of length numStages.
		3920	* @param[in] pvCoeffs points to the ladder coefficient buffer. The array is of length numStages+1.
		3921	* @param[in] pState points to the state buffer. The array is of length numStages+blockSize.
		3922	* @param[in] blockSize number of samples to process.
2	mjames	3923	*/
		3924	void arm_iir_lattice_init_q31(
		3925	arm_iir_lattice_instance_q31 * S,
		3926	uint16_t numStages,
		3927	q31_t * pkCoeffs,
		3928	q31_t * pvCoeffs,
		3929	q31_t * pState,
		3930	uint32_t blockSize);
		3931
		3932
		3933	/**
		3934	* @brief Processing function for the Q15 IIR lattice filter.
5	mjames	3935	* @param[in] S points to an instance of the Q15 IIR lattice structure.
		3936	* @param[in] pSrc points to the block of input data.
		3937	* @param[out] pDst points to the block of output data.
		3938	* @param[in] blockSize number of samples to process.
2	mjames	3939	*/
		3940	void arm_iir_lattice_q15(
		3941	const arm_iir_lattice_instance_q15 * S,
		3942	q15_t * pSrc,
		3943	q15_t * pDst,
		3944	uint32_t blockSize);
		3945
		3946
		3947	/**
		3948	* @brief Initialization function for the Q15 IIR lattice filter.
5	mjames	3949	* @param[in] S points to an instance of the fixed-point Q15 IIR lattice structure.
2	mjames	3950	* @param[in] numStages number of stages in the filter.
5	mjames	3951	* @param[in] pkCoeffs points to reflection coefficient buffer. The array is of length numStages.
		3952	* @param[in] pvCoeffs points to ladder coefficient buffer. The array is of length numStages+1.
		3953	* @param[in] pState points to state buffer. The array is of length numStages+blockSize.
		3954	* @param[in] blockSize number of samples to process per call.
2	mjames	3955	*/
		3956	void arm_iir_lattice_init_q15(
		3957	arm_iir_lattice_instance_q15 * S,
		3958	uint16_t numStages,
		3959	q15_t * pkCoeffs,
		3960	q15_t * pvCoeffs,
		3961	q15_t * pState,
		3962	uint32_t blockSize);
		3963
5	mjames	3964
2	mjames	3965	/**
		3966	* @brief Instance structure for the floating-point LMS filter.
		3967	*/
		3968	typedef struct
		3969	{
		3970	uint16_t numTaps; /*< number of coefficients in the filter. /
		3971	float32_t pState; /< points to the state variable array. The array is of length numTaps+blockSize-1. /
		3972	float32_t pCoeffs; /< points to the coefficient array. The array is of length numTaps. /
		3973	float32_t mu; /*< step size that controls filter coefficient updates. /
		3974	} arm_lms_instance_f32;
		3975
5	mjames	3976
2	mjames	3977	/**
		3978	* @brief Processing function for floating-point LMS filter.
5	mjames	3979	* @param[in] S points to an instance of the floating-point LMS filter structure.
		3980	* @param[in] pSrc points to the block of input data.
		3981	* @param[in] pRef points to the block of reference data.
		3982	* @param[out] pOut points to the block of output data.
		3983	* @param[out] pErr points to the block of error data.
		3984	* @param[in] blockSize number of samples to process.
2	mjames	3985	*/
		3986	void arm_lms_f32(
		3987	const arm_lms_instance_f32 * S,
		3988	float32_t * pSrc,
		3989	float32_t * pRef,
		3990	float32_t * pOut,
		3991	float32_t * pErr,
		3992	uint32_t blockSize);
		3993
5	mjames	3994
2	mjames	3995	/**
		3996	* @brief Initialization function for floating-point LMS filter.
5	mjames	3997	* @param[in] S points to an instance of the floating-point LMS filter structure.
		3998	* @param[in] numTaps number of filter coefficients.
		3999	* @param[in] pCoeffs points to the coefficient buffer.
		4000	* @param[in] pState points to state buffer.
		4001	* @param[in] mu step size that controls filter coefficient updates.
		4002	* @param[in] blockSize number of samples to process.
2	mjames	4003	*/
		4004	void arm_lms_init_f32(
		4005	arm_lms_instance_f32 * S,
		4006	uint16_t numTaps,
		4007	float32_t * pCoeffs,
		4008	float32_t * pState,
		4009	float32_t mu,
		4010	uint32_t blockSize);
		4011
5	mjames	4012
2	mjames	4013	/**
		4014	* @brief Instance structure for the Q15 LMS filter.
		4015	*/
		4016	typedef struct
		4017	{
		4018	uint16_t numTaps; /*< number of coefficients in the filter. /
		4019	q15_t pState; /< points to the state variable array. The array is of length numTaps+blockSize-1. /
		4020	q15_t pCoeffs; /< points to the coefficient array. The array is of length numTaps. /
		4021	q15_t mu; /*< step size that controls filter coefficient updates. /
		4022	uint32_t postShift; /*< bit shift applied to coefficients. /
		4023	} arm_lms_instance_q15;
		4024
		4025
		4026	/**
		4027	* @brief Initialization function for the Q15 LMS filter.
5	mjames	4028	* @param[in] S points to an instance of the Q15 LMS filter structure.
		4029	* @param[in] numTaps number of filter coefficients.
		4030	* @param[in] pCoeffs points to the coefficient buffer.
		4031	* @param[in] pState points to the state buffer.
		4032	* @param[in] mu step size that controls filter coefficient updates.
		4033	* @param[in] blockSize number of samples to process.
		4034	* @param[in] postShift bit shift applied to coefficients.
2	mjames	4035	*/
		4036	void arm_lms_init_q15(
		4037	arm_lms_instance_q15 * S,
		4038	uint16_t numTaps,
		4039	q15_t * pCoeffs,
		4040	q15_t * pState,
		4041	q15_t mu,
		4042	uint32_t blockSize,
		4043	uint32_t postShift);
		4044
5	mjames	4045
2	mjames	4046	/**
		4047	* @brief Processing function for Q15 LMS filter.
5	mjames	4048	* @param[in] S points to an instance of the Q15 LMS filter structure.
		4049	* @param[in] pSrc points to the block of input data.
		4050	* @param[in] pRef points to the block of reference data.
		4051	* @param[out] pOut points to the block of output data.
		4052	* @param[out] pErr points to the block of error data.
		4053	* @param[in] blockSize number of samples to process.
2	mjames	4054	*/
		4055	void arm_lms_q15(
		4056	const arm_lms_instance_q15 * S,
		4057	q15_t * pSrc,
		4058	q15_t * pRef,
		4059	q15_t * pOut,
		4060	q15_t * pErr,
		4061	uint32_t blockSize);
		4062
		4063
		4064	/**
		4065	* @brief Instance structure for the Q31 LMS filter.
		4066	*/
		4067	typedef struct
		4068	{
		4069	uint16_t numTaps; /*< number of coefficients in the filter. /
		4070	q31_t pState; /< points to the state variable array. The array is of length numTaps+blockSize-1. /
		4071	q31_t pCoeffs; /< points to the coefficient array. The array is of length numTaps. /
		4072	q31_t mu; /*< step size that controls filter coefficient updates. /
		4073	uint32_t postShift; /*< bit shift applied to coefficients. /
		4074	} arm_lms_instance_q31;
		4075
5	mjames	4076
2	mjames	4077	/**
		4078	* @brief Processing function for Q31 LMS filter.
5	mjames	4079	* @param[in] S points to an instance of the Q15 LMS filter structure.
		4080	* @param[in] pSrc points to the block of input data.
		4081	* @param[in] pRef points to the block of reference data.
		4082	* @param[out] pOut points to the block of output data.
		4083	* @param[out] pErr points to the block of error data.
		4084	* @param[in] blockSize number of samples to process.
2	mjames	4085	*/
		4086	void arm_lms_q31(
		4087	const arm_lms_instance_q31 * S,
		4088	q31_t * pSrc,
		4089	q31_t * pRef,
		4090	q31_t * pOut,
		4091	q31_t * pErr,
		4092	uint32_t blockSize);
		4093
5	mjames	4094
2	mjames	4095	/**
		4096	* @brief Initialization function for Q31 LMS filter.
5	mjames	4097	* @param[in] S points to an instance of the Q31 LMS filter structure.
		4098	* @param[in] numTaps number of filter coefficients.
		4099	* @param[in] pCoeffs points to coefficient buffer.
		4100	* @param[in] pState points to state buffer.
		4101	* @param[in] mu step size that controls filter coefficient updates.
		4102	* @param[in] blockSize number of samples to process.
		4103	* @param[in] postShift bit shift applied to coefficients.
2	mjames	4104	*/
		4105	void arm_lms_init_q31(
		4106	arm_lms_instance_q31 * S,
		4107	uint16_t numTaps,
		4108	q31_t * pCoeffs,
		4109	q31_t * pState,
		4110	q31_t mu,
		4111	uint32_t blockSize,
		4112	uint32_t postShift);
		4113
5	mjames	4114
2	mjames	4115	/**
		4116	* @brief Instance structure for the floating-point normalized LMS filter.
		4117	*/
		4118	typedef struct
		4119	{
		4120	uint16_t numTaps; /*< number of coefficients in the filter. /
		4121	float32_t pState; /< points to the state variable array. The array is of length numTaps+blockSize-1. /
		4122	float32_t pCoeffs; /< points to the coefficient array. The array is of length numTaps. /
5	mjames	4123	float32_t mu; /*< step size that control filter coefficient updates. /
		4124	float32_t energy; /*< saves previous frame energy. /
		4125	float32_t x0; /*< saves previous input sample. /
2	mjames	4126	} arm_lms_norm_instance_f32;
		4127
5	mjames	4128
2	mjames	4129	/**
		4130	* @brief Processing function for floating-point normalized LMS filter.
5	mjames	4131	* @param[in] S points to an instance of the floating-point normalized LMS filter structure.
		4132	* @param[in] pSrc points to the block of input data.
		4133	* @param[in] pRef points to the block of reference data.
		4134	* @param[out] pOut points to the block of output data.
		4135	* @param[out] pErr points to the block of error data.
		4136	* @param[in] blockSize number of samples to process.
2	mjames	4137	*/
		4138	void arm_lms_norm_f32(
		4139	arm_lms_norm_instance_f32 * S,
		4140	float32_t * pSrc,
		4141	float32_t * pRef,
		4142	float32_t * pOut,
		4143	float32_t * pErr,
		4144	uint32_t blockSize);
		4145
5	mjames	4146
2	mjames	4147	/**
		4148	* @brief Initialization function for floating-point normalized LMS filter.
5	mjames	4149	* @param[in] S points to an instance of the floating-point LMS filter structure.
		4150	* @param[in] numTaps number of filter coefficients.
		4151	* @param[in] pCoeffs points to coefficient buffer.
		4152	* @param[in] pState points to state buffer.
		4153	* @param[in] mu step size that controls filter coefficient updates.
		4154	* @param[in] blockSize number of samples to process.
2	mjames	4155	*/
		4156	void arm_lms_norm_init_f32(
		4157	arm_lms_norm_instance_f32 * S,
		4158	uint16_t numTaps,
		4159	float32_t * pCoeffs,
		4160	float32_t * pState,
		4161	float32_t mu,
		4162	uint32_t blockSize);
		4163
		4164
		4165	/**
		4166	* @brief Instance structure for the Q31 normalized LMS filter.
		4167	*/
		4168	typedef struct
		4169	{
		4170	uint16_t numTaps; /*< number of coefficients in the filter. /
		4171	q31_t pState; /< points to the state variable array. The array is of length numTaps+blockSize-1. /
		4172	q31_t pCoeffs; /< points to the coefficient array. The array is of length numTaps. /
		4173	q31_t mu; /*< step size that controls filter coefficient updates. /
		4174	uint8_t postShift; /*< bit shift applied to coefficients. /
		4175	q31_t recipTable; /< points to the reciprocal initial value table. /
		4176	q31_t energy; /*< saves previous frame energy. /
		4177	q31_t x0; /*< saves previous input sample. /
		4178	} arm_lms_norm_instance_q31;
		4179
5	mjames	4180
2	mjames	4181	/**
		4182	* @brief Processing function for Q31 normalized LMS filter.
5	mjames	4183	* @param[in] S points to an instance of the Q31 normalized LMS filter structure.
		4184	* @param[in] pSrc points to the block of input data.
		4185	* @param[in] pRef points to the block of reference data.
		4186	* @param[out] pOut points to the block of output data.
		4187	* @param[out] pErr points to the block of error data.
		4188	* @param[in] blockSize number of samples to process.
2	mjames	4189	*/
		4190	void arm_lms_norm_q31(
		4191	arm_lms_norm_instance_q31 * S,
		4192	q31_t * pSrc,
		4193	q31_t * pRef,
		4194	q31_t * pOut,
		4195	q31_t * pErr,
		4196	uint32_t blockSize);
		4197
5	mjames	4198
2	mjames	4199	/**
		4200	* @brief Initialization function for Q31 normalized LMS filter.
5	mjames	4201	* @param[in] S points to an instance of the Q31 normalized LMS filter structure.
		4202	* @param[in] numTaps number of filter coefficients.
		4203	* @param[in] pCoeffs points to coefficient buffer.
		4204	* @param[in] pState points to state buffer.
		4205	* @param[in] mu step size that controls filter coefficient updates.
		4206	* @param[in] blockSize number of samples to process.
		4207	* @param[in] postShift bit shift applied to coefficients.
2	mjames	4208	*/
		4209	void arm_lms_norm_init_q31(
		4210	arm_lms_norm_instance_q31 * S,
		4211	uint16_t numTaps,
		4212	q31_t * pCoeffs,
		4213	q31_t * pState,
		4214	q31_t mu,
		4215	uint32_t blockSize,
		4216	uint8_t postShift);
		4217
5	mjames	4218
2	mjames	4219	/**
		4220	* @brief Instance structure for the Q15 normalized LMS filter.
		4221	*/
		4222	typedef struct
		4223	{
5	mjames	4224	uint16_t numTaps; /*< Number of coefficients in the filter. /
2	mjames	4225	q15_t pState; /< points to the state variable array. The array is of length numTaps+blockSize-1. /
		4226	q15_t pCoeffs; /< points to the coefficient array. The array is of length numTaps. /
5	mjames	4227	q15_t mu; /*< step size that controls filter coefficient updates. /
		4228	uint8_t postShift; /*< bit shift applied to coefficients. /
		4229	q15_t recipTable; /< Points to the reciprocal initial value table. /
		4230	q15_t energy; /*< saves previous frame energy. /
		4231	q15_t x0; /*< saves previous input sample. /
2	mjames	4232	} arm_lms_norm_instance_q15;
		4233
5	mjames	4234
2	mjames	4235	/**
		4236	* @brief Processing function for Q15 normalized LMS filter.
5	mjames	4237	* @param[in] S points to an instance of the Q15 normalized LMS filter structure.
		4238	* @param[in] pSrc points to the block of input data.
		4239	* @param[in] pRef points to the block of reference data.
		4240	* @param[out] pOut points to the block of output data.
		4241	* @param[out] pErr points to the block of error data.
		4242	* @param[in] blockSize number of samples to process.
2	mjames	4243	*/
		4244	void arm_lms_norm_q15(
		4245	arm_lms_norm_instance_q15 * S,
		4246	q15_t * pSrc,
		4247	q15_t * pRef,
		4248	q15_t * pOut,
		4249	q15_t * pErr,
		4250	uint32_t blockSize);
		4251
		4252
		4253	/**
		4254	* @brief Initialization function for Q15 normalized LMS filter.
5	mjames	4255	* @param[in] S points to an instance of the Q15 normalized LMS filter structure.
		4256	* @param[in] numTaps number of filter coefficients.
		4257	* @param[in] pCoeffs points to coefficient buffer.
		4258	* @param[in] pState points to state buffer.
		4259	* @param[in] mu step size that controls filter coefficient updates.
		4260	* @param[in] blockSize number of samples to process.
		4261	* @param[in] postShift bit shift applied to coefficients.
2	mjames	4262	*/
		4263	void arm_lms_norm_init_q15(
		4264	arm_lms_norm_instance_q15 * S,
		4265	uint16_t numTaps,
		4266	q15_t * pCoeffs,
		4267	q15_t * pState,
		4268	q15_t mu,
		4269	uint32_t blockSize,
		4270	uint8_t postShift);
		4271
5	mjames	4272
2	mjames	4273	/**
		4274	* @brief Correlation of floating-point sequences.
5	mjames	4275	* @param[in] pSrcA points to the first input sequence.
		4276	* @param[in] srcALen length of the first input sequence.
		4277	* @param[in] pSrcB points to the second input sequence.
		4278	* @param[in] srcBLen length of the second input sequence.
		4279	* @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1.
2	mjames	4280	*/
		4281	void arm_correlate_f32(
		4282	float32_t * pSrcA,
		4283	uint32_t srcALen,
		4284	float32_t * pSrcB,
		4285	uint32_t srcBLen,
		4286	float32_t * pDst);
		4287
		4288
		4289	/**
		4290	* @brief Correlation of Q15 sequences
5	mjames	4291	* @param[in] pSrcA points to the first input sequence.
		4292	* @param[in] srcALen length of the first input sequence.
		4293	* @param[in] pSrcB points to the second input sequence.
		4294	* @param[in] srcBLen length of the second input sequence.
		4295	* @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1.
		4296	* @param[in] pScratch points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
2	mjames	4297	*/
		4298	void arm_correlate_opt_q15(
		4299	q15_t * pSrcA,
		4300	uint32_t srcALen,
		4301	q15_t * pSrcB,
		4302	uint32_t srcBLen,
		4303	q15_t * pDst,
		4304	q15_t * pScratch);
		4305
		4306
		4307	/**
		4308	* @brief Correlation of Q15 sequences.
5	mjames	4309	* @param[in] pSrcA points to the first input sequence.
		4310	* @param[in] srcALen length of the first input sequence.
		4311	* @param[in] pSrcB points to the second input sequence.
		4312	* @param[in] srcBLen length of the second input sequence.
		4313	* @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1.
2	mjames	4314	*/
		4315
		4316	void arm_correlate_q15(
		4317	q15_t * pSrcA,
		4318	uint32_t srcALen,
		4319	q15_t * pSrcB,
		4320	uint32_t srcBLen,
		4321	q15_t * pDst);
		4322
5	mjames	4323
2	mjames	4324	/**
		4325	* @brief Correlation of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4.
5	mjames	4326	* @param[in] pSrcA points to the first input sequence.
		4327	* @param[in] srcALen length of the first input sequence.
		4328	* @param[in] pSrcB points to the second input sequence.
		4329	* @param[in] srcBLen length of the second input sequence.
		4330	* @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1.
2	mjames	4331	*/
		4332
		4333	void arm_correlate_fast_q15(
5	mjames	4334	q15_t * pSrcA,
		4335	uint32_t srcALen,
		4336	q15_t * pSrcB,
		4337	uint32_t srcBLen,
		4338	q15_t * pDst);
2	mjames	4339
		4340
		4341	/**
		4342	* @brief Correlation of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4.
5	mjames	4343	* @param[in] pSrcA points to the first input sequence.
		4344	* @param[in] srcALen length of the first input sequence.
		4345	* @param[in] pSrcB points to the second input sequence.
		4346	* @param[in] srcBLen length of the second input sequence.
		4347	* @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1.
		4348	* @param[in] pScratch points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
2	mjames	4349	*/
		4350	void arm_correlate_fast_opt_q15(
		4351	q15_t * pSrcA,
		4352	uint32_t srcALen,
		4353	q15_t * pSrcB,
		4354	uint32_t srcBLen,
		4355	q15_t * pDst,
		4356	q15_t * pScratch);
		4357
5	mjames	4358
2	mjames	4359	/**
		4360	* @brief Correlation of Q31 sequences.
5	mjames	4361	* @param[in] pSrcA points to the first input sequence.
		4362	* @param[in] srcALen length of the first input sequence.
		4363	* @param[in] pSrcB points to the second input sequence.
		4364	* @param[in] srcBLen length of the second input sequence.
		4365	* @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1.
2	mjames	4366	*/
		4367	void arm_correlate_q31(
		4368	q31_t * pSrcA,
		4369	uint32_t srcALen,
		4370	q31_t * pSrcB,
		4371	uint32_t srcBLen,
		4372	q31_t * pDst);
		4373
5	mjames	4374
2	mjames	4375	/**
		4376	* @brief Correlation of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
5	mjames	4377	* @param[in] pSrcA points to the first input sequence.
		4378	* @param[in] srcALen length of the first input sequence.
		4379	* @param[in] pSrcB points to the second input sequence.
		4380	* @param[in] srcBLen length of the second input sequence.
		4381	* @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1.
2	mjames	4382	*/
		4383	void arm_correlate_fast_q31(
		4384	q31_t * pSrcA,
		4385	uint32_t srcALen,
		4386	q31_t * pSrcB,
		4387	uint32_t srcBLen,
		4388	q31_t * pDst);
		4389
		4390
		4391	/**
		4392	* @brief Correlation of Q7 sequences.
5	mjames	4393	* @param[in] pSrcA points to the first input sequence.
		4394	* @param[in] srcALen length of the first input sequence.
		4395	* @param[in] pSrcB points to the second input sequence.
		4396	* @param[in] srcBLen length of the second input sequence.
		4397	* @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1.
		4398	* @param[in] pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
		4399	* @param[in] pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
2	mjames	4400	*/
		4401	void arm_correlate_opt_q7(
		4402	q7_t * pSrcA,
		4403	uint32_t srcALen,
		4404	q7_t * pSrcB,
		4405	uint32_t srcBLen,
		4406	q7_t * pDst,
		4407	q15_t * pScratch1,
		4408	q15_t * pScratch2);
		4409
		4410
		4411	/**
		4412	* @brief Correlation of Q7 sequences.
5	mjames	4413	* @param[in] pSrcA points to the first input sequence.
		4414	* @param[in] srcALen length of the first input sequence.
		4415	* @param[in] pSrcB points to the second input sequence.
		4416	* @param[in] srcBLen length of the second input sequence.
		4417	* @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1.
2	mjames	4418	*/
		4419	void arm_correlate_q7(
		4420	q7_t * pSrcA,
		4421	uint32_t srcALen,
		4422	q7_t * pSrcB,
		4423	uint32_t srcBLen,
		4424	q7_t * pDst);
		4425
		4426
		4427	/**
		4428	* @brief Instance structure for the floating-point sparse FIR filter.
		4429	*/
		4430	typedef struct
		4431	{
		4432	uint16_t numTaps; /*< number of coefficients in the filter. /
		4433	uint16_t stateIndex; /*< state buffer index. Points to the oldest sample in the state buffer. /
		4434	float32_t pState; /< points to the state buffer array. The array is of length maxDelay+blockSize-1. /
		4435	float32_t pCoeffs; /< points to the coefficient array. The array is of length numTaps./
		4436	uint16_t maxDelay; /*< maximum offset specified by the pTapDelay array. /
		4437	int32_t pTapDelay; /< points to the array of delay values. The array is of length numTaps. /
		4438	} arm_fir_sparse_instance_f32;
		4439
		4440	/**
		4441	* @brief Instance structure for the Q31 sparse FIR filter.
		4442	*/
		4443	typedef struct
		4444	{
		4445	uint16_t numTaps; /*< number of coefficients in the filter. /
		4446	uint16_t stateIndex; /*< state buffer index. Points to the oldest sample in the state buffer. /
		4447	q31_t pState; /< points to the state buffer array. The array is of length maxDelay+blockSize-1. /
		4448	q31_t pCoeffs; /< points to the coefficient array. The array is of length numTaps./
		4449	uint16_t maxDelay; /*< maximum offset specified by the pTapDelay array. /
		4450	int32_t pTapDelay; /< points to the array of delay values. The array is of length numTaps. /
		4451	} arm_fir_sparse_instance_q31;
		4452
		4453	/**
		4454	* @brief Instance structure for the Q15 sparse FIR filter.
		4455	*/
		4456	typedef struct
		4457	{
		4458	uint16_t numTaps; /*< number of coefficients in the filter. /
		4459	uint16_t stateIndex; /*< state buffer index. Points to the oldest sample in the state buffer. /
		4460	q15_t pState; /< points to the state buffer array. The array is of length maxDelay+blockSize-1. /
		4461	q15_t pCoeffs; /< points to the coefficient array. The array is of length numTaps./
		4462	uint16_t maxDelay; /*< maximum offset specified by the pTapDelay array. /
		4463	int32_t pTapDelay; /< points to the array of delay values. The array is of length numTaps. /
		4464	} arm_fir_sparse_instance_q15;
		4465
		4466	/**
		4467	* @brief Instance structure for the Q7 sparse FIR filter.
		4468	*/
		4469	typedef struct
		4470	{
		4471	uint16_t numTaps; /*< number of coefficients in the filter. /
		4472	uint16_t stateIndex; /*< state buffer index. Points to the oldest sample in the state buffer. /
		4473	q7_t pState; /< points to the state buffer array. The array is of length maxDelay+blockSize-1. /
		4474	q7_t pCoeffs; /< points to the coefficient array. The array is of length numTaps./
		4475	uint16_t maxDelay; /*< maximum offset specified by the pTapDelay array. /
		4476	int32_t pTapDelay; /< points to the array of delay values. The array is of length numTaps. /
		4477	} arm_fir_sparse_instance_q7;
		4478
5	mjames	4479
2	mjames	4480	/**
		4481	* @brief Processing function for the floating-point sparse FIR filter.
5	mjames	4482	* @param[in] S points to an instance of the floating-point sparse FIR structure.
		4483	* @param[in] pSrc points to the block of input data.
		4484	* @param[out] pDst points to the block of output data
		4485	* @param[in] pScratchIn points to a temporary buffer of size blockSize.
2	mjames	4486	* @param[in] blockSize number of input samples to process per call.
		4487	*/
		4488	void arm_fir_sparse_f32(
		4489	arm_fir_sparse_instance_f32 * S,
		4490	float32_t * pSrc,
		4491	float32_t * pDst,
		4492	float32_t * pScratchIn,
		4493	uint32_t blockSize);
		4494
5	mjames	4495
2	mjames	4496	/**
		4497	* @brief Initialization function for the floating-point sparse FIR filter.
5	mjames	4498	* @param[in,out] S points to an instance of the floating-point sparse FIR structure.
2	mjames	4499	* @param[in] numTaps number of nonzero coefficients in the filter.
5	mjames	4500	* @param[in] pCoeffs points to the array of filter coefficients.
		4501	* @param[in] pState points to the state buffer.
		4502	* @param[in] pTapDelay points to the array of offset times.
2	mjames	4503	* @param[in] maxDelay maximum offset time supported.
		4504	* @param[in] blockSize number of samples that will be processed per block.
		4505	*/
		4506	void arm_fir_sparse_init_f32(
		4507	arm_fir_sparse_instance_f32 * S,
		4508	uint16_t numTaps,
		4509	float32_t * pCoeffs,
		4510	float32_t * pState,
		4511	int32_t * pTapDelay,
		4512	uint16_t maxDelay,
		4513	uint32_t blockSize);
		4514
5	mjames	4515
2	mjames	4516	/**
		4517	* @brief Processing function for the Q31 sparse FIR filter.
5	mjames	4518	* @param[in] S points to an instance of the Q31 sparse FIR structure.
		4519	* @param[in] pSrc points to the block of input data.
		4520	* @param[out] pDst points to the block of output data
		4521	* @param[in] pScratchIn points to a temporary buffer of size blockSize.
2	mjames	4522	* @param[in] blockSize number of input samples to process per call.
		4523	*/
		4524	void arm_fir_sparse_q31(
		4525	arm_fir_sparse_instance_q31 * S,
		4526	q31_t * pSrc,
		4527	q31_t * pDst,
		4528	q31_t * pScratchIn,
		4529	uint32_t blockSize);
		4530
5	mjames	4531
2	mjames	4532	/**
		4533	* @brief Initialization function for the Q31 sparse FIR filter.
5	mjames	4534	* @param[in,out] S points to an instance of the Q31 sparse FIR structure.
2	mjames	4535	* @param[in] numTaps number of nonzero coefficients in the filter.
5	mjames	4536	* @param[in] pCoeffs points to the array of filter coefficients.
		4537	* @param[in] pState points to the state buffer.
		4538	* @param[in] pTapDelay points to the array of offset times.
2	mjames	4539	* @param[in] maxDelay maximum offset time supported.
		4540	* @param[in] blockSize number of samples that will be processed per block.
		4541	*/
		4542	void arm_fir_sparse_init_q31(
		4543	arm_fir_sparse_instance_q31 * S,
		4544	uint16_t numTaps,
		4545	q31_t * pCoeffs,
		4546	q31_t * pState,
		4547	int32_t * pTapDelay,
		4548	uint16_t maxDelay,
		4549	uint32_t blockSize);
		4550
5	mjames	4551
2	mjames	4552	/**
		4553	* @brief Processing function for the Q15 sparse FIR filter.
5	mjames	4554	* @param[in] S points to an instance of the Q15 sparse FIR structure.
		4555	* @param[in] pSrc points to the block of input data.
		4556	* @param[out] pDst points to the block of output data
		4557	* @param[in] pScratchIn points to a temporary buffer of size blockSize.
		4558	* @param[in] pScratchOut points to a temporary buffer of size blockSize.
2	mjames	4559	* @param[in] blockSize number of input samples to process per call.
		4560	*/
		4561	void arm_fir_sparse_q15(
		4562	arm_fir_sparse_instance_q15 * S,
		4563	q15_t * pSrc,
		4564	q15_t * pDst,
		4565	q15_t * pScratchIn,
		4566	q31_t * pScratchOut,
		4567	uint32_t blockSize);
		4568
		4569
		4570	/**
		4571	* @brief Initialization function for the Q15 sparse FIR filter.
5	mjames	4572	* @param[in,out] S points to an instance of the Q15 sparse FIR structure.
2	mjames	4573	* @param[in] numTaps number of nonzero coefficients in the filter.
5	mjames	4574	* @param[in] pCoeffs points to the array of filter coefficients.
		4575	* @param[in] pState points to the state buffer.
		4576	* @param[in] pTapDelay points to the array of offset times.
2	mjames	4577	* @param[in] maxDelay maximum offset time supported.
		4578	* @param[in] blockSize number of samples that will be processed per block.
		4579	*/
		4580	void arm_fir_sparse_init_q15(
		4581	arm_fir_sparse_instance_q15 * S,
		4582	uint16_t numTaps,
		4583	q15_t * pCoeffs,
		4584	q15_t * pState,
		4585	int32_t * pTapDelay,
		4586	uint16_t maxDelay,
		4587	uint32_t blockSize);
		4588
5	mjames	4589
2	mjames	4590	/**
		4591	* @brief Processing function for the Q7 sparse FIR filter.
5	mjames	4592	* @param[in] S points to an instance of the Q7 sparse FIR structure.
		4593	* @param[in] pSrc points to the block of input data.
		4594	* @param[out] pDst points to the block of output data
		4595	* @param[in] pScratchIn points to a temporary buffer of size blockSize.
		4596	* @param[in] pScratchOut points to a temporary buffer of size blockSize.
2	mjames	4597	* @param[in] blockSize number of input samples to process per call.
		4598	*/
		4599	void arm_fir_sparse_q7(
		4600	arm_fir_sparse_instance_q7 * S,
		4601	q7_t * pSrc,
		4602	q7_t * pDst,
		4603	q7_t * pScratchIn,
		4604	q31_t * pScratchOut,
		4605	uint32_t blockSize);
		4606
5	mjames	4607
2	mjames	4608	/**
		4609	* @brief Initialization function for the Q7 sparse FIR filter.
5	mjames	4610	* @param[in,out] S points to an instance of the Q7 sparse FIR structure.
2	mjames	4611	* @param[in] numTaps number of nonzero coefficients in the filter.
5	mjames	4612	* @param[in] pCoeffs points to the array of filter coefficients.
		4613	* @param[in] pState points to the state buffer.
		4614	* @param[in] pTapDelay points to the array of offset times.
2	mjames	4615	* @param[in] maxDelay maximum offset time supported.
		4616	* @param[in] blockSize number of samples that will be processed per block.
		4617	*/
		4618	void arm_fir_sparse_init_q7(
		4619	arm_fir_sparse_instance_q7 * S,
		4620	uint16_t numTaps,
		4621	q7_t * pCoeffs,
		4622	q7_t * pState,
		4623	int32_t * pTapDelay,
		4624	uint16_t maxDelay,
		4625	uint32_t blockSize);
		4626
		4627
5	mjames	4628	/**
2	mjames	4629	* @brief Floating-point sin_cos function.
5	mjames	4630	* @param[in] theta input value in degrees
		4631	* @param[out] pSinVal points to the processed sine output.
		4632	* @param[out] pCosVal points to the processed cos output.
2	mjames	4633	*/
		4634	void arm_sin_cos_f32(
		4635	float32_t theta,
		4636	float32_t * pSinVal,
5	mjames	4637	float32_t * pCosVal);
2	mjames	4638
5	mjames	4639
		4640	/**
2	mjames	4641	* @brief Q31 sin_cos function.
		4642	* @param[in] theta scaled input value in degrees
5	mjames	4643	* @param[out] pSinVal points to the processed sine output.
		4644	* @param[out] pCosVal points to the processed cosine output.
2	mjames	4645	*/
		4646	void arm_sin_cos_q31(
		4647	q31_t theta,
		4648	q31_t * pSinVal,
		4649	q31_t * pCosVal);
		4650
		4651
		4652	/**
		4653	* @brief Floating-point complex conjugate.
5	mjames	4654	* @param[in] pSrc points to the input vector
		4655	* @param[out] pDst points to the output vector
		4656	* @param[in] numSamples number of complex samples in each vector
2	mjames	4657	*/
		4658	void arm_cmplx_conj_f32(
		4659	float32_t * pSrc,
		4660	float32_t * pDst,
		4661	uint32_t numSamples);
		4662
		4663	/**
		4664	* @brief Q31 complex conjugate.
5	mjames	4665	* @param[in] pSrc points to the input vector
		4666	* @param[out] pDst points to the output vector
		4667	* @param[in] numSamples number of complex samples in each vector
2	mjames	4668	*/
		4669	void arm_cmplx_conj_q31(
		4670	q31_t * pSrc,
		4671	q31_t * pDst,
		4672	uint32_t numSamples);
		4673
5	mjames	4674
2	mjames	4675	/**
		4676	* @brief Q15 complex conjugate.
5	mjames	4677	* @param[in] pSrc points to the input vector
		4678	* @param[out] pDst points to the output vector
		4679	* @param[in] numSamples number of complex samples in each vector
2	mjames	4680	*/
		4681	void arm_cmplx_conj_q15(
		4682	q15_t * pSrc,
		4683	q15_t * pDst,
		4684	uint32_t numSamples);
		4685
		4686
		4687	/**
		4688	* @brief Floating-point complex magnitude squared
5	mjames	4689	* @param[in] pSrc points to the complex input vector
		4690	* @param[out] pDst points to the real output vector
		4691	* @param[in] numSamples number of complex samples in the input vector
2	mjames	4692	*/
		4693	void arm_cmplx_mag_squared_f32(
		4694	float32_t * pSrc,
		4695	float32_t * pDst,
		4696	uint32_t numSamples);
		4697
5	mjames	4698
2	mjames	4699	/**
		4700	* @brief Q31 complex magnitude squared
5	mjames	4701	* @param[in] pSrc points to the complex input vector
		4702	* @param[out] pDst points to the real output vector
		4703	* @param[in] numSamples number of complex samples in the input vector
2	mjames	4704	*/
		4705	void arm_cmplx_mag_squared_q31(
		4706	q31_t * pSrc,
		4707	q31_t * pDst,
		4708	uint32_t numSamples);
		4709
5	mjames	4710
2	mjames	4711	/**
		4712	* @brief Q15 complex magnitude squared
5	mjames	4713	* @param[in] pSrc points to the complex input vector
		4714	* @param[out] pDst points to the real output vector
		4715	* @param[in] numSamples number of complex samples in the input vector
2	mjames	4716	*/
		4717	void arm_cmplx_mag_squared_q15(
		4718	q15_t * pSrc,
		4719	q15_t * pDst,
		4720	uint32_t numSamples);
		4721
		4722
		4723	/**
		4724	* @ingroup groupController
		4725	*/
		4726
		4727	/**
		4728	* @defgroup PID PID Motor Control
		4729	*
		4730	* A Proportional Integral Derivative (PID) controller is a generic feedback control
		4731	* loop mechanism widely used in industrial control systems.
		4732	* A PID controller is the most commonly used type of feedback controller.
		4733	*
		4734	* This set of functions implements (PID) controllers
		4735	* for Q15, Q31, and floating-point data types. The functions operate on a single sample
		4736	* of data and each call to the function returns a single processed value.
		4737	* <code>S</code> points to an instance of the PID control data structure. <code>in</code>
		4738	* is the input sample value. The functions return the output value.
		4739	*
		4740	* \par Algorithm:
		4741	* <pre>
		4742	* y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]
		4743	* A0 = Kp + Ki + Kd
		4744	* A1 = (-Kp ) - (2 * Kd )
		4745	* A2 = Kd </pre>
		4746	*
		4747	* \par
		4748	* where \c Kp is proportional constant, \c Ki is Integral constant and \c Kd is Derivative constant
		4749	*
		4750	* \par
		4751	* \image html PID.gif "Proportional Integral Derivative Controller"
		4752	*
		4753	* \par
		4754	* The PID controller calculates an "error" value as the difference between
		4755	* the measured output and the reference input.
		4756	* The controller attempts to minimize the error by adjusting the process control inputs.
		4757	* The proportional value determines the reaction to the current error,
		4758	* the integral value determines the reaction based on the sum of recent errors,
		4759	* and the derivative value determines the reaction based on the rate at which the error has been changing.
		4760	*
		4761	* \par Instance Structure
		4762	* The Gains A0, A1, A2 and state variables for a PID controller are stored together in an instance data structure.
		4763	* A separate instance structure must be defined for each PID Controller.
		4764	* There are separate instance structure declarations for each of the 3 supported data types.
		4765	*
		4766	* \par Reset Functions
		4767	* There is also an associated reset function for each data type which clears the state array.
		4768	*
		4769	* \par Initialization Functions
		4770	* There is also an associated initialization function for each data type.
		4771	* The initialization function performs the following operations:
		4772	* - Initializes the Gains A0, A1, A2 from Kp,Ki, Kd gains.
		4773	* - Zeros out the values in the state buffer.
		4774	*
		4775	* \par
		4776	* Instance structure cannot be placed into a const data section and it is recommended to use the initialization function.
		4777	*
		4778	* \par Fixed-Point Behavior
		4779	* Care must be taken when using the fixed-point versions of the PID Controller functions.
		4780	* In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
		4781	* Refer to the function specific documentation below for usage guidelines.
		4782	*/
		4783
		4784	/**
		4785	* @addtogroup PID
		4786	* @{
		4787	*/
		4788
		4789	/**
		4790	* @brief Process function for the floating-point PID Control.
5	mjames	4791	* @param[in,out] S is an instance of the floating-point PID Control structure
		4792	* @param[in] in input sample to process
2	mjames	4793	* @return out processed output sample.
		4794	*/
		4795	static __INLINE float32_t arm_pid_f32(
		4796	arm_pid_instance_f32 * S,
		4797	float32_t in)
		4798	{
		4799	float32_t out;
		4800
		4801	/* y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2] */
		4802	out = (S->A0 * in) +
		4803	(S->A1 * S->state[0]) + (S->A2 * S->state[1]) + (S->state[2]);
		4804
		4805	/* Update state */
		4806	S->state[1] = S->state[0];
		4807	S->state[0] = in;
		4808	S->state[2] = out;
		4809
		4810	/* return to application */
		4811	return (out);
		4812
		4813	}
		4814
		4815	/**
		4816	* @brief Process function for the Q31 PID Control.
5	mjames	4817	* @param[in,out] S points to an instance of the Q31 PID Control structure
		4818	* @param[in] in input sample to process
2	mjames	4819	* @return out processed output sample.
		4820	*
		4821	* <b>Scaling and Overflow Behavior:</b>
		4822	* \par
		4823	* The function is implemented using an internal 64-bit accumulator.
		4824	* The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
		4825	* Thus, if the accumulator result overflows it wraps around rather than clip.
		4826	* In order to avoid overflows completely the input signal must be scaled down by 2 bits as there are four additions.
		4827	* After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.
		4828	*/
		4829	static __INLINE q31_t arm_pid_q31(
		4830	arm_pid_instance_q31 * S,
		4831	q31_t in)
		4832	{
		4833	q63_t acc;
		4834	q31_t out;
		4835
		4836	/* acc = A0 * x[n] */
		4837	acc = (q63_t) S->A0 * in;
		4838
		4839	/* acc += A1 * x[n-1] */
		4840	acc += (q63_t) S->A1 * S->state[0];
		4841
		4842	/* acc += A2 * x[n-2] */
		4843	acc += (q63_t) S->A2 * S->state[1];
		4844
		4845	/* convert output to 1.31 format to add y[n-1] */
		4846	out = (q31_t) (acc >> 31u);
		4847
		4848	/* out += y[n-1] */
		4849	out += S->state[2];
		4850
		4851	/* Update state */
		4852	S->state[1] = S->state[0];
		4853	S->state[0] = in;
		4854	S->state[2] = out;
		4855
		4856	/* return to application */
		4857	return (out);
		4858	}
		4859
5	mjames	4860
2	mjames	4861	/**
		4862	* @brief Process function for the Q15 PID Control.
5	mjames	4863	* @param[in,out] S points to an instance of the Q15 PID Control structure
		4864	* @param[in] in input sample to process
2	mjames	4865	* @return out processed output sample.
		4866	*
		4867	* <b>Scaling and Overflow Behavior:</b>
		4868	* \par
		4869	* The function is implemented using a 64-bit internal accumulator.
		4870	* Both Gains and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
		4871	* The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
		4872	* There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
		4873	* After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
		4874	* Lastly, the accumulator is saturated to yield a result in 1.15 format.
		4875	*/
		4876	static __INLINE q15_t arm_pid_q15(
		4877	arm_pid_instance_q15 * S,
		4878	q15_t in)
		4879	{
		4880	q63_t acc;
		4881	q15_t out;
		4882
		4883	#ifndef ARM_MATH_CM0_FAMILY
		4884	__SIMD32_TYPE *vstate;
		4885
		4886	/* Implementation of PID controller */
		4887
		4888	/* acc = A0 * x[n] */
5	mjames	4889	acc = (q31_t) __SMUAD((uint32_t)S->A0, (uint32_t)in);
2	mjames	4890
		4891	/* acc += A1 * x[n-1] + A2 * x[n-2] */
		4892	vstate = __SIMD32_CONST(S->state);
5	mjames	4893	acc = (q63_t)__SMLALD((uint32_t)S->A1, (uint32_t)*vstate, (uint64_t)acc);
2	mjames	4894	#else
		4895	/* acc = A0 * x[n] */
		4896	acc = ((q31_t) S->A0) * in;
		4897
		4898	/* acc += A1 * x[n-1] + A2 * x[n-2] */
		4899	acc += (q31_t) S->A1 * S->state[0];
		4900	acc += (q31_t) S->A2 * S->state[1];
		4901	#endif
		4902
		4903	/* acc += y[n-1] */
		4904	acc += (q31_t) S->state[2] << 15;
		4905
		4906	/* saturate the output */
		4907	out = (q15_t) (__SSAT((acc >> 15), 16));
		4908
		4909	/* Update state */
		4910	S->state[1] = S->state[0];
		4911	S->state[0] = in;
		4912	S->state[2] = out;
		4913
		4914	/* return to application */
		4915	return (out);
		4916	}
		4917
		4918	/**
		4919	* @} end of PID group
		4920	*/
		4921
		4922
		4923	/**
		4924	* @brief Floating-point matrix inverse.
5	mjames	4925	* @param[in] src points to the instance of the input floating-point matrix structure.
		4926	* @param[out] dst points to the instance of the output floating-point matrix structure.
2	mjames	4927	* @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match.
		4928	* If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR.
		4929	*/
		4930	arm_status arm_mat_inverse_f32(
		4931	const arm_matrix_instance_f32 * src,
		4932	arm_matrix_instance_f32 * dst);
		4933
		4934
		4935	/**
		4936	* @brief Floating-point matrix inverse.
5	mjames	4937	* @param[in] src points to the instance of the input floating-point matrix structure.
		4938	* @param[out] dst points to the instance of the output floating-point matrix structure.
2	mjames	4939	* @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match.
		4940	* If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR.
		4941	*/
		4942	arm_status arm_mat_inverse_f64(
		4943	const arm_matrix_instance_f64 * src,
		4944	arm_matrix_instance_f64 * dst);
		4945
		4946
		4947
		4948	/**
		4949	* @ingroup groupController
		4950	*/
		4951
		4952	/**
		4953	* @defgroup clarke Vector Clarke Transform
		4954	* Forward Clarke transform converts the instantaneous stator phases into a two-coordinate time invariant vector.
		4955	* Generally the Clarke transform uses three-phase currents <code>Ia, Ib and Ic</code> to calculate currents
		4956	* in the two-phase orthogonal stator axis <code>Ialpha</code> and <code>Ibeta</code>.
		4957	* When <code>Ialpha</code> is superposed with <code>Ia</code> as shown in the figure below
		4958	* \image html clarke.gif Stator current space vector and its components in (a,b).
		4959	* and <code>Ia + Ib + Ic = 0</code>, in this condition <code>Ialpha</code> and <code>Ibeta</code>
		4960	* can be calculated using only <code>Ia</code> and <code>Ib</code>.
		4961	*
		4962	* The function operates on a single sample of data and each call to the function returns the processed output.
		4963	* The library provides separate functions for Q31 and floating-point data types.
		4964	* \par Algorithm
		4965	* \image html clarkeFormula.gif
		4966	* where <code>Ia</code> and <code>Ib</code> are the instantaneous stator phases and
		4967	* <code>pIalpha</code> and <code>pIbeta</code> are the two coordinates of time invariant vector.
		4968	* \par Fixed-Point Behavior
		4969	* Care must be taken when using the Q31 version of the Clarke transform.
		4970	* In particular, the overflow and saturation behavior of the accumulator used must be considered.
		4971	* Refer to the function specific documentation below for usage guidelines.
		4972	*/
		4973
		4974	/**
		4975	* @addtogroup clarke
		4976	* @{
		4977	*/
		4978
		4979	/**
		4980	*
		4981	* @brief Floating-point Clarke transform
5	mjames	4982	* @param[in] Ia input three-phase coordinate <code>a</code>
		4983	* @param[in] Ib input three-phase coordinate <code>b</code>
		4984	* @param[out] pIalpha points to output two-phase orthogonal vector axis alpha
		4985	* @param[out] pIbeta points to output two-phase orthogonal vector axis beta
2	mjames	4986	*/
		4987	static __INLINE void arm_clarke_f32(
		4988	float32_t Ia,
		4989	float32_t Ib,
		4990	float32_t * pIalpha,
		4991	float32_t * pIbeta)
		4992	{
		4993	/* Calculate pIalpha using the equation, pIalpha = Ia */
		4994	*pIalpha = Ia;
		4995
		4996	/* Calculate pIbeta using the equation, pIbeta = (1/sqrt(3)) * Ia + (2/sqrt(3)) * Ib */
5	mjames	4997	pIbeta = ((float32_t) 0.57735026919 Ia + (float32_t) 1.15470053838 * Ib);
2	mjames	4998	}
		4999
5	mjames	5000
2	mjames	5001	/**
		5002	* @brief Clarke transform for Q31 version
5	mjames	5003	* @param[in] Ia input three-phase coordinate <code>a</code>
		5004	* @param[in] Ib input three-phase coordinate <code>b</code>
		5005	* @param[out] pIalpha points to output two-phase orthogonal vector axis alpha
		5006	* @param[out] pIbeta points to output two-phase orthogonal vector axis beta
2	mjames	5007	*
		5008	* <b>Scaling and Overflow Behavior:</b>
		5009	* \par
		5010	* The function is implemented using an internal 32-bit accumulator.
		5011	* The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
		5012	* There is saturation on the addition, hence there is no risk of overflow.
		5013	*/
		5014	static __INLINE void arm_clarke_q31(
		5015	q31_t Ia,
		5016	q31_t Ib,
		5017	q31_t * pIalpha,
		5018	q31_t * pIbeta)
		5019	{
		5020	q31_t product1, product2; /* Temporary variables used to store intermediate results */
		5021
		5022	/* Calculating pIalpha from Ia by equation pIalpha = Ia */
		5023	*pIalpha = Ia;
		5024
		5025	/* Intermediate product is calculated by (1/(sqrt(3)) * Ia) */
		5026	product1 = (q31_t) (((q63_t) Ia * 0x24F34E8B) >> 30);
		5027
		5028	/* Intermediate product is calculated by (2/sqrt(3) * Ib) */
		5029	product2 = (q31_t) (((q63_t) Ib * 0x49E69D16) >> 30);
		5030
		5031	/* pIbeta is calculated by adding the intermediate products */
		5032	*pIbeta = __QADD(product1, product2);
		5033	}
		5034
		5035	/**
		5036	* @} end of clarke group
		5037	*/
		5038
		5039	/**
		5040	* @brief Converts the elements of the Q7 vector to Q31 vector.
5	mjames	5041	* @param[in] pSrc input pointer
		5042	* @param[out] pDst output pointer
		5043	* @param[in] blockSize number of samples to process
2	mjames	5044	*/
		5045	void arm_q7_to_q31(
		5046	q7_t * pSrc,
		5047	q31_t * pDst,
		5048	uint32_t blockSize);
		5049
		5050
		5051
		5052	/**
		5053	* @ingroup groupController
		5054	*/
		5055
		5056	/**
		5057	* @defgroup inv_clarke Vector Inverse Clarke Transform
		5058	* Inverse Clarke transform converts the two-coordinate time invariant vector into instantaneous stator phases.
		5059	*
		5060	* The function operates on a single sample of data and each call to the function returns the processed output.
		5061	* The library provides separate functions for Q31 and floating-point data types.
		5062	* \par Algorithm
		5063	* \image html clarkeInvFormula.gif
		5064	* where <code>pIa</code> and <code>pIb</code> are the instantaneous stator phases and
		5065	* <code>Ialpha</code> and <code>Ibeta</code> are the two coordinates of time invariant vector.
		5066	* \par Fixed-Point Behavior
		5067	* Care must be taken when using the Q31 version of the Clarke transform.
		5068	* In particular, the overflow and saturation behavior of the accumulator used must be considered.
		5069	* Refer to the function specific documentation below for usage guidelines.
		5070	*/
		5071
		5072	/**
		5073	* @addtogroup inv_clarke
		5074	* @{
		5075	*/
		5076
		5077	/**
		5078	* @brief Floating-point Inverse Clarke transform
5	mjames	5079	* @param[in] Ialpha input two-phase orthogonal vector axis alpha
		5080	* @param[in] Ibeta input two-phase orthogonal vector axis beta
		5081	* @param[out] pIa points to output three-phase coordinate <code>a</code>
		5082	* @param[out] pIb points to output three-phase coordinate <code>b</code>
2	mjames	5083	*/
		5084	static __INLINE void arm_inv_clarke_f32(
		5085	float32_t Ialpha,
		5086	float32_t Ibeta,
		5087	float32_t * pIa,
		5088	float32_t * pIb)
		5089	{
		5090	/* Calculating pIa from Ialpha by equation pIa = Ialpha */
		5091	*pIa = Ialpha;
		5092
		5093	/* Calculating pIb from Ialpha and Ibeta by equation pIb = -(1/2) * Ialpha + (sqrt(3)/2) * Ibeta */
5	mjames	5094	pIb = -0.5f Ialpha + 0.8660254039f * Ibeta;
2	mjames	5095	}
		5096
5	mjames	5097
2	mjames	5098	/**
		5099	* @brief Inverse Clarke transform for Q31 version
5	mjames	5100	* @param[in] Ialpha input two-phase orthogonal vector axis alpha
		5101	* @param[in] Ibeta input two-phase orthogonal vector axis beta
		5102	* @param[out] pIa points to output three-phase coordinate <code>a</code>
		5103	* @param[out] pIb points to output three-phase coordinate <code>b</code>
2	mjames	5104	*
		5105	* <b>Scaling and Overflow Behavior:</b>
		5106	* \par
		5107	* The function is implemented using an internal 32-bit accumulator.
		5108	* The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
		5109	* There is saturation on the subtraction, hence there is no risk of overflow.
		5110	*/
		5111	static __INLINE void arm_inv_clarke_q31(
		5112	q31_t Ialpha,
		5113	q31_t Ibeta,
		5114	q31_t * pIa,
		5115	q31_t * pIb)
		5116	{
		5117	q31_t product1, product2; /* Temporary variables used to store intermediate results */
		5118
		5119	/* Calculating pIa from Ialpha by equation pIa = Ialpha */
		5120	*pIa = Ialpha;
		5121
		5122	/* Intermediate product is calculated by (1/(2sqrt(3)) Ia) */
		5123	product1 = (q31_t) (((q63_t) (Ialpha) * (0x40000000)) >> 31);
		5124
		5125	/* Intermediate product is calculated by (1/sqrt(3) * pIb) */
		5126	product2 = (q31_t) (((q63_t) (Ibeta) * (0x6ED9EBA1)) >> 31);
		5127
		5128	/* pIb is calculated by subtracting the products */
		5129	*pIb = __QSUB(product2, product1);
		5130	}
		5131
		5132	/**
		5133	* @} end of inv_clarke group
		5134	*/
		5135
		5136	/**
		5137	* @brief Converts the elements of the Q7 vector to Q15 vector.
5	mjames	5138	* @param[in] pSrc input pointer
		5139	* @param[out] pDst output pointer
		5140	* @param[in] blockSize number of samples to process
2	mjames	5141	*/
		5142	void arm_q7_to_q15(
		5143	q7_t * pSrc,
		5144	q15_t * pDst,
		5145	uint32_t blockSize);
		5146
		5147
		5148
		5149	/**
		5150	* @ingroup groupController
		5151	*/
		5152
		5153	/**
		5154	* @defgroup park Vector Park Transform
		5155	*
		5156	* Forward Park transform converts the input two-coordinate vector to flux and torque components.
		5157	* The Park transform can be used to realize the transformation of the <code>Ialpha</code> and the <code>Ibeta</code> currents
		5158	* from the stationary to the moving reference frame and control the spatial relationship between
		5159	* the stator vector current and rotor flux vector.
		5160	* If we consider the d axis aligned with the rotor flux, the diagram below shows the
		5161	* current vector and the relationship from the two reference frames:
		5162	* \image html park.gif "Stator current space vector and its component in (a,b) and in the d,q rotating reference frame"
		5163	*
		5164	* The function operates on a single sample of data and each call to the function returns the processed output.
		5165	* The library provides separate functions for Q31 and floating-point data types.
		5166	* \par Algorithm
		5167	* \image html parkFormula.gif
		5168	* where <code>Ialpha</code> and <code>Ibeta</code> are the stator vector components,
		5169	* <code>pId</code> and <code>pIq</code> are rotor vector components and <code>cosVal</code> and <code>sinVal</code> are the
		5170	* cosine and sine values of theta (rotor flux position).
		5171	* \par Fixed-Point Behavior
		5172	* Care must be taken when using the Q31 version of the Park transform.
		5173	* In particular, the overflow and saturation behavior of the accumulator used must be considered.
		5174	* Refer to the function specific documentation below for usage guidelines.
		5175	*/
		5176
		5177	/**
		5178	* @addtogroup park
		5179	* @{
		5180	*/
		5181
		5182	/**
		5183	* @brief Floating-point Park transform
5	mjames	5184	* @param[in] Ialpha input two-phase vector coordinate alpha
		5185	* @param[in] Ibeta input two-phase vector coordinate beta
		5186	* @param[out] pId points to output rotor reference frame d
		5187	* @param[out] pIq points to output rotor reference frame q
		5188	* @param[in] sinVal sine value of rotation angle theta
		5189	* @param[in] cosVal cosine value of rotation angle theta
2	mjames	5190	*
		5191	* The function implements the forward Park transform.
		5192	*
		5193	*/
		5194	static __INLINE void arm_park_f32(
		5195	float32_t Ialpha,
		5196	float32_t Ibeta,
		5197	float32_t * pId,
		5198	float32_t * pIq,
		5199	float32_t sinVal,
		5200	float32_t cosVal)
		5201	{
		5202	/* Calculate pId using the equation, pId = Ialpha * cosVal + Ibeta * sinVal */
		5203	pId = Ialpha cosVal + Ibeta * sinVal;
		5204
		5205	/* Calculate pIq using the equation, pIq = - Ialpha * sinVal + Ibeta * cosVal */
		5206	pIq = -Ialpha sinVal + Ibeta * cosVal;
		5207	}
		5208
5	mjames	5209
2	mjames	5210	/**
		5211	* @brief Park transform for Q31 version
5	mjames	5212	* @param[in] Ialpha input two-phase vector coordinate alpha
		5213	* @param[in] Ibeta input two-phase vector coordinate beta
		5214	* @param[out] pId points to output rotor reference frame d
		5215	* @param[out] pIq points to output rotor reference frame q
		5216	* @param[in] sinVal sine value of rotation angle theta
		5217	* @param[in] cosVal cosine value of rotation angle theta
2	mjames	5218	*
		5219	* <b>Scaling and Overflow Behavior:</b>
		5220	* \par
		5221	* The function is implemented using an internal 32-bit accumulator.
		5222	* The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
		5223	* There is saturation on the addition and subtraction, hence there is no risk of overflow.
		5224	*/
		5225	static __INLINE void arm_park_q31(
		5226	q31_t Ialpha,
		5227	q31_t Ibeta,
		5228	q31_t * pId,
		5229	q31_t * pIq,
		5230	q31_t sinVal,
		5231	q31_t cosVal)
		5232	{
		5233	q31_t product1, product2; /* Temporary variables used to store intermediate results */
		5234	q31_t product3, product4; /* Temporary variables used to store intermediate results */
		5235
		5236	/* Intermediate product is calculated by (Ialpha * cosVal) */
		5237	product1 = (q31_t) (((q63_t) (Ialpha) * (cosVal)) >> 31);
		5238
		5239	/* Intermediate product is calculated by (Ibeta * sinVal) */
		5240	product2 = (q31_t) (((q63_t) (Ibeta) * (sinVal)) >> 31);
		5241
		5242
		5243	/* Intermediate product is calculated by (Ialpha * sinVal) */
		5244	product3 = (q31_t) (((q63_t) (Ialpha) * (sinVal)) >> 31);
		5245
		5246	/* Intermediate product is calculated by (Ibeta * cosVal) */
		5247	product4 = (q31_t) (((q63_t) (Ibeta) * (cosVal)) >> 31);
		5248
		5249	/* Calculate pId by adding the two intermediate products 1 and 2 */
		5250	*pId = __QADD(product1, product2);
		5251
		5252	/* Calculate pIq by subtracting the two intermediate products 3 from 4 */
		5253	*pIq = __QSUB(product4, product3);
		5254	}
		5255
		5256	/**
		5257	* @} end of park group
		5258	*/
		5259
		5260	/**
		5261	* @brief Converts the elements of the Q7 vector to floating-point vector.
5	mjames	5262	* @param[in] pSrc is input pointer
		5263	* @param[out] pDst is output pointer
		5264	* @param[in] blockSize is the number of samples to process
2	mjames	5265	*/
		5266	void arm_q7_to_float(
		5267	q7_t * pSrc,
		5268	float32_t * pDst,
		5269	uint32_t blockSize);
		5270
		5271
		5272	/**
		5273	* @ingroup groupController
		5274	*/
		5275
		5276	/**
		5277	* @defgroup inv_park Vector Inverse Park transform
		5278	* Inverse Park transform converts the input flux and torque components to two-coordinate vector.
		5279	*
		5280	* The function operates on a single sample of data and each call to the function returns the processed output.
		5281	* The library provides separate functions for Q31 and floating-point data types.
		5282	* \par Algorithm
		5283	* \image html parkInvFormula.gif
		5284	* where <code>pIalpha</code> and <code>pIbeta</code> are the stator vector components,
		5285	* <code>Id</code> and <code>Iq</code> are rotor vector components and <code>cosVal</code> and <code>sinVal</code> are the
		5286	* cosine and sine values of theta (rotor flux position).
		5287	* \par Fixed-Point Behavior
		5288	* Care must be taken when using the Q31 version of the Park transform.
		5289	* In particular, the overflow and saturation behavior of the accumulator used must be considered.
		5290	* Refer to the function specific documentation below for usage guidelines.
		5291	*/
		5292
		5293	/**
		5294	* @addtogroup inv_park
		5295	* @{
		5296	*/
		5297
		5298	/**
		5299	* @brief Floating-point Inverse Park transform
5	mjames	5300	* @param[in] Id input coordinate of rotor reference frame d
		5301	* @param[in] Iq input coordinate of rotor reference frame q
		5302	* @param[out] pIalpha points to output two-phase orthogonal vector axis alpha
		5303	* @param[out] pIbeta points to output two-phase orthogonal vector axis beta
		5304	* @param[in] sinVal sine value of rotation angle theta
		5305	* @param[in] cosVal cosine value of rotation angle theta
2	mjames	5306	*/
		5307	static __INLINE void arm_inv_park_f32(
		5308	float32_t Id,
		5309	float32_t Iq,
		5310	float32_t * pIalpha,
		5311	float32_t * pIbeta,
		5312	float32_t sinVal,
		5313	float32_t cosVal)
		5314	{
		5315	/* Calculate pIalpha using the equation, pIalpha = Id * cosVal - Iq * sinVal */
		5316	pIalpha = Id cosVal - Iq * sinVal;
		5317
		5318	/* Calculate pIbeta using the equation, pIbeta = Id * sinVal + Iq * cosVal */
		5319	pIbeta = Id sinVal + Iq * cosVal;
		5320	}
		5321
		5322
		5323	/**
5	mjames	5324	* @brief Inverse Park transform for Q31 version
		5325	* @param[in] Id input coordinate of rotor reference frame d
		5326	* @param[in] Iq input coordinate of rotor reference frame q
		5327	* @param[out] pIalpha points to output two-phase orthogonal vector axis alpha
		5328	* @param[out] pIbeta points to output two-phase orthogonal vector axis beta
		5329	* @param[in] sinVal sine value of rotation angle theta
		5330	* @param[in] cosVal cosine value of rotation angle theta
2	mjames	5331	*
		5332	* <b>Scaling and Overflow Behavior:</b>
		5333	* \par
		5334	* The function is implemented using an internal 32-bit accumulator.
		5335	* The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
		5336	* There is saturation on the addition, hence there is no risk of overflow.
		5337	*/
		5338	static __INLINE void arm_inv_park_q31(
		5339	q31_t Id,
		5340	q31_t Iq,
		5341	q31_t * pIalpha,
		5342	q31_t * pIbeta,
		5343	q31_t sinVal,
		5344	q31_t cosVal)
		5345	{
		5346	q31_t product1, product2; /* Temporary variables used to store intermediate results */
		5347	q31_t product3, product4; /* Temporary variables used to store intermediate results */
		5348
		5349	/* Intermediate product is calculated by (Id * cosVal) */
		5350	product1 = (q31_t) (((q63_t) (Id) * (cosVal)) >> 31);
		5351
		5352	/* Intermediate product is calculated by (Iq * sinVal) */
		5353	product2 = (q31_t) (((q63_t) (Iq) * (sinVal)) >> 31);
		5354
		5355
		5356	/* Intermediate product is calculated by (Id * sinVal) */
		5357	product3 = (q31_t) (((q63_t) (Id) * (sinVal)) >> 31);
		5358
		5359	/* Intermediate product is calculated by (Iq * cosVal) */
		5360	product4 = (q31_t) (((q63_t) (Iq) * (cosVal)) >> 31);
		5361
		5362	/* Calculate pIalpha by using the two intermediate products 1 and 2 */
		5363	*pIalpha = __QSUB(product1, product2);
		5364
		5365	/* Calculate pIbeta by using the two intermediate products 3 and 4 */
		5366	*pIbeta = __QADD(product4, product3);
		5367	}
		5368
		5369	/**
		5370	* @} end of Inverse park group
		5371	*/
		5372
		5373
		5374	/**
		5375	* @brief Converts the elements of the Q31 vector to floating-point vector.
5	mjames	5376	* @param[in] pSrc is input pointer
		5377	* @param[out] pDst is output pointer
		5378	* @param[in] blockSize is the number of samples to process
2	mjames	5379	*/
		5380	void arm_q31_to_float(
		5381	q31_t * pSrc,
		5382	float32_t * pDst,
		5383	uint32_t blockSize);
		5384
		5385	/**
		5386	* @ingroup groupInterpolation
		5387	*/
		5388
		5389	/**
		5390	* @defgroup LinearInterpolate Linear Interpolation
		5391	*
		5392	* Linear interpolation is a method of curve fitting using linear polynomials.
		5393	* Linear interpolation works by effectively drawing a straight line between two neighboring samples and returning the appropriate point along that line
		5394	*
		5395	* \par
		5396	* \image html LinearInterp.gif "Linear interpolation"
		5397	*
		5398	* \par
		5399	* A Linear Interpolate function calculates an output value(y), for the input(x)
		5400	* using linear interpolation of the input values x0, x1( nearest input values) and the output values y0 and y1(nearest output values)
		5401	*
		5402	* \par Algorithm:
		5403	* <pre>
		5404	* y = y0 + (x - x0) * ((y1 - y0)/(x1-x0))
		5405	* where x0, x1 are nearest values of input x
		5406	* y0, y1 are nearest values to output y
		5407	* </pre>
		5408	*
		5409	* \par
		5410	* This set of functions implements Linear interpolation process
		5411	* for Q7, Q15, Q31, and floating-point data types. The functions operate on a single
		5412	* sample of data and each call to the function returns a single processed value.
		5413	* <code>S</code> points to an instance of the Linear Interpolate function data structure.
		5414	* <code>x</code> is the input sample value. The functions returns the output value.
		5415	*
		5416	* \par
		5417	* if x is outside of the table boundary, Linear interpolation returns first value of the table
		5418	* if x is below input range and returns last value of table if x is above range.
		5419	*/
		5420
		5421	/**
		5422	* @addtogroup LinearInterpolate
		5423	* @{
		5424	*/
		5425
		5426	/**
		5427	* @brief Process function for the floating-point Linear Interpolation Function.
5	mjames	5428	* @param[in,out] S is an instance of the floating-point Linear Interpolation structure
		5429	* @param[in] x input sample to process
2	mjames	5430	* @return y processed output sample.
		5431	*
		5432	*/
		5433	static __INLINE float32_t arm_linear_interp_f32(
		5434	arm_linear_interp_instance_f32 * S,
		5435	float32_t x)
		5436	{
		5437	float32_t y;
		5438	float32_t x0, x1; /* Nearest input values */
		5439	float32_t y0, y1; /* Nearest output values */
		5440	float32_t xSpacing = S->xSpacing; /* spacing between input values */
		5441	int32_t i; /* Index variable */
		5442	float32_t pYData = S->pYData; / pointer to output table */
		5443
		5444	/* Calculation of index */
		5445	i = (int32_t) ((x - S->x1) / xSpacing);
		5446
		5447	if(i < 0)
		5448	{
		5449	/* Iniatilize output for below specified range as least output value of table */
		5450	y = pYData[0];
		5451	}
		5452	else if((uint32_t)i >= S->nValues)
		5453	{
		5454	/* Iniatilize output for above specified range as last output value of table */
		5455	y = pYData[S->nValues - 1];
		5456	}
		5457	else
		5458	{
		5459	/* Calculation of nearest input values */
5	mjames	5460	x0 = S->x1 + i * xSpacing;
2	mjames	5461	x1 = S->x1 + (i + 1) * xSpacing;
		5462
		5463	/* Read of nearest output values */
		5464	y0 = pYData[i];
		5465	y1 = pYData[i + 1];
		5466
		5467	/* Calculation of output */
		5468	y = y0 + (x - x0) * ((y1 - y0) / (x1 - x0));
		5469
		5470	}
		5471
		5472	/* returns output value */
		5473	return (y);
		5474	}
		5475
5	mjames	5476
2	mjames	5477	/**
		5478	*
		5479	* @brief Process function for the Q31 Linear Interpolation Function.
5	mjames	5480	* @param[in] pYData pointer to Q31 Linear Interpolation table
		5481	* @param[in] x input sample to process
		5482	* @param[in] nValues number of table values
2	mjames	5483	* @return y processed output sample.
		5484	*
		5485	* \par
		5486	* Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
		5487	* This function can support maximum of table size 2^12.
		5488	*
		5489	*/
		5490	static __INLINE q31_t arm_linear_interp_q31(
		5491	q31_t * pYData,
		5492	q31_t x,
		5493	uint32_t nValues)
		5494	{
		5495	q31_t y; /* output */
		5496	q31_t y0, y1; /* Nearest output values */
		5497	q31_t fract; /* fractional part */
		5498	int32_t index; /* Index to read nearest output values */
		5499
		5500	/* Input is in 12.20 format */
		5501	/* 12 bits for the table index */
		5502	/* Index value calculation */
5	mjames	5503	index = ((x & (q31_t)0xFFF00000) >> 20);
2	mjames	5504
		5505	if(index >= (int32_t)(nValues - 1))
		5506	{
		5507	return (pYData[nValues - 1]);
		5508	}
		5509	else if(index < 0)
		5510	{
		5511	return (pYData[0]);
		5512	}
		5513	else
		5514	{
		5515	/* 20 bits for the fractional part */
		5516	/* shift left by 11 to keep fract in 1.31 format */
		5517	fract = (x & 0x000FFFFF) << 11;
		5518
		5519	/* Read two nearest output values from the index in 1.31(q31) format */
		5520	y0 = pYData[index];
5	mjames	5521	y1 = pYData[index + 1];
2	mjames	5522
		5523	/* Calculation of y0 * (1-fract) and y is in 2.30 format */
		5524	y = ((q31_t) ((q63_t) y0 * (0x7FFFFFFF - fract) >> 32));
		5525
		5526	/* Calculation of y0 * (1-fract) + y1 fract and y is in 2.30 format /
		5527	y += ((q31_t) (((q63_t) y1 * fract) >> 32));
		5528
		5529	/* Convert y to 1.31 format */
		5530	return (y << 1u);
		5531	}
		5532	}
		5533
5	mjames	5534
2	mjames	5535	/**
		5536	*
		5537	* @brief Process function for the Q15 Linear Interpolation Function.
5	mjames	5538	* @param[in] pYData pointer to Q15 Linear Interpolation table
		5539	* @param[in] x input sample to process
		5540	* @param[in] nValues number of table values
2	mjames	5541	* @return y processed output sample.
		5542	*
		5543	* \par
		5544	* Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
		5545	* This function can support maximum of table size 2^12.
		5546	*
		5547	*/
		5548	static __INLINE q15_t arm_linear_interp_q15(
		5549	q15_t * pYData,
		5550	q31_t x,
		5551	uint32_t nValues)
		5552	{
		5553	q63_t y; /* output */
		5554	q15_t y0, y1; /* Nearest output values */
		5555	q31_t fract; /* fractional part */
		5556	int32_t index; /* Index to read nearest output values */
		5557
		5558	/* Input is in 12.20 format */
		5559	/* 12 bits for the table index */
		5560	/* Index value calculation */
5	mjames	5561	index = ((x & (int32_t)0xFFF00000) >> 20);
2	mjames	5562
		5563	if(index >= (int32_t)(nValues - 1))
		5564	{
		5565	return (pYData[nValues - 1]);
		5566	}
		5567	else if(index < 0)
		5568	{
		5569	return (pYData[0]);
		5570	}
		5571	else
		5572	{
		5573	/* 20 bits for the fractional part */
		5574	/* fract is in 12.20 format */
		5575	fract = (x & 0x000FFFFF);
		5576
		5577	/* Read two nearest output values from the index */
		5578	y0 = pYData[index];
5	mjames	5579	y1 = pYData[index + 1];
2	mjames	5580
		5581	/* Calculation of y0 * (1-fract) and y is in 13.35 format */
		5582	y = ((q63_t) y0 * (0xFFFFF - fract));
		5583
		5584	/* Calculation of (y0 * (1-fract) + y1 * fract) and y is in 13.35 format */
		5585	y += ((q63_t) y1 * (fract));
		5586
		5587	/* convert y to 1.15 format */
5	mjames	5588	return (q15_t) (y >> 20);
2	mjames	5589	}
5	mjames	5590	}
2	mjames	5591
		5592
		5593	/**
		5594	*
		5595	* @brief Process function for the Q7 Linear Interpolation Function.
5	mjames	5596	* @param[in] pYData pointer to Q7 Linear Interpolation table
		5597	* @param[in] x input sample to process
		5598	* @param[in] nValues number of table values
2	mjames	5599	* @return y processed output sample.
		5600	*
		5601	* \par
		5602	* Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
		5603	* This function can support maximum of table size 2^12.
		5604	*/
		5605	static __INLINE q7_t arm_linear_interp_q7(
		5606	q7_t * pYData,
		5607	q31_t x,
		5608	uint32_t nValues)
		5609	{
		5610	q31_t y; /* output */
		5611	q7_t y0, y1; /* Nearest output values */
		5612	q31_t fract; /* fractional part */
		5613	uint32_t index; /* Index to read nearest output values */
		5614
		5615	/* Input is in 12.20 format */
		5616	/* 12 bits for the table index */
		5617	/* Index value calculation */
		5618	if (x < 0)
		5619	{
		5620	return (pYData[0]);
		5621	}
		5622	index = (x >> 20) & 0xfff;
		5623
		5624	if(index >= (nValues - 1))
		5625	{
		5626	return (pYData[nValues - 1]);
		5627	}
		5628	else
		5629	{
		5630	/* 20 bits for the fractional part */
		5631	/* fract is in 12.20 format */
		5632	fract = (x & 0x000FFFFF);
		5633
		5634	/* Read two nearest output values from the index and are in 1.7(q7) format */
		5635	y0 = pYData[index];
5	mjames	5636	y1 = pYData[index + 1];
2	mjames	5637
		5638	/* Calculation of y0 * (1-fract ) and y is in 13.27(q27) format */
		5639	y = ((y0 * (0xFFFFF - fract)));
		5640
		5641	/* Calculation of y1 * fract + y0 * (1-fract) and y is in 13.27(q27) format */
		5642	y += (y1 * fract);
		5643
		5644	/* convert y to 1.7(q7) format */
5	mjames	5645	return (q7_t) (y >> 20);
		5646	}
		5647	}
2	mjames	5648
		5649	/**
		5650	* @} end of LinearInterpolate group
		5651	*/
		5652
		5653	/**
		5654	* @brief Fast approximation to the trigonometric sine function for floating-point data.
5	mjames	5655	* @param[in] x input value in radians.
2	mjames	5656	* @return sin(x).
		5657	*/
		5658	float32_t arm_sin_f32(
		5659	float32_t x);
		5660
5	mjames	5661
2	mjames	5662	/**
		5663	* @brief Fast approximation to the trigonometric sine function for Q31 data.
5	mjames	5664	* @param[in] x Scaled input value in radians.
2	mjames	5665	* @return sin(x).
		5666	*/
		5667	q31_t arm_sin_q31(
		5668	q31_t x);
		5669
5	mjames	5670
2	mjames	5671	/**
		5672	* @brief Fast approximation to the trigonometric sine function for Q15 data.
5	mjames	5673	* @param[in] x Scaled input value in radians.
2	mjames	5674	* @return sin(x).
		5675	*/
		5676	q15_t arm_sin_q15(
		5677	q15_t x);
		5678
5	mjames	5679
2	mjames	5680	/**
		5681	* @brief Fast approximation to the trigonometric cosine function for floating-point data.
5	mjames	5682	* @param[in] x input value in radians.
2	mjames	5683	* @return cos(x).
		5684	*/
		5685	float32_t arm_cos_f32(
		5686	float32_t x);
		5687
5	mjames	5688
2	mjames	5689	/**
		5690	* @brief Fast approximation to the trigonometric cosine function for Q31 data.
5	mjames	5691	* @param[in] x Scaled input value in radians.
2	mjames	5692	* @return cos(x).
		5693	*/
		5694	q31_t arm_cos_q31(
		5695	q31_t x);
		5696
5	mjames	5697
2	mjames	5698	/**
		5699	* @brief Fast approximation to the trigonometric cosine function for Q15 data.
5	mjames	5700	* @param[in] x Scaled input value in radians.
2	mjames	5701	* @return cos(x).
		5702	*/
		5703	q15_t arm_cos_q15(
		5704	q15_t x);
		5705
		5706
		5707	/**
		5708	* @ingroup groupFastMath
		5709	*/
		5710
		5711
		5712	/**
		5713	* @defgroup SQRT Square Root
		5714	*
		5715	* Computes the square root of a number.
		5716	* There are separate functions for Q15, Q31, and floating-point data types.
		5717	* The square root function is computed using the Newton-Raphson algorithm.
		5718	* This is an iterative algorithm of the form:
		5719	* <pre>
		5720	* x1 = x0 - f(x0)/f'(x0)
		5721	* </pre>
		5722	* where <code>x1</code> is the current estimate,
		5723	* <code>x0</code> is the previous estimate, and
		5724	* <code>f'(x0)</code> is the derivative of <code>f()</code> evaluated at <code>x0</code>.
		5725	* For the square root function, the algorithm reduces to:
		5726	* <pre>
		5727	* x0 = in/2 [initial guess]
		5728	* x1 = 1/2 * ( x0 + in / x0) [each iteration]
		5729	* </pre>
		5730	*/
		5731
		5732
		5733	/**
		5734	* @addtogroup SQRT
		5735	* @{
		5736	*/
		5737
		5738	/**
		5739	* @brief Floating-point square root function.
5	mjames	5740	* @param[in] in input value.
		5741	* @param[out] pOut square root of input value.
2	mjames	5742	* @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
		5743	* <code>in</code> is negative value and returns zero output for negative values.
		5744	*/
		5745	static __INLINE arm_status arm_sqrt_f32(
		5746	float32_t in,
		5747	float32_t * pOut)
		5748	{
		5749	if(in >= 0.0f)
		5750	{
		5751
5	mjames	5752	#if (__FPU_USED == 1) && defined ( __CC_ARM )
2	mjames	5753	*pOut = __sqrtf(in);
5	mjames	5754	#elif (__FPU_USED == 1) && (defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050))
		5755	*pOut = __builtin_sqrtf(in);
		5756	#elif (__FPU_USED == 1) && defined(__GNUC__)
		5757	*pOut = __builtin_sqrtf(in);
		5758	#elif (__FPU_USED == 1) && defined ( __ICCARM__ ) && (__VER__ >= 6040000)
		5759	__ASM("VSQRT.F32 %0,%1" : "=t"(*pOut) : "t"(in));
2	mjames	5760	#else
		5761	*pOut = sqrtf(in);
		5762	#endif
		5763
		5764	return (ARM_MATH_SUCCESS);
		5765	}
		5766	else
		5767	{
		5768	*pOut = 0.0f;
		5769	return (ARM_MATH_ARGUMENT_ERROR);
		5770	}
		5771	}
		5772
		5773
		5774	/**
		5775	* @brief Q31 square root function.
5	mjames	5776	* @param[in] in input value. The range of the input value is [0 +1) or 0x00000000 to 0x7FFFFFFF.
		5777	* @param[out] pOut square root of input value.
2	mjames	5778	* @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
		5779	* <code>in</code> is negative value and returns zero output for negative values.
		5780	*/
		5781	arm_status arm_sqrt_q31(
		5782	q31_t in,
		5783	q31_t * pOut);
		5784
5	mjames	5785
2	mjames	5786	/**
		5787	* @brief Q15 square root function.
5	mjames	5788	* @param[in] in input value. The range of the input value is [0 +1) or 0x0000 to 0x7FFF.
		5789	* @param[out] pOut square root of input value.
2	mjames	5790	* @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
		5791	* <code>in</code> is negative value and returns zero output for negative values.
		5792	*/
		5793	arm_status arm_sqrt_q15(
		5794	q15_t in,
		5795	q15_t * pOut);
		5796
		5797	/**
		5798	* @} end of SQRT group
		5799	*/
		5800
		5801
		5802	/**
		5803	* @brief floating-point Circular write function.
		5804	*/
		5805	static __INLINE void arm_circularWrite_f32(
		5806	int32_t * circBuffer,
		5807	int32_t L,
		5808	uint16_t * writeOffset,
		5809	int32_t bufferInc,
		5810	const int32_t * src,
		5811	int32_t srcInc,
		5812	uint32_t blockSize)
		5813	{
		5814	uint32_t i = 0u;
		5815	int32_t wOffset;
		5816
		5817	/* Copy the value of Index pointer that points
		5818	* to the current location where the input samples to be copied */
		5819	wOffset = *writeOffset;
		5820
		5821	/* Loop over the blockSize */
		5822	i = blockSize;
		5823
		5824	while(i > 0u)
		5825	{
		5826	/* copy the input sample to the circular buffer */
		5827	circBuffer[wOffset] = *src;
		5828
		5829	/* Update the input pointer */
		5830	src += srcInc;
		5831
		5832	/* Circularly update wOffset. Watch out for positive and negative value */
		5833	wOffset += bufferInc;
		5834	if(wOffset >= L)
		5835	wOffset -= L;
		5836
		5837	/* Decrement the loop counter */
		5838	i--;
		5839	}
		5840
		5841	/* Update the index pointer */
5	mjames	5842	*writeOffset = (uint16_t)wOffset;
2	mjames	5843	}
		5844
		5845
		5846
		5847	/**
		5848	* @brief floating-point Circular Read function.
		5849	*/
		5850	static __INLINE void arm_circularRead_f32(
		5851	int32_t * circBuffer,
		5852	int32_t L,
		5853	int32_t * readOffset,
		5854	int32_t bufferInc,
		5855	int32_t * dst,
		5856	int32_t * dst_base,
		5857	int32_t dst_length,
		5858	int32_t dstInc,
		5859	uint32_t blockSize)
		5860	{
		5861	uint32_t i = 0u;
		5862	int32_t rOffset, dst_end;
		5863
		5864	/* Copy the value of Index pointer that points
		5865	* to the current location from where the input samples to be read */
		5866	rOffset = *readOffset;
		5867	dst_end = (int32_t) (dst_base + dst_length);
		5868
		5869	/* Loop over the blockSize */
		5870	i = blockSize;
		5871
		5872	while(i > 0u)
		5873	{
		5874	/* copy the sample from the circular buffer to the destination buffer */
		5875	*dst = circBuffer[rOffset];
		5876
		5877	/* Update the input pointer */
		5878	dst += dstInc;
		5879
		5880	if(dst == (int32_t *) dst_end)
		5881	{
		5882	dst = dst_base;
		5883	}
		5884
		5885	/* Circularly update rOffset. Watch out for positive and negative value */
		5886	rOffset += bufferInc;
		5887
		5888	if(rOffset >= L)
		5889	{
		5890	rOffset -= L;
		5891	}
		5892
		5893	/* Decrement the loop counter */
		5894	i--;
		5895	}
		5896
		5897	/* Update the index pointer */
		5898	*readOffset = rOffset;
		5899	}
		5900
5	mjames	5901
2	mjames	5902	/**
		5903	* @brief Q15 Circular write function.
		5904	*/
		5905	static __INLINE void arm_circularWrite_q15(
		5906	q15_t * circBuffer,
		5907	int32_t L,
		5908	uint16_t * writeOffset,
		5909	int32_t bufferInc,
		5910	const q15_t * src,
		5911	int32_t srcInc,
		5912	uint32_t blockSize)
		5913	{
		5914	uint32_t i = 0u;
		5915	int32_t wOffset;
		5916
		5917	/* Copy the value of Index pointer that points
		5918	* to the current location where the input samples to be copied */
		5919	wOffset = *writeOffset;
		5920
		5921	/* Loop over the blockSize */
		5922	i = blockSize;
		5923
		5924	while(i > 0u)
		5925	{
		5926	/* copy the input sample to the circular buffer */
		5927	circBuffer[wOffset] = *src;
		5928
		5929	/* Update the input pointer */
		5930	src += srcInc;
		5931
		5932	/* Circularly update wOffset. Watch out for positive and negative value */
		5933	wOffset += bufferInc;
		5934	if(wOffset >= L)
		5935	wOffset -= L;
		5936
		5937	/* Decrement the loop counter */
		5938	i--;
		5939	}
		5940
		5941	/* Update the index pointer */
5	mjames	5942	*writeOffset = (uint16_t)wOffset;
2	mjames	5943	}
		5944
		5945
		5946	/**
		5947	* @brief Q15 Circular Read function.
		5948	*/
		5949	static __INLINE void arm_circularRead_q15(
		5950	q15_t * circBuffer,
		5951	int32_t L,
		5952	int32_t * readOffset,
		5953	int32_t bufferInc,
		5954	q15_t * dst,
		5955	q15_t * dst_base,
		5956	int32_t dst_length,
		5957	int32_t dstInc,
		5958	uint32_t blockSize)
		5959	{
		5960	uint32_t i = 0;
		5961	int32_t rOffset, dst_end;
		5962
		5963	/* Copy the value of Index pointer that points
		5964	* to the current location from where the input samples to be read */
		5965	rOffset = *readOffset;
		5966
		5967	dst_end = (int32_t) (dst_base + dst_length);
		5968
		5969	/* Loop over the blockSize */
		5970	i = blockSize;
		5971
		5972	while(i > 0u)
		5973	{
		5974	/* copy the sample from the circular buffer to the destination buffer */
		5975	*dst = circBuffer[rOffset];
		5976
		5977	/* Update the input pointer */
		5978	dst += dstInc;
		5979
		5980	if(dst == (q15_t *) dst_end)
		5981	{
		5982	dst = dst_base;
		5983	}
		5984
		5985	/* Circularly update wOffset. Watch out for positive and negative value */
		5986	rOffset += bufferInc;
		5987
		5988	if(rOffset >= L)
		5989	{
		5990	rOffset -= L;
		5991	}
		5992
		5993	/* Decrement the loop counter */
		5994	i--;
		5995	}
		5996
		5997	/* Update the index pointer */
		5998	*readOffset = rOffset;
		5999	}
		6000
		6001
		6002	/**
		6003	* @brief Q7 Circular write function.
		6004	*/
		6005	static __INLINE void arm_circularWrite_q7(
		6006	q7_t * circBuffer,
		6007	int32_t L,
		6008	uint16_t * writeOffset,
		6009	int32_t bufferInc,
		6010	const q7_t * src,
		6011	int32_t srcInc,
		6012	uint32_t blockSize)
		6013	{
		6014	uint32_t i = 0u;
		6015	int32_t wOffset;
		6016
		6017	/* Copy the value of Index pointer that points
		6018	* to the current location where the input samples to be copied */
		6019	wOffset = *writeOffset;
		6020
		6021	/* Loop over the blockSize */
		6022	i = blockSize;
		6023
		6024	while(i > 0u)
		6025	{
		6026	/* copy the input sample to the circular buffer */
		6027	circBuffer[wOffset] = *src;
		6028
		6029	/* Update the input pointer */
		6030	src += srcInc;
		6031
		6032	/* Circularly update wOffset. Watch out for positive and negative value */
		6033	wOffset += bufferInc;
		6034	if(wOffset >= L)
		6035	wOffset -= L;
		6036
		6037	/* Decrement the loop counter */
		6038	i--;
		6039	}
		6040
		6041	/* Update the index pointer */
5	mjames	6042	*writeOffset = (uint16_t)wOffset;
2	mjames	6043	}
		6044
		6045
		6046	/**
		6047	* @brief Q7 Circular Read function.
		6048	*/
		6049	static __INLINE void arm_circularRead_q7(
		6050	q7_t * circBuffer,
		6051	int32_t L,
		6052	int32_t * readOffset,
		6053	int32_t bufferInc,
		6054	q7_t * dst,
		6055	q7_t * dst_base,
		6056	int32_t dst_length,
		6057	int32_t dstInc,
		6058	uint32_t blockSize)
		6059	{
		6060	uint32_t i = 0;
		6061	int32_t rOffset, dst_end;
		6062
		6063	/* Copy the value of Index pointer that points
		6064	* to the current location from where the input samples to be read */
		6065	rOffset = *readOffset;
		6066
		6067	dst_end = (int32_t) (dst_base + dst_length);
		6068
		6069	/* Loop over the blockSize */
		6070	i = blockSize;
		6071
		6072	while(i > 0u)
		6073	{
		6074	/* copy the sample from the circular buffer to the destination buffer */
		6075	*dst = circBuffer[rOffset];
		6076
		6077	/* Update the input pointer */
		6078	dst += dstInc;
		6079
		6080	if(dst == (q7_t *) dst_end)
		6081	{
		6082	dst = dst_base;
		6083	}
		6084
		6085	/* Circularly update rOffset. Watch out for positive and negative value */
		6086	rOffset += bufferInc;
		6087
		6088	if(rOffset >= L)
		6089	{
		6090	rOffset -= L;
		6091	}
		6092
		6093	/* Decrement the loop counter */
		6094	i--;
		6095	}
		6096
		6097	/* Update the index pointer */
		6098	*readOffset = rOffset;
		6099	}
		6100
		6101
		6102	/**
		6103	* @brief Sum of the squares of the elements of a Q31 vector.
5	mjames	6104	* @param[in] pSrc is input pointer
		6105	* @param[in] blockSize is the number of samples to process
		6106	* @param[out] pResult is output value.
2	mjames	6107	*/
		6108	void arm_power_q31(
		6109	q31_t * pSrc,
		6110	uint32_t blockSize,
		6111	q63_t * pResult);
		6112
5	mjames	6113
2	mjames	6114	/**
		6115	* @brief Sum of the squares of the elements of a floating-point vector.
5	mjames	6116	* @param[in] pSrc is input pointer
		6117	* @param[in] blockSize is the number of samples to process
		6118	* @param[out] pResult is output value.
2	mjames	6119	*/
		6120	void arm_power_f32(
		6121	float32_t * pSrc,
		6122	uint32_t blockSize,
		6123	float32_t * pResult);
		6124
5	mjames	6125
2	mjames	6126	/**
		6127	* @brief Sum of the squares of the elements of a Q15 vector.
5	mjames	6128	* @param[in] pSrc is input pointer
		6129	* @param[in] blockSize is the number of samples to process
		6130	* @param[out] pResult is output value.
2	mjames	6131	*/
		6132	void arm_power_q15(
		6133	q15_t * pSrc,
		6134	uint32_t blockSize,
		6135	q63_t * pResult);
		6136
5	mjames	6137
2	mjames	6138	/**
		6139	* @brief Sum of the squares of the elements of a Q7 vector.
5	mjames	6140	* @param[in] pSrc is input pointer
		6141	* @param[in] blockSize is the number of samples to process
		6142	* @param[out] pResult is output value.
2	mjames	6143	*/
		6144	void arm_power_q7(
		6145	q7_t * pSrc,
		6146	uint32_t blockSize,
		6147	q31_t * pResult);
		6148
5	mjames	6149
2	mjames	6150	/**
		6151	* @brief Mean value of a Q7 vector.
5	mjames	6152	* @param[in] pSrc is input pointer
		6153	* @param[in] blockSize is the number of samples to process
		6154	* @param[out] pResult is output value.
2	mjames	6155	*/
		6156	void arm_mean_q7(
		6157	q7_t * pSrc,
		6158	uint32_t blockSize,
		6159	q7_t * pResult);
		6160
5	mjames	6161
2	mjames	6162	/**
		6163	* @brief Mean value of a Q15 vector.
5	mjames	6164	* @param[in] pSrc is input pointer
		6165	* @param[in] blockSize is the number of samples to process
		6166	* @param[out] pResult is output value.
2	mjames	6167	*/
		6168	void arm_mean_q15(
		6169	q15_t * pSrc,
		6170	uint32_t blockSize,
		6171	q15_t * pResult);
		6172
5	mjames	6173
2	mjames	6174	/**
		6175	* @brief Mean value of a Q31 vector.
5	mjames	6176	* @param[in] pSrc is input pointer
		6177	* @param[in] blockSize is the number of samples to process
		6178	* @param[out] pResult is output value.
2	mjames	6179	*/
		6180	void arm_mean_q31(
		6181	q31_t * pSrc,
		6182	uint32_t blockSize,
		6183	q31_t * pResult);
		6184
5	mjames	6185
2	mjames	6186	/**
		6187	* @brief Mean value of a floating-point vector.
5	mjames	6188	* @param[in] pSrc is input pointer
		6189	* @param[in] blockSize is the number of samples to process
		6190	* @param[out] pResult is output value.
2	mjames	6191	*/
		6192	void arm_mean_f32(
		6193	float32_t * pSrc,
		6194	uint32_t blockSize,
		6195	float32_t * pResult);
		6196
5	mjames	6197
2	mjames	6198	/**
		6199	* @brief Variance of the elements of a floating-point vector.
5	mjames	6200	* @param[in] pSrc is input pointer
		6201	* @param[in] blockSize is the number of samples to process
		6202	* @param[out] pResult is output value.
2	mjames	6203	*/
		6204	void arm_var_f32(
		6205	float32_t * pSrc,
		6206	uint32_t blockSize,
		6207	float32_t * pResult);
		6208
5	mjames	6209
2	mjames	6210	/**
		6211	* @brief Variance of the elements of a Q31 vector.
5	mjames	6212	* @param[in] pSrc is input pointer
		6213	* @param[in] blockSize is the number of samples to process
		6214	* @param[out] pResult is output value.
2	mjames	6215	*/
		6216	void arm_var_q31(
		6217	q31_t * pSrc,
		6218	uint32_t blockSize,
		6219	q31_t * pResult);
		6220
5	mjames	6221
2	mjames	6222	/**
		6223	* @brief Variance of the elements of a Q15 vector.
5	mjames	6224	* @param[in] pSrc is input pointer
		6225	* @param[in] blockSize is the number of samples to process
		6226	* @param[out] pResult is output value.
2	mjames	6227	*/
		6228	void arm_var_q15(
		6229	q15_t * pSrc,
		6230	uint32_t blockSize,
		6231	q15_t * pResult);
		6232
5	mjames	6233
2	mjames	6234	/**
		6235	* @brief Root Mean Square of the elements of a floating-point vector.
5	mjames	6236	* @param[in] pSrc is input pointer
		6237	* @param[in] blockSize is the number of samples to process
		6238	* @param[out] pResult is output value.
2	mjames	6239	*/
		6240	void arm_rms_f32(
		6241	float32_t * pSrc,
		6242	uint32_t blockSize,
		6243	float32_t * pResult);
		6244
5	mjames	6245
2	mjames	6246	/**
		6247	* @brief Root Mean Square of the elements of a Q31 vector.
5	mjames	6248	* @param[in] pSrc is input pointer
		6249	* @param[in] blockSize is the number of samples to process
		6250	* @param[out] pResult is output value.
2	mjames	6251	*/
		6252	void arm_rms_q31(
		6253	q31_t * pSrc,
		6254	uint32_t blockSize,
		6255	q31_t * pResult);
		6256
5	mjames	6257
2	mjames	6258	/**
		6259	* @brief Root Mean Square of the elements of a Q15 vector.
5	mjames	6260	* @param[in] pSrc is input pointer
		6261	* @param[in] blockSize is the number of samples to process
		6262	* @param[out] pResult is output value.
2	mjames	6263	*/
		6264	void arm_rms_q15(
		6265	q15_t * pSrc,
		6266	uint32_t blockSize,
		6267	q15_t * pResult);
		6268
5	mjames	6269
2	mjames	6270	/**
		6271	* @brief Standard deviation of the elements of a floating-point vector.
5	mjames	6272	* @param[in] pSrc is input pointer
		6273	* @param[in] blockSize is the number of samples to process
		6274	* @param[out] pResult is output value.
2	mjames	6275	*/
		6276	void arm_std_f32(
		6277	float32_t * pSrc,
		6278	uint32_t blockSize,
		6279	float32_t * pResult);
		6280
5	mjames	6281
2	mjames	6282	/**
		6283	* @brief Standard deviation of the elements of a Q31 vector.
5	mjames	6284	* @param[in] pSrc is input pointer
		6285	* @param[in] blockSize is the number of samples to process
		6286	* @param[out] pResult is output value.
2	mjames	6287	*/
		6288	void arm_std_q31(
		6289	q31_t * pSrc,
		6290	uint32_t blockSize,
		6291	q31_t * pResult);
		6292
5	mjames	6293
2	mjames	6294	/**
		6295	* @brief Standard deviation of the elements of a Q15 vector.
5	mjames	6296	* @param[in] pSrc is input pointer
		6297	* @param[in] blockSize is the number of samples to process
		6298	* @param[out] pResult is output value.
2	mjames	6299	*/
		6300	void arm_std_q15(
		6301	q15_t * pSrc,
		6302	uint32_t blockSize,
		6303	q15_t * pResult);
		6304
5	mjames	6305
2	mjames	6306	/**
		6307	* @brief Floating-point complex magnitude
5	mjames	6308	* @param[in] pSrc points to the complex input vector
		6309	* @param[out] pDst points to the real output vector
		6310	* @param[in] numSamples number of complex samples in the input vector
2	mjames	6311	*/
		6312	void arm_cmplx_mag_f32(
		6313	float32_t * pSrc,
		6314	float32_t * pDst,
		6315	uint32_t numSamples);
		6316
5	mjames	6317
2	mjames	6318	/**
		6319	* @brief Q31 complex magnitude
5	mjames	6320	* @param[in] pSrc points to the complex input vector
		6321	* @param[out] pDst points to the real output vector
		6322	* @param[in] numSamples number of complex samples in the input vector
2	mjames	6323	*/
		6324	void arm_cmplx_mag_q31(
		6325	q31_t * pSrc,
		6326	q31_t * pDst,
		6327	uint32_t numSamples);
		6328
5	mjames	6329
2	mjames	6330	/**
		6331	* @brief Q15 complex magnitude
5	mjames	6332	* @param[in] pSrc points to the complex input vector
		6333	* @param[out] pDst points to the real output vector
		6334	* @param[in] numSamples number of complex samples in the input vector
2	mjames	6335	*/
		6336	void arm_cmplx_mag_q15(
		6337	q15_t * pSrc,
		6338	q15_t * pDst,
		6339	uint32_t numSamples);
		6340
5	mjames	6341
2	mjames	6342	/**
		6343	* @brief Q15 complex dot product
5	mjames	6344	* @param[in] pSrcA points to the first input vector
		6345	* @param[in] pSrcB points to the second input vector
		6346	* @param[in] numSamples number of complex samples in each vector
		6347	* @param[out] realResult real part of the result returned here
		6348	* @param[out] imagResult imaginary part of the result returned here
2	mjames	6349	*/
		6350	void arm_cmplx_dot_prod_q15(
		6351	q15_t * pSrcA,
		6352	q15_t * pSrcB,
		6353	uint32_t numSamples,
		6354	q31_t * realResult,
		6355	q31_t * imagResult);
		6356
5	mjames	6357
2	mjames	6358	/**
		6359	* @brief Q31 complex dot product
5	mjames	6360	* @param[in] pSrcA points to the first input vector
		6361	* @param[in] pSrcB points to the second input vector
		6362	* @param[in] numSamples number of complex samples in each vector
		6363	* @param[out] realResult real part of the result returned here
		6364	* @param[out] imagResult imaginary part of the result returned here
2	mjames	6365	*/
		6366	void arm_cmplx_dot_prod_q31(
		6367	q31_t * pSrcA,
		6368	q31_t * pSrcB,
		6369	uint32_t numSamples,
		6370	q63_t * realResult,
		6371	q63_t * imagResult);
		6372
5	mjames	6373
2	mjames	6374	/**
		6375	* @brief Floating-point complex dot product
5	mjames	6376	* @param[in] pSrcA points to the first input vector
		6377	* @param[in] pSrcB points to the second input vector
		6378	* @param[in] numSamples number of complex samples in each vector
		6379	* @param[out] realResult real part of the result returned here
		6380	* @param[out] imagResult imaginary part of the result returned here
2	mjames	6381	*/
		6382	void arm_cmplx_dot_prod_f32(
		6383	float32_t * pSrcA,
		6384	float32_t * pSrcB,
		6385	uint32_t numSamples,
		6386	float32_t * realResult,
		6387	float32_t * imagResult);
		6388
5	mjames	6389
2	mjames	6390	/**
		6391	* @brief Q15 complex-by-real multiplication
5	mjames	6392	* @param[in] pSrcCmplx points to the complex input vector
		6393	* @param[in] pSrcReal points to the real input vector
		6394	* @param[out] pCmplxDst points to the complex output vector
		6395	* @param[in] numSamples number of samples in each vector
2	mjames	6396	*/
		6397	void arm_cmplx_mult_real_q15(
		6398	q15_t * pSrcCmplx,
		6399	q15_t * pSrcReal,
		6400	q15_t * pCmplxDst,
		6401	uint32_t numSamples);
		6402
5	mjames	6403
2	mjames	6404	/**
		6405	* @brief Q31 complex-by-real multiplication
5	mjames	6406	* @param[in] pSrcCmplx points to the complex input vector
		6407	* @param[in] pSrcReal points to the real input vector
		6408	* @param[out] pCmplxDst points to the complex output vector
		6409	* @param[in] numSamples number of samples in each vector
2	mjames	6410	*/
		6411	void arm_cmplx_mult_real_q31(
		6412	q31_t * pSrcCmplx,
		6413	q31_t * pSrcReal,
		6414	q31_t * pCmplxDst,
		6415	uint32_t numSamples);
		6416
5	mjames	6417
2	mjames	6418	/**
		6419	* @brief Floating-point complex-by-real multiplication
5	mjames	6420	* @param[in] pSrcCmplx points to the complex input vector
		6421	* @param[in] pSrcReal points to the real input vector
		6422	* @param[out] pCmplxDst points to the complex output vector
		6423	* @param[in] numSamples number of samples in each vector
2	mjames	6424	*/
		6425	void arm_cmplx_mult_real_f32(
		6426	float32_t * pSrcCmplx,
		6427	float32_t * pSrcReal,
		6428	float32_t * pCmplxDst,
		6429	uint32_t numSamples);
		6430
5	mjames	6431
2	mjames	6432	/**
		6433	* @brief Minimum value of a Q7 vector.
5	mjames	6434	* @param[in] pSrc is input pointer
		6435	* @param[in] blockSize is the number of samples to process
		6436	* @param[out] result is output pointer
		6437	* @param[in] index is the array index of the minimum value in the input buffer.
2	mjames	6438	*/
		6439	void arm_min_q7(
		6440	q7_t * pSrc,
		6441	uint32_t blockSize,
		6442	q7_t * result,
		6443	uint32_t * index);
		6444
5	mjames	6445
2	mjames	6446	/**
		6447	* @brief Minimum value of a Q15 vector.
5	mjames	6448	* @param[in] pSrc is input pointer
		6449	* @param[in] blockSize is the number of samples to process
		6450	* @param[out] pResult is output pointer
		6451	* @param[in] pIndex is the array index of the minimum value in the input buffer.
2	mjames	6452	*/
		6453	void arm_min_q15(
		6454	q15_t * pSrc,
		6455	uint32_t blockSize,
		6456	q15_t * pResult,
		6457	uint32_t * pIndex);
		6458
5	mjames	6459
2	mjames	6460	/**
		6461	* @brief Minimum value of a Q31 vector.
5	mjames	6462	* @param[in] pSrc is input pointer
		6463	* @param[in] blockSize is the number of samples to process
		6464	* @param[out] pResult is output pointer
		6465	* @param[out] pIndex is the array index of the minimum value in the input buffer.
2	mjames	6466	*/
		6467	void arm_min_q31(
		6468	q31_t * pSrc,
		6469	uint32_t blockSize,
		6470	q31_t * pResult,
		6471	uint32_t * pIndex);
		6472
5	mjames	6473
2	mjames	6474	/**
		6475	* @brief Minimum value of a floating-point vector.
5	mjames	6476	* @param[in] pSrc is input pointer
		6477	* @param[in] blockSize is the number of samples to process
		6478	* @param[out] pResult is output pointer
		6479	* @param[out] pIndex is the array index of the minimum value in the input buffer.
2	mjames	6480	*/
		6481	void arm_min_f32(
		6482	float32_t * pSrc,
		6483	uint32_t blockSize,
		6484	float32_t * pResult,
		6485	uint32_t * pIndex);
		6486
5	mjames	6487
2	mjames	6488	/**
		6489	* @brief Maximum value of a Q7 vector.
5	mjames	6490	* @param[in] pSrc points to the input buffer
		6491	* @param[in] blockSize length of the input vector
		6492	* @param[out] pResult maximum value returned here
		6493	* @param[out] pIndex index of maximum value returned here
2	mjames	6494	*/
		6495	void arm_max_q7(
		6496	q7_t * pSrc,
		6497	uint32_t blockSize,
		6498	q7_t * pResult,
		6499	uint32_t * pIndex);
		6500
5	mjames	6501
2	mjames	6502	/**
		6503	* @brief Maximum value of a Q15 vector.
5	mjames	6504	* @param[in] pSrc points to the input buffer
		6505	* @param[in] blockSize length of the input vector
		6506	* @param[out] pResult maximum value returned here
		6507	* @param[out] pIndex index of maximum value returned here
2	mjames	6508	*/
		6509	void arm_max_q15(
		6510	q15_t * pSrc,
		6511	uint32_t blockSize,
		6512	q15_t * pResult,
		6513	uint32_t * pIndex);
		6514
5	mjames	6515
2	mjames	6516	/**
		6517	* @brief Maximum value of a Q31 vector.
5	mjames	6518	* @param[in] pSrc points to the input buffer
		6519	* @param[in] blockSize length of the input vector
		6520	* @param[out] pResult maximum value returned here
		6521	* @param[out] pIndex index of maximum value returned here
2	mjames	6522	*/
		6523	void arm_max_q31(
		6524	q31_t * pSrc,
		6525	uint32_t blockSize,
		6526	q31_t * pResult,
		6527	uint32_t * pIndex);
		6528
5	mjames	6529
2	mjames	6530	/**
		6531	* @brief Maximum value of a floating-point vector.
5	mjames	6532	* @param[in] pSrc points to the input buffer
		6533	* @param[in] blockSize length of the input vector
		6534	* @param[out] pResult maximum value returned here
		6535	* @param[out] pIndex index of maximum value returned here
2	mjames	6536	*/
		6537	void arm_max_f32(
		6538	float32_t * pSrc,
		6539	uint32_t blockSize,
		6540	float32_t * pResult,
		6541	uint32_t * pIndex);
		6542
5	mjames	6543
2	mjames	6544	/**
		6545	* @brief Q15 complex-by-complex multiplication
5	mjames	6546	* @param[in] pSrcA points to the first input vector
		6547	* @param[in] pSrcB points to the second input vector
		6548	* @param[out] pDst points to the output vector
		6549	* @param[in] numSamples number of complex samples in each vector
2	mjames	6550	*/
		6551	void arm_cmplx_mult_cmplx_q15(
		6552	q15_t * pSrcA,
		6553	q15_t * pSrcB,
		6554	q15_t * pDst,
		6555	uint32_t numSamples);
		6556
5	mjames	6557
2	mjames	6558	/**
		6559	* @brief Q31 complex-by-complex multiplication
5	mjames	6560	* @param[in] pSrcA points to the first input vector
		6561	* @param[in] pSrcB points to the second input vector
		6562	* @param[out] pDst points to the output vector
		6563	* @param[in] numSamples number of complex samples in each vector
2	mjames	6564	*/
		6565	void arm_cmplx_mult_cmplx_q31(
		6566	q31_t * pSrcA,
		6567	q31_t * pSrcB,
		6568	q31_t * pDst,
		6569	uint32_t numSamples);
		6570
5	mjames	6571
2	mjames	6572	/**
		6573	* @brief Floating-point complex-by-complex multiplication
5	mjames	6574	* @param[in] pSrcA points to the first input vector
		6575	* @param[in] pSrcB points to the second input vector
		6576	* @param[out] pDst points to the output vector
		6577	* @param[in] numSamples number of complex samples in each vector
2	mjames	6578	*/
		6579	void arm_cmplx_mult_cmplx_f32(
		6580	float32_t * pSrcA,
		6581	float32_t * pSrcB,
		6582	float32_t * pDst,
		6583	uint32_t numSamples);
		6584
5	mjames	6585
2	mjames	6586	/**
		6587	* @brief Converts the elements of the floating-point vector to Q31 vector.
5	mjames	6588	* @param[in] pSrc points to the floating-point input vector
		6589	* @param[out] pDst points to the Q31 output vector
		6590	* @param[in] blockSize length of the input vector
2	mjames	6591	*/
		6592	void arm_float_to_q31(
		6593	float32_t * pSrc,
		6594	q31_t * pDst,
		6595	uint32_t blockSize);
		6596
5	mjames	6597
2	mjames	6598	/**
		6599	* @brief Converts the elements of the floating-point vector to Q15 vector.
5	mjames	6600	* @param[in] pSrc points to the floating-point input vector
		6601	* @param[out] pDst points to the Q15 output vector
		6602	* @param[in] blockSize length of the input vector
2	mjames	6603	*/
		6604	void arm_float_to_q15(
		6605	float32_t * pSrc,
		6606	q15_t * pDst,
		6607	uint32_t blockSize);
		6608
5	mjames	6609
2	mjames	6610	/**
		6611	* @brief Converts the elements of the floating-point vector to Q7 vector.
5	mjames	6612	* @param[in] pSrc points to the floating-point input vector
		6613	* @param[out] pDst points to the Q7 output vector
		6614	* @param[in] blockSize length of the input vector
2	mjames	6615	*/
		6616	void arm_float_to_q7(
		6617	float32_t * pSrc,
		6618	q7_t * pDst,
		6619	uint32_t blockSize);
		6620
		6621
		6622	/**
		6623	* @brief Converts the elements of the Q31 vector to Q15 vector.
5	mjames	6624	* @param[in] pSrc is input pointer
		6625	* @param[out] pDst is output pointer
		6626	* @param[in] blockSize is the number of samples to process
2	mjames	6627	*/
		6628	void arm_q31_to_q15(
		6629	q31_t * pSrc,
		6630	q15_t * pDst,
		6631	uint32_t blockSize);
		6632
5	mjames	6633
2	mjames	6634	/**
		6635	* @brief Converts the elements of the Q31 vector to Q7 vector.
5	mjames	6636	* @param[in] pSrc is input pointer
		6637	* @param[out] pDst is output pointer
		6638	* @param[in] blockSize is the number of samples to process
2	mjames	6639	*/
		6640	void arm_q31_to_q7(
		6641	q31_t * pSrc,
		6642	q7_t * pDst,
		6643	uint32_t blockSize);
		6644
5	mjames	6645
2	mjames	6646	/**
		6647	* @brief Converts the elements of the Q15 vector to floating-point vector.
5	mjames	6648	* @param[in] pSrc is input pointer
		6649	* @param[out] pDst is output pointer
		6650	* @param[in] blockSize is the number of samples to process
2	mjames	6651	*/
		6652	void arm_q15_to_float(
		6653	q15_t * pSrc,
		6654	float32_t * pDst,
		6655	uint32_t blockSize);
		6656
		6657
		6658	/**
		6659	* @brief Converts the elements of the Q15 vector to Q31 vector.
5	mjames	6660	* @param[in] pSrc is input pointer
		6661	* @param[out] pDst is output pointer
		6662	* @param[in] blockSize is the number of samples to process
2	mjames	6663	*/
		6664	void arm_q15_to_q31(
		6665	q15_t * pSrc,
		6666	q31_t * pDst,
		6667	uint32_t blockSize);
		6668
		6669
		6670	/**
		6671	* @brief Converts the elements of the Q15 vector to Q7 vector.
5	mjames	6672	* @param[in] pSrc is input pointer
		6673	* @param[out] pDst is output pointer
		6674	* @param[in] blockSize is the number of samples to process
2	mjames	6675	*/
		6676	void arm_q15_to_q7(
		6677	q15_t * pSrc,
		6678	q7_t * pDst,
		6679	uint32_t blockSize);
		6680
		6681
		6682	/**
		6683	* @ingroup groupInterpolation
		6684	*/
		6685
		6686	/**
		6687	* @defgroup BilinearInterpolate Bilinear Interpolation
		6688	*
		6689	* Bilinear interpolation is an extension of linear interpolation applied to a two dimensional grid.
		6690	* The underlying function <code>f(x, y)</code> is sampled on a regular grid and the interpolation process
		6691	* determines values between the grid points.
		6692	* Bilinear interpolation is equivalent to two step linear interpolation, first in the x-dimension and then in the y-dimension.
		6693	* Bilinear interpolation is often used in image processing to rescale images.
		6694	* The CMSIS DSP library provides bilinear interpolation functions for Q7, Q15, Q31, and floating-point data types.
		6695	*
		6696	* <b>Algorithm</b>
		6697	* \par
		6698	* The instance structure used by the bilinear interpolation functions describes a two dimensional data table.
		6699	* For floating-point, the instance structure is defined as:
		6700	* <pre>
		6701	* typedef struct
		6702	* {
		6703	* uint16_t numRows;
		6704	* uint16_t numCols;
		6705	* float32_t *pData;
		6706	* } arm_bilinear_interp_instance_f32;
		6707	* </pre>
		6708	*
		6709	* \par
		6710	* where <code>numRows</code> specifies the number of rows in the table;
		6711	* <code>numCols</code> specifies the number of columns in the table;
		6712	* and <code>pData</code> points to an array of size <code>numRows*numCols</code> values.
		6713	* The data table <code>pTable</code> is organized in row order and the supplied data values fall on integer indexes.
		6714	* That is, table element (x,y) is located at <code>pTable[x + y*numCols]</code> where x and y are integers.
		6715	*
		6716	* \par
		6717	* Let <code>(x, y)</code> specify the desired interpolation point. Then define:
		6718	* <pre>
		6719	* XF = floor(x)
		6720	* YF = floor(y)
		6721	* </pre>
		6722	* \par
		6723	* The interpolated output point is computed as:
		6724	* <pre>
		6725	* f(x, y) = f(XF, YF) * (1-(x-XF)) * (1-(y-YF))
		6726	* + f(XF+1, YF) * (x-XF)*(1-(y-YF))
		6727	* + f(XF, YF+1) * (1-(x-XF))*(y-YF)
		6728	* + f(XF+1, YF+1) * (x-XF)*(y-YF)
		6729	* </pre>
		6730	* Note that the coordinates (x, y) contain integer and fractional components.
		6731	* The integer components specify which portion of the table to use while the
		6732	* fractional components control the interpolation processor.
		6733	*
		6734	* \par
		6735	* if (x,y) are outside of the table boundary, Bilinear interpolation returns zero output.
		6736	*/
		6737
		6738	/**
		6739	* @addtogroup BilinearInterpolate
		6740	* @{
		6741	*/
		6742
5	mjames	6743
2	mjames	6744	/**
		6745	*
		6746	* @brief Floating-point bilinear interpolation.
5	mjames	6747	* @param[in,out] S points to an instance of the interpolation structure.
		6748	* @param[in] X interpolation coordinate.
		6749	* @param[in] Y interpolation coordinate.
2	mjames	6750	* @return out interpolated value.
		6751	*/
		6752	static __INLINE float32_t arm_bilinear_interp_f32(
		6753	const arm_bilinear_interp_instance_f32 * S,
		6754	float32_t X,
		6755	float32_t Y)
		6756	{
		6757	float32_t out;
		6758	float32_t f00, f01, f10, f11;
		6759	float32_t *pData = S->pData;
		6760	int32_t xIndex, yIndex, index;
		6761	float32_t xdiff, ydiff;
		6762	float32_t b1, b2, b3, b4;
		6763
		6764	xIndex = (int32_t) X;
		6765	yIndex = (int32_t) Y;
		6766
		6767	/* Care taken for table outside boundary */
		6768	/* Returns zero output when values are outside table boundary */
5	mjames	6769	if(xIndex < 0 \|\| xIndex > (S->numRows - 1) \|\| yIndex < 0 \|\| yIndex > (S->numCols - 1))
2	mjames	6770	{
		6771	return (0);
		6772	}
		6773
		6774	/* Calculation of index for two nearest points in X-direction */
		6775	index = (xIndex - 1) + (yIndex - 1) * S->numCols;
		6776
		6777
		6778	/* Read two nearest points in X-direction */
		6779	f00 = pData[index];
		6780	f01 = pData[index + 1];
		6781
		6782	/* Calculation of index for two nearest points in Y-direction */
		6783	index = (xIndex - 1) + (yIndex) * S->numCols;
		6784
		6785
		6786	/* Read two nearest points in Y-direction */
		6787	f10 = pData[index];
		6788	f11 = pData[index + 1];
		6789
		6790	/* Calculation of intermediate values */
		6791	b1 = f00;
		6792	b2 = f01 - f00;
		6793	b3 = f10 - f00;
		6794	b4 = f00 - f01 - f10 + f11;
		6795
		6796	/* Calculation of fractional part in X */
		6797	xdiff = X - xIndex;
		6798
		6799	/* Calculation of fractional part in Y */
		6800	ydiff = Y - yIndex;
		6801
		6802	/* Calculation of bi-linear interpolated output */
		6803	out = b1 + b2 * xdiff + b3 * ydiff + b4 * xdiff * ydiff;
		6804
		6805	/* return to application */
		6806	return (out);
		6807	}
		6808
5	mjames	6809
2	mjames	6810	/**
		6811	*
		6812	* @brief Q31 bilinear interpolation.
5	mjames	6813	* @param[in,out] S points to an instance of the interpolation structure.
		6814	* @param[in] X interpolation coordinate in 12.20 format.
		6815	* @param[in] Y interpolation coordinate in 12.20 format.
2	mjames	6816	* @return out interpolated value.
		6817	*/
		6818	static __INLINE q31_t arm_bilinear_interp_q31(
		6819	arm_bilinear_interp_instance_q31 * S,
		6820	q31_t X,
		6821	q31_t Y)
		6822	{
		6823	q31_t out; /* Temporary output */
		6824	q31_t acc = 0; /* output */
		6825	q31_t xfract, yfract; /* X, Y fractional parts */
		6826	q31_t x1, x2, y1, y2; /* Nearest output values */
		6827	int32_t rI, cI; /* Row and column indices */
		6828	q31_t pYData = S->pData; / pointer to output table values */
		6829	uint32_t nCols = S->numCols; /* num of rows */
		6830
		6831	/* Input is in 12.20 format */
		6832	/* 12 bits for the table index */
		6833	/* Index value calculation */
5	mjames	6834	rI = ((X & (q31_t)0xFFF00000) >> 20);
2	mjames	6835
		6836	/* Input is in 12.20 format */
		6837	/* 12 bits for the table index */
		6838	/* Index value calculation */
5	mjames	6839	cI = ((Y & (q31_t)0xFFF00000) >> 20);
2	mjames	6840
		6841	/* Care taken for table outside boundary */
		6842	/* Returns zero output when values are outside table boundary */
		6843	if(rI < 0 \|\| rI > (S->numRows - 1) \|\| cI < 0 \|\| cI > (S->numCols - 1))
		6844	{
		6845	return (0);
		6846	}
		6847
		6848	/* 20 bits for the fractional part */
		6849	/* shift left xfract by 11 to keep 1.31 format */
		6850	xfract = (X & 0x000FFFFF) << 11u;
		6851
		6852	/* Read two nearest output values from the index */
5	mjames	6853	x1 = pYData[(rI) + (int32_t)nCols * (cI) ];
		6854	x2 = pYData[(rI) + (int32_t)nCols * (cI) + 1];
2	mjames	6855
		6856	/* 20 bits for the fractional part */
		6857	/* shift left yfract by 11 to keep 1.31 format */
		6858	yfract = (Y & 0x000FFFFF) << 11u;
		6859
		6860	/* Read two nearest output values from the index */
5	mjames	6861	y1 = pYData[(rI) + (int32_t)nCols * (cI + 1) ];
		6862	y2 = pYData[(rI) + (int32_t)nCols * (cI + 1) + 1];
2	mjames	6863
		6864	/* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 3.29(q29) format */
5	mjames	6865	out = ((q31_t) (((q63_t) x1 * (0x7FFFFFFF - xfract)) >> 32));
2	mjames	6866	acc = ((q31_t) (((q63_t) out * (0x7FFFFFFF - yfract)) >> 32));
		6867
		6868	/* x2 * (xfract) * (1-yfract) in 3.29(q29) and adding to acc */
		6869	out = ((q31_t) ((q63_t) x2 * (0x7FFFFFFF - yfract) >> 32));
		6870	acc += ((q31_t) ((q63_t) out * (xfract) >> 32));
		6871
		6872	/* y1 * (1 - xfract) * (yfract) in 3.29(q29) and adding to acc */
		6873	out = ((q31_t) ((q63_t) y1 * (0x7FFFFFFF - xfract) >> 32));
		6874	acc += ((q31_t) ((q63_t) out * (yfract) >> 32));
		6875
		6876	/* y2 * (xfract) * (yfract) in 3.29(q29) and adding to acc */
		6877	out = ((q31_t) ((q63_t) y2 * (xfract) >> 32));
		6878	acc += ((q31_t) ((q63_t) out * (yfract) >> 32));
		6879
		6880	/* Convert acc to 1.31(q31) format */
5	mjames	6881	return ((q31_t)(acc << 2));
2	mjames	6882	}
		6883
5	mjames	6884
2	mjames	6885	/**
		6886	* @brief Q15 bilinear interpolation.
5	mjames	6887	* @param[in,out] S points to an instance of the interpolation structure.
		6888	* @param[in] X interpolation coordinate in 12.20 format.
		6889	* @param[in] Y interpolation coordinate in 12.20 format.
2	mjames	6890	* @return out interpolated value.
		6891	*/
		6892	static __INLINE q15_t arm_bilinear_interp_q15(
		6893	arm_bilinear_interp_instance_q15 * S,
		6894	q31_t X,
		6895	q31_t Y)
		6896	{
		6897	q63_t acc = 0; /* output */
		6898	q31_t out; /* Temporary output */
		6899	q15_t x1, x2, y1, y2; /* Nearest output values */
		6900	q31_t xfract, yfract; /* X, Y fractional parts */
		6901	int32_t rI, cI; /* Row and column indices */
		6902	q15_t pYData = S->pData; / pointer to output table values */
		6903	uint32_t nCols = S->numCols; /* num of rows */
		6904
		6905	/* Input is in 12.20 format */
		6906	/* 12 bits for the table index */
		6907	/* Index value calculation */
5	mjames	6908	rI = ((X & (q31_t)0xFFF00000) >> 20);
2	mjames	6909
		6910	/* Input is in 12.20 format */
		6911	/* 12 bits for the table index */
		6912	/* Index value calculation */
5	mjames	6913	cI = ((Y & (q31_t)0xFFF00000) >> 20);
2	mjames	6914
		6915	/* Care taken for table outside boundary */
		6916	/* Returns zero output when values are outside table boundary */
		6917	if(rI < 0 \|\| rI > (S->numRows - 1) \|\| cI < 0 \|\| cI > (S->numCols - 1))
		6918	{
		6919	return (0);
		6920	}
		6921
		6922	/* 20 bits for the fractional part */
		6923	/* xfract should be in 12.20 format */
		6924	xfract = (X & 0x000FFFFF);
		6925
		6926	/* Read two nearest output values from the index */
5	mjames	6927	x1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) ];
		6928	x2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) + 1];
2	mjames	6929
		6930	/* 20 bits for the fractional part */
		6931	/* yfract should be in 12.20 format */
		6932	yfract = (Y & 0x000FFFFF);
		6933
		6934	/* Read two nearest output values from the index */
5	mjames	6935	y1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) ];
		6936	y2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) + 1];
2	mjames	6937
		6938	/* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 13.51 format */
		6939
		6940	/* x1 is in 1.15(q15), xfract in 12.20 format and out is in 13.35 format */
		6941	/* convert 13.35 to 13.31 by right shifting and out is in 1.31 */
		6942	out = (q31_t) (((q63_t) x1 * (0xFFFFF - xfract)) >> 4u);
		6943	acc = ((q63_t) out * (0xFFFFF - yfract));
		6944
		6945	/* x2 * (xfract) * (1-yfract) in 1.51 and adding to acc */
		6946	out = (q31_t) (((q63_t) x2 * (0xFFFFF - yfract)) >> 4u);
		6947	acc += ((q63_t) out * (xfract));
		6948
		6949	/* y1 * (1 - xfract) * (yfract) in 1.51 and adding to acc */
		6950	out = (q31_t) (((q63_t) y1 * (0xFFFFF - xfract)) >> 4u);
		6951	acc += ((q63_t) out * (yfract));
		6952
		6953	/* y2 * (xfract) * (yfract) in 1.51 and adding to acc */
		6954	out = (q31_t) (((q63_t) y2 * (xfract)) >> 4u);
		6955	acc += ((q63_t) out * (yfract));
		6956
		6957	/* acc is in 13.51 format and down shift acc by 36 times */
		6958	/* Convert out to 1.15 format */
5	mjames	6959	return ((q15_t)(acc >> 36));
2	mjames	6960	}
		6961
5	mjames	6962
2	mjames	6963	/**
		6964	* @brief Q7 bilinear interpolation.
5	mjames	6965	* @param[in,out] S points to an instance of the interpolation structure.
		6966	* @param[in] X interpolation coordinate in 12.20 format.
		6967	* @param[in] Y interpolation coordinate in 12.20 format.
2	mjames	6968	* @return out interpolated value.
		6969	*/
		6970	static __INLINE q7_t arm_bilinear_interp_q7(
		6971	arm_bilinear_interp_instance_q7 * S,
		6972	q31_t X,
		6973	q31_t Y)
		6974	{
		6975	q63_t acc = 0; /* output */
		6976	q31_t out; /* Temporary output */
		6977	q31_t xfract, yfract; /* X, Y fractional parts */
		6978	q7_t x1, x2, y1, y2; /* Nearest output values */
		6979	int32_t rI, cI; /* Row and column indices */
		6980	q7_t pYData = S->pData; / pointer to output table values */
		6981	uint32_t nCols = S->numCols; /* num of rows */
		6982
		6983	/* Input is in 12.20 format */
		6984	/* 12 bits for the table index */
		6985	/* Index value calculation */
5	mjames	6986	rI = ((X & (q31_t)0xFFF00000) >> 20);
2	mjames	6987
		6988	/* Input is in 12.20 format */
		6989	/* 12 bits for the table index */
		6990	/* Index value calculation */
5	mjames	6991	cI = ((Y & (q31_t)0xFFF00000) >> 20);
2	mjames	6992
		6993	/* Care taken for table outside boundary */
		6994	/* Returns zero output when values are outside table boundary */
		6995	if(rI < 0 \|\| rI > (S->numRows - 1) \|\| cI < 0 \|\| cI > (S->numCols - 1))
		6996	{
		6997	return (0);
		6998	}
		6999
		7000	/* 20 bits for the fractional part */
		7001	/* xfract should be in 12.20 format */
5	mjames	7002	xfract = (X & (q31_t)0x000FFFFF);
2	mjames	7003
		7004	/* Read two nearest output values from the index */
5	mjames	7005	x1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) ];
		7006	x2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) + 1];
2	mjames	7007
		7008	/* 20 bits for the fractional part */
		7009	/* yfract should be in 12.20 format */
5	mjames	7010	yfract = (Y & (q31_t)0x000FFFFF);
2	mjames	7011
		7012	/* Read two nearest output values from the index */
5	mjames	7013	y1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) ];
		7014	y2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) + 1];
2	mjames	7015
		7016	/* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 16.47 format */
		7017	out = ((x1 * (0xFFFFF - xfract)));
		7018	acc = (((q63_t) out * (0xFFFFF - yfract)));
		7019
		7020	/* x2 * (xfract) * (1-yfract) in 2.22 and adding to acc */
		7021	out = ((x2 * (0xFFFFF - yfract)));
		7022	acc += (((q63_t) out * (xfract)));
		7023
		7024	/* y1 * (1 - xfract) * (yfract) in 2.22 and adding to acc */
		7025	out = ((y1 * (0xFFFFF - xfract)));
		7026	acc += (((q63_t) out * (yfract)));
		7027
		7028	/* y2 * (xfract) * (yfract) in 2.22 and adding to acc */
		7029	out = ((y2 * (yfract)));
		7030	acc += (((q63_t) out * (xfract)));
		7031
		7032	/* acc in 16.47 format and down shift by 40 to convert to 1.7 format */
5	mjames	7033	return ((q7_t)(acc >> 40));
2	mjames	7034	}
		7035
		7036	/**
		7037	* @} end of BilinearInterpolate group
		7038	*/
		7039
5	mjames	7040
		7041	/* SMMLAR */
2	mjames	7042	#define multAcc_32x32_keep32_R(a, x, y) \
		7043	a = (q31_t) (((((q63_t) a) << 32) + ((q63_t) x * y) + 0x80000000LL ) >> 32)
		7044
5	mjames	7045	/* SMMLSR */
2	mjames	7046	#define multSub_32x32_keep32_R(a, x, y) \
		7047	a = (q31_t) (((((q63_t) a) << 32) - ((q63_t) x * y) + 0x80000000LL ) >> 32)
		7048
5	mjames	7049	/* SMMULR */
2	mjames	7050	#define mult_32x32_keep32_R(a, x, y) \
		7051	a = (q31_t) (((q63_t) x * y + 0x80000000LL ) >> 32)
		7052
5	mjames	7053	/* SMMLA */
2	mjames	7054	#define multAcc_32x32_keep32(a, x, y) \
		7055	a += (q31_t) (((q63_t) x * y) >> 32)
		7056
5	mjames	7057	/* SMMLS */
2	mjames	7058	#define multSub_32x32_keep32(a, x, y) \
		7059	a -= (q31_t) (((q63_t) x * y) >> 32)
		7060
5	mjames	7061	/* SMMUL */
2	mjames	7062	#define mult_32x32_keep32(a, x, y) \
		7063	a = (q31_t) (((q63_t) x * y ) >> 32)
		7064
		7065
5	mjames	7066	#if defined ( __CC_ARM )
		7067	/* Enter low optimization region - place directly above function definition */
		7068	#if defined( ARM_MATH_CM4 ) \|\| defined( ARM_MATH_CM7)
		7069	#define LOW_OPTIMIZATION_ENTER \
		7070	_Pragma ("push") \
		7071	_Pragma ("O1")
		7072	#else
		7073	#define LOW_OPTIMIZATION_ENTER
		7074	#endif
2	mjames	7075
5	mjames	7076	/* Exit low optimization region - place directly after end of function definition */
		7077	#if defined( ARM_MATH_CM4 ) \|\| defined( ARM_MATH_CM7)
		7078	#define LOW_OPTIMIZATION_EXIT \
		7079	_Pragma ("pop")
		7080	#else
		7081	#define LOW_OPTIMIZATION_EXIT
		7082	#endif
2	mjames	7083
5	mjames	7084	/* Enter low optimization region - place directly above function definition */
2	mjames	7085	#define IAR_ONLY_LOW_OPTIMIZATION_ENTER
		7086
5	mjames	7087	/* Exit low optimization region - place directly after end of function definition */
2	mjames	7088	#define IAR_ONLY_LOW_OPTIMIZATION_EXIT
		7089
5	mjames	7090	#elif defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050)
		7091	#define LOW_OPTIMIZATION_ENTER
2	mjames	7092	#define LOW_OPTIMIZATION_EXIT
5	mjames	7093	#define IAR_ONLY_LOW_OPTIMIZATION_ENTER
2	mjames	7094	#define IAR_ONLY_LOW_OPTIMIZATION_EXIT
		7095
		7096	#elif defined(__GNUC__)
		7097	#define LOW_OPTIMIZATION_ENTER __attribute__(( optimize("-O1") ))
5	mjames	7098	#define LOW_OPTIMIZATION_EXIT
		7099	#define IAR_ONLY_LOW_OPTIMIZATION_ENTER
		7100	#define IAR_ONLY_LOW_OPTIMIZATION_EXIT
2	mjames	7101
5	mjames	7102	#elif defined(__ICCARM__)
		7103	/* Enter low optimization region - place directly above function definition */
		7104	#if defined( ARM_MATH_CM4 ) \|\| defined( ARM_MATH_CM7)
		7105	#define LOW_OPTIMIZATION_ENTER \
		7106	_Pragma ("optimize=low")
		7107	#else
		7108	#define LOW_OPTIMIZATION_ENTER
		7109	#endif
		7110
		7111	/* Exit low optimization region - place directly after end of function definition */
2	mjames	7112	#define LOW_OPTIMIZATION_EXIT
		7113
5	mjames	7114	/* Enter low optimization region - place directly above function definition */
		7115	#if defined( ARM_MATH_CM4 ) \|\| defined( ARM_MATH_CM7)
		7116	#define IAR_ONLY_LOW_OPTIMIZATION_ENTER \
		7117	_Pragma ("optimize=low")
		7118	#else
		7119	#define IAR_ONLY_LOW_OPTIMIZATION_ENTER
		7120	#endif
2	mjames	7121
5	mjames	7122	/* Exit low optimization region - place directly after end of function definition */
2	mjames	7123	#define IAR_ONLY_LOW_OPTIMIZATION_EXIT
		7124
5	mjames	7125	#elif defined(__CSMC__)
		7126	#define LOW_OPTIMIZATION_ENTER
		7127	#define LOW_OPTIMIZATION_EXIT
		7128	#define IAR_ONLY_LOW_OPTIMIZATION_ENTER
		7129	#define IAR_ONLY_LOW_OPTIMIZATION_EXIT
2	mjames	7130
5	mjames	7131	#elif defined(__TASKING__)
		7132	#define LOW_OPTIMIZATION_ENTER
		7133	#define LOW_OPTIMIZATION_EXIT
		7134	#define IAR_ONLY_LOW_OPTIMIZATION_ENTER
		7135	#define IAR_ONLY_LOW_OPTIMIZATION_EXIT
2	mjames	7136
5	mjames	7137	#endif
2	mjames	7138
		7139
5	mjames	7140	#ifdef __cplusplus
		7141	}
2	mjames	7142	#endif
		7143
		7144
5	mjames	7145	#if defined ( __GNUC__ )
		7146	#pragma GCC diagnostic pop
2	mjames	7147	#endif
		7148
		7149	#endif /* _ARM_MATH_H */
		7150
		7151	/**
		7152	*
		7153	* End of file.
		7154	*/

Subversion Repositories DashDisplay

(root)/trunk/Drivers/CMSIS/Include/arm_math.h – Rev 5