Home

Resume

Blog

Teikitu


/* =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
/*  »Project«   Teikitu Gaming System (TgS) (∂)
    »File«      TgS (WIN) Common - Math API [Matrix] [M] [F] [34].c
    »Author«    Andrew Aye (EMail: mailto:andrew.aye@gmail.com, Web: http://www.andrewaye.com)
    »Version«   4.51 / »GUID« A9981407-3EC9-42AF-8B6F-8BE6DD919615                                                                                                        */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
/*  Copyright: © 2002-2017, Andrew Aye.  All Rights Reserved.
    This software is free for non-commercial use.  Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
    following conditions are met:
    Redistribution of source code must retain this copyright notice, this list of conditions and the following disclaimers.
    Redistribution in binary form must reproduce this copyright notice, this list of conditions and the following disclaimers in the documentation and other materials
    provided with the distribution.
    The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission.
    The intellectual property rights of the algorithms used reside with Andrew Aye.
    You may not use this software, in whole or in part, in support of any commercial product without the express written consent of the author.
    There is no warranty or other guarantee of fitness of this software for any purpose. It is provided solely "as is".                                                   */
/* =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
#if defined(TGS_COMMON_MATH_API_MATRIX_SPECIALIZATION_INL)

/* == Common ============================================================================================================================================================ */

/* -.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-. */
/*  Public Functions                                                                                                                                                      */
/* -.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-. */

/* ---- M_CAT_F32_34_IMPL ----------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgVOID M_CAT_F32_34_IMPL( PCU_TgMAT_F32_34 ptmRet, CPCU_TgMAT_F32_34 pxM0, CPCU_TgMAT_F32_34 pxM1 )
{
    const __m128                        mi00 = _mm_shuffle_ps( pxM0->m_avRow[0].m_mData, pxM0->m_avRow[0].m_mData, 0x00 );
    const __m128                        mi01 = _mm_shuffle_ps( pxM0->m_avRow[0].m_mData, pxM0->m_avRow[0].m_mData, 0x55 );
    const __m128                        mi02 = _mm_shuffle_ps( pxM0->m_avRow[0].m_mData, pxM0->m_avRow[0].m_mData, 0xAA );
    const __m128                        mi03 = _mm_shuffle_ps( pxM0->m_avRow[0].m_mData, pxM0->m_avRow[0].m_mData, 0xFF );
    const __m128                        mi04 = _mm_shuffle_ps( pxM0->m_avRow[1].m_mData, pxM0->m_avRow[1].m_mData, 0x00 );
    const __m128                        mi05 = _mm_shuffle_ps( pxM0->m_avRow[1].m_mData, pxM0->m_avRow[1].m_mData, 0x55 );
    const __m128                        mi06 = _mm_shuffle_ps( pxM0->m_avRow[1].m_mData, pxM0->m_avRow[1].m_mData, 0xAA );
    const __m128                        mi07 = _mm_shuffle_ps( pxM0->m_avRow[1].m_mData, pxM0->m_avRow[1].m_mData, 0xFF );
    const __m128                        mi08 = _mm_shuffle_ps( pxM0->m_avRow[2].m_mData, pxM0->m_avRow[2].m_mData, 0x00 );
    const __m128                        mi09 = _mm_shuffle_ps( pxM0->m_avRow[2].m_mData, pxM0->m_avRow[2].m_mData, 0x55 );
    const __m128                        mi10 = _mm_shuffle_ps( pxM0->m_avRow[2].m_mData, pxM0->m_avRow[2].m_mData, 0xAA );
    const __m128                        mi11 = _mm_shuffle_ps( pxM0->m_avRow[2].m_mData, pxM0->m_avRow[2].m_mData, 0xFF );

    const __m128                        mi16 = _mm_mul_ps( mi00, pxM1->m_avRow[0].m_mData );
    const __m128                        mi17 = _mm_mul_ps( mi04, pxM1->m_avRow[0].m_mData );
    const __m128                        mi18 = _mm_mul_ps( mi08, pxM1->m_avRow[0].m_mData );
    const __m128                        mi20 = _mm_mul_ps( mi01, pxM1->m_avRow[1].m_mData );
    const __m128                        mi21 = _mm_mul_ps( mi05, pxM1->m_avRow[1].m_mData );
    const __m128                        mi22 = _mm_mul_ps( mi09, pxM1->m_avRow[1].m_mData );
    const __m128                        mi24 = _mm_mul_ps( mi02, pxM1->m_avRow[2].m_mData );
    const __m128                        mi25 = _mm_mul_ps( mi06, pxM1->m_avRow[2].m_mData );
    const __m128                        mi26 = _mm_mul_ps( mi10, pxM1->m_avRow[2].m_mData );
    const __m128                        mi28 = _mm_and_ps( mi03, KTgV_000F.m_f32_v04.m_mData );
    const __m128                        mi29 = _mm_and_ps( mi07, KTgV_000F.m_f32_v04.m_mData );
    const __m128                        mi30 = _mm_and_ps( mi11, KTgV_000F.m_f32_v04.m_mData );

    ptmRet->m_avRow[0].m_mData = _mm_add_ps( _mm_add_ps( mi16, mi20 ), _mm_add_ps( mi24, mi28 ) );
    ptmRet->m_avRow[1].m_mData = _mm_add_ps( _mm_add_ps( mi17, mi21 ), _mm_add_ps( mi25, mi29 ) );
    ptmRet->m_avRow[2].m_mData = _mm_add_ps( _mm_add_ps( mi18, mi22 ), _mm_add_ps( mi26, mi30 ) );
}


/* ---- M_INV_DET_F32_34_IMPL ------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgVOID M_INV_DET_F32_34_IMPL( PCU_TgMAT_F32_34 ptmRet, C_TgVEC_M_F32_04 tvDet, CPCU_TgMAT_F32_34 pxM1 )
{
    /*  Construct the four corner matrices of the 4x4.  Note: this is probably an inefficient method to take the inverse of a standard linear transform matrix since it 
    cant take into account the known information of the row or column of zeros that normally exists in the matrix.  However, since inverse operations should not be
    terribly time sensitive this may not be a problem. */

    const __m128                        miR0 = pxM1->m_avRow[0].m_mData;
    const __m128                        miR1 = pxM1->m_avRow[1].m_mData;
    const __m128                        miR2 = pxM1->m_avRow[2].m_mData;
    const __m128                        mi00 = _mm_and_ps( KTgV_FFF0.m_f32_v04.m_mData, tvDet );
    const __m128                        miDet = _mm_add_ps( KTgV_UNIT_W_F32_04.m_mData, mi00 );

    const __m128                        mi01 = _mm_shuffle_ps( miR0, miR1, _MM_PERM( 1, 2, 1, 2 ) );
    const __m128                        mi02 = _mm_shuffle_ps( miR2, miR2, _MM_PERM( 1, 2, 1, 2 ) );
    const __m128                        mi03 = _mm_shuffle_ps( mi01, mi01, _MM_PERM( 2, 1, 0, 3 ) );
    const __m128                        mi04 = _mm_shuffle_ps( mi02, mi01, _MM_PERM( 1, 0, 3, 3 ) );
    const __m128                        mi05 = _mm_shuffle_ps( mi01, mi01, _MM_PERM( 3, 0, 1, 3 ) );
    const __m128                        mi06 = _mm_shuffle_ps( mi02, mi01, _MM_PERM( 0, 1, 2, 3 ) );
    const __m128                        mi07 = _mm_mul_ps( mi03, mi04 );
    const __m128                        mi08 = _mm_mul_ps( mi05, mi06 );
    const __m128                        mi09 = _mm_sub_ps( mi07, mi08 );
    const __m128                        mi10 = _mm_and_ps( KTgV_FFF0.m_f32_v04.m_mData, _mm_div_ps( mi09, miDet ) );

    const __m128                        mi11 = _mm_shuffle_ps( miR0, miR1, _MM_PERM( 0, 2, 0, 2 ) );
    const __m128                        mi12 = _mm_shuffle_ps( miR2, miR2, _MM_PERM( 0, 2, 0, 2 ) );
    const __m128                        mi13 = _mm_shuffle_ps( mi11, mi11, _MM_PERM( 3, 0, 1, 3 ) );
    const __m128                        mi14 = _mm_shuffle_ps( mi12, mi11, _MM_PERM( 0, 1, 2, 3 ) );
    const __m128                        mi15 = _mm_shuffle_ps( mi11, mi11, _MM_PERM( 2, 1, 0, 3 ) );
    const __m128                        mi16 = _mm_shuffle_ps( mi12, mi11, _MM_PERM( 1, 0, 3, 3 ) );
    const __m128                        mi17 = _mm_mul_ps( mi13, mi14 );
    const __m128                        mi18 = _mm_mul_ps( mi15, mi16 );
    const __m128                        mi19 = _mm_sub_ps( mi17, mi18 );
    const __m128                        mi20 = _mm_and_ps( KTgV_FFF0.m_f32_v04.m_mData, _mm_div_ps( mi19, miDet ) );

    const __m128                        mi21 = _mm_shuffle_ps( miR0, miR1, _MM_PERM( 0, 1, 0, 1 ) );
    const __m128                        mi22 = _mm_shuffle_ps( miR2, miR2, _MM_PERM( 0, 1, 0, 1 ) );
    const __m128                        mi23 = _mm_shuffle_ps( mi21, mi21, _MM_PERM( 2, 1, 0, 3 ) );
    const __m128                        mi24 = _mm_shuffle_ps( mi22, mi21, _MM_PERM( 1, 0, 3, 3 ) );
    const __m128                        mi25 = _mm_shuffle_ps( mi21, mi21, _MM_PERM( 3, 0, 1, 3 ) );
    const __m128                        mi26 = _mm_shuffle_ps( mi22, mi21, _MM_PERM( 0, 1, 2, 3 ) );
    const __m128                        mi27 = _mm_mul_ps( mi23, mi24 );
    const __m128                        mi28 = _mm_mul_ps( mi25, mi26 );
    const __m128                        mi29 = _mm_sub_ps( mi27, mi28 );
    const __m128                        mi30 = _mm_and_ps( KTgV_FFF0.m_f32_v04.m_mData, _mm_div_ps( mi29, miDet ) );

    const __m128                        mi31 = _mm_shuffle_ps( mi10, mi20, _MM_PERM( 0, 1, 0, 1 ) ); /* 00, 01, 10, 11 */
    const __m128                        mi32 = _mm_shuffle_ps( mi10, mi20, _MM_PERM( 2, 3, 2, 3 ) ); /* 02, 03, 12, 13 */
    const __m128                        mi34 = _mm_shuffle_ps( mi31, mi30, _MM_PERM( 0, 2, 0, 0 ) ); /* 00, 10, 20, 20 */
    const __m128                        mi35 = _mm_shuffle_ps( mi31, mi30, _MM_PERM( 1, 3, 1, 1 ) ); /* 01, 11, 21, 21 */
    const __m128                        mi36 = _mm_shuffle_ps( mi32, mi30, _MM_PERM( 0, 2, 2, 2 ) ); /* 02, 12, 22, 22 */

    const __m128                        mi37 = _mm_shuffle_ps( miR0, miR0, _MM_PERM( 3, 3, 3, 3 ) );
    const __m128                        mi38 = _mm_shuffle_ps( miR1, miR1, _MM_PERM( 3, 3, 3, 3 ) );
    const __m128                        mi39 = _mm_shuffle_ps( miR2, miR2, _MM_PERM( 3, 3, 3, 3 ) );

    const __m128                        mi40 = _mm_mul_ps( mi37, mi34 );
    const __m128                        mi41 = _mm_mul_ps( mi38, mi35 );
    const __m128                        mi42 = _mm_mul_ps( mi39, mi36 );

    const __m128                        mi43 = _mm_sub_ps( KTgV_ZERO_F32_04.m_mData, mi40 );
    const __m128                        mi44 = _mm_add_ps( mi41, mi42 );
    const __m128                        mi45 = _mm_and_ps( KTgV_FFF0.m_f32_v04.m_mData, _mm_sub_ps( mi43, mi44 ) );

    const __m128                        mi46 = _mm_shuffle_ps( mi45, mi45, _MM_PERM( 3, 3, 3, 0 ) );
    const __m128                        mi47 = _mm_shuffle_ps( mi45, mi45, _MM_PERM( 3, 3, 3, 1 ) );
    const __m128                        mi48 = _mm_shuffle_ps( mi45, mi45, _MM_PERM( 3, 3, 3, 2 ) );

    ptmRet->m_avRow[0].m_mData = _mm_add_ps( mi46, mi10 );
    ptmRet->m_avRow[1].m_mData = _mm_add_ps( mi47, mi20 );
    ptmRet->m_avRow[2].m_mData = _mm_add_ps( mi48, mi30 );
}


/* ---- M_DET_F32_34_IMPL ----------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgVEC_M_F32_04 M_DET_F32_34_IMPL( CPCU_TgMAT_F32_34 pxM1 )
{
    const __m128                        miR0 = pxM1->m_avRow[0].m_mData;
    const __m128                        miR1 = pxM1->m_avRow[1].m_mData;
    const __m128                        miR2 = pxM1->m_avRow[2].m_mData;

    const __m128                        mi01 = _mm_shuffle_ps( miR1, miR1, _MM_PERM( 1, 2, 0, 3 ) );
    const __m128                        mi02 = _mm_shuffle_ps( miR2, miR2, _MM_PERM( 2, 0, 1, 3 ) );
    const __m128                        mi03 = _mm_shuffle_ps( miR1, miR1, _MM_PERM( 2, 0, 1, 3 ) );
    const __m128                        mi04 = _mm_shuffle_ps( miR2, miR2, _MM_PERM( 1, 2, 0, 3 ) );

    const __m128                        mi05 = _mm_mul_ps( mi01, mi02 );
    const __m128                        mi06 = _mm_mul_ps( mi03, mi04 );
    const __m128                        mi07 = _mm_sub_ps( mi05, mi06 );
    const __m128                        mi08 = _mm_mul_ps( miR0, mi07 );

    const __m128                        mi09 = _mm_shuffle_ps( mi08, mi08, _MM_PERM( 1, 1, 1, 1 ) );
    const __m128                        mi10 = _mm_shuffle_ps( mi08, mi08, _MM_PERM( 2, 2, 2, 2 ) );
    const __m128                        mi11 = _mm_add_ss( mi08, mi09 );
    const __m128                        mi12 = _mm_add_ss( mi10, mi11 );

    return ((_mm_shuffle_ps( mi12, mi12, 0x00 )));
}


#endif