Home

Resume

Blog

Teikitu


/* =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
/*  »Project«   Teikitu Gaming System (TgS) (∂)
    »File«      TgS (CLANG-X86) Common - Math API [Matrix] [M] [F] [34].inl
    »Author«    Andrew Aye (EMail: mailto:andrew.aye@gmail.com, Web: http://www.andrewaye.com)
    »Version«   4.51 / »GUID« A9981407-3EC9-42AF-8B6F-8BE6DD919615                                                                                                        */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
/*  Copyright: © 2002-2017, Andrew Aye.  All Rights Reserved.
    This software is free for non-commercial use.  Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
    following conditions are met:
    Redistribution of source code must retain this copyright notice, this list of conditions and the following disclaimers.
    Redistribution in binary form must reproduce this copyright notice, this list of conditions and the following disclaimers in the documentation and other materials
    provided with the distribution.
    The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission.
    The intellectual property rights of the algorithms used reside with Andrew Aye.
    You may not use this software, in whole or in part, in support of any commercial product without the express written consent of the author.
    There is no warranty or other guarantee of fitness of this software for any purpose. It is provided solely "as is".                                                   */
/* =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
#if !defined(TGS_COMMON_MATH_API_MATRIX_M_F_34_INL)
#define TGS_COMMON_MATH_API_MATRIX_M_F_34_INL
#pragma once


/* == Common ============================================================================================================================================================ */

/* ---- M(M_CLR) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVOID M_CLR_F32_34( PCU_TgMAT_F32_34 pxM0 )
{
    memset( pxM0, 0x00, sizeof( TgMAT_F32_34 ) );
}


/* ---- M(M_MAX) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVOID M_MAX_F32_34( PCU_TgMAT_F32_34 pxRet, CPCU_TgMAT_F32_34 pxM0, CPCU_TgMAT_F32_34 pxM1 )
{
    pxRet->m_avRow[0].m_mData = M_MAX_F32_04( pxM0->m_avRow[0].m_mData, pxM1->m_avRow[0].m_mData );
    pxRet->m_avRow[1].m_mData = M_MAX_F32_04( pxM0->m_avRow[1].m_mData, pxM1->m_avRow[1].m_mData );
    pxRet->m_avRow[2].m_mData = M_MAX_F32_04( pxM0->m_avRow[2].m_mData, pxM1->m_avRow[2].m_mData );
}


/* ---- M(M_MIN) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVOID M_MIN_F32_34( PCU_TgMAT_F32_34 pxRet, CPCU_TgMAT_F32_34 pxM0, CPCU_TgMAT_F32_34 pxM1 )
{
    pxRet->m_avRow[0].m_mData = M_MIN_F32_04( pxM0->m_avRow[0].m_mData, pxM1->m_avRow[0].m_mData );
    pxRet->m_avRow[1].m_mData = M_MIN_F32_04( pxM0->m_avRow[1].m_mData, pxM1->m_avRow[1].m_mData );
    pxRet->m_avRow[2].m_mData = M_MIN_F32_04( pxM0->m_avRow[2].m_mData, pxM1->m_avRow[2].m_mData );
}


/* ---- M(M_AND) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVOID M_AND_F32_34( PCU_TgMAT_F32_34 pxRet, CPCU_TgMAT_F32_34 pxM0, CPCU_TgMAT_F32_34 pxM1 )
{
    pxRet->m_avRow[0].m_mData = M_AND_F32_04( pxM0->m_avRow[0].m_mData, pxM1->m_avRow[0].m_mData );
    pxRet->m_avRow[1].m_mData = M_AND_F32_04( pxM0->m_avRow[1].m_mData, pxM1->m_avRow[1].m_mData );
    pxRet->m_avRow[2].m_mData = M_AND_F32_04( pxM0->m_avRow[2].m_mData, pxM1->m_avRow[2].m_mData );
}


/* ---- M(M_OR) --------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVOID M_OR_F32_34( PCU_TgMAT_F32_34 pxRet, CPCU_TgMAT_F32_34 pxM0, CPCU_TgMAT_F32_34 pxM1 )
{
    pxRet->m_avRow[0].m_mData = M_OR_F32_04( pxM0->m_avRow[0].m_mData, pxM1->m_avRow[0].m_mData );
    pxRet->m_avRow[1].m_mData = M_OR_F32_04( pxM0->m_avRow[1].m_mData, pxM1->m_avRow[1].m_mData );
    pxRet->m_avRow[2].m_mData = M_OR_F32_04( pxM0->m_avRow[2].m_mData, pxM1->m_avRow[2].m_mData );
}


/* ---- M(M_XOR) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVOID M_XOR_F32_34( PCU_TgMAT_F32_34 pxRet, CPCU_TgMAT_F32_34 pxM0, CPCU_TgMAT_F32_34 pxM1 )
{
    pxRet->m_avRow[0].m_mData = M_XOR_F32_04( pxM0->m_avRow[0].m_mData, pxM1->m_avRow[0].m_mData );
    pxRet->m_avRow[1].m_mData = M_XOR_F32_04( pxM0->m_avRow[1].m_mData, pxM1->m_avRow[1].m_mData );
    pxRet->m_avRow[2].m_mData = M_XOR_F32_04( pxM0->m_avRow[2].m_mData, pxM1->m_avRow[2].m_mData );
}


/* ---- V(M_ADD) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVOID M_ADD_F32_34( PCU_TgMAT_F32_34 pxRet, CPCU_TgMAT_F32_34 pxM0, CPCU_TgMAT_F32_34 pxM1 )
{
    pxRet->m_avRow[0].m_mData = M_ADD_F32_04( pxM0->m_avRow[0].m_mData, pxM1->m_avRow[0].m_mData );
    pxRet->m_avRow[1].m_mData = M_ADD_F32_04( pxM0->m_avRow[1].m_mData, pxM1->m_avRow[1].m_mData );
    pxRet->m_avRow[2].m_mData = M_ADD_F32_04( pxM0->m_avRow[2].m_mData, pxM1->m_avRow[2].m_mData );
}


/* ---- V(M_MUL) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVOID M_MUL_F32_34( PCU_TgMAT_F32_34 pxRet, CPCU_TgMAT_F32_34 pxM0, CPCU_TgMAT_F32_34 pxM1 )
{
    pxRet->m_avRow[0].m_mData = M_MUL_F32_04( pxM0->m_avRow[0].m_mData, pxM1->m_avRow[0].m_mData );
    pxRet->m_avRow[1].m_mData = M_MUL_F32_04( pxM0->m_avRow[1].m_mData, pxM1->m_avRow[1].m_mData );
    pxRet->m_avRow[2].m_mData = M_MUL_F32_04( pxM0->m_avRow[2].m_mData, pxM1->m_avRow[2].m_mData );
}


/* ---- V(M_MUL_SM) ----------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVOID M_MUL_SM_F32_34( PCU_TgMAT_F32_34 pxRet, C_TgVEC_M_F32_04 vS, CPCU_TgMAT_F32_34 pxM1 )
{
    pxRet->m_avRow[0].m_mData = M_MUL_F32_04( vS, pxM1->m_avRow[0].m_mData );
    pxRet->m_avRow[1].m_mData = M_MUL_F32_04( vS, pxM1->m_avRow[1].m_mData );
    pxRet->m_avRow[2].m_mData = M_MUL_F32_04( vS, pxM1->m_avRow[2].m_mData );
}


/* ---- V(M_MUL_MS) ----------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVOID M_MUL_MS_F32_34( PCU_TgMAT_F32_34 pxRet, CPCU_TgMAT_F32_34 pxM0, C_TgVEC_M_F32_04 vS )
{
    pxRet->m_avRow[0].m_mData = M_MUL_F32_04( pxM0->m_avRow[0].m_mData, vS );
    pxRet->m_avRow[1].m_mData = M_MUL_F32_04( pxM0->m_avRow[1].m_mData, vS );
    pxRet->m_avRow[2].m_mData = M_MUL_F32_04( pxM0->m_avRow[2].m_mData, vS );
}


/* ---- V(M_DIV) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVOID M_DIV_F32_34( PCU_TgMAT_F32_34 pxRet, CPCU_TgMAT_F32_34 pxM0, CPCU_TgMAT_F32_34 pxM1 )
{
    pxRet->m_avRow[0].m_mData = M_DIV_F32_04( pxM0->m_avRow[0].m_mData, pxM1->m_avRow[0].m_mData );
    pxRet->m_avRow[1].m_mData = M_DIV_F32_04( pxM0->m_avRow[1].m_mData, pxM1->m_avRow[1].m_mData );
    pxRet->m_avRow[2].m_mData = M_DIV_F32_04( pxM0->m_avRow[2].m_mData, pxM1->m_avRow[2].m_mData );
}


/* ---- V(M_DIV_SM) ----------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVOID M_DIV_SM_F32_34( PCU_TgMAT_F32_34 pxRet, C_TgVEC_M_F32_04 vS, CPCU_TgMAT_F32_34 pxM1 )
{
    pxRet->m_avRow[0].m_mData = M_DIV_F32_04( vS, pxM1->m_avRow[0].m_mData );
    pxRet->m_avRow[1].m_mData = M_DIV_F32_04( vS, pxM1->m_avRow[1].m_mData );
    pxRet->m_avRow[2].m_mData = M_DIV_F32_04( vS, pxM1->m_avRow[2].m_mData );
}


/* ---- V(M_DIV_MS) ----------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVOID M_DIV_MS_F32_34( PCU_TgMAT_F32_34 pxRet, CPCU_TgMAT_F32_34 pxM0, C_TgVEC_M_F32_04 vS )
{
    pxRet->m_avRow[0].m_mData = M_DIV_F32_04( pxM0->m_avRow[0].m_mData, vS );
    pxRet->m_avRow[1].m_mData = M_DIV_F32_04( pxM0->m_avRow[1].m_mData, vS );
    pxRet->m_avRow[2].m_mData = M_DIV_F32_04( pxM0->m_avRow[2].m_mData, vS );
}


/* ---- V(M_SUB) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVOID M_SUB_F32_34( PCU_TgMAT_F32_34 pxRet, CPCU_TgMAT_F32_34 pxM0, CPCU_TgMAT_F32_34 pxM1 )
{
    pxRet->m_avRow[0].m_mData = M_SUB_F32_04( pxM0->m_avRow[0].m_mData, pxM1->m_avRow[0].m_mData );
    pxRet->m_avRow[1].m_mData = M_SUB_F32_04( pxM0->m_avRow[1].m_mData, pxM1->m_avRow[1].m_mData );
    pxRet->m_avRow[2].m_mData = M_SUB_F32_04( pxM0->m_avRow[2].m_mData, pxM1->m_avRow[2].m_mData );
}


/* ---- V(M_NEG) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVOID M_NEG_F32_34( PCU_TgMAT_F32_34 pxRet, CPCU_TgMAT_F32_34 pxM0 )
{
    pxRet->m_avRow[0].m_mData = M_NEG_F32_04( pxM0->m_avRow[0].m_mData );
    pxRet->m_avRow[1].m_mData = M_NEG_F32_04( pxM0->m_avRow[1].m_mData );
    pxRet->m_avRow[2].m_mData = M_NEG_F32_04( pxM0->m_avRow[2].m_mData );
}


/* ---- M(M_CLI) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVOID M_CLI_F32_34( PCU_TgMAT_F32_34 pxM0 )
{
    pxM0->m_avRow[0].m_mData = KTgV_UNIT_X_F32_04.m_mData;
    pxM0->m_avRow[1].m_mData = KTgV_UNIT_Y_F32_04.m_mData;
    pxM0->m_avRow[2].m_mData = KTgV_UNIT_Z_F32_04.m_mData;
}


/* ---- M(M_SET_T) ------------------------------------------------------------------------------------------------------------------------------------------------------ */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVOID M_SET_T_F32_34( PCU_TgMAT_F32_34 pxM0, C_TgVEC_M_F32_04 vS0 )
{
    const __m128                        mi00 = _mm_and_ps( KTgV_FFF0.m_f32_v04.m_mData, pxM0->m_avRow[0].m_mData );
    const __m128                        mi01 = _mm_and_ps( KTgV_FFF0.m_f32_v04.m_mData, pxM0->m_avRow[1].m_mData );
    const __m128                        mi02 = _mm_and_ps( KTgV_FFF0.m_f32_v04.m_mData, pxM0->m_avRow[2].m_mData );
    const __m128                        mi03 = _mm_and_ps( KTgV_FFF0.m_f32_v04.m_mData, vS0 );

    const __m128                        mi04 = _mm_shuffle_ps( KTgV_ZERO_F32_04.m_mData, mi03, _MM_PERM( 0, 0, 3, 0 ) );
    const __m128                        mi05 = _mm_shuffle_ps( KTgV_ZERO_F32_04.m_mData, mi03, _MM_PERM( 0, 0, 3, 1 ) );
    const __m128                        mi06 = _mm_shuffle_ps( KTgV_ZERO_F32_04.m_mData, mi03, _MM_PERM( 0, 0, 3, 2 ) );

    pxM0->m_avRow[0].m_mData = _mm_add_ps( mi00, mi04 );
    pxM0->m_avRow[1].m_mData = _mm_add_ps( mi01, mi05 );
    pxM0->m_avRow[2].m_mData = _mm_add_ps( mi02, mi06 );
}


/* ---- M(M_INIT_T) ----------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVOID M_INIT_T_F32_34( PCU_TgMAT_F32_34 pxM0, C_TgVEC_M_F32_04 vS0 )
{
    const __m128                        mi00 = _mm_and_ps( KTgV_FFF0.m_f32_v04.m_mData, vS0 );
    const __m128                        mi01 = _mm_shuffle_ps( KTgV_ZERO_F32_04.m_mData, mi00, _MM_PERM( 0, 0, 3, 0 ) );
    const __m128                        mi02 = _mm_shuffle_ps( KTgV_ZERO_F32_04.m_mData, mi00, _MM_PERM( 0, 0, 3, 1 ) );
    const __m128                        mi03 = _mm_shuffle_ps( KTgV_ZERO_F32_04.m_mData, mi00, _MM_PERM( 0, 0, 3, 2 ) );

    pxM0->m_avRow[0].m_mData = _mm_add_ps( KTgV_UNIT_X_F32_04.m_mData, mi01 );
    pxM0->m_avRow[1].m_mData = _mm_add_ps( KTgV_UNIT_Y_F32_04.m_mData, mi02 );
    pxM0->m_avRow[2].m_mData = _mm_add_ps( KTgV_UNIT_Z_F32_04.m_mData, mi03 );
}


/* ---- V(M_CAT) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgEXTN TgVOID M_CAT_F32_34_IMPL( PCU_TgMAT_F32_34, CPCU_TgMAT_F32_34, CPCU_TgMAT_F32_34 );
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVOID M_CAT_F32_34( PCU_TgMAT_F32_34 pxM0, CPCU_TgMAT_F32_34 pxM1, CPCU_TgMAT_F32_34 pxM2 )
{
    M_CAT_F32_34_IMPL( pxM0, pxM1, pxM2 );
}


/* ---- V(M_INV_DET) ---------------------------------------------------------------------------------------------------------------------------------------------------- */
TgEXTN TgVOID M_INV_DET_F32_34_IMPL( PCU_TgMAT_F32_34, C_TgVEC_M_F32_04, CPCU_TgMAT_F32_34 );
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVOID M_INV_DET_F32_34( PCU_TgMAT_F32_34 pxM0, C_TgVEC_M_F32_04 v0, CPCU_TgMAT_F32_34 pxM1 )
{
    M_INV_DET_F32_34_IMPL( pxM0, v0, pxM1 );
}


/* ---- V(M_INV) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVOID M_INV_F32_34( PCU_TgMAT_F32_34 ptmRet, CPCU_TgMAT_F32_34 pxM1 )
{
    M_INV_DET_F32_34( ptmRet, M_DET_F32_34( pxM1 ), pxM1 );
}


/* ---- V(M_DET) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgEXTN TgVEC_M_F32_04 M_DET_F32_34_IMPL( CPCU_TgMAT_F32_34 );
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_DET_F32_34( CPCU_TgMAT_F32_34 pxM0 )
{
    return (M_DET_F32_34_IMPL( pxM0 ));
}


/* ---- V(M_TX) --------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_TX_F32_34( CPCU_TgMAT_F32_34 pxM0, C_TgVEC_M_F32_04 vX0 )
{
     /*  Probably a much better/faster way to do this, but I got bored. */

    const __m128                        miXMM0 = _mm_unpacklo_ps( pxM0->m_avRow[0].m_mData, pxM0->m_avRow[1].m_mData );
    const __m128                        miXMM1 = _mm_unpacklo_ps( pxM0->m_avRow[2].m_mData, KTgV_UNIT_W_F32_04.m_mData );
    const __m128                        miXMM2 = _mm_unpackhi_ps( pxM0->m_avRow[0].m_mData, pxM0->m_avRow[1].m_mData );
    const __m128                        miXMM3 = _mm_unpackhi_ps( pxM0->m_avRow[2].m_mData, KTgV_UNIT_W_F32_04.m_mData );
    const __m128                        miXMM4 = _mm_movelh_ps( miXMM0, miXMM1 );
    const __m128                        miXMM5 = _mm_movehl_ps( miXMM1, miXMM0 );
    const __m128                        miXMM6 = _mm_movelh_ps( miXMM2, miXMM3 );
    const __m128                        miXMM7 = _mm_movehl_ps( miXMM3, miXMM2 );
    const __m128                        miXMM8 = _mm_mul_ps( _mm_shuffle_ps( vX0, vX0, 0x00 ), miXMM4 );
    const __m128                        miXMM9 = _mm_mul_ps( _mm_shuffle_ps( vX0, vX0, 0x55 ), miXMM5 );
    const __m128                        miXMMA = _mm_mul_ps( _mm_shuffle_ps( vX0, vX0, 0xAA ), miXMM6 );
    const __m128                        miXMMB = _mm_mul_ps( _mm_shuffle_ps( vX0, vX0, 0xFF ), miXMM7 );

    return (_mm_add_ps( _mm_add_ps( miXMM8, miXMM9 ), _mm_add_ps( miXMMA, miXMMB ) ));
}


/* ---- M(M_TX_P) ------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_TX_P_F32_34( CPCU_TgMAT_F32_34 pxM0, C_TgVEC_M_F32_04 vX0 )
{
    /* Probably a much better/faster way to do this, but I got bored. */

    const __m128                        miXMM0 = _mm_unpacklo_ps( pxM0->m_avRow[0].m_mData, pxM0->m_avRow[1].m_mData );
    const __m128                        miXMM1 = _mm_unpacklo_ps( pxM0->m_avRow[2].m_mData, KTgV_UNIT_W_F32_04.m_mData );
    const __m128                        miXMM2 = _mm_unpackhi_ps( pxM0->m_avRow[0].m_mData, pxM0->m_avRow[1].m_mData );
    const __m128                        miXMM3 = _mm_unpackhi_ps( pxM0->m_avRow[2].m_mData, KTgV_UNIT_W_F32_04.m_mData );
    const __m128                        miXMM4 = _mm_movelh_ps( miXMM0, miXMM1 );
    const __m128                        miXMM5 = _mm_movehl_ps( miXMM1, miXMM0 );
    const __m128                        miXMM6 = _mm_movelh_ps( miXMM2, miXMM3 );
    const __m128                        miXMM7 = _mm_movehl_ps( miXMM3, miXMM2 );
    const __m128                        miXMM8 = _mm_mul_ps( _mm_shuffle_ps( vX0, vX0, 0x00 ), miXMM4 );
    const __m128                        miXMM9 = _mm_mul_ps( _mm_shuffle_ps( vX0, vX0, 0x55 ), miXMM5 );
    const __m128                        miXMMA = _mm_mul_ps( _mm_shuffle_ps( vX0, vX0, 0xAA ), miXMM6 );

    return (_mm_add_ps( _mm_add_ps( miXMM8, miXMM9 ), _mm_add_ps( miXMMA, miXMM7 ) ));
}


/* ---- M(M_TX_V) ------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_TX_V_F32_34( CPCU_TgMAT_F32_34 pxM0, C_TgVEC_M_F32_04 vX0 )
{
    /* Probably a much better/faster way to do this, but I got bored. */

    const __m128                        miXMM0 = _mm_unpacklo_ps( pxM0->m_avRow[0].m_mData, pxM0->m_avRow[1].m_mData );
    const __m128                        miXMM1 = _mm_unpacklo_ps( pxM0->m_avRow[2].m_mData, KTgV_UNIT_W_F32_04.m_mData );
    const __m128                        miXMM2 = _mm_unpackhi_ps( pxM0->m_avRow[0].m_mData, pxM0->m_avRow[1].m_mData );
    const __m128                        miXMM3 = _mm_unpackhi_ps( pxM0->m_avRow[2].m_mData, KTgV_UNIT_W_F32_04.m_mData );
    const __m128                        miXMM4 = _mm_movelh_ps( miXMM0, miXMM1 );
    const __m128                        miXMM5 = _mm_movehl_ps( miXMM1, miXMM0 );
    const __m128                        miXMM6 = _mm_movelh_ps( miXMM2, miXMM3 );
    const __m128                        miXMM8 = _mm_mul_ps( _mm_shuffle_ps( vX0, vX0, 0x00 ), miXMM4 );
    const __m128                        miXMM9 = _mm_mul_ps( _mm_shuffle_ps( vX0, vX0, 0x55 ), miXMM5 );
    const __m128                        miXMMA = _mm_mul_ps( _mm_shuffle_ps( vX0, vX0, 0xAA ), miXMM6 );

    return (_mm_add_ps( _mm_add_ps( miXMM8, miXMM9 ), _mm_add_ps( miXMMA, KTgV_ZERO_F32_04.m_mData ) ));
}


/* ---- V(M_GET_COL_0) -------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_Get_Col_0_F32_34( CPCU_TgMAT_F32_34 pxM0 )
{
    __m128                              miXMM0 = _mm_shuffle_ps( pxM0->m_avRow[0].m_mData, pxM0->m_avRow[1].m_mData, 0x44 );
    __m128                              miXMM1 = _mm_shuffle_ps( pxM0->m_avRow[2].m_mData, KTgV_UNIT_W_F32_04.m_mData, 0x44 );

    return (_mm_shuffle_ps( miXMM0, miXMM1, 0x88 ));
}


/* ---- V(M_GET_COL_1) -------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_Get_Col_1_F32_34( CPCU_TgMAT_F32_34 pxM0 )
{
    __m128                              miXMM0 = _mm_shuffle_ps( pxM0->m_avRow[0].m_mData, pxM0->m_avRow[1].m_mData, 0x44 );
    __m128                              miXMM1 = _mm_shuffle_ps( pxM0->m_avRow[2].m_mData, KTgV_UNIT_W_F32_04.m_mData, 0x44 );

    return (_mm_shuffle_ps( miXMM0, miXMM1, 0xDD ));
}


/* ---- V(M_GET_COL_2) -------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_Get_Col_2_F32_34( CPCU_TgMAT_F32_34 pxM0 )
{
    __m128                              miXMM2 = _mm_shuffle_ps( pxM0->m_avRow[0].m_mData, pxM0->m_avRow[1].m_mData, 0xEE );
    __m128                              miXMM3 = _mm_shuffle_ps( pxM0->m_avRow[2].m_mData, KTgV_UNIT_W_F32_04.m_mData, 0xEE );

    return (_mm_shuffle_ps( miXMM2, miXMM3, 0x88 ));
}


/* ---- V(M_GET_COL_3) -------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_Get_Col_3_F32_34( CPCU_TgMAT_F32_34 pxM0 )
{
    __m128                              miXMM2 = _mm_shuffle_ps( pxM0->m_avRow[0].m_mData, pxM0->m_avRow[1].m_mData, 0xEE );
    __m128                              miXMM3 = _mm_shuffle_ps( pxM0->m_avRow[2].m_mData, KTgV_UNIT_W_F32_04.m_mData, 0xEE );

    return (_mm_shuffle_ps( miXMM2, miXMM3, 0xDD ));
}


/* ---- M(M_TR) --------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVOID M_TR_F32_34( PCU_TgMAT_F32_44 pxM0, CPCU_TgMAT_F32_34 pxM1 )
{
    __m128                              miXMM0 = _mm_shuffle_ps( pxM1->m_avRow[0].m_mData, pxM1->m_avRow[1].m_mData, 0x44 );
    __m128                              miXMM2 = _mm_shuffle_ps( pxM1->m_avRow[0].m_mData, pxM1->m_avRow[1].m_mData, 0xEE );
    __m128                              miXMM1 = _mm_shuffle_ps( pxM1->m_avRow[2].m_mData, KTgV_UNIT_W_F32_04.m_mData, 0x44 );
    __m128                              miXMM3 = _mm_shuffle_ps( pxM1->m_avRow[2].m_mData, KTgV_UNIT_W_F32_04.m_mData, 0xEE );

    pxM0->m_avRow[0].m_mData = _mm_shuffle_ps( miXMM0, miXMM1, 0x88 );
    pxM0->m_avRow[1].m_mData = _mm_shuffle_ps( miXMM0, miXMM1, 0xDD );
    pxM0->m_avRow[2].m_mData = _mm_shuffle_ps( miXMM2, miXMM3, 0x88 );
    pxM0->m_avRow[3].m_mData = _mm_shuffle_ps( miXMM2, miXMM3, 0xDD );
}


/* ====================================================================================================================================================================== */
#endif