Home

Resume

Blog

Teikitu


/* =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
/*  »Project«   Teikitu Gaming System (TgS) (∂)
    »File«      TgS (CLANG-X86) Common - Math API [Vector] [M] [F32].inl
    »Author«    Andrew Aye (EMail: mailto:andrew.aye@gmail.com, Web: http://www.andrewaye.com)
    »Version«   4.51 / »GUID« A9981407-3EC9-42AF-8B6F-8BE6DD919615                                                                                                        */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
/*  Copyright: © 2002-2017, Andrew Aye.  All Rights Reserved.
    This software is free for non-commercial use.  Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
    following conditions are met:
    Redistribution of source code must retain this copyright notice, this list of conditions and the following disclaimers.
    Redistribution in binary form must reproduce this copyright notice, this list of conditions and the following disclaimers in the documentation and other materials
    provided with the distribution.
    The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission.
    The intellectual property rights of the algorithms used reside with Andrew Aye.
    You may not use this software, in whole or in part, in support of any commercial product without the express written consent of the author.
    There is no warranty or other guarantee of fitness of this software for any purpose. It is provided solely "as is".                                                   */
/* =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
#if !defined(TGS_COMMON_MATH_API_VECTOR_M_F32_INL)
#define TGS_COMMON_MATH_API_VECTOR_M_F32_INL
#pragma once


/* == Common ============================================================================================================================================================ */

/* ---- MS_SETU_F32_04 -------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 MS_SETU_F32_04( CPCU_TgFLOAT32 puiVal )
{
    return (_mm_loadu_ps( puiVal ));
}


/* ---- MS_SETA_F32_04 -------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 MS_SETA_F32_04( CPCU_TgFLOAT32 puiVal )
{
    return (_mm_load_ps( puiVal ));
}


/* ---- MS_SET1_F32_04 -------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 MS_SET1_F32_04( C_TgFLOAT32 iVal )
{
    return (_mm_set1_ps( iVal ));
}


/* ---- M_SET4_F32_04 --------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_SET4_F32_04( C_TgFLOAT32 fX, C_TgFLOAT32 fY, C_TgFLOAT32 fZ, C_TgFLOAT32 fW )
{
    return (_mm_set_ps( fW, fZ, fY, fX ));
}


/* ---- M_PERM_F32_04 --------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_PERM_F32_04( C_TgVEC_M_F32_04 tvLeft, C_TgVEC_M_F32_04 tvRight, C_TgVEC_M_F32_04 tuvMask )
{
    __m128                              vX = { 0 };
    TgUINT32                            uiIndex;

    for (uiIndex = 0; uiIndex < 4; ++uiIndex)
    {
        C_TgUINT08                          byMask = ((CP_TgUINT08)(&tuvMask))[uiIndex];
        C_TgFLOAT32                         f0 = tvLeft[byMask & 0x3];
        C_TgFLOAT32                         f1 = tvRight[byMask & 0x3];

        vX[uiIndex] = 0 == (byMask & KTgPERM_PARAM_SELECT_BIT) ? f0 : f1;
    };

    return (vX);
}


/* ---- M_SEL_F32_04 ---------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_SEL_F32_04( C_TgVEC_M_F32_04 tvLeft, C_TgVEC_M_F32_04 tvRight, C_TgVEC_M_F32_04 tuvMask )
{
    return (_mm_or_ps( _mm_and_ps( tvLeft, _mm_xor_ps( KTgV_FFFF.m_f32_v04.m_mData, tuvMask ) ), _mm_and_ps( tvRight, tuvMask ) ));
}


/* ---- M_AND_F32_04 ---------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_AND_F32_04( C_TgVEC_M_F32_04 tvLeft, C_TgVEC_M_F32_04 tvRight )
{
    return (_mm_and_ps( tvLeft, tvRight ));
}


/* ---- M_OR_F32_04 ----------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_OR_F32_04( C_TgVEC_M_F32_04 tvLeft, C_TgVEC_M_F32_04 tvRight )
{
    return (_mm_or_ps( tvLeft, tvRight ));
}


/* ---- M_XOR_F32_04 ---------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_XOR_F32_04( C_TgVEC_M_F32_04 tvLeft, C_TgVEC_M_F32_04 tvRight )
{
    return (_mm_xor_ps( tvLeft, tvRight ));
}


/* ---- M_MAX_F32_04 ---------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_MAX_F32_04( C_TgVEC_M_F32_04 vLeft, C_TgVEC_M_F32_04 vRight )
{
    TgVEC_F32_04 vResult = F_MAX_F32_04((CP_TgVEC_F32_04)&vLeft, (CP_TgVEC_F32_04)&vRight );
    return vResult.m_mData;
}


/* ---- M_MIN_F32_04 ---------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_MIN_F32_04( C_TgVEC_M_F32_04 tvLeft, C_TgVEC_M_F32_04 tvRight )
{
    return (_mm_min_ps( tvLeft, tvRight ));
}


/* ---- M_CMP_EQ_F32_04 ------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_CMP_EQ_F32_04( C_TgVEC_M_F32_04 tvLeft, C_TgVEC_M_F32_04 tvRight )
{
    return (_mm_cmpeq_ps( tvLeft, tvRight ));
}


/* ---- M_CMP_NE_F32_04 ------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_CMP_NE_F32_04( C_TgVEC_M_F32_04 tvLeft, C_TgVEC_M_F32_04 tvRight )
{
    return (_mm_cmpneq_ps( tvLeft, tvRight ));
}


/* ---- M_CMP_GE_F32_04 ------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_CMP_GE_F32_04( C_TgVEC_M_F32_04 tvLeft, C_TgVEC_M_F32_04 tvRight )
{
    return (_mm_cmpge_ps( tvLeft, tvRight ));
}


/* ---- M_CMP_GT_F32_04 ------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_CMP_GT_F32_04( C_TgVEC_M_F32_04 tvLeft, C_TgVEC_M_F32_04 tvRight )
{
    return (_mm_cmpgt_ps( tvLeft, tvRight ));
}


/* ---- M_CMP_LE_F32_04 ------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_CMP_LE_F32_04( C_TgVEC_M_F32_04 tvLeft, C_TgVEC_M_F32_04 tvRight )
{
    return (_mm_cmple_ps( tvLeft, tvRight ));
}


/* ---- TgVEC_M_F32_04 -------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_CMP_LT_F32_04( C_TgVEC_M_F32_04 tvLeft, C_TgVEC_M_F32_04 tvRight )
{
    return (_mm_cmplt_ps( tvLeft, tvRight ));
}


/* ---- V(M_ADD) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_ADD_F32_04( C_TgVEC_M_F32_04 tvLeft, C_TgVEC_M_F32_04 tvRight )
{
    return (_mm_add_ps( tvLeft, tvRight ));
}


/* ---- V(M_MAD) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_MAD_F32_04( C_TgVEC_M_F32_04 tvM0, C_TgVEC_M_F32_04 tvM1, C_TgVEC_M_F32_04 tvA0 )
{
    return (_mm_add_ps( _mm_mul_ps( tvM0, tvM1 ), tvA0 ));
}


/* ---- V(M_NMS) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_NMS_F32_04( C_TgVEC_M_F32_04 tvM0, C_TgVEC_M_F32_04 tvM1, C_TgVEC_M_F32_04 tvA0 )
{
    return (_mm_sub_ps( tvA0, _mm_mul_ps( tvM0, tvM1 ) ));
}


/* ---- V(M_DOT) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_DOT_F32_04( C_TgVEC_M_F32_04 tvLeft, C_TgVEC_M_F32_04 tvRight )
{
    const __m128                        vX1 = _mm_mul_ps( tvLeft, tvRight );
    const __m128                        vX2 = _mm_shuffle_ps( tvRight, vX1, _MM_SHUFFLE( 1, 0, 0, 0 ) );
    const __m128                        vX3 = _mm_add_ps( vX2, vX1 );
    const __m128                        vX4 = _mm_shuffle_ps( vX1, vX3, _MM_SHUFFLE( 0, 3, 0, 0 ) );
    const __m128                        vX5 = _mm_add_ps( vX4, vX3 );

    return (_mm_shuffle_ps( vX5, vX5, _MM_SHUFFLE( 2, 2, 2, 2 ) ));
}


/* ---- V(M_MUL) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_MUL_F32_04( C_TgVEC_M_F32_04 tvLeft, C_TgVEC_M_F32_04 tvRight )
{
    return (_mm_mul_ps( tvLeft, tvRight ));
}


/* ---- V(M_DIV) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_DIV_F32_04( C_TgVEC_M_F32_04 tvLeft, C_TgVEC_M_F32_04 tvRight )
{
    return (_mm_div_ps( tvLeft, tvRight ));
}


/* ---- V(M_SUB) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_SUB_F32_04( C_TgVEC_M_F32_04 tvLeft, C_TgVEC_M_F32_04 tvRight )
{
    return (_mm_sub_ps( tvLeft, tvRight ));
}


/* ---- V(M_LSQ) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_LSQ_F32_04( C_TgVEC_M_F32_04 tvLeft )
{
    return (M_DOT_F32_04( tvLeft, tvLeft ));
}


/* ---- V(M_LEN) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_LEN_F32_04( C_TgVEC_M_F32_04 tvLeft )
{
    const __m128                        vX1 = _mm_mul_ps( tvLeft, tvLeft );
    const __m128                        vX2 = _mm_shuffle_ps( vX1, vX1, _MM_SHUFFLE( 3, 2, 3, 2 ) );
    const __m128                        vX3 = _mm_add_ps( vX1, vX2 );
    const __m128                        vX4 = _mm_shuffle_ps( vX3, vX3, _MM_SHUFFLE( 1, 0, 0, 0 ) );
    const __m128                        vX5 = _mm_shuffle_ps( vX2, vX4, _MM_SHUFFLE( 3, 3, 0, 0 ) );
    const __m128                        vX6 = _mm_add_ps( vX4, vX5 );
    const __m128                        vX7 = _mm_shuffle_ps( vX6, vX6, _MM_SHUFFLE( 2, 2, 2, 2 ) );
    const __m128                        vx8 = __builtin_ia32_sqrtps( vX7 );

    return (vx8);
}


/* ---- V(M_NEG) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_NEG_F32_04( C_TgVEC_M_F32_04 tvLeft )
{
    return (_mm_xor_ps( KTgV_SgnM.m_f32_v04.m_mData, tvLeft ));
}


/* ---- V(M_SQRT) ------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_SQRT_F32_04( C_TgVEC_M_F32_04 tvLeft )
{
    return (__builtin_ia32_sqrtps( tvLeft ));
}


/* ---- V(M_RSQRT) ------------------------------------------------------------------------------------------------------------------------------------------------------ */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_RSQRT_F32_04( C_TgVEC_M_F32_04 tvLeft )
{
    const __m128                        vX1 = __builtin_ia32_sqrtps( tvLeft );
    const __m128                        vX2 = _mm_set1_ps( 1.0F );
    const __m128                        vX3 = _mm_div_ps( vX2, vX1 );

    return (vX3);
}


/* ---- V(M_NORM) ------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_NORM_F32_04( C_TgVEC_M_F32_04 tvLeft )
{
    const __m128                        vX1 = _mm_mul_ps( tvLeft, tvLeft );
    const __m128                        vX2 = _mm_shuffle_ps( vX1, vX1, _MM_SHUFFLE( 3, 2, 3, 2 ) );
    const __m128                        vX3 = _mm_add_ps( vX1, vX2 );
    const __m128                        vX4 = _mm_shuffle_ps( vX3, vX3, _MM_SHUFFLE( 1, 0, 0, 0 ) );
    const __m128                        vX5 = _mm_shuffle_ps( vX2, vX4, _MM_SHUFFLE( 3, 3, 0, 0 ) );
    const __m128                        vX6 = _mm_add_ps( vX4, vX5 );
    const __m128                        vX7 = _mm_shuffle_ps( vX6, vX6, _MM_SHUFFLE( 2, 2, 2, 2 ) );
    const __m128                        vx8 = __builtin_ia32_sqrtps( vX7 );
    const __m128                        vX9 = _mm_cmpneq_ps( vX7, KTgV_INF.m_f32_v04.m_mData );
    const __m128                        vXA = _mm_div_ps( tvLeft, vx8 );
    const __m128                        vXB = _mm_and_ps( vXA, vX9 );

    return (vXB);
}


/* ---- V(M_NORM_LEN) --------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_NORM_LEN_F32_04( PCU_TgVEC_M_F32_04 ptvRet_Length, C_TgVEC_M_F32_04 tvLeft )
{
    const __m128                        vX1 = _mm_mul_ps( tvLeft, tvLeft );
    const __m128                        vX2 = _mm_shuffle_ps( vX1, vX1, _MM_SHUFFLE( 3, 2, 3, 2 ) );
    const __m128                        vX3 = _mm_add_ps( vX1, vX2 );
    const __m128                        vX4 = _mm_shuffle_ps( vX3, vX3, _MM_SHUFFLE( 1, 0, 0, 0 ) );
    const __m128                        vX5 = _mm_shuffle_ps( vX2, vX4, _MM_SHUFFLE( 3, 3, 0, 0 ) );
    const __m128                        vX6 = _mm_add_ps( vX4, vX5 );
    const __m128                        vX7 = _mm_shuffle_ps( vX6, vX6, _MM_SHUFFLE( 2, 2, 2, 2 ) );
    const __m128                        vx8 = __builtin_ia32_sqrtps( vX7 );
    const __m128                        vX9 = _mm_cmpneq_ps( vX7, KTgV_INF.m_f32_v04.m_mData );
    const __m128                        vXA = _mm_div_ps( tvLeft, vx8 );
    const __m128                        vXB = _mm_and_ps( vXA, vX9 );

    *ptvRet_Length = vx8;

    return (vXB);
}


/* ---- V(M_FLR) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_FLR_F32_04( C_TgVEC_M_F32_04 tvLeft )
{
    __m128                              vX1;

    vX1[0] = tgPM_FLOOR_F32( tvLeft[0] );
    vX1[1] = tgPM_FLOOR_F32( tvLeft[1] );
    vX1[2] = tgPM_FLOOR_F32( tvLeft[2] );
    vX1[3] = tgPM_FLOOR_F32( tvLeft[3] );

    return (vX1);
}


/* ---- V(M_CEL) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_CEL_F32_04( C_TgVEC_M_F32_04 tvLeft )
{
    __m128                              vX1;

    vX1[0] = tgPM_CEIL_F32( tvLeft[0] );
    vX1[1] = tgPM_CEIL_F32( tvLeft[1] );
    vX1[2] = tgPM_CEIL_F32( tvLeft[2] );
    vX1[3] = tgPM_CEIL_F32( tvLeft[3] );

    return (vX1);
}


/* ---- V(M_CLP) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_CLP_F32_04( C_TgVEC_M_F32_04 vLeft, C_TgVEC_M_F32_04 vMin, C_TgVEC_M_F32_04 vMax )
{
    return (_mm_max_ps(vMin, _mm_min_ps(vMax, vLeft)));
}


/* ---- V(M_SAT) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_SAT_F32_04( C_TgVEC_M_F32_04 tvLeft )
{
    return (M_CLP_F32_04( tvLeft, KTgV_ZERO_F32_04.m_mData, KTgV_ONE_F32_04.m_mData ));
}


/* ---- V(M_NaN) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_NaN_F32_04( C_TgVEC_M_F32_04 tvLeft )
{
    return (M_CMP_EQ_F32_04( M_AND_F32_04( tvLeft, KTgV_NaN.m_f32_v04.m_mData ), KTgV_NaN.m_f32_v04.m_mData ));
}


/* ---- V(M_BND) -------------------------------------------------------------------------------------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
TgINLINE TgVEC_M_F32_04 M_BND_F32_04( C_TgVEC_M_F32_04 tvLeft, C_TgVEC_M_F32_04 tvBound )
{
    const __m128                        vX1 = _mm_sub_ps( KTgV_ZERO_F32_04.m_mData, tvBound );
    const __m128                        vX2 = _mm_cmpgt_ps( tvLeft, tvBound );
    const __m128                        vX3 = _mm_cmplt_ps( tvLeft, vX1 );

    return (M_XOR_F32_04( (_mm_or_ps( vX2, vX3 )), KTgV_FFFF.m_f32_v04.m_mData ));
}


/* ====================================================================================================================================================================== */
#endif