#ifndef MAT_USER_FUNCTIONS_H_
#define MAT_USER_FUNCTIONS_H_

namespace mat
{
#ifndef __CUDA_ARCH__
#define min(a, b) std::min(a, b)
#define max(a, b) std::max(a, b)
#define norm3d(a, b, c) sqrt(pow(a, 2) + pow(b, 2) + pow(c, 2))
#define rnorm3d(a, b, c) 1.0 / sqrt(pow(a, 2) + pow(b, 2) + pow(c, 2))
#endif

#ifndef M_PI
#define M_PI 3.1415926535897932384626433832795
#endif

#define EIGEN_VECTOR_TOL 1.0e-20

//-------------------------------------------------------------------------------------------------
//
// Load curve
//
//-------------------------------------------------------------------------------------------------

template <typename T1, typename T2>
inline __host__ __device__ double load_curve(T1* curve_data, T2* curve_val, int id, double x)
{
    int index0 = 0;
    int index1 = 0;

    T1 beg = curve_data[id * 10 + 1];
    T1 end = curve_data[id * 10 + 2];

    for (index1 = beg; index1 < end; index1++) {
        T2 x1 = curve_val[index1 * 2];
        if (x1 >= x) break;
    }

    index1 = min(index1, end - 1);
    index0 = index1 - 1;

    T2 x0 = curve_val[index0 * 2 + 0];
    T2 f0 = curve_val[index0 * 2 + 1];
    T2 x1 = curve_val[index1 * 2 + 0];
    T2 f1 = curve_val[index1 * 2 + 1];

    T2 xi = (x0 == x1) ? 1.0 : (x - x0) / (x1 - x0);

    return (1.0 - xi) * f0 + xi * f1;
}

//-------------------------------------------------------------------------------------------------
//
// Effective strain rate
//
//-------------------------------------------------------------------------------------------------

template <typename T>
inline __host__ __device__ T mat_effective_strain_rate(T* strain_rate)
{
    return sqrt(2.0 / 3.0 * (pow(strain_rate[0], 2) + pow(strain_rate[1], 2) + pow(strain_rate[2], 2)) +
                2.0 * (pow(strain_rate[3], 2) + pow(strain_rate[4], 2) + pow(strain_rate[5], 2)));
}

//-------------------------------------------------------------------------------------------------
//
// Effective stress (von Mises)
//
//-------------------------------------------------------------------------------------------------

template <typename T>
inline __host__ __device__ T mat_effective_stress(T* stress)
{
    return sqrt(1.5 * (pow(stress[0], 2) + pow(stress[1], 2) + pow(stress[2], 2)) +
                3.0 * (pow(stress[3], 2) + pow(stress[4], 2) + pow(stress[5], 2)));
}

//-------------------------------------------------------------------------------------------------
//
// von Mises yield criterion 1
//
//-------------------------------------------------------------------------------------------------

template <typename T>
inline __host__ __device__ void mat_yield_von_mises_1(T shear, T sigy0, T* sig_eff, T* stress, T* deps, int* yield)
{
    *sig_eff = mat_effective_stress(stress);

    if (*sig_eff > 1.00001 * sigy0) {
        T dsig = *sig_eff - sigy0;
        *deps = dsig / shear / 3.0;
        *yield = 1;
    } else {
        *yield = 0;
    }
}

//-------------------------------------------------------------------------------------------------
//
// von Mises yield criterion 2
//
//-------------------------------------------------------------------------------------------------

template <typename T>
inline __host__ __device__ void mat_yield_von_mises_2(T shear, T sigy0, T sigy1, T* sig_eff, T* stress,
                                                      T* deps, T* epsp, T* depsp)
{
    T dsig = *sig_eff - sigy0;

    T denom = max(0.5 * dsig, sigy1 - sigy0 + dsig);

    T xi = dsig / denom;
    T fac = (*sig_eff - xi * dsig) / *sig_eff;

    *depsp = xi * (*deps);
    *epsp += *depsp;

    for (int i = 0; i < 6; i++) {
        stress[i] *= fac;
    }

    *sig_eff *= fac;
}

//-------------------------------------------------------------------------------------------------
//
// Cross product
//
//-------------------------------------------------------------------------------------------------

template <typename T>
inline __host__ __device__ void cross_product(T a1, T a2, T a3, T b1, T b2, T b3, T* c1, T* c2, T* c3)
{
    *c1 = a2 * b3 - a3 * b2;
    *c2 = a3 * b1 - a1 * b3;
    *c3 = a1 * b2 - a2 * b1;
}

template <typename T>
inline __host__ __device__ void cross_product(T* a, T* b, T* c)
{
    cross_product(a[0], a[1], a[2], b[0], b[1], b[2], c, c + 1, c + 2);
}

//-------------------------------------------------------------------------------------------------
//
// Eigen values
//
//-------------------------------------------------------------------------------------------------

template <typename T>
inline __host__ __device__ void calc_eigen_values(T s11, T s22, T s33, T s12, T s23, T s31,
                                                  T* evalue1, T* evalue2, T* evalue3)
{
    T cc = 0.0;
    T ss = 0.0;
    T s12_2 = pow(s12, 2);
    T s23_2 = pow(s23, 2);
    T s31_2 = pow(s31, 2);
    T aaa = sqrt(3.0) / 2.0;

    // Constant, linear and quadratic terms in characteristic equation
    T a0 = -s11 * s22 * s33 + s11 * s23_2 + s12_2 * s33 + s31_2 * s22 - 2.0 * s12 * s23 * s31;
    T a1 = -s12_2 - s23_2 - s31_2 + s22 * s33 + s11 * s22 + s11 * s33;
    T a2 = -(s11 + s22 + s33);

    T q = min(0.0, a1 / 3.0 - pow(a2, 2) / 9.0);
    T r = a1 * a2 / 6.0 - a0 / 2.0 - pow(a2, 3) / 27.0;
    T t = -a2 / 3.0;

    if (q < -1.0e-50) {
        T s = max(-1.0, min(1.0, r / sqrt(-pow(q, 3))));
        T theta = acos(s);
        T q2 = 2.0 * sqrt(-q);
        cc = q2 * cos(theta / 3.0);
        ss = q2 * sin(theta / 3.0);
    }

    *evalue1 = cc + t;
    *evalue2 = -cc / 2.0 - aaa * ss + t;
    *evalue3 = -cc / 2.0 + aaa * ss + t;
}

template <typename T>
inline __host__ __device__ void calc_eigen_values(T* s, T* eval)
{
    calc_eigen_values<T>(s[0], s[1], s[2], s[3], s[4], s[5], eval, eval + 1, eval + 2);
}

//-------------------------------------------------------------------------------------------------
//
// Eigen vectors
//
//-------------------------------------------------------------------------------------------------

template <typename T>
inline __host__ __device__ void calc_eigen_vectors(T s11, T s22, T s33, T s12, T s23, T s31,
                                                   T evalue1, T evalue2, T evalue3,
                                                   T* evec11, T* evec21, T* evec31,
                                                   T* evec12, T* evec22, T* evec32,
                                                   T* evec13, T* evec23, T* evec33)
{
    if (evalue1 == evalue2 && evalue1 == evalue3) {
        *evec11 = *evec22 = *evec33 = 1.0;
        *evec21 = *evec31 = *evec12 = *evec32 = *evec13 = *evec23 = 0.0;
        return;
    }

    int j = 0;

    T a12 = s12;
    T a23 = s23;
    T a31 = s31;

    T evector11 = 0.0;
    T evector21 = 0.0;
    T evector31 = 0.0;
    T evector12 = 0.0;
    T evector22 = 0.0;
    T evector32 = 0.0;
    T evector13 = 0.0;
    T evector23 = 0.0;
    T evector33 = 0.0;

    // Find the first two eigenvectors
    for (int i = 1; i < 3; i++) {
        T v1 = 0.0;
        T v2 = 0.0;
        T v3 = 0.0;

        T evalue = (i == 1) ? evalue1 : evalue2;
        T a11 = s11 - evalue;
        T a22 = s22 - evalue;
        T a33 = s33 - evalue;

        // Compute sub determinants
        T dd1 = a22 * a33 - pow(a23, 2);
        T dd2 = a11 * a33 - pow(a31, 2);
        T dd3 = a11 * a22 - pow(a12, 2);

        T ad1 = fabs(dd1);
        T ad2 = fabs(dd2);
        T ad3 = fabs(dd3);

        if (i == 2) {
            if (j == 1)
                ad1 = 0.0;
            else if (j == 2)
                ad2 = 0.0;
            else if (j == 3)
                ad3 = 0.0;
        }

        T dmax = max(ad1, max(ad2, ad3));

        // Choose non-zero component based on sub determinants
        if (dmax > EIGEN_VECTOR_TOL) {
            if (ad1 == dmax) {
                j = 1;
                v1 = 1.0;
                v2 = (a23 * a31 - a33 * a12) / dd1;
                v3 = (a23 * a12 - a22 * a31) / dd1;
            } else if (ad2 == dmax) {
                j = 2;
                v1 = (a31 * a23 - a33 * a12) / dd2;
                v2 = 1.0;
                v3 = (a31 * a12 - a11 * a23) / dd2;
            } else {
                j = 3;
                v1 = (a12 * a23 - a22 * a31) / dd3;
                v2 = (a12 * a31 - a11 * a23) / dd3;
                v3 = 1.0;
            }
        }
        // Nearly singular
        else {
            if (i == 1) {
                j = 1;
                v1 = 1.0;
                v2 = 0.0;
                v3 = 0.0;
            } else {
                T abs_w1 = fabs(evector11);
                T abs_w2 = fabs(evector21);
                T abs_w3 = fabs(evector31);
                T vmax = max(abs_w1, max(abs_w2, abs_w3));

                if (abs_w1 == vmax) {
                    v1 = -evector21;
                    v2 = evector11;
                    v3 = 0.0;
                } else {
                    v1 = 0.0;
                    v2 = evector31;
                    v3 = -evector21;
                }
            }
        }

        // Make vector 2 orthogonal to vector 1
        if (i == 2) {
            T ddot = evector11 * v1 + evector21 * v2 + evector31 * v3;
            v1 -= ddot * evector11;
            v2 -= ddot * evector21;
            v3 -= ddot * evector31;
        }

        // Normalize eigenvector
        T vabs = rnorm3d(v1, v2, v3);

        if (i == 1) {
            evector11 = v1 * vabs;
            evector21 = v2 * vabs;
            evector31 = v3 * vabs;
        } else {
            evector12 = v1 * vabs;
            evector22 = v2 * vabs;
            evector32 = v3 * vabs;
        }
    }

    // Cross product gives us the third vector
    cross_product(evector11, evector21, evector31, evector12, evector22, evector32, &evector13, &evector23, &evector33);

    // Store the eigen vectors
    *evec11 = evector11;
    *evec21 = evector21;
    *evec31 = evector31;
    *evec12 = evector12;
    *evec22 = evector22;
    *evec32 = evector32;
    *evec13 = evector13;
    *evec23 = evector23;
    *evec33 = evector33;
}

template <typename T>
inline __host__ __device__ void calc_eigen_vectors(T* s, T* eval, T* evec)
{
    calc_eigen_vectors<T>(s[0], s[1], s[2], s[3], s[4], s[5], eval[0], eval[1], eval[2], evec, evec + 1, evec + 2,
                          evec + 3, evec + 4, evec + 5, evec + 6, evec + 7, evec + 8);
}

//-------------------------------------------------------------------------------------------------
//
// Right Cauchy - Green deformation tensor
//
//-------------------------------------------------------------------------------------------------

template <typename T>
inline __host__ __device__ void cauchy_green_tensor(T* mat, T* c)
{
    c[0] = pow(mat[0], 2) + pow(mat[1], 2) + pow(mat[2], 2);
    c[1] = pow(mat[3], 2) + pow(mat[4], 2) + pow(mat[5], 2);
    c[2] = pow(mat[6], 2) + pow(mat[7], 2) + pow(mat[8], 2);
    c[3] = mat[0] * mat[3] + mat[1] * mat[4] + mat[2] * mat[5];
    c[4] = mat[3] * mat[6] + mat[4] * mat[7] + mat[5] * mat[8];
    c[5] = mat[6] * mat[0] + mat[7] * mat[1] + mat[8] * mat[2];
}

//-------------------------------------------------------------------------------------------------
//
// Transform tensor
//
//-------------------------------------------------------------------------------------------------

template <typename T>
inline __host__ __device__ void transform_tensor(T* s, T r11, T r21, T r31, T r12, T r22, T r32, T r13, T r23, T r33, T* b)
{
    b[0] = pow(r11, 2) * s[0] + pow(r12, 2) * s[1] + pow(r13, 2) * s[2] +
           2.0f * (r11 * r12 * s[3] + r11 * r13 * s[5] + r12 * r13 * s[4]);
    b[1] = pow(r21, 2) * s[0] + pow(r22, 2) * s[1] + pow(r23, 2) * s[2] +
           2.0f * (r21 * r22 * s[3] + r21 * r23 * s[5] + r22 * r23 * s[4]);
    b[2] = s[0] + s[1] + s[2] - b[0] - b[1];
    b[3] = r11 * (r21 * s[0] + r22 * s[3] + r23 * s[5]) + r12 * (r21 * s[3] + r22 * s[1] + r23 * s[4]) +
           r13 * (r21 * s[5] + r22 * s[4] + r23 * s[2]);
    b[4] = r21 * (r31 * s[0] + r32 * s[3] + r33 * s[5]) + r22 * (r31 * s[3] + r32 * s[1] + r33 * s[4]) +
           r23 * (r31 * s[5] + r32 * s[4] + r33 * s[2]);
    b[5] = r31 * (r11 * s[0] + r12 * s[3] + r13 * s[5]) + r32 * (r11 * s[3] + r12 * s[1] + r13 * s[4]) +
           r33 * (r11 * s[5] + r12 * s[4] + r13 * s[2]);
}

template <typename T, int transpose>
inline __host__ __device__ void transform_tensor(T* s, T* r, T* b)
{
    if (transpose == 0) {
        transform_tensor<T>(s, r[0], r[1], r[2], r[3], r[4], r[5], r[6], r[7], r[8], b);
    } else {
        transform_tensor<T>(s, r[0], r[3], r[6], r[1], r[4], r[7], r[2], r[5], r[8], b);
    }
}

//-------------------------------------------------------------------------------------------------
//
// Invert a 3x3 matrix, returns the determinate
//
//-------------------------------------------------------------------------------------------------

template <typename T>
inline __host__ __device__ double invert_3x3(T* mat, T* mat_inv)
{
    double cof[9] = {
        mat[4] * mat[8] - mat[7] * mat[5],
        mat[7] * mat[2] - mat[1] * mat[8],
        mat[1] * mat[5] - mat[4] * mat[2],
        mat[5] * mat[6] - mat[8] * mat[3],
        mat[8] * mat[0] - mat[2] * mat[6],
        mat[2] * mat[3] - mat[5] * mat[0],
        mat[3] * mat[7] - mat[6] * mat[4],
        mat[6] * mat[1] - mat[0] * mat[7],
        mat[0] * mat[4] - mat[3] * mat[1]};

    double det = mat[0] * cof[0] + mat[3] * cof[1] + mat[6] * cof[2];

    if (det != 0) {
        double det_inv = 1.0 / det;
        for (int i = 0; i < 9; ++i) {
            mat_inv[i] = cof[i] * det_inv;
        }
    }

    return det;
}

//-------------------------------------------------------------------------------------------------
//
// Check if two vectors are pointing in the same direction within a specified tolerance
//
//-------------------------------------------------------------------------------------------------

template <typename T>
inline __host__ __device__ bool vectors_aligned(T* vec1, T* vec2, T tolerance)
{
    // Calculate magnitudes of both vectors
    T mag1 = norm3d(vec1[0], vec1[1], vec1[2]);
    T mag2 = norm3d(vec2[0], vec2[1], vec2[2]);

    // Check if either vector is zero (to avoid division by zero)
    if (mag1 < 1e-10 || mag2 < 1e-10) {
        return false;
    }

    // Calculate dot product
    T dot_product = vec1[0] * vec2[0] + vec1[1] * vec2[1] + vec1[2] * vec2[2];

    // Calculate cosine of angle between vectors
    T cos_angle = dot_product / (mag1 * mag2);

    // Handle floating point precision issues
    if (cos_angle > 1.0) {
        cos_angle = 1.0;
    }

    if (cos_angle < -1.0) {
        cos_angle = -1.0;
    }

    // Calculate angle in degrees
    T angle_degrees = acos(cos_angle) * 180.0 / M_PI;

    // Check if angle is within tolerance
    return angle_degrees <= tolerance;
}
} // namespace mat_user

#endif // MAT_USER_FUNCTIONS_H_
