/*
 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
 * //
 * // Redistribution and use in source and binary forms, with or without modification,
 * // are permitted provided that the following conditions are met:
 * //
 * // 1.  Redistributions of source code must retain the above copyright notice, this
 * // list of conditions and the following disclaimer.
 * //
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
 * // this list of conditions and the following disclaimer in the documentation
 * // and/or other materials provided with the distribution.
 * //
 * // 3.  Neither the name of the copyright holder nor the names of its
 * // contributors may be used to endorse or promote products derived from
 * // this software without specific prior written permission.
 * //
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
use crate::common::f_fmla;

// Polynomials approximating erf(x)/x on ( k/8, (k + 1)/8 ) generated by Sollya
// with:
// > P = fpminimax(erf(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14|], [|D...|],
//                 [k/8, (k + 1)/8]);
// for k = 0..31.
static COEFFS: [[u64; 8]; 32] = [
    [
        0x3ff20dd750429b6d,
        0xbfd812746b037753,
        0x3fbce2f219e8596a,
        0xbf9b82cdacb78fda,
        0x3f756479297dfda5,
        0xbf48b3ac5455ef02,
        0xbf7126fcac367e3b,
        0x3fb2d0bdb3ba4984,
    ],
    [
        0x3ff20dd750429b6d,
        0xbfd812746b0379a8,
        0x3fbce2f21a03cf2a,
        0xbf9b82ce30de083e,
        0x3f7565bcad3eb60f,
        0xbf4c02c66f659256,
        0x3f1f92f673385229,
        0xbeedef402648ae90,
    ],
    [
        0x3ff20dd750429b34,
        0xbfd812746b032dce,
        0x3fbce2f219d84aae,
        0xbf9b82ce22dcf139,
        0x3f7565b9efcd4af1,
        0xbf4c021f1af414bc,
        0x3f1f7c6d177eff82,
        0xbeec9e4410dcf865,
    ],
    [
        0x3ff20dd750426eab,
        0xbfd812746ae592c7,
        0x3fbce2f211525f14,
        0xbf9b82ccc125e63f,
        0x3f756596f261cfd3,
        0xbf4bfde1ff8eeecf,
        0x3f1f31a9d15dc5d8,
        0xbeea5a4362844b3c,
    ],
    [
        0x3ff20dd75039c705,
        0xbfd812746777e74d,
        0x3fbce2f17af98a1b,
        0xbf9b82be4b817cbe,
        0x3f7564bec2e2962e,
        0xbf4bee86f9da3558,
        0x3f1e9443689dc0cc,
        0xbee79c0f230805d8,
    ],
    [
        0x3ff20dd74f811211,
        0xbfd81274371a3e8f,
        0x3fbce2ec038262e5,
        0xbf9b8265b82c5e1f,
        0x3f75615a2e239267,
        0xbf4bc63ae023dceb,
        0x3f1d87c2102f7e06,
        0xbee49584bea41d62,
    ],
    [
        0x3ff20dd746d063e3,
        0xbfd812729a8a950f,
        0x3fbce2cb0a2df232,
        0xbf9b80eca1f51278,
        0x3f75572e26c46815,
        0xbf4b715e5638b65e,
        0x3f1bfbb195484968,
        0xbee177a565c15c52,
    ],
    [
        0x3ff20dd701b44486,
        0xbfd812691145f237,
        0x3fbce23a06b8cfd9,
        0xbf9b7c1dc7245288,
        0x3f753e92f7f397dd,
        0xbf4ad97cc4acf0b2,
        0x3f19f028b2b09b71,
        0xbedcdc4da08da8c1,
    ],
    [
        0x3ff20dd5715ac332,
        0xbfd8123e680bd0eb,
        0x3fbce0457aded691,
        0xbf9b6f52d52bed40,
        0x3f750c291b84414c,
        0xbf49ea246b1ad4a9,
        0x3f177654674e0ca0,
        0xbed737c11a1bcebb,
    ],
    [
        0x3ff20dce6593e114,
        0xbfd811a59c02eadc,
        0x3fbcdab53c7cd7d5,
        0xbf9b526d2e321eed,
        0x3f74b1d32cd8b994,
        0xbf48963143ec0a1e,
        0x3f14ad5700e4db91,
        0xbed231e100e43ef2,
    ],
    [
        0x3ff20db48bfd5a62,
        0xbfd80fdd84f9e308,
        0x3fbccd340d462983,
        0xbf9b196a29287680,
        0x3f74210c2c13a0f7,
        0xbf46dbdfb4ff71ae,
        0x3f11bca2d17fbd71,
        0xbecbca36f90c7cf5,
    ],
    [
        0x3ff20d64b2f8f508,
        0xbfd80b4d4f19fa8b,
        0x3fbcb088197262e3,
        0xbf9ab51fd02e5b99,
        0x3f734e1e5e81a632,
        0xbf44c66377b502ce,
        0x3f0d9ad25066213c,
        0xbec4b0df7dd0cfa1,
    ],
    [
        0x3ff20c8fc1243576,
        0xbfd8010cb2009e27,
        0x3fbc7a47e9299315,
        0xbf9a155be5683654,
        0x3f7233502694997b,
        0xbf426c94b7d81300,
        0x3f08094f1de25fb9,
        0xbebe0e3d776c6eef,
    ],
    [
        0x3ff20a9bd1611bc1,
        0xbfd7ec7fbce83f90,
        0x3fbc1d757d7317b7,
        0xbf992c160cd589f0,
        0x3f70d307269cc5c2,
        0xbf3fda5b0d2d1879,
        0x3f02fdd7b3b14a7f,
        0xbeb54eed4a26af5a,
    ],
    [
        0x3ff20682834f943d,
        0xbfd7c73f747bf5a9,
        0x3fbb8c2db4a9ffd1,
        0xbf97f0e4ffe989ec,
        0x3f6e7061eae4166e,
        0xbf3ad36e873fff2d,
        0x3efd39222396128e,
        0xbead83dacec5ea6b,
    ],
    [
        0x3ff1feb8d12676d7,
        0xbfd7898347284afe,
        0x3fbaba3466b34451,
        0xbf9663adc573e2f9,
        0x3f6ae99fb17c3e08,
        0xbf3602f950ad5535,
        0x3ef5e9717490609d,
        0xbea3fca107bbc8d5,
    ],
    [
        0x3ff1f12fe3c536fa,
        0xbfd72b1d1f22e6d3,
        0x3fb99fc0eed4a896,
        0xbf948db0a87bd8c6,
        0x3f673e368895aa61,
        0xbf319b35d5301fc8,
        0x3ef007987e4bb033,
        0xbe9a7edcd4c2dc70,
    ],
    [
        0x3ff1db7b0df84d5d,
        0xbfd6a4e4a41cde02,
        0x3fb83bbded16455d,
        0xbf92809b3b36977e,
        0x3f639c08bab44679,
        0xbf2b7b45a70ed119,
        0x3ee6e99b36410e7b,
        0xbe913619bb7ebc0c,
    ],
    [
        0x3ff1bb1c85c4a527,
        0xbfd5f23b99a249a3,
        0x3fb694c91fa0d12c,
        0xbf9053e1ce11c72d,
        0x3f602bf72c50ea78,
        0xbf24f478fb56cb02,
        0x3ee005f80ecbe213,
        0xbe85f2446bde7f5b,
    ],
    [
        0x3ff18dec3bd51f9d,
        0xbfd5123f58346186,
        0x3fb4b8a1ca536ab4,
        0xbf8c4243015cc723,
        0x3f5a1a8a01d351ef,
        0xbf1f466b34f1d86b,
        0x3ed5f835eea0bf6a,
        0xbe7b83165b939234,
    ],
    [
        0x3ff152804c3369f4,
        0xbfd4084cd4afd4bc,
        0x3fb2ba2e836e47aa,
        0xbf8800f2dfc6904b,
        0x3f54a6daf0669c59,
        0xbf16e326ab872317,
        0x3ecd9761a6a755a5,
        0xbe70fca33f9dd4b5,
    ],
    [
        0x3ff1087ad68356aa,
        0xbfd2dbb044707459,
        0x3fb0aea8ceaa0384,
        0xbf840b516d52b3d2,
        0x3f500c9e05f01d22,
        0xbf1076afb0dc0ff7,
        0x3ec39fadec400657,
        0xbe64b5761352e7e3,
    ],
    [
        0x3ff0b0a7a8ba4a22,
        0xbfd196990d22d4a1,
        0x3fad5551e6ac0c4d,
        0xbf807cce1770bd1a,
        0x3f4890347b8848bf,
        0xbf0757ec96750b6a,
        0x3eb9b258a1e06bce,
        0xbe58fc6d22da7572,
    ],
    [
        0x3ff04ce2be70fb47,
        0xbfd0449e4b0b9cac,
        0x3fa97f7424f4b0e7,
        0xbf7ac825439c42f4,
        0x3f428f5f65426dfb,
        0xbf005b699a90f90f,
        0x3eb0a888eecf4593,
        0xbe4deace2b32bb31,
    ],
    [
        0x3fefbf9fb0e11cc8,
        0xbfcde2640856545a,
        0x3fa5f5b1f47f8510,
        0xbf7588bc71eb41b9,
        0x3f3bc6a0a772f56d,
        0xbef6b9fad1f1657a,
        0x3ea573204ba66504,
        0xbe41d38065c94e44,
    ],
    [
        0x3feed8f18c99e031,
        0xbfcb4cb6acd903b4,
        0x3fa2c7f3dddd6fc1,
        0xbf713052067df4e0,
        0x3f34a5027444082f,
        0xbeef672bab0e2554,
        0x3e9b83c756348cc9,
        0xbe3534f1a1079499,
    ],
    [
        0x3fedebd33044166d,
        0xbfc8d7cd9053f7d8,
        0x3f9ff9957fb3d6e7,
        0xbf6b50be55de0f36,
        0x3f2e92c8ec53a628,
        0xbee5a4b88d508007,
        0x3e91a27737559e26,
        0xbe2942ae62cb2c14,
    ],
    [
        0x3fecfdbf0386f3bd,
        0xbfc68e33d93b0dc4,
        0x3f9b2683d58f53de,
        0xbf65a9174e70d26f,
        0x3f269ddd326d49cd,
        0xbeddd8f397a8219c,
        0x3e86a755016ad4dd,
        0xbe1e366e0139187d,
    ],
    [
        0x3fec132adb8d7464,
        0xbfc475a899f61b46,
        0x3f970a431397a77c,
        0xbf612e3d35beeee2,
        0x3f20c16b05738333,
        0xbed4a47f873e144e,
        0x3e7d3d494c698c02,
        0xbe12302c59547fe5,
    ],
    [
        0x3feb2f5fd05555e7,
        0xbfc28feefbe03ec7,
        0x3f93923acbb3a676,
        0xbf5b4ff793cd6358,
        0x3f18ea0eb8c913bc,
        0xbeccb31ec2baceb1,
        0x3e730011e7e80c04,
        0xbe0617710635cb1d,
    ],
    [
        0x3fea54853cd9593e,
        0xbfc0dbdbaea4dc8e,
        0x3f90a93e2c20a0fd,
        0xbf55c969ff401ea8,
        0x3f129e0cc64fe627,
        0xbec4160d8e9d3c2a,
        0x3e68e7b67594624a,
        0xbdfb1cf2c975b09b,
    ],
    [
        0x3fe983ceece09ff8,
        0xbfbeacc78f7a2d00,
        0x3f8c74418410655f,
        0xbf51756a050e441e,
        0x3f0bff3650f7f548,
        0xbebc56c0217d3ada,
        0x3e607b4918d0b489,
        0xbdf0d4be8c1c50f8,
    ],
];

trait ErffBackend {
    fn fma(&self, x: f64, y: f64, z: f64) -> f64;
}

struct GenErffBackend {}

impl ErffBackend for GenErffBackend {
    #[inline(always)]
    fn fma(&self, x: f64, y: f64, z: f64) -> f64 {
        f_fmla(x, y, z)
    }
}

#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
struct FmaErffBackend {}

#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
impl ErffBackend for FmaErffBackend {
    #[inline(always)]
    fn fma(&self, x: f64, y: f64, z: f64) -> f64 {
        f64::mul_add(x, y, z)
    }
}

#[inline(always)]
fn erff_gen<B: ErffBackend>(x: f32, backend: B) -> f32 {
    let x_u = x.to_bits();
    let x_abs = x_u & 0x7fff_ffffu32;

    if x_abs >= 0x4080_0000u32 {
        static ONE: [f32; 2] = [1.0, -1.0];
        static SMALL: [f32; 2] = [f32::from_bits(0xb3000000), f32::from_bits(0x33000000)];

        let sign = x.is_sign_negative() as usize;

        if x_abs >= 0x7f80_0000u32 {
            return if x_abs > 0x7f80_0000 { x } else { ONE[sign] };
        }

        return ONE[sign] + SMALL[sign];
    }

    // Polynomial approximation:
    //   erf(x) ~ x * (c0 + c1 * x^2 + c2 * x^4 + ... + c7 * x^14)
    let xd = x as f64;
    let xsq = xd * xd;

    const EIGHT: u32 = 3 << 23;
    let idx = unsafe { f32::from_bits(x_abs.wrapping_add(EIGHT)).to_int_unchecked::<usize>() };

    let c = COEFFS[idx];

    let x4 = xsq * xsq;
    let c0 = backend.fma(xsq, f64::from_bits(c[1]), f64::from_bits(c[0]));
    let c1 = backend.fma(xsq, f64::from_bits(c[3]), f64::from_bits(c[2]));
    let c2 = backend.fma(xsq, f64::from_bits(c[5]), f64::from_bits(c[4]));
    let c3 = backend.fma(xsq, f64::from_bits(c[7]), f64::from_bits(c[6]));

    let x8 = x4 * x4;
    let p0 = backend.fma(x4, c1, c0);
    let p1 = backend.fma(x4, c3, c2);

    (xd * backend.fma(x8, p1, p0)) as f32
}

#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx", enable = "fma")]
unsafe fn erff_fma_impl(x: f32) -> f32 {
    erff_gen(x, FmaErffBackend {})
}

/// Error function
///
/// Max ulp 0.5
#[inline]
pub fn f_erff(x: f32) -> f32 {
    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
    {
        crate::err::erff::erff_gen(x, GenErffBackend {})
    }
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
    {
        use std::sync::OnceLock;
        static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new();
        let q = EXECUTOR.get_or_init(|| {
            if std::arch::is_x86_feature_detected!("avx")
                && std::arch::is_x86_feature_detected!("fma")
            {
                erff_fma_impl
            } else {
                fn def_erff(x: f32) -> f32 {
                    erff_gen(x, GenErffBackend {})
                }
                def_erff
            }
        });
        unsafe { q(x) }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn f_erff_test() {
        assert_eq!(f_erff(0.0), 0.0);
        assert_eq!(f_erff(1.0), 0.8427008);
        assert_eq!(f_erff(0.5), 0.5204999);
        assert_eq!(f_erff(f32::INFINITY), 1.0);
        assert_eq!(f_erff(f32::NEG_INFINITY), -1.0);
        assert!(f_erff(f32::NAN).is_nan());
    }
}
