Where can I find the world's fastest atof implementation?

前端 未结 6 658
南笙
南笙 2021-02-01 05:59

I\'m looking for an extremely fast atof() implementation on IA32 optimized for US-en locale, ASCII, and non-scientific notation. The windows multithreaded CRT falls down misera

6条回答
  •  后悔当初
    2021-02-01 06:54

    I've implemented something you may find useful. In comparison with atof it's about x5 faster and if used with __forceinline about x10 faster. Another nice thing is that it seams to have exactly same arithmetic as crt implementation. Of course it has some cons too:

    • it supports only single precision float,
    • and doesn't scan any special values like #INF, etc...
    __forceinline bool float_scan(const wchar_t* wcs, float* val)
    {
    int hdr=0;
    while (wcs[hdr]==L' ')
        hdr++;
    
    int cur=hdr;
    
    bool negative=false;
    bool has_sign=false;
    
    if (wcs[cur]==L'+' || wcs[cur]==L'-')
    {
        if (wcs[cur]==L'-')
            negative=true;
        has_sign=true;
        cur++;
    }
    else
        has_sign=false;
    
    int quot_digs=0;
    int frac_digs=0;
    
    bool full=false;
    
    wchar_t period=0;
    int binexp=0;
    int decexp=0;
    unsigned long value=0;
    
    while (wcs[cur]>=L'0' && wcs[cur]<=L'9')
    {
        if (!full)
        {
            if (value>=0x19999999 && wcs[cur]-L'0'>5 || value>0x19999999)
            {
                full=true;
                decexp++;
            }
            else
                value=value*10+wcs[cur]-L'0';
        }
        else
            decexp++;
    
        quot_digs++;
        cur++;
    }
    
    if (wcs[cur]==L'.' || wcs[cur]==L',')
    {
        period=wcs[cur];
        cur++;
    
        while (wcs[cur]>=L'0' && wcs[cur]<=L'9')
        {
            if (!full)
            {
                if (value>=0x19999999 && wcs[cur]-L'0'>5 || value>0x19999999)
                    full=true;
                else
                {
                    decexp--;
                    value=value*10+wcs[cur]-L'0';
                }
            }
    
            frac_digs++;
            cur++;
        }
    }
    
    if (!quot_digs && !frac_digs)
        return false;
    
    wchar_t exp_char=0;
    
    int decexp2=0; // explicit exponent
    bool exp_negative=false;
    bool has_expsign=false;
    int exp_digs=0;
    
    // even if value is 0, we still need to eat exponent chars
    if (wcs[cur]==L'e' || wcs[cur]==L'E')
    {
        exp_char=wcs[cur];
        cur++;
    
        if (wcs[cur]==L'+' || wcs[cur]==L'-')
        {
            has_expsign=true;
            if (wcs[cur]=='-')
                exp_negative=true;
            cur++;
        }
    
        while (wcs[cur]>=L'0' && wcs[cur]<=L'9')
        {
            if (decexp2>=0x19999999)
                return false;
            decexp2=10*decexp2+wcs[cur]-L'0';
            exp_digs++;
            cur++;
        }
    
        if (exp_negative)
            decexp-=decexp2;
        else
            decexp+=decexp2;
    }
    
    // end of wcs scan, cur contains value's tail
    
    if (value)
    {
        while (value<=0x19999999)
        {
            decexp--;
            value=value*10;
        }
    
        if (decexp)
        {
            // ensure 1bit space for mul by something lower than 2.0
            if (value&0x80000000)
            {
                value>>=1;
                binexp++;
            }
    
            if (decexp>308 || decexp<-307)
                return false;
    
            // convert exp from 10 to 2 (using FPU)
            int E;
            double v=pow(10.0,decexp);
            double m=frexp(v,&E);
            m=2.0*m;
            E--;
            value=(unsigned long)floor(value*m);
    
            binexp+=E;
        }
    
        binexp+=23; // rebase exponent to 23bits of mantisa
    
    
        // so the value is: +/- VALUE * pow(2,BINEXP);
        // (normalize manthisa to 24bits, update exponent)
        while (value&0xFE000000)
        {
            value>>=1;
            binexp++;
        }
        if (value&0x01000000)
        {
            if (value&1)
                value++;
            value>>=1;
            binexp++;
            if (value&0x01000000)
            {
                value>>=1;
                binexp++;
            }
        }
    
        while (!(value&0x00800000))
        {
            value<<=1;
            binexp--;
        }
    
        if (binexp<-127)
        {
            // underflow
            value=0;
            binexp=-127;
        }
        else
        if (binexp>128)
            return false;
    
        //exclude "implicit 1"
        value&=0x007FFFFF;
    
        // encode exponent
        unsigned long exponent=(binexp+127)<<23;
        value |= exponent;
    }
    
    // encode sign
    unsigned long sign=negative<<31;
    value |= sign;
    
    if (val)
    {
        *(unsigned long*)val=value;
    }
    
    return true;
    }
    

提交回复
热议问题