PHP not have a function for XML-safe entity decode? Not have some xml_entity_decode?

后端 未结 6 1826
旧时难觅i
旧时难觅i 2020-12-17 02:20

THE PROBLEM: I need a XML file \"full encoded\" by UTF8; that is, with no entity representing symbols, all symbols enconded by UTF8, except the only 3 ones that are

6条回答
  •  时光说笑
    2020-12-17 02:47

        public function entity_decode($str, $charset = NULL)
    {
        if (strpos($str, '&') === FALSE)
        {
            return $str;
        }
    
        static $_entities;
    
        isset($charset) OR $charset = $this->charset;
        $flag = is_php('5.4')
            ? ENT_COMPAT | ENT_HTML5
            : ENT_COMPAT;
    
        do
        {
            $str_compare = $str;
    
            // Decode standard entities, avoiding false positives
            if ($c = preg_match_all('/&[a-z]{2,}(?![a-z;])/i', $str, $matches))
            {
                if ( ! isset($_entities))
                {
                    $_entities = array_map('strtolower', get_html_translation_table(HTML_ENTITIES, $flag, $charset));
    
                    // If we're not on PHP 5.4+, add the possibly dangerous HTML 5
                    // entities to the array manually
                    if ($flag === ENT_COMPAT)
                    {
                        $_entities[':'] = ':';
                        $_entities['('] = '(';
                        $_entities[')'] = '&rpar';
                        $_entities["\n"] = '&newline;';
                        $_entities["\t"] = '&tab;';
                    }
                }
    
                $replace = array();
                $matches = array_unique(array_map('strtolower', $matches[0]));
                for ($i = 0; $i < $c; $i++)
                {
                    if (($char = array_search($matches[$i].';', $_entities, TRUE)) !== FALSE)
                    {
                        $replace[$matches[$i]] = $char;
                    }
                }
    
                $str = str_ireplace(array_keys($replace), array_values($replace), $str);
            }
    
            // Decode numeric & UTF16 two byte entities
            $str = html_entity_decode(
                preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;]))|(?:0*\d{2,4}(?![0-9;])))/iS', '$1;', $str),
                $flag,
                $charset
            );
        }
        while ($str_compare !== $str);
        return $str;
    }
    

提交回复
热议问题