PHP 删除非法UTF-8字符
//reject overly long 2 byte sequences, as well as characters above U+10000 and replace with ? $some_string = preg_replace('/[x00-x08x10x0Bx0Cx0E-x19x7F]'. '|[x00-x7F][x80-xBF]+'. '|([xC0xC1]|[xF0-xFF])[x80-xBF]*'. '|[xC2-xDF]((?![x80-xBF])|[x80-xBF]{2,})'. '|[xE0-xEF](([x80-xBF](?![x80-xBF]))|(?![x80-xBF]{2})|[x80-xBF]{3,})/S', '?', $some_string ); //reject overly long 3 byte sequences and UTF-16 surrogates and replace with ? $some_string = preg_replace('/xE0[x80-x9F][x80-xBF]'. '|xED[xA0-xBF][x80-xBF]/S','?', $some_string );