my $ascii_char = '[\x00-\x7F]'; my $cont_byte = '[\x80-\xBF]'; my $utf8_2 = '[\xC0-\xDF]' . $cont_byte; my $utf8_3 = '[\xE0-\xEF]' . $cont_byte . '{2}'; my $utf8_4 = '[\xF0-\xF7]' . $cont_byte . '{3}'; my $utf8_5 = '[\xF8-\xFB]' . $cont_byte . '{4}'; my $nibble_good_chars = qr{ ^( $ascii_char+ |$utf8_2|$utf8_3|$utf8_4|$utf8_5 )(.*) $ }sx;