一个Java基于codePoint的emoji判断方法
该方法参考自一篇博客java判断是否是emoji字符(史上最全)_isemojicharacter-CSDN博客
经过简单封装如下:
public class EmojiFilter {
private EmojiFilter() {
}
/**
* 过滤emoji 或者 其他非文字类型的字符
* 如果只需要判断是否含有emoji,使用hasEmoji方法更好(不需要遍历所有字符)
*
* @param source
* @return
*/
public static String filterEmoji(String source) {
if (!StringUtils.hasText(source)) {
return source;
}
StringBuilder sb = new StringBuilder();
int len = source.length();
for (int i = 0; i < len; i++) {
char ch = source.charAt(i);
if (!Character.isSurrogate(ch)) {
//单个codepoint
if (!isEmojiCharacter(Character.codePointAt(source, i))) {
sb.append(ch);
}
} else {
//两个codepoint
char ch2 = source.charAt(i + 1);//低位codepoint
if (!isEmojiCharacter(Character.codePointAt(source, i)) && Character.isSurrogatePair(ch, ch2)) {
sb.append(ch);
sb.append(ch2);
}
}
}
return sb.toString();
}
public static boolean hasEmoji(String source) {
if (!StringUtils.hasText(source)) {
return false;
}
char[] chars = source.toCharArray();
int length = chars.length;
for (int i = 0; i < length; i++) {
if (isEmojiCharacter(Character.codePointAt(chars, i))) {
return true;
}
}
return false;
}
/*参考:
* java判断是否是emoji字符(史上最全)_Mr.QingBin的博客-CSDN博客_java 判断emoji
https://blog.csdn.net/congqingbin/article/details/97144558
*/
private static boolean isEmojiCharacter(int codePoint) {
return
(0x0300 <= codePoint && codePoint <= 0x03FF) ||
(0x0600 <= codePoint && codePoint <= 0x06FF) ||
(0x0C00 <= codePoint && codePoint <= 0x0C7F) ||
(0x1DC0 <= codePoint && codePoint <= 0x1DFF) ||
(0x1E00 <= codePoint && codePoint <= 0x1EFF) ||
(0x2000 <= codePoint && codePoint <= 0x209F) ||
(0x20D0 <= codePoint && codePoint <= 0x214F) ||
(0x2190 <= codePoint && codePoint <= 0x23FF) ||
(0x2460 <= codePoint && codePoint <= 0x25FF) ||
(0x2600 <= codePoint && codePoint <= 0x27EF) ||
(0x2900 <= codePoint && codePoint <= 0x29FF) ||
(0x2B00 <= codePoint && codePoint <= 0x2BFF) ||
(0x2C60 <= codePoint && codePoint <= 0x2C7F) ||
(0x2E00 <= codePoint && codePoint <= 0x2E7F) ||
(0xA490 <= codePoint && codePoint <= 0xA4CF) ||
(0xE000 <= codePoint && codePoint <= 0xF8FF) ||
(0xFE00 <= codePoint && codePoint <= 0xFE0F) ||
(0xFE30 <= codePoint && codePoint <= 0xFE4F) ||
(0x1F000 <= codePoint && codePoint <= 0x1F02F) ||
(0x1F0A0 <= codePoint && codePoint <= 0x1F0FF) ||
(0x1F100 <= codePoint && codePoint <= 0x1F64F) ||
(0x1F680 <= codePoint && codePoint <= 0x1F6FF) ||
(0x1F910 <= codePoint && codePoint <= 0x1F96B) ||
(0x1F980 <= codePoint && codePoint <= 0x1F9E0);
}
}
额外的参考: