近期做朋友圈功能的时候,需要支持emoji表情评论存储。有两种方式:1.修改mysql数据字符集;2.java过滤过滤emoji表情,进行转码,之后在存入数据库;
1.修改mysql数据字符集
mysql的utf8编码的一个字符最多3个字节,但是一个emoji表情为4个字节,所以utf8不支持存储emoji表情。但是utf8的超集utf8mb4一个字符最多能有4字节,所以能支持emoji表情的存储。所以把数据库编码改成utf8mb4;
修改字符集,参考:http://blog.csdn.net/u013145194/article/details/51527389
2.Java过滤过滤emoji表情,进行转码,之后在存入数据库
import org.apache.commons.lang.StringUtils;
public class EmojiUtils {
/**
* 将Unicode字符转成中文
*
* @param src
* @return
*/
public static String unicode2Emoji(String src) {
if (StringUtils.isEmpty(src)) {
return "";
}
StringBuffer retBuf = new StringBuffer();
int maxLoop = src.length();
for (int i = 0; i < maxLoop; i++) {
if (src.charAt(i) == '\\') {
if ((i < maxLoop - 5) && ((src.charAt(i + 1) == 'u')
|| (src.charAt(i + 1) == 'U'))) {
try {
retBuf.append((char) Integer
.parseInt(src.substring(i + 2, i + 6), 16));
i += 5;
} catch (NumberFormatException localNumberFormatException) {
retBuf.append(src.charAt(i));
}
} else {
retBuf.append(src.charAt(i));
}
} else {
retBuf.append(src.charAt(i));
}
}
return retBuf.toString();
}
/**
* 将字符串中的Emoji表情转换成Unicode编码
*
* @param src
* @return
*/
public static String emoji2Unicode(String src) {
StringBuffer unicode = new StringBuffer();
for (int i = 0; i < src.length(); i++) {
char c = src.charAt(i);
int codepoint = src.codePointAt(i);
if (isEmojiCharacter(codepoint)) {
unicode.append("\\u").append(Integer.toHexString(c));
} else {
unicode.append(c);
}
}
return unicode.toString();
}
/**
* 判断是否包含Emoji符号
*
* @param codePoint
* @return
*/
public static boolean isEmojiCharacter(int codePoint) {
return (codePoint >= 0x2600 && codePoint <= 0x27BF) // 杂项符号与符号字体
|| codePoint == 0x303D || codePoint == 0x2049
|| codePoint == 0x203C
|| (codePoint >= 0x2000 && codePoint <= 0x200F)//
|| (codePoint >= 0x2028 && codePoint <= 0x202F)//
|| codePoint == 0x205F //
|| (codePoint >= 0x2065 && codePoint <= 0x206F)//
/* 标点符号占用区域 */
|| (codePoint >= 0x2100 && codePoint <= 0x214F)// 字母符号
|| (codePoint >= 0x2300 && codePoint <= 0x23FF)// 各种技术符号
|| (codePoint >= 0x2B00 && codePoint <= 0x2BFF)// 箭头A
|| (codePoint >= 0x2900 && codePoint <= 0x297F)// 箭头B
|| (codePoint >= 0x3200 && codePoint <= 0x32FF)// 中文符号
|| (codePoint >= 0xD800 && codePoint <= 0xDFFF)// 高低位替代符保留区域
|| (codePoint >= 0xE000 && codePoint <= 0xF8FF)// 私有保留区域
|| (codePoint >= 0xFE00 && codePoint <= 0xFE0F)// 变异选择器
|| codePoint >= 0x10000; // Plane在第二平面以上的,char都不可以存,全部都转
}
}