Is it easy to cut pictograms out of natural text? It’s easy!
Code
// <!-- https://mvnrepository.com/artifact/org.nlp4j/nlp4j-core -->
// import nlp4j.Document;
// import nlp4j.impl.DefaultDocument;
// import nlp4j.util.DocumentUtil;
String text = "😊";
Document doc = new DefaultDocument();
doc.putAttribute("text", text);
EmojiAnnotator ann = new EmojiAnnotator();
ann.setProperty("target", "text");
ann.annotate(doc);
System.err.println(DocumentUtil.toJsonPrettyString(doc));
Result
{
"text": "😊",
"keywords": [
{
"facet": "emoji",
"lex": "SMILING_FACE_WITH_SMILING_EYES",
"str": "SMILING_FACE_WITH_SMILING_EYES",
"begin": 0,
"end": 1,
"@classname": "nlp4j.impl.DefaultKeyword"
},
{
"facet": "emojiblock",
"lex": "EMOTICONS",
"str": "EMOTICONS",
"begin": 0,
"end": 1,
"@classname": "nlp4j.impl.DefaultKeyword"
},
{
"facet": "emojichar",
"lex": "😊",
"str": "😊",
"begin": 0,
"end": 1,
"@classname": "nlp4j.impl.DefaultKeyword"
}
]
}