public class FileTokenizer extends Tokenizer
Modifier and Type | Field and Description |
---|---|
protected java.lang.String |
mode_ |
tokenVectorMap
Constructor and Description |
---|
FileTokenizer() |
FileTokenizer(java.lang.String config) |
Modifier and Type | Method and Description |
---|---|
void |
addTokenizers(java.util.List<java.lang.String> tokenizerNames) |
protected java.util.List<Token> |
readFile(java.lang.String filename)
Splits a text document into tokens
|
void |
setMode(java.lang.String mode) |
java.util.List<Token> |
tokenize(java.lang.String filename)
Splits the document into tokens.
|
void |
tokenizeFile(java.lang.String fileName) |
getTokenVectorMap, iterator, printTokens, tokenize, toString
public FileTokenizer(java.lang.String config)
public FileTokenizer()
public void tokenizeFile(java.lang.String fileName)
public java.util.List<Token> tokenize(java.lang.String filename)
Tokenizer
protected java.util.List<Token> readFile(java.lang.String filename)
filename
- the string filename of the document to split into tokenspublic void addTokenizers(java.util.List<java.lang.String> tokenizerNames)
public void setMode(java.lang.String mode)