public class TokenFilter
extends java.lang.Object
Modifier and Type | Field and Description |
---|---|
protected java.util.HashSet<java.lang.String> |
excludedTokenClasses |
protected java.util.HashSet<java.lang.Integer> |
excludedTokenTypes |
protected java.util.HashSet<java.lang.String> |
includedTokenClasses |
protected java.util.HashSet<java.lang.Integer> |
includedTokenTypes |
static java.lang.String |
PARAM_EXCLUDEDTOKENCLASSES
Configuration parameter for list of token classes to include in lookups
|
static java.lang.String |
PARAM_EXCLUDEDTOKENTYPES
Configuration parameter for list of token classes to include in lookups
|
static java.lang.String |
PARAM_INCLUDEDTOKENCLASSES
Configuration parameter for list of token classes to include in lookups
|
static java.lang.String |
PARAM_INCLUDEDTOKENTYPES
Configuration parameter for list of token classes to include in lookups
|
static java.lang.String |
PARAM_STOPWORDS |
static java.lang.String |
PARAM_TOKENANNOTATION
Configuration parameter giving type of tokens
|
Constructor and Description |
---|
TokenFilter(java.lang.String tokenAnnotationName,
java.lang.String tokenTypeFeatureName,
java.lang.String tokenClassFeatureName,
Logger logger) |
Modifier and Type | Method and Description |
---|---|
boolean |
checkTokenClass(org.apache.uima.cas.text.AnnotationFS token) |
boolean |
checkTokenClass(DictionaryToken token) |
boolean |
checkTokenType(org.apache.uima.cas.text.AnnotationFS token) |
boolean |
checkTokenType(DictionaryToken token) |
java.lang.String |
getTokenAnnotationName() |
org.apache.uima.cas.Feature |
getTokenClassFeature() |
java.lang.String |
getTokenClassFeatureName() |
org.apache.uima.cas.Feature |
getTokenTypeFeature() |
java.lang.String |
getTokenTypeFeatureName() |
void |
initConfig(org.apache.uima.analysis_engine.annotator.AnnotatorContext annotatorContext) |
static java.util.Set<java.lang.String> |
initializeStopWordList(java.lang.String[] stopWordsStrings) |
void |
initTypes(org.apache.uima.cas.TypeSystem typeSystem) |
void |
initTypes(org.apache.uima.cas.TypeSystem typeSystem,
boolean requireFeatureExistence) |
boolean |
isOK_Token(org.apache.uima.cas.text.AnnotationFS token,
TokenNormalizer tokenNormalizer) |
boolean |
isOK_Token(DictionaryToken token,
TokenNormalizer tokenNormalizer) |
static boolean |
isStopWord(java.util.Set<java.lang.String> stopWords,
java.lang.String tokenText) |
boolean |
isStopWord(java.lang.String tokenText) |
void |
setTokenAnnotationName(java.lang.String tokenAnnotationName) |
void |
setTokenClassFeature(org.apache.uima.cas.Feature tokenClassFeature) |
void |
setTokenClassFeatureName(java.lang.String tokenClassFeatureName) |
void |
setTokenTypeFeature(org.apache.uima.cas.Feature tokenTypeFeature) |
void |
setTokenTypeFeatureName(java.lang.String tokenTypeFeatureName) |
public static final java.lang.String PARAM_INCLUDEDTOKENCLASSES
protected java.util.HashSet<java.lang.String> includedTokenClasses
public static final java.lang.String PARAM_EXCLUDEDTOKENCLASSES
protected java.util.HashSet<java.lang.String> excludedTokenClasses
public static final java.lang.String PARAM_INCLUDEDTOKENTYPES
protected java.util.HashSet<java.lang.Integer> includedTokenTypes
public static final java.lang.String PARAM_EXCLUDEDTOKENTYPES
protected java.util.HashSet<java.lang.Integer> excludedTokenTypes
public static final java.lang.String PARAM_STOPWORDS
public static final java.lang.String PARAM_TOKENANNOTATION
public TokenFilter(java.lang.String tokenAnnotationName, java.lang.String tokenTypeFeatureName, java.lang.String tokenClassFeatureName, Logger logger)
public java.lang.String getTokenClassFeatureName()
public void setTokenClassFeatureName(java.lang.String tokenClassFeatureName)
public org.apache.uima.cas.Feature getTokenClassFeature()
public void setTokenClassFeature(org.apache.uima.cas.Feature tokenClassFeature)
public java.lang.String getTokenTypeFeatureName()
public void setTokenTypeFeatureName(java.lang.String tokenTypeFeatureName)
public org.apache.uima.cas.Feature getTokenTypeFeature()
public void setTokenTypeFeature(org.apache.uima.cas.Feature tokenTypeFeature)
public java.lang.String getTokenAnnotationName()
public void setTokenAnnotationName(java.lang.String tokenAnnotationName)
public void initConfig(org.apache.uima.analysis_engine.annotator.AnnotatorContext annotatorContext) throws org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException
org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException
public static java.util.Set<java.lang.String> initializeStopWordList(java.lang.String[] stopWordsStrings) throws org.apache.uima.analysis_engine.annotator.AnnotatorContextException
org.apache.uima.analysis_engine.annotator.AnnotatorContextException
public boolean checkTokenClass(org.apache.uima.cas.text.AnnotationFS token)
token
- tokenClass to look uppublic boolean checkTokenClass(DictionaryToken token)
public static boolean isStopWord(java.util.Set<java.lang.String> stopWords, java.lang.String tokenText)
public boolean isStopWord(java.lang.String tokenText)
public boolean checkTokenType(org.apache.uima.cas.text.AnnotationFS token)
token
- public boolean checkTokenType(DictionaryToken token)
public void initTypes(org.apache.uima.cas.TypeSystem typeSystem) throws UnknownTypeException
UnknownTypeException
public void initTypes(org.apache.uima.cas.TypeSystem typeSystem, boolean requireFeatureExistence) throws UnknownTypeException
typeSystem
- requireFeatureExistence
- -
if true, if the tokenType and/or tokenClass features of the tokenAnnotation are
specified, they must exist. This is to allow for the situation where these features
might not exist during dictionary loading, but are needed at annotator runtimeUnknownTypeException
public boolean isOK_Token(org.apache.uima.cas.text.AnnotationFS token, TokenNormalizer tokenNormalizer)
public boolean isOK_Token(DictionaryToken token, TokenNormalizer tokenNormalizer)
Copyright © 2013. All Rights Reserved.