Getter/setter methods for koRpus objects
taggedText(obj, add.desc = FALSE, doc_id = FALSE) ## S4 method for signature 'kRp.text' taggedText(obj, add.desc = FALSE, doc_id = FALSE) taggedText(obj) <- value ## S4 replacement method for signature 'kRp.text' taggedText(obj) <- value doc_id(obj, ...) ## S4 method for signature 'kRp.text' doc_id(obj, has_id = NULL) hasFeature(obj, feature = NULL, ...) ## S4 method for signature 'kRp.text' hasFeature(obj, feature = NULL) hasFeature(obj, feature) <- value ## S4 replacement method for signature 'kRp.text' hasFeature(obj, feature) <- value feature(obj, feature, ...) ## S4 method for signature 'kRp.text' feature(obj, feature, doc_id = NULL) feature(obj, feature) <- value ## S4 replacement method for signature 'kRp.text' feature(obj, feature) <- value corpusReadability(obj, ...) ## S4 method for signature 'kRp.text' corpusReadability(obj, doc_id = NULL) corpusReadability(obj) <- value ## S4 replacement method for signature 'kRp.text' corpusReadability(obj) <- value corpusHyphen(obj, ...) ## S4 method for signature 'kRp.text' corpusHyphen(obj, doc_id = NULL) corpusHyphen(obj) <- value ## S4 replacement method for signature 'kRp.text' corpusHyphen(obj) <- value corpusLexDiv(obj, ...) ## S4 method for signature 'kRp.text' corpusLexDiv(obj, doc_id = NULL) corpusLexDiv(obj) <- value ## S4 replacement method for signature 'kRp.text' corpusLexDiv(obj) <- value corpusFreq(obj, ...) ## S4 method for signature 'kRp.text' corpusFreq(obj) corpusFreq(obj) <- value ## S4 replacement method for signature 'kRp.text' corpusFreq(obj) <- value corpusCorpFreq(obj, ...) ## S4 method for signature 'kRp.text' corpusCorpFreq(obj) corpusCorpFreq(obj) <- value ## S4 replacement method for signature 'kRp.text' corpusCorpFreq(obj) <- value corpusStopwords(obj, ...) ## S4 method for signature 'kRp.text' corpusStopwords(obj) corpusStopwords(obj) <- value ## S4 replacement method for signature 'kRp.text' corpusStopwords(obj) <- value ## S4 method for signature 'kRp.text,ANY,ANY,ANY' x[i, j, ..., drop = TRUE] ## S4 replacement method for signature 'kRp.text,ANY,ANY,ANY' x[i, j, ...] <- value ## S4 method for signature 'kRp.text' x[[i, doc_id = NULL, ...]] ## S4 replacement method for signature 'kRp.text' x[[i, doc_id = NULL, ...]] <- value ## S4 method for signature 'kRp.text' describe(obj, doc_id = NULL, simplify = TRUE, ...) ## S4 replacement method for signature 'kRp.text' describe(obj, doc_id = NULL, ...) <- value ## S4 method for signature 'kRp.text' language(obj) ## S4 replacement method for signature 'kRp.text' language(obj) <- value diffText(obj, doc_id = NULL) ## S4 method for signature 'kRp.text' diffText(obj, doc_id = NULL) diffText(obj) <- value ## S4 replacement method for signature 'kRp.text' diffText(obj) <- value originalText(obj) ## S4 method for signature 'kRp.text' originalText(obj) is.taggedText(obj) is.kRp.text(obj) fixObject(obj, doc_id = NA) ## S4 method for signature 'kRp.text' fixObject(obj, doc_id = NA) tif_as_tokens_df(tokens) ## S4 method for signature 'kRp.text' tif_as_tokens_df(tokens) ## S4 method for signature 'kRp.tagged' fixObject(obj, doc_id = NA) ## S4 method for signature 'kRp.txt.freq' fixObject(obj, doc_id = NA) ## S4 method for signature 'kRp.txt.trans' fixObject(obj, doc_id = NA) ## S4 method for signature 'kRp.analysis' fixObject(obj, doc_id = NA)
obj |
An arbitrary |
add.desc |
Logical,
determines whether the |
doc_id |
Logical (except for |
value |
The new value to replace the current with. |
... |
Additional arguments for the generics. |
has_id |
A character vector with |
feature |
Character string naming the feature to look for. The return value is logical if a single feature
name is given. If |
x |
An object of class |
i |
Defines the row selector ( |
j |
Defines the column selector. |
drop |
Logical,
whether the result should be coerced to the lowest possible dimension. See |
simplify |
Logical, if |
tokens |
An object of class |
taggedText() returns the tokens slot.
doc_id() Returns a character vector of all doc_id values in the object.
describe() returns the desc slot.
language() returns the lang slot.
[/[[ Can be used as a shortcut to index the results of taggedText().
fixObject returns the same object upgraded to the object structure of this package version (e.g.,
new columns, changed names, etc.).
hasFeature() returns TRUE or codeFALSE,
depending on whether the requested feature is present or not.
feature() returns the list entry of the feat_list slot for the requested feature.
corpusReadability() returns the list of kRp.readability objects,
see readability.
corpusHyphen() returns the list of kRp.hyphen objects,
see hyphen.
corpusLexDiv() returns the list of kRp.TTR objects,
see lex.div.
corpusFreq() returns the frequency analysis data from the feat_list slot,
see freq.analysis.
corpusCorpFreq() returns the kRp.corp.freq object of the feat_list slot,
see for example read.corp.custom.
corpusStopwords() returns the number of stopwords found in each text (if analyzed) from the feat_list slot.
tif_as_tokens_df returns the tokens slot in a TIF[1] compliant format,
i.e., doc_id is not a factor but a character vector.
originalText() similar to taggedText(),
but reverts any transformations back to the original text before returning the tokens slot.
Only works if the object has the feature diff, see examples.
diffText() returns the diff slot, if present.
[1] Text Interchange Formats (https://github.com/ropensci/tif)
# code is only run when the english language package can be loaded
if(require("koRpus.lang.en", quietly = TRUE)){
sample_file <- file.path(
path.package("koRpus"), "examples", "corpus", "Reality_Winner.txt"
)
tokenized.obj <- tokenize(
txt=sample_file,
lang="en"
)
doc_id(tokenized.obj)
describe(tokenized.obj)
language(tokenized.obj)
taggedText(tokenized.obj)
tokenized.obj[["token"]]
tokenized.obj[1:3, "token"]
tif_as_tokens_df(tokenized.obj)
# example for originalText()
tokenized.obj <- jumbleWords(tokenized.obj)
# now compare the jumbled words to the original
tokenized.obj[["token"]]
originalText(tokenized.obj)[["token"]]
} else {}Please choose more modern alternatives, such as Google Chrome or Mozilla Firefox.