Get N-Grams
Count n-grams, either of words, or of characters.
ngrams(.Object, ...) ## S4 method for signature 'partition' ngrams( .Object, n = 2, p_attribute = "word", char = NULL, progress = FALSE, ... ) ## S4 method for signature 'character' ngrams( .Object, n = 2, p_attribute = "word", char = NULL, progress = FALSE, ... ) ## S4 method for signature 'partition' ngrams( .Object, n = 2, p_attribute = "word", char = NULL, progress = FALSE, ... ) ## S4 method for signature 'subcorpus' ngrams( .Object, n = 2, p_attribute = "word", char = NULL, progress = FALSE, ... ) ## S4 method for signature 'character' ngrams( .Object, n = 2, p_attribute = "word", char = NULL, progress = FALSE, ... ) ## S4 method for signature 'data.table' ngrams(.Object, n = 2L, p_attribute = "word") ## S4 method for signature 'corpus' ngrams( .Object, n = 2, p_attribute = "word", char = NULL, progress = FALSE, ... ) ## S4 method for signature 'partition_bundle' ngrams( .Object, n = 2, char = NULL, p_attribute = "word", mc = FALSE, progress = FALSE, ... )
.Object |
object of class |
... |
Further arguments. |
n |
number of tokens/characters |
p_attribute |
the p-attribute to use (can be > 1) |
char |
If |
progress |
logical |
mc |
A |
use("polmineR") P <- partition("GERMAPARLMINI", date = "2009-10-27") ngramObject <- ngrams(P, n = 2, p_attribute = "word", char = NULL) # a more complex scenario: get most frequent ADJA/NN-combinations ngramObject <- ngrams(P, n = 2, p_attribute = c("word", "pos"), char = NULL) ngramObject2 <- subset( ngramObject, ngramObject[["1_pos"]] == "ADJA" & ngramObject[["2_pos"]] == "NN" ) ngramObject2@stat[, "1_pos" := NULL][, "2_pos" := NULL] ngramObject3 <- sort(ngramObject2, by = "count") head(ngramObject3) use("polmineR") dt <- decode("REUTERS", p_attribute = "word", s_attribute = character(), to = "data.table") y <- ngrams(dt, n = 3L, p_attribute = "word")
Please choose more modern alternatives, such as Google Chrome or Mozilla Firefox.