R Markdown
bigram
toks2 <- tokens(data_corpus_inaugural)
toks2 <- tokens_remove(toks2, stopwords("english"), padding = TRUE)
microbenchmark::microbenchmark(path_R = textstat_collocations(toks2, size = 2, tolower = FALSE, method="all", path=1),
path_C = textstat_collocations(toks2, size = 2, tolower = FALSE, method="all", path=2),
times = 2, unit = "relative")
## Unit: relative
## expr min lq mean median uq max neval
## path_R 2.725511 2.725511 2.665482 2.665482 2.609778 2.609778 2
## path_C 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 2
compare minimum counts
toks2 <- tokens(quantedaData::data_corpus_SOTU)
toks2 <- tokens_remove(toks2, stopwords("english"), padding = TRUE)
microbenchmark::microbenchmark(path_R = textstat_collocations(toks2, size = 3, tolower = FALSE, method="lambda", min_count = 2, path=2),
path_C = textstat_collocations(toks2, size = 3, tolower = FALSE, method="lambda", min_count = 1, path=2),
times = 2, unit = "relative")
## Warning in evalq((function (..., call. = TRUE, immediate. = FALSE,
## noBreaks. = FALSE, : Warning: ipf algorithm did not converge for at least
## once
## Warning in evalq((function (..., call. = TRUE, immediate. = FALSE,
## noBreaks. = FALSE, : Warning: ipf algorithm did not converge for at least
## once
## Warning in evalq((function (..., call. = TRUE, immediate. = FALSE,
## noBreaks. = FALSE, : Warning: ipf algorithm did not converge for at least
## once
## Warning in evalq((function (..., call. = TRUE, immediate. = FALSE,
## noBreaks. = FALSE, : Warning: ipf algorithm did not converge for at least
## once
## Unit: relative
## expr min lq mean median uq max neval
## path_R 1.00000 1.00000 1.00000 1.00000 1.00000 1.00000 2
## path_C 11.93741 11.93741 11.98982 11.98982 12.04222 12.04222 2
trigram
toks2 <- tokens(data_corpus_inaugural)
toks2 <- tokens_remove(toks2, stopwords("english"), padding = TRUE)
microbenchmark::microbenchmark(path_R = textstat_collocations(toks2, size = 3, tolower = FALSE, method="all", path=1),
path_C = textstat_collocations(toks2, size = 3, tolower = FALSE, method="all", path=2),
times = 2, unit = "relative")
## Unit: relative
## expr min lq mean median uq max neval
## path_R 3.02222 3.02222 3.02029 3.02029 3.018381 3.018381 2
## path_C 1.00000 1.00000 1.00000 1.00000 1.000000 1.000000 2
4-grams
toks2 <- tokens(data_corpus_inaugural)
toks2 <- tokens_remove(toks2, stopwords("english"), padding = TRUE)
microbenchmark::microbenchmark(path_R = textstat_collocations(toks2, size = 4, tolower = FALSE, method="all", path=1),
path_C = textstat_collocations(toks2, size = 4, tolower = FALSE, method="all", path=2),
times = 2, unit = "relative")
## Unit: relative
## expr min lq mean median uq max neval
## path_R 1.517191 1.517191 1.536343 1.536343 1.555169 1.555169 2
## path_C 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 2