Extensions of base R functions for tokens objects.
# S3 method for tokens unlist(x, recursive = FALSE, use.names = TRUE) # S3 method for tokens [(x, i, drop_docid = TRUE) # S3 method for tokens +(t1, t2) # S3 method for tokens c(...)
x | a tokens object |
---|---|
recursive | a required argument for unlist but inapplicable to tokens objects |
i | document names or indices for documents to extract. |
drop_docid | if |
t1 | tokens one to be added |
t2 | tokens two to be added |
unlist
returns a simple vector of characters from a
tokens object.
c(...)
and +
return a tokens object whose documents
have been added as a single sequence of documents.
toks <- tokens(c(d1 = "one two three", d2 = "four five six", d3 = "seven eight")) str(toks) #> List of 3 #> $ d1: chr [1:3] "one" "two" "three" #> $ d2: chr [1:3] "four" "five" "six" #> $ d3: chr [1:2] "seven" "eight" #> - attr(*, "types")= chr [1:8] "one" "two" "three" "four" ... #> - attr(*, "padding")= logi FALSE #> - attr(*, "class")= chr "tokens" #> - attr(*, "docvars")='data.frame': 3 obs. of 3 variables: #> ..$ docname_: chr [1:3] "d1" "d2" "d3" #> ..$ docid_ : Factor w/ 3 levels "d1","d2","d3": 1 2 3 #> ..$ segid_ : int [1:3] 1 1 1 #> - attr(*, "meta")=List of 3 #> ..$ system:List of 5 #> .. ..$ package-version:Classes 'package_version', 'numeric_version' hidden list of 1 #> .. .. ..$ : int [1:3] 3 2 0 #> .. ..$ r-version :Classes 'R_system_version', 'package_version', 'numeric_version' hidden list of 1 #> .. .. ..$ : int [1:3] 4 1 2 #> .. ..$ system : Named chr [1:3] "Darwin" "x86_64" "kbenoit" #> .. .. ..- attr(*, "names")= chr [1:3] "sysname" "machine" "user" #> .. ..$ directory : chr "/Users/kbenoit/Dropbox (Personal)/GitHub/quanteda/quanteda/docs/reference" #> .. ..$ created : Date[1:1], format: "2021-11-25" #> ..$ object:List of 6 #> .. ..$ unit : chr "documents" #> .. ..$ what : chr "word" #> .. ..$ ngram : int 1 #> .. ..$ skip : int 0 #> .. ..$ concatenator: chr "_" #> .. ..$ summary :List of 2 #> .. .. ..$ hash: chr(0) #> .. .. ..$ data: NULL #> ..$ user : list() toks[c(1,3)] #> Tokens consisting of 2 documents. #> d1 : #> [1] "one" "two" "three" #> #> d3 : #> [1] "seven" "eight" #> # combining tokens toks1 <- tokens(c(doc1 = "a b c d e", doc2 = "f g h")) toks2 <- tokens(c(doc3 = "1 2 3")) toks1 + toks2 #> Tokens consisting of 3 documents. #> doc1 : #> [1] "a" "b" "c" "d" "e" #> #> doc2 : #> [1] "f" "g" "h" #> #> doc3 : #> [1] "1" "2" "3" #> c(toks1, toks2) #> Tokens consisting of 3 documents. #> doc1 : #> [1] "a" "b" "c" "d" "e" #> #> doc2 : #> [1] "f" "g" "h" #> #> doc3 : #> [1] "1" "2" "3" #>