Skip to content

Commit 70d7d31

Browse files
committed
* tidy_dtm/tidy_tdm did not order unnamed matrices as expected (e.g.,
`{1, 2, ..., 1}` was ordered as `{1, 10, 2, ...}`). This has been corrected. close #16
1 parent 389432d commit 70d7d31

6 files changed

Lines changed: 67 additions & 6 deletions

File tree

DESCRIPTION

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
Package: textshape
2-
Date: 2018-01-27
2+
Date: 2018-02-11
33
Title: Tools for Reshaping Text
44
Version: 1.5.2
5-
Authors@R: c(person("Tyler", "Rinker", email = "tyler.rinker@gmail.com", role = c("aut", "cre")),
6-
person("Joran", "Elias", role = "ctb"), person("Matthew", "Flickinger", role = "ctb"),
7-
person('Paul', 'Foster', role = "ctb"))
5+
Authors@R: c(person("Tyler", "Rinker", email = "tyler.rinker@gmail.com", role = c("aut", "cre")), person("Joran",
6+
"Elias", role = "ctb"), person("Matthew", "Flickinger", role = "ctb"), person('Paul', 'Foster', role =
7+
"ctb"))
88
Maintainer: Tyler Rinker <tyler.rinker@gmail.com>
99
Description: Tools that can be used to reshape and restructure text data.
1010
Depends: R (>= 3.4.0)
@@ -16,3 +16,36 @@ Roxygen: list(wrap = FALSE)
1616
RoxygenNote: 6.0.1
1717
URL: http://github.com/trinker/textshape
1818
BugReports: http://github.com/trinker/textshape/issues
19+
Collate:
20+
'bind_list.R'
21+
'bind_table.R'
22+
'bind_vector.R'
23+
'change_index.R'
24+
'cluster_matrix.R'
25+
'column_to_rownames.R'
26+
'combine.R'
27+
'duration.R'
28+
'flatten.R'
29+
'from_to.R'
30+
'mtabulate.R'
31+
'set_output.R'
32+
'split_index.R'
33+
'split_match.R'
34+
'split_match_regex_to_transcript.R'
35+
'split_portion.R'
36+
'split_run.R'
37+
'split_sentence.R'
38+
'split_sentence_token.R'
39+
'split_speaker.R'
40+
'split_token.R'
41+
'split_transcript.R'
42+
'split_word.R'
43+
'textshape-package.R'
44+
'tidy_colo_dtm.R'
45+
'utils.R'
46+
'tidy_dtm.R'
47+
'tidy_list.R'
48+
'tidy_matrix.R'
49+
'tidy_table.R'
50+
'tidy_vector.R'
51+
'unique_pairs.R'

NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ MINOR FEATURES
3636

3737
IMPROVEMENTS
3838

39+
* `tidy_dtm`/`tidy_tdm` did not order unnamed matrices as expected (e.g.,
40+
`{1, 2, ..., 1}` was ordered as `{1, 10, 2, ...}`). This has been corrected.
41+
3942
CHANGES
4043

4144

NEWS.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ textshape 1.5.1 -
3636

3737
**IMPROVEMENTS**
3838

39+
* `tidy_dtm`/`tidy_tdm` did not order unnamed matrices as expected (e.g.,
40+
`{1, 2, ..., 1}` was ordered as `{1, 10, 2, ...}`). This has been corrected.
41+
3942
**CHANGES**
4043

4144

R/tidy_dtm.R

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#' @param \ldots ignored.
1111
#' @return Returns a tidied data.frame.
1212
#' @rdname tidy_dtm
13+
#' @include utils.R
1314
#' @export
1415
#' @examples
1516
#' data(simple_dtm)
@@ -37,6 +38,7 @@
3738
#' group_by(time) %>%
3839
#' arrange(desc(n)) %>%
3940
#' slice(1:10) %>%
41+
#' ungroup() %>%
4042
#' mutate(
4143
#' term = factor(paste(term, time, sep = "__"),
4244
#' levels = rev(paste(term, time, sep = "__")))
@@ -50,13 +52,15 @@ tidy_dtm <- function(x, ...){
5052

5153
doc <- NULL
5254

55+
docfun <- function(docs) if (is_numeric_doc_names(x)) {as.integer(docs)} else {docs}
56+
5357
data.table::data.table(
5458
doc = x[['dimnames']][['Docs']][x[['i']]],
5559
term = x[['dimnames']][['Terms']][x[['j']]],
5660
n = x[['v']],
5761
i = x[['i']],
5862
j = x[['j']]
59-
)[order(doc), ]
63+
)[, doc := docfun(doc)][order(doc), ][]
6064
}
6165

6266

@@ -67,11 +71,13 @@ tidy_tdm <- function(x, ...){
6771

6872
doc <- NULL
6973

74+
docfun <- function(docs) if (is_numeric_doc_names(x)) {as.integer(docs)} else {docs}
75+
7076
data.table::data.table(
7177
doc = x[['dimnames']][['Docs']][x[['j']]],
7278
term = x[['dimnames']][['Terms']][x[['i']]],
7379
n = x[['v']],
7480
i = x[['j']],
7581
j = x[['i']]
76-
)[order(doc), ]
82+
)[, doc := docfun(doc)][order(doc), ][]
7783
}

R/utils.R

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,3 +117,18 @@ sprintf_ish <- function(x, padding, type){
117117
# set_tibble(x)
118118
# }
119119

120+
is_numeric_doc_names <- function(x, ...){
121+
UseMethod('is_numeric_doc_names')
122+
}
123+
124+
125+
is_numeric_doc_names.TermDocumentMatrix <- function(x, ...){
126+
colnames_numeric <- suppressWarnings(as.integer(colnames(x)))
127+
!anyNA(colnames_numeric) && isTRUE(all.equal(stats::sd(diff(colnames_numeric)), 0))
128+
}
129+
130+
131+
is_numeric_doc_names.DocumentTermMatrix <- function(x, ...){
132+
rownames_numeric <- suppressWarnings(as.integer(rownames(x)))
133+
!anyNA(rownames_numeric) && isTRUE(all.equal(stats::sd(diff(rownames_numeric)), 0))
134+
}

man/tidy_dtm.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)