trinker
diff --git a/‎DESCRIPTION‎
Lines changed: 2 additions & 2 deletions b/‎DESCRIPTION‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎NAMESPACE‎
Lines changed: 1 addition & 0 deletions b/‎NAMESPACE‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎NEWS‎
Lines changed: 1 addition & 0 deletions b/‎NEWS‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎NEWS.md‎
Lines changed: 1 addition & 0 deletions b/‎NEWS.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎R/unnest_text.R‎
Lines changed: 86 additions & 0 deletions b/‎R/unnest_text.R‎
Lines changed: 86 additions & 0 deletions
diff --git a/‎README.Rmd‎
Lines changed: 7 additions & 1 deletion b/‎README.Rmd‎
Lines changed: 7 additions & 1 deletion
@@ -1,7 +1,6 @@
 Package: textshape
-Date: 2018-02-11
 Title: Tools for Reshaping Text
-Version: 1.5.2
+Version: 1.5.3
 Authors@R: c(person("Tyler", "Rinker", email = "tyler.rinker@gmail.com", role = c("aut", "cre")), person("Joran",
              "Elias", role = "ctb"), person("Matthew", "Flickinger", role = "ctb"), person('Paul', 'Foster', role =
              "ctb"))
@@ -49,3 +48,4 @@ Collate:
     'tidy_table.R'
     'tidy_vector.R'
     'unique_pairs.R'
+    'unnest_text.R'
@@ -65,5 +65,6 @@ export(tidy_table)
 export(tidy_tdm)
 export(tidy_vector)
 export(unique_pairs)
+export(unnest_text)
 importFrom(data.table,":=")
 importFrom(data.table,.N)
@@ -31,6 +31,7 @@ NEW FEATURES
   using the concatenated list/atomic vector names as the names of the single
   tiered list.
 
+* `unnest_text` added to located and unnest nested text columns in a data.frame.
 
 MINOR FEATURES
 
 
@@ -31,6 +31,7 @@ textshape 1.5.1 -
   using the concatenated list/atomic vector names as the names of the single
   tiered list.
 
+* `unnest_text` added to located and unnest nested text columns in a data.frame.
 
 **MINOR FEATURES**
 
 
@@ -0,0 +1,86 @@
+#' Unnest Nested Text Columns
+#' 
+#' Unnest nested text columns in a data.frame.   Attempts to locate the nested 
+#' text column without specifying. 
+#' 
+#' @param dataframe A dataframe object.
+#' @param column Column name to search for markers/terms. 
+#' @param integer.rownames logical.  If \code{TRUE} then the rownames are numbered
+#' 1 through number of rows, otherwise the original row number is retained 
+#' follwed by a period and the element number from the list.
+#' @param \ldots ignored.
+#' @return Returns an unnested data.frame.
+#' @export
+#' @examples
+#' dat <- DATA
+#' 
+#' ## Add a nested/list text column
+#' dat$split <- lapply(dat$state, function(x) {
+#'     unlist(strsplit(x, '(?<=[?!.])\\s+', perl = TRUE))
+#' })
+#' 
+#' unnest_text(dat)
+#' unnest_text(dat, integer.rownames = FALSE)
+#' 
+#' ## Add a second nested integer column
+#' dat$d <- lapply(dat$split, nchar)
+#' \dontrun{
+#' unnest_text(dat) # causes error, must supply column explicitly
+#' }
+#' unnest_text(dat, 'split')
+#' 
+#' ## As a data.table
+#' library(data.table)
+#' dt_dat <- data.table::as.data.table(data.table::copy(dat))
+#' unnest_text(dt_dat, 'split')
+#' \dontrun{
+#' unnest_text(dt_dat, 'd')
+#' }
+#' 
+#' \dontrun{
+#' ## As a tibble
+#' library(tibble)
+#' t_dat <- tibble:::as_tibble(dat)
+#' unnest_text(t_dat, 'split')
+#' }
+unnest_text <- function(dataframe, column, integer.rownames = TRUE, ...){
+
+    if (missing(column)) {
+        column <- names(dataframe)[!unlist(lapply(as.data.frame(dataframe), is.atomic))]
+        if (length(column) == 0) stop("There appears to be no nested columns.  Please supply `column` explicitly.")
+        if (length(column) > 1) stop("There appears to be multiple nested columns.  Please supply `column` explicitly.")  
+        message(sprintf('Nested column detected, unnesting: %s', column))
+    }
+
+    nms <- colnames(dataframe)
+    
+    lens <- lengths(dataframe[[column]])
+    col <- unlist(dataframe[[column]])
+
+    if (!is.character(col)) {
+        warning(sprintf(paste0('Unnesting: `%s`\nThis is not a character column.\n\n', 
+            'Perhaps you want to use `tidyr::unnest` instead?'), column), call. = FALSE)
+    }
+    
+    dataframe[[column]] <- NA
+    
+    dataframe <- dataframe[rep(seq_len(nrow(dataframe)), lens),]
+    
+    dataframe[[column]] <- col
+    if (isTRUE(integer.rownames)) {
+        rownames(dataframe) <- NULL
+    } else {
+        rnms <- rownames(dataframe)
+        rnms <- ifelse(grepl('\\.', rnms), rnms, paste0(rnms, '.0'))
+        
+        rownames(dataframe) <- paste0(
+            gsub('\\.+$', '', rnms),
+            '.',    
+            as.integer(gsub('^\\d+\\.', '', rnms)) + 1
+        )
+    }
+    
+    dataframe
+
+}
+
@@ -59,6 +59,7 @@ Most of the functions split, expand, or tidy a `vector`, `list`, `data.frame`, o
 | `from_to`        | `vector`, `data.frame`         | Prepare speaker data for a flow network                      |
 | `mtabulate`      | `vector`, `list`, `data.frame` | Dataframe/list version of `tabulate` to produce count matrix |
 | `flatten`        | `list`                         | Flatten nested, named list to single tier                    |
+| `unnest_text`    | `data.frame`                   | Unnest a nested text column                                  |
 | `split_index`    | `vector`, `list`, `data.frame` | Split at specified indices                                   |
 | `split_match`    | `vector`                       | Split vector at specified character/regex match              |
 | `split_portion`  | `vector`\*                     | Split data into portioned chunks                             |
@@ -237,10 +238,15 @@ The `tidy_colo_dtm` and `tidy_colo_tdm` functions convert a `DocumentTermMatrix`
 
 ```{r}
 my_dtm <- with(presidential_debates_2012, q_dtm(dialogue, paste(time, tot, sep = "_")))
+sw <- unique(c(
+    lexicon::sw_jockers, 
+    lexicon::sw_loughran_mcdonald_long, 
+    lexicon::sw_fry_1000
+))
 
 tidy_colo_dtm(my_dtm) %>%
     tbl_df() %>%
-    filter(!term_1 %in% c('i', lexicon::sw_onix) & !term_2 %in% lexicon::sw_onix) %>%
+    filter(!term_1 %in% c('i', sw) & !term_2 %in% sw) %>%
     filter(term_1 != term_2) %>%
     unique_pairs() %>%
     filter(n > 15) %>%