Read in DEP Complaints

getwd()
## [1] "/Users/seanchew/Desktop/Bo_Assistantship/1_Scripts"
DEP_Query <- read_rds("../3_Intermediate/DEP_Data.rds")

Testing out ngram packages.

DEP_Query %>% head
##   unique_key        created_date         closed_date agency
## 1   23013899 2012-04-06 04:25:00 2012-04-16 04:25:00    DEP
## 2   25627963 2013-05-29 05:54:00 2013-05-29 12:21:00    DEP
## 3   25627964 2013-05-29 07:34:00 2013-05-31 12:30:00    DEP
## 4   23014578 2012-04-06 05:11:00 2012-04-16 05:11:00    DEP
## 5   37475111 2017-10-19 09:18:00 2017-10-24 08:51:00    DEP
## 6   23015156 2012-04-06 09:54:00 2012-04-16 09:54:00    DEP
##                              agency_name complaint_type
## 1 Department of Environmental Protection          Noise
## 2 Department of Environmental Protection          Noise
## 3 Department of Environmental Protection          Noise
## 4 Department of Environmental Protection          Noise
## 5 Department of Environmental Protection   Water System
## 6 Department of Environmental Protection          Noise
##                            descriptor incident_zip             incident_address
## 1  Noise: Private Carting Noise (NQ1)        11411 117-01 SPRINGFIELD BOULEVARD
## 2            Noise, Barking Dog (NR5)        11233            744 HALSEY STREET
## 3 Noise: Construction Equipment (NC1)        10013            7 HARRISON STREET
## 4  Noise: Private Carting Noise (NQ1)        10038                 DUTCH STREET
## 5           Leak (Use Comments) (WA2)        11357             14-25 139 STREET
## 6  Noise: Private Carting Noise (NQ1)        11375                         <NA>
##             street_name cross_street_1 cross_street_2 address_type
## 1 SPRINGFIELD BOULEVARD    LINDEN BLVD         117 RD      ADDRESS
## 2         HALSEY STREET    PATCHEN AVE      RALPH AVE      ADDRESS
## 3       HARRISON STREET      STAPLE ST   GREENWICH ST      ADDRESS
## 4          DUTCH STREET    JOHN STREET  FULTON STREET    BLOCKFACE
## 5            139 STREET         14 AVE         15 AVE      ADDRESS
## 6                  <NA>           <NA>           <NA> INTERSECTION
##              city facility_type status
## 1 Cambria Heights           N/A Closed
## 2        BROOKLYN           N/A Closed
## 3        NEW YORK           N/A Closed
## 4        NEW YORK           N/A Closed
## 5      Whitestone           N/A Closed
## 6    Forest Hills           N/A Closed
##                                                                                                                                                                                                                                                                                                                   resolution_description
## 1                                                                                                                                          The status of this Service Request is currently not available online. Please call 311 for further assistance. If you are outside of New York City, please call (212) NEW-YORK (212-639-9675).
## 2                                                                                                                                           The Department of Environmental Protection determined that an inspection is not warranted to investigate this complaint at this time and sent a letter to the complainant and/or respondent.
## 3 The Department of Environmental Protection did not observe a violation of the New York City Air/Noise Code at the time of inspection and could not issue a notice of violation. If the problem still exists, please call 311 and file a new complaint. If you are outside of New York City, please call (212) NEW-YORK (212-639-9675).
## 4                                                                                                                                          The status of this Service Request is currently not available online. Please call 311 for further assistance. If you are outside of New York City, please call (212) NEW-YORK (212-639-9675).
## 5                                                                                                                                                                   The Department of Environmental Protection determined that this complaint is a duplicate of a previously filed complaint. The original complaint is being addressed.
## 6                                                                                                                                          The status of this Service Request is currently not available online. Please call 311 for further assistance. If you are outside of New York City, please call (212) NEW-YORK (212-639-9675).
##   resolution_action_updated_date community_board        bbl   borough
## 1            2012-04-16 04:25:00       13 QUEENS 4127340008    QUEENS
## 2            2013-05-29 12:21:00     03 BROOKLYN 3016680033  BROOKLYN
## 3            2013-05-31 12:30:00    01 MANHATTAN 1001807512 MANHATTAN
## 4            2012-04-16 05:11:00    01 MANHATTAN       <NA> MANHATTAN
## 5            2017-10-24 08:51:00       07 QUEENS 4041070057    QUEENS
## 6            2012-04-16 09:54:00       06 QUEENS       <NA>    QUEENS
##   x_coordinate_state_plane y_coordinate_state_plane open_data_channel_type
## 1                  1055343                   192986                UNKNOWN
## 2                  1005228                   188750                  PHONE
## 3                   981619                   201118                  PHONE
## 4                   982189                   197759                UNKNOWN
## 5                  1031371                   225613                  PHONE
## 6                  1027799                   201983                UNKNOWN
##   park_facility_name park_borough           latitude          longitude
## 1        Unspecified       QUEENS  40.69609320344547 -73.74362116515917
## 2        Unspecified     BROOKLYN 40.684727057881254 -73.92436116842451
## 3        Unspecified    MANHATTAN 40.718698788216436 -74.00949123734148
## 4        Unspecified    MANHATTAN 40.709479286258514 -74.00743395461726
## 5        Unspecified       QUEENS  40.78580613055689 -73.82984126011773
## 6        Unspecified       QUEENS  40.72096620537131 -73.84289312884077
##    location.latitude location.longitude
## 1  40.69609320344547 -73.74362116515917
## 2 40.684727057881254 -73.92436116842451
## 3 40.718698788216436 -74.00949123734148
## 4 40.709479286258514 -74.00743395461726
## 5  40.78580613055689 -73.82984126011773
## 6  40.72096620537131 -73.84289312884077
##                                location.human_address intersection_street_1
## 1 {"address": "", "city": "", "state": "", "zip": ""}                  <NA>
## 2 {"address": "", "city": "", "state": "", "zip": ""}                  <NA>
## 3 {"address": "", "city": "", "state": "", "zip": ""}                  <NA>
## 4 {"address": "", "city": "", "state": "", "zip": ""}                  <NA>
## 5 {"address": "", "city": "", "state": "", "zip": ""}                  <NA>
## 6 {"address": "", "city": "", "state": "", "zip": ""}      QUEENS BOULEVARD
##   intersection_street_2 due_date location_type
## 1                  <NA>     <NA>          <NA>
## 2                  <NA>     <NA>          <NA>
## 3                  <NA>     <NA>          <NA>
## 4                  <NA>     <NA>          <NA>
## 5                  <NA>     <NA>          <NA>
## 6               71 ROAD     <NA>          <NA>
DEP_resolution <- DEP_Query %>% 
    select(c("complaint_type","resolution_description"))

DEP_descriptors <- DEP_Query %>% 
    select(c("complaint_type","descriptor"))

resolution_ngrams <- DEP_resolution %>%
  unnest_tokens(ngram, resolution_description, token = "ngrams", n = 4) %>% 
    count(ngram,sort = TRUE)

descriptors_ngrams <- DEP_descriptors %>%
  unnest_tokens(ngram, descriptor, token = "ngrams", n = 4) %>% 
    count(ngram,sort = TRUE)
descriptors_ngrams %>%
    head(30)%>%
    arrange(desc(n)) %>% 
    drop_na %>% 
    ggplot(aes(x =  n, y = reorder(ngram,n))) +
    geom_col(show.legend = FALSE) +
    labs(x = "Number of Occurances",
         y = "Ngram")

resolution_ngrams %>%
    head(30)%>%
    arrange(desc(n)) %>% 
    drop_na %>% 
    ggplot(aes(x =  n, y = reorder(ngram,n))) +
    geom_col(show.legend = FALSE) +
    labs(x = "Number of Occurances",
         y = "Ngram")

complaint_types <- c("Water System","Air Quality", "Sewer" )
resolution_inputs <- list(DEP_Query,complaint_types)

by_type_filter_resolution <- function(x){
    filtered <- DEP_Query %>% 
        filter(complaint_type == x) %>% 
        select(c("complaint_type","resolution_description")) %>% 
        unnest_tokens(ngram, resolution_description, token = "ngrams", n = 4) %>% 
        count(ngram,sort = TRUE)
return(filtered)
}
by_type_filter_descriptor <- function(x){
    filtered <- DEP_Query %>% 
        filter(complaint_type == x) %>% 
        select(c("complaint_type","descriptor")) %>% 
        unnest_tokens(ngram, descriptor, token = "ngrams", n = 4) %>% 
        count(ngram,sort = TRUE)
return(filtered)
}

ngram_plot <- function(x){
    plot <- x %>%
        as.data.frame() %>% 
        head(30)%>%
        arrange(desc(n)) %>% 
        drop_na %>% 
        ggplot(aes(x =  n, y = reorder(ngram,n))) +
        geom_col(show.legend = FALSE) +
        labs(x = "Number of Occurances",
             y = "Ngram")
    return(plot)
}

resolution_ngrams_by_cat <- map(complaint_types,by_type_filter_resolution)
descriptor_ngrams_by_cat <- map(complaint_types,by_type_filter_descriptor)

# resolution_ngram_plots<-map(resolution_ngrams_by_cat,ngram_plot)
descriptor_ngram_plots<-map(descriptor_ngrams_by_cat,ngram_plot)

# The descriptors after the first 11 are unimportant, it seems
descriptor_ngram_plots <- descriptor_ngram_plots[1:11]

descriptor_ngram_plots
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]
## NULL
## 
## [[5]]
## NULL
## 
## [[6]]
## NULL
## 
## [[7]]
## NULL
## 
## [[8]]
## NULL
## 
## [[9]]
## NULL
## 
## [[10]]
## NULL
## 
## [[11]]
## NULL
# plotnames = imap(complaint_types[1:11], ~paste0("../4_Outputs/",., "_ngrams.png")) %>%
#     flatten()
# plotnames

# pwalk(list(descriptor_ngram_plots,plotnames), ggsave)
# walk2(plotnames, descriptor_ngram_plots, ~ggsave(filename = .x, plot = .y, 
                                             # height = 7, width = 7))