Read in DEP Complaints
getwd()
## [1] "/Users/seanchew/Desktop/Bo_Assistantship/1_Scripts"
DEP_Query <- read_rds("../3_Intermediate/DEP_Data.rds")
Testing out ngram packages.
DEP_Query %>% head
## unique_key created_date closed_date agency
## 1 23013899 2012-04-06 04:25:00 2012-04-16 04:25:00 DEP
## 2 25627963 2013-05-29 05:54:00 2013-05-29 12:21:00 DEP
## 3 25627964 2013-05-29 07:34:00 2013-05-31 12:30:00 DEP
## 4 23014578 2012-04-06 05:11:00 2012-04-16 05:11:00 DEP
## 5 37475111 2017-10-19 09:18:00 2017-10-24 08:51:00 DEP
## 6 23015156 2012-04-06 09:54:00 2012-04-16 09:54:00 DEP
## agency_name complaint_type
## 1 Department of Environmental Protection Noise
## 2 Department of Environmental Protection Noise
## 3 Department of Environmental Protection Noise
## 4 Department of Environmental Protection Noise
## 5 Department of Environmental Protection Water System
## 6 Department of Environmental Protection Noise
## descriptor incident_zip incident_address
## 1 Noise: Private Carting Noise (NQ1) 11411 117-01 SPRINGFIELD BOULEVARD
## 2 Noise, Barking Dog (NR5) 11233 744 HALSEY STREET
## 3 Noise: Construction Equipment (NC1) 10013 7 HARRISON STREET
## 4 Noise: Private Carting Noise (NQ1) 10038 DUTCH STREET
## 5 Leak (Use Comments) (WA2) 11357 14-25 139 STREET
## 6 Noise: Private Carting Noise (NQ1) 11375 <NA>
## street_name cross_street_1 cross_street_2 address_type
## 1 SPRINGFIELD BOULEVARD LINDEN BLVD 117 RD ADDRESS
## 2 HALSEY STREET PATCHEN AVE RALPH AVE ADDRESS
## 3 HARRISON STREET STAPLE ST GREENWICH ST ADDRESS
## 4 DUTCH STREET JOHN STREET FULTON STREET BLOCKFACE
## 5 139 STREET 14 AVE 15 AVE ADDRESS
## 6 <NA> <NA> <NA> INTERSECTION
## city facility_type status
## 1 Cambria Heights N/A Closed
## 2 BROOKLYN N/A Closed
## 3 NEW YORK N/A Closed
## 4 NEW YORK N/A Closed
## 5 Whitestone N/A Closed
## 6 Forest Hills N/A Closed
## resolution_description
## 1 The status of this Service Request is currently not available online. Please call 311 for further assistance. If you are outside of New York City, please call (212) NEW-YORK (212-639-9675).
## 2 The Department of Environmental Protection determined that an inspection is not warranted to investigate this complaint at this time and sent a letter to the complainant and/or respondent.
## 3 The Department of Environmental Protection did not observe a violation of the New York City Air/Noise Code at the time of inspection and could not issue a notice of violation. If the problem still exists, please call 311 and file a new complaint. If you are outside of New York City, please call (212) NEW-YORK (212-639-9675).
## 4 The status of this Service Request is currently not available online. Please call 311 for further assistance. If you are outside of New York City, please call (212) NEW-YORK (212-639-9675).
## 5 The Department of Environmental Protection determined that this complaint is a duplicate of a previously filed complaint. The original complaint is being addressed.
## 6 The status of this Service Request is currently not available online. Please call 311 for further assistance. If you are outside of New York City, please call (212) NEW-YORK (212-639-9675).
## resolution_action_updated_date community_board bbl borough
## 1 2012-04-16 04:25:00 13 QUEENS 4127340008 QUEENS
## 2 2013-05-29 12:21:00 03 BROOKLYN 3016680033 BROOKLYN
## 3 2013-05-31 12:30:00 01 MANHATTAN 1001807512 MANHATTAN
## 4 2012-04-16 05:11:00 01 MANHATTAN <NA> MANHATTAN
## 5 2017-10-24 08:51:00 07 QUEENS 4041070057 QUEENS
## 6 2012-04-16 09:54:00 06 QUEENS <NA> QUEENS
## x_coordinate_state_plane y_coordinate_state_plane open_data_channel_type
## 1 1055343 192986 UNKNOWN
## 2 1005228 188750 PHONE
## 3 981619 201118 PHONE
## 4 982189 197759 UNKNOWN
## 5 1031371 225613 PHONE
## 6 1027799 201983 UNKNOWN
## park_facility_name park_borough latitude longitude
## 1 Unspecified QUEENS 40.69609320344547 -73.74362116515917
## 2 Unspecified BROOKLYN 40.684727057881254 -73.92436116842451
## 3 Unspecified MANHATTAN 40.718698788216436 -74.00949123734148
## 4 Unspecified MANHATTAN 40.709479286258514 -74.00743395461726
## 5 Unspecified QUEENS 40.78580613055689 -73.82984126011773
## 6 Unspecified QUEENS 40.72096620537131 -73.84289312884077
## location.latitude location.longitude
## 1 40.69609320344547 -73.74362116515917
## 2 40.684727057881254 -73.92436116842451
## 3 40.718698788216436 -74.00949123734148
## 4 40.709479286258514 -74.00743395461726
## 5 40.78580613055689 -73.82984126011773
## 6 40.72096620537131 -73.84289312884077
## location.human_address intersection_street_1
## 1 {"address": "", "city": "", "state": "", "zip": ""} <NA>
## 2 {"address": "", "city": "", "state": "", "zip": ""} <NA>
## 3 {"address": "", "city": "", "state": "", "zip": ""} <NA>
## 4 {"address": "", "city": "", "state": "", "zip": ""} <NA>
## 5 {"address": "", "city": "", "state": "", "zip": ""} <NA>
## 6 {"address": "", "city": "", "state": "", "zip": ""} QUEENS BOULEVARD
## intersection_street_2 due_date location_type
## 1 <NA> <NA> <NA>
## 2 <NA> <NA> <NA>
## 3 <NA> <NA> <NA>
## 4 <NA> <NA> <NA>
## 5 <NA> <NA> <NA>
## 6 71 ROAD <NA> <NA>
DEP_resolution <- DEP_Query %>%
select(c("complaint_type","resolution_description"))
DEP_descriptors <- DEP_Query %>%
select(c("complaint_type","descriptor"))
resolution_ngrams <- DEP_resolution %>%
unnest_tokens(ngram, resolution_description, token = "ngrams", n = 4) %>%
count(ngram,sort = TRUE)
descriptors_ngrams <- DEP_descriptors %>%
unnest_tokens(ngram, descriptor, token = "ngrams", n = 4) %>%
count(ngram,sort = TRUE)
descriptors_ngrams %>%
head(30)%>%
arrange(desc(n)) %>%
drop_na %>%
ggplot(aes(x = n, y = reorder(ngram,n))) +
geom_col(show.legend = FALSE) +
labs(x = "Number of Occurances",
y = "Ngram")
resolution_ngrams %>%
head(30)%>%
arrange(desc(n)) %>%
drop_na %>%
ggplot(aes(x = n, y = reorder(ngram,n))) +
geom_col(show.legend = FALSE) +
labs(x = "Number of Occurances",
y = "Ngram")
complaint_types <- c("Water System","Air Quality", "Sewer" )
resolution_inputs <- list(DEP_Query,complaint_types)
by_type_filter_resolution <- function(x){
filtered <- DEP_Query %>%
filter(complaint_type == x) %>%
select(c("complaint_type","resolution_description")) %>%
unnest_tokens(ngram, resolution_description, token = "ngrams", n = 4) %>%
count(ngram,sort = TRUE)
return(filtered)
}
by_type_filter_descriptor <- function(x){
filtered <- DEP_Query %>%
filter(complaint_type == x) %>%
select(c("complaint_type","descriptor")) %>%
unnest_tokens(ngram, descriptor, token = "ngrams", n = 4) %>%
count(ngram,sort = TRUE)
return(filtered)
}
ngram_plot <- function(x){
plot <- x %>%
as.data.frame() %>%
head(30)%>%
arrange(desc(n)) %>%
drop_na %>%
ggplot(aes(x = n, y = reorder(ngram,n))) +
geom_col(show.legend = FALSE) +
labs(x = "Number of Occurances",
y = "Ngram")
return(plot)
}
resolution_ngrams_by_cat <- map(complaint_types,by_type_filter_resolution)
descriptor_ngrams_by_cat <- map(complaint_types,by_type_filter_descriptor)
# resolution_ngram_plots<-map(resolution_ngrams_by_cat,ngram_plot)
descriptor_ngram_plots<-map(descriptor_ngrams_by_cat,ngram_plot)
# The descriptors after the first 11 are unimportant, it seems
descriptor_ngram_plots <- descriptor_ngram_plots[1:11]
descriptor_ngram_plots
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
## NULL
##
## [[5]]
## NULL
##
## [[6]]
## NULL
##
## [[7]]
## NULL
##
## [[8]]
## NULL
##
## [[9]]
## NULL
##
## [[10]]
## NULL
##
## [[11]]
## NULL
# plotnames = imap(complaint_types[1:11], ~paste0("../4_Outputs/",., "_ngrams.png")) %>%
# flatten()
# plotnames
# pwalk(list(descriptor_ngram_plots,plotnames), ggsave)
# walk2(plotnames, descriptor_ngram_plots, ~ggsave(filename = .x, plot = .y,
# height = 7, width = 7))