knitr::opts_chunk$set(
  error = FALSE,
  message = FALSE,
  warning = FALSE
)
library(tidyverse)
library(sf)
library(janitor)
library(lubridate)
library(tidycensus)

Part 1: Identifying Potential Illegal Basement Units

boroughs <- 
    read_sf("../Data/borough_boundaries") %>% 
    st_transform(crs = 2263)

tracts <- 
    read_sf("../Data/ct_2010") %>% 
    st_transform(crs = 2263) %>% 
    mutate(
      ct2010 = as.numeric(ct2010) / 100 #converting to MAPPLUTO format
    )

blocks <- 
    read_sf("../Data/cb_2010") %>% 
    st_transform(crs = 2263) %>% 
    mutate(
      ct2010 = as.numeric(ct2010) / 100 #converting to MAPPLUTO format
    )

boroughs %>% 
    ggplot() + 
    geom_sf() + 
    geom_sf(data = blocks, color = "darkred", size = 0.2) + 
    theme_void()

Locate Small Residential Buildings with Below-Grade Full Basements

mappluto <- 
    read_sf("../Data/mappluto") %>% 
    st_transform(crs = 2263) %>% 
    mutate(
      boro_name = 
        case_when(
          Borough == "MN" ~ "Manhattan", 
          Borough == "BK" ~ "Brooklyn", 
          Borough == "QN" ~ "Queens", 
          Borough == "BX" ~ "Bronx", 
          Borough == "SI" ~ "Staten Island"
        ), 
        ct2010 = as.numeric(CT2010), 
        cb2010 = CB2010
    )
  
mappluto_bsmts <- 
    mappluto %>% 
    drop_na(BsmtCode) %>% 
    mutate(BsmtCode = as.integer(BsmtCode)) %>% 
    filter(!(BsmtCode %in% c(0, 5)))

basements <- 
    mappluto_bsmts %>% 
    filter(UnitsRes > 0, UnitsRes < 4) %>% 
    mutate(
      BsmtAboveGrade = 
        ifelse(BsmtCode %in% c(1, 3), TRUE, FALSE), 
      BsmtFull = 
        ifelse(BsmtCode %in% c(1, 2), TRUE, FALSE)
    ) %>% 
    select(
      boro_name, 
      cb2010, 
      ct2010, 
      LotArea, 
      BldgArea, 
      ResArea, 
      UnitsRes, 
      BsmtCode, 
      BsmtAboveGrade, 
      BsmtFull, 
      geometry
    ) 

basements %>% 
  count(BsmtAboveGrade, BsmtFull)

Basements that are above grade can be considered more likely to be legal residences, as these units are more easily capable of complying with DOB regulations for basement units (eg. light, air flow, drainage). Basements that are below grade and full size (ie. at least 75% of the floor area of the first floor) can be assumed to be the most likely candidates for illegal basement units. These units are large enough to provide ample space for a residential unit, but are more prone to flooding due to sitting below street level.

For the purposes of this project, we will assume that buildings with full, below grade basements (FALSE-TRUE in the above table) are candidates for illegal units. This is a rough assumption that likely oversimplifies the reality, but given the lack of clear data, this model will allow us to narrow down our study somewhat in hopes of identifying the most at-risk data. Given this assumption, there are 456,804 buildings across the five boroughs that could potentially contain illegal basement apartments.

basements_illegal <- 
    basements %>% 
    filter(!BsmtAboveGrade, BsmtFull) %>% 
    st_as_sf()

basements_illegal %>% 
  ggplot() + 
  geom_sf(data = boroughs) + 
  geom_sf(size = 0.05) + 
  theme_void() + 
  labs(
    title = "1-3 unit residential buildings with full below-grade basements", 
    caption = "Source: NYC MapPLUTO"
  )

Aggregate Potential Illegal Basement Units

lots_block <- 
    mappluto %>% 
    st_drop_geometry() %>% 
    group_by(boro_name, ct2010, cb2010) %>% 
    count(name = "total_lots")

lots_tract <- 
    mappluto %>% 
    st_drop_geometry() %>% 
    group_by(boro_name, ct2010) %>% 
    count(name = "total_lots")

bsmts_blocks <- 
    blocks %>% 
    left_join(
      basements_illegal %>% 
      st_drop_geometry() %>% 
      group_by(boro_name, ct2010, cb2010) %>% 
      count(name = "bsmts_illegal"), 
      by = c("boro_name", "ct2010", "cb2010")) %>% 
    mutate(
      bsmts_illegal = replace_na(bsmts_illegal, 0)
    ) %>% 
    left_join(lots_block, by = c("boro_name", "ct2010", "cb2010")) %>% 
    mutate(prop_bsmts = bsmts_illegal / total_lots)

bsmts_tracts <- 
    tracts %>% 
    left_join(
      basements_illegal %>% 
      st_drop_geometry() %>% 
      group_by(boro_name, ct2010) %>% 
      count(name = "bsmts_illegal"), 
      by = c("boro_name", "ct2010")) %>% 
    mutate(
      bsmts_illegal = replace_na(bsmts_illegal, 0)
    ) %>% 
    left_join(lots_tract, by = c("boro_name", "ct2010")) %>% 
    mutate(prop_bsmts = bsmts_illegal / total_lots)
bsmts_blocks %>% 
  ggplot(aes(fill = bsmts_illegal)) + 
  geom_sf(size = 0) + 
  scale_fill_viridis_c() + 
  theme_void() + 
  labs(
    title = "Buildings with possible illegal basement units by Census block", 
    subtitle = 
      "Includes 1-3 unit residential buildings w/ full, below-grade basements", 
    caption = "Source: NYC MapPLUTO", 
    fill = "# of buildings"
  )

bsmts_blocks %>% 
  ggplot(aes(fill = prop_bsmts)) + 
  geom_sf(size = 0) + 
  scale_fill_viridis_c(labels = scales::label_percent()) + 
  theme_void() + 
  labs(
    title = "Buildings with possible illegal basement units by Census block", 
    subtitle = 
      "Includes 1-3 unit residential buildings w/ full, below-grade basements", 
    caption = "Source: NYC MapPLUTO", 
    fill = "% of buildings"
  )

bsmts_tracts %>% 
  ggplot(aes(fill = bsmts_illegal)) + 
  geom_sf(size = 0) + 
  scale_fill_viridis_c() + 
  theme_void() + 
  labs(
    title = "Buildings with possible illegal basement units by Census tract", 
    subtitle = 
      "Includes 1-3 unit residential buildings w/ full, below-grade basements", 
    caption = "Source: NYC MapPLUTO", 
    fill = "% of buildings"
  )

bsmts_tracts %>% 
  ggplot(aes(fill = prop_bsmts)) + 
  geom_sf(size = 0) + 
  scale_fill_viridis_c(labels = scales::label_percent()) + 
  theme_void() + 
  labs(
    title = "Buildings with possible illegal basement units by Census tract", 
    subtitle = 
      "Includes 1-3 unit residential buildings w/ full, below-grade basements", 
    caption = "Source: NYC MapPLUTO", 
    fill = "% of buildings"
  )

Part 2: Estimate Distribution of Illegal Basement Units

Identify New Households by Census Estimates and DOB Certificates of Occupancy

In this section, we used data from the NYC Department of Buildings (DOB) and the US Census Bureau to estimate the number of new, ‘unaccounted for’ residential units in each Census tract between 2012 and 2019. This was calculated by subtracting the number of new Certificates of Occupancy issued by DOB from the number of new household units in each tract as reported by the American Community Survey. If a tract has more new households than new units to house them, the difference is represented above as units that are ‘unaccounted for.’ The methodology for this process was adopted from a 2008 study from the Pratt Center (updated in 2021) that sought to quantify the number of illegal basement units across NYC.

Once again, we recognize that this methodology is a rough estimate at best. There are plausible scenarios in which an area legitimately acquires more new households than new housing units, including scenarios in which multiple households live together in one house or unit or in which existing units were originally vacant. Likewise, not all illegal housing units are sure to be caught by this process, especially in communities with high vacancy rates. Data errors may further skew results one way or the other. Nonetheless, we are working with limited data; this methodology can serve as a rough heuristic for spatially locating where illegal basement units may be concentrated in New York City.

It should also be noted that this process only accounts for new illegal housing units that were occupied for the first time between 2012 and 2019. Many such units were presumably occupied before 2012 or after 2019; however, these are not included in this estimate. Rather than present a definitive representation of where these units are in the city, we treat these results as an estimate of the spatial distribution of such units.

co <- 
  read_csv("../Data/DOB_Certificate_Of_Occupancy.csv") %>% 
  clean_names() %>% 
  mutate(
    date = mdy(c_o_issue_date), 
    year = year(date), 
    pr_dwelling_unit = as.integer(replace_na(pr_dwelling_unit, 0)), 
    ex_dwelling_unit = as.integer(replace_na(ex_dwelling_unit, 0)), 
    new_dwelling_unit = pr_dwelling_unit - ex_dwelling_unit
  ) %>% 
  filter(
    application_status_raw == "Issued", 
    year < 2020
  ) %>% 
  group_by(bbl, location) %>% 
  arrange(desc(date)) %>% 
  slice_head()

co_sf <- 
  co %>% 
  left_join(mappluto, by = c("bbl" = "BBL")) %>% 
  drop_na(CT2010) %>% 
  st_as_sf()

new_co_block <- 
  co_sf %>% 
  st_drop_geometry() %>% 
  group_by(boro_name, ct2010, cb2010) %>% 
  count(name = "new_co", wt = new_dwelling_unit)

new_co_tract <- 
  co_sf %>% 
  st_drop_geometry() %>% 
  group_by(boro_name, ct2010) %>% 
  count(name = "new_co", wt = new_dwelling_unit)
vars <- c(total_hh_2020 = "B10063_001E")

census_api_key("4878f9292ac0c11cddc6fabc8cd8530c4d0e12f0")

hh_2019_tract <- 
  get_acs(
    state = 36, 
    county = c("061", "047", "081", "005", "085"), 
    geography = "tract", 
    variables = vars, 
    year = 2019
  ) %>% 
  transmute(
    cnty = str_sub(GEOID, 3, 5),
    tract = as.integer(str_sub(GEOID, 6, 12)), 
    hh_2019 = estimate
  )

hh_2012_tract <- 
  get_acs(
    state = 36, 
    county = c("061", "047", "081", "005", "085"), 
    geography = "tract", 
    variables = vars, 
    year = 2012
  ) %>% 
  transmute(
    cnty = str_sub(GEOID, 3, 5),
    tract = as.integer(str_sub(GEOID, 6, 12)), 
    hh_2012 = estimate
  )

hh_tract <- 
  hh_2012_tract %>% 
  left_join(hh_2019_tract, by = c("cnty", "tract")) %>% 
  transmute(
    new_hh = hh_2019 - hh_2012, 
    new_hh = ifelse(new_hh < 0, 0, new_hh), 
    boro_name = 
        case_when(
          cnty == "061" ~ "Manhattan", 
          cnty == "047" ~ "Brooklyn", 
          cnty == "081" ~ "Queens", 
          cnty == "005" ~ "Bronx", 
          cnty == "085" ~ "Staten Island"
        ), 
        ct2010 = tract / 100
  )

Estimate and Visualize Distribution of Unaccounted For Households

missing_hh <- 
  tracts %>% 
  left_join(new_co_tract, by = c("boro_name", "ct2010")) %>% 
  left_join(hh_tract, by = c("boro_name", "ct2010")) %>% 
  mutate(
    unknown_new_hh = new_hh - new_co, 
    unknown_new_hh = ifelse(unknown_new_hh < 0, 0, unknown_new_hh)
  )

missing_hh %>% 
  mutate(
    unknown_hh_cat = cut(unknown_new_hh, c(0, 1, 100, 200, 300, 400, 500, 2000))
  ) %>% 
  ggplot() + 
  geom_sf(data = boroughs) + 
  geom_sf(aes(fill = unknown_hh_cat), size = 0.1) + 
  scale_fill_viridis_d(
    labels = 
      c("0", "1-99", "100-199", "200-299", "300-399", "400-499", "500+")
  ) + 
  theme_void() + 
  labs(
    title = "Unaccounted for new households, 2012-2019 by Census tract", 
    subtitle = "Number of new households minus new DOB Certificates of Occupancy",
    fill = "# of units", 
    caption = "Source: NYC MapPLUTO, NYC DOB,\nUS Census Bureau"
  )

# Read flood data for storm surge

flood <- read_sf("../Data/moderate_floodmap/moderate_floodmap.shp") %>% 
  st_transform(crs = 2263) 

flood_small <- flood %>% 
  st_zm(flood)

flood_nuisance <- flood_small %>% 
  filter(Flooding_C == 1)

flood_nuisance %>%
  ggplot(aes(fill = Flooding_C)) +
  geom_sf(size = 0) +
  scale_fill_viridis_c() +
  theme_void() +
  labs(
    title = "1 - Nuisance Flooding",
    subtitle =
      "greater or equal to 4 in. and less than 1 ft.",
    caption = "Source: NYCDEP",
    fill = "Flooding_C")

flood_deep <- flood_small %>% 
  filter(Flooding_C == 2)

flood_deep %>%
  ggplot() +
  geom_sf(data = boroughs, fill = "white") +
  geom_sf(size = 0.01,aes(fill = Flooding_C)) +
  scale_fill_viridis_c() +
  theme_void() +
  labs(
    title = "2 - Deep and Contiguous Flooding",
    subtitle = "1ft and Greater",
    caption = "Source: NYCDEP",
    fill = "Flooding_C")

flood_2080 <- flood_small %>% 
  filter(Flooding_C == 3)

flood_2080 %>%
  ggplot(aes(fill = Flooding_C)) +
  geom_sf(size = 0) +
  scale_fill_viridis_c() +
  theme_void() +
  labs(
    title = "3 - Future High Tides 2080",
    subtitle =
      "sourced from on DCP Flood Hazard Mapper High Tide 2080s High Estimate of 58 inches sea level rise.",
    caption = "Source: NYCDEP",
    fill = "Flooding_C")

Take the intersect of basement apartments and flooding scenarios - we will use the worst case scenario

in order to perform our analysis (worst flooding)

At_Risk_Deep <- basements_illegal %>% 
    st_intersection(flood_deep) 

At_Risk_CT <- At_Risk_Deep %>% 
  group_by(ct2010,boro_name) %>% 
  count() %>% 
  st_drop_geometry() %>% 
  mutate(ct_boro = paste0(boro_name,ct2010)) 

tracts_fix <- tracts %>% 
  mutate(ct_boro = paste0(boro_name,ct2010))

At_Risk_Deep_CT <- tracts_fix %>% 
  left_join(At_Risk_CT, by = "ct_boro") %>%
  rename("boro_name" = "boro_name.x" ) %>% 
  mutate(county_name = 
           case_when(
             boro_name ==  "Manhattan" ~ "New York County", 
             boro_name == "Brooklyn" ~ "Kings County", 
             boro_name == "Queens" ~ "Queens County", 
             boro_name == "Bronx" ~ "Bronx County", 
             boro_name == "Staten Island" ~ "Richmond County"),
         name_x = paste0("Census Tract ",ctlabel,", ",county_name,", New York"))

basement_count <- bsmts_tracts %>% 
  mutate(ct_boro = paste0(boro_name,ct2010)) %>% 
  left_join(At_Risk_Deep_CT %>% st_drop_geometry() , by = "ct_boro") %>% 
  mutate(proportion_risk = n/total_lots)



at_risk_ct<-At_Risk_Deep_CT %>% 
  ggplot() + 
  geom_sf(data = boroughs) + 
  scale_fill_viridis_c() +
  geom_sf(aes(fill = n), size = 0.1) + 
  theme_void() + 
  labs(
    title = "Unaccounted for new households at risk of stormwater flooding",
    fill = "# of units", 
    caption = "Source: NYC MapPLUTO, NYC DOB,\nUS Census Bureau"
  )

Calculate other indicators: minority status, english fluency, immigrant status, and income

v2019 <- load_variables(2019, "acs5", cache = TRUE)

demographics <- v2019 %>% 
  filter(grepl("B02001",name))

white <- c(white= "B02001_002")
total <- c(total = "B02001_001")
income <- c(income ="B19113_001")
# language_bad <- c(language_bad = "B16004_045")
# language_not_well <- c(language_not_well = "B16004_044")
native_born <- c(native_born = "B05002_002")

White_2019 <- 
  get_acs(
    state = 36, 
    county = c("061", "047", "081", "005", "085"), 
    geography = "tract", 
    variables = white, 
    year = 2019
  )

Total_2019 <- 
  get_acs(
    state = 36, 
    county = c("061", "047", "081", "005", "085"), 
    geography = "tract", 
    variables = total, 
    year = 2019
  )

Minority_2019 <- White_2019 %>% 
  left_join(Total_2019, by = "GEOID") %>% 
  clean_names() %>% 
  mutate(minority = (estimate_y - estimate_x)/estimate_y)

Income_2019 <- 
  get_acs(
    state = 36, 
    county = c("061", "047", "081", "005", "085"), 
    geography = "tract", 
    variables = income, 
    year = 2019
  ) %>% 
  clean_names() %>% 
  filter(!is.na(estimate)) %>% 
  mutate(perc_in = estimate/median(estimate))



native_2019 <- 
  get_acs(
    state = 36, 
    county = c("061", "047", "081", "005", "085"), 
    geography = "tract", 
    variables = native_born, 
    year = 2019
  ) 

immigrants_2019 <- native_2019 %>% 
  left_join(Total_2019, by = "GEOID") %>%
  clean_names() %>% 
  mutate(immigrants = estimate_y - estimate_x,
         perc_im = immigrants/estimate_y)

consolidated <- immigrants_2019 %>% #left_join(language_fluency_2019, by = "geoid") %>% 
  left_join(Income_2019, by = "geoid") %>% 
  left_join(Minority_2019, by = "geoid") %>% #left_join(missing_hh_join, by = "") %>%  
  clean_names()%>% 
  select(c("geoid","name_x_x","perc_im","perc_in","minority")) %>% 
    rename(name_x = name_x_x)

#Join back to the census tract level so that we can see what is most vulnerable:

At_Risk_All <- At_Risk_Deep_CT %>% 
  left_join(consolidated, by = "name_x") 

immigrants <- At_Risk_All %>% 
  ggplot() + 
  geom_sf(data = boroughs) + 
  geom_sf(aes(fill = perc_im), size = 0.1) + 
  theme_void() + 
  scale_fill_viridis_c() +
  labs(
    title = "Immigrants across\ncensus tracts",
    fill = "% of total pop census tract", 
    caption = "Source: NYC MapPLUTO, NYC DOB,\nUS Census Bureau"
  )

income <- At_Risk_All %>% 
  ggplot() + 
  geom_sf(data = boroughs) + 
  geom_sf(aes(fill = perc_in), size = 0.1) + 
  theme_void() + 
  scale_fill_viridis_c() +
  labs(
    title = "Income by CT",
    fill = "Normalized to Median of NYC", 
    caption = "Source: NYC MapPLUTO, NYC DOB,\nUS Census Bureau"
  )

minority <- At_Risk_All %>% 
  ggplot() + 
  geom_sf(data = boroughs) + 
  geom_sf(aes(fill = minority), size = 0.1) + 
  theme_void() + 
  scale_fill_viridis_c() +
  labs(
    title = "Minority by Census Tract",
    fill = "Non-White Percent\nof Census Population", 
    caption = "Source: NYC MapPLUTO, NYC DOB,\nUS Census Bureau"
  )

# At_Risk_All %>% 
#   ggplot() + 
#   geom_sf(data = boroughs) + 
#   geom_sf(aes(fill = perc_fluen), size = 0.1) + 
#   theme_void() + 
#   scale_fill_viridis_c() +
#   labs(
#     title = "Language Fluency",
#     fill = "Normalized to average\nnumber of tracts with data", 
#     caption = "Source: NYC MapPLUTO, NYC DOB,\nUS Census Bureau"
#   )

ggsave("immigrant.png",immigrants)
ggsave("income.png",income)
ggsave("minority.png",minority)

Reclassifying so that we can take all factors into account.

MCDA <- At_Risk_All %>% 
  mutate(
    immigrant_vul = ntile(perc_im,6),#language_vul = ntile(perc_fluen,6),
    income_vul = ntile(perc_in,6),
    income_vul = case_when(
      income_vul == 1 ~ 6,
      income_vul == 2 ~ 5,
      income_vul == 3 ~ 4,
      income_vul == 4 ~ 3,
      income_vul == 5 ~ 2,
      income_vul == 6 ~ 1),
    minority_vul = ntile(minority,6),
    n_apts = ntile(n,6),
    Final_Score = ifelse(!is.na(n),
                          immigrant_vul + income_vul + minority_vul  + n_apts,
                          NA))

MCDA %>% 
  ggplot() + 
  geom_sf(data = boroughs, lwd = .1) + 
  geom_sf(aes(fill = Final_Score), size = 0.05) + 
  theme_void() + 
  scale_fill_viridis_c(na.value = "grey") +
  labs(
    title = "Final Weighted Vulnerability Score",
    fill = "Total Score", 
    caption = "Source: NYC MapPLUTO, NYC DOB,\nUS Census Bureau"
  )

ggsave("MCDA.png")

Final_Table <- MCDA %>% 
  select(c("name_x","ntaname","immigrant_vul","income_vul","minority_vul","n_apts","Final_Score")) %>%
  arrange(desc(Final_Score)) %>% 
  st_drop_geometry() %>% 
  head()

Final_Table