Introduction

Within the context of recent economic growth within the City of Buffalo (before Covid-19), this project will examine changes in Single Family Housing Prices from 2017 - 2020. Buffalo has experienced rising real estate values in response to economic development projects, university expansions and workforce development programs. This project will compare changes in housing prices across neighborhoods and investigate possible factors that influence housing prices such as number of bathrooms, bedrooms and total living area.

Materials and methods

  • Data Sources
    • Open Data Buffalo, Tax Assessment Data 2017 - 2020
    • Open Data Buffalo, Neighborhood Boundary Shapefile
  • Methods
    • Data Cleaning & Processing
    • Exploration of the tax assessment data
    • Plot parcel data for Single Family Homes
    • Map Buffalo Neighborhoods
    • Compare median price of single family homes by neighborhood
    • Run regression analysis to examine possible indicators of price

Packages

library(tidyverse)
library(ggplot2)
library(ggmap)
library(maptools)
library(ggthemes)
library(rgeos)
library(broom)
library(plyr)
library(dplyr)
library(grid)
library(gridExtra)
library(reshape2)
library(scales)
library(sp)
library(sf)
library(rgdal)
library(RColorBrewer)
library(kableExtra)
library(leaflet)
knitr::opts_chunk$set(cache=TRUE) 

Data Download & Cleaning

Using the tax assessors manual, all property classes that represent single family residential were extracted from the Buffalo Assessment Roll.

# 2017 - 2019 Buffalo Assessment Roll
Parcel17 <- read.csv(file = "https://raw.githubusercontent.com/geo511-2020/geo511-2020-project-erikwoyc/master/2017-2018_Assessment_Roll.csv")
SingleFam_propclass <- c("210", "215", "240", "241", "250", "270")
Buffalo_17 <- filter(Parcel17, PROPERTY.CLASS %in% SingleFam_propclass)

# 2019 - 2020 Buffalo Assessment Roll
Parcel20 <- read.csv(file = "https://raw.githubusercontent.com/geo511-2020/geo511-2020-project-erikwoyc/master/2019-2020_Assessment_Roll.csv")
SingleFam_propclass <- c("210", "215", "240", "241", "250", "270")
Buffalo_20 <- filter(Parcel20, PROPERTY.CLASS %in% SingleFam_propclass)

# Neighborhood Shapefile
Neighborhood_URL <- "https://data.buffalony.gov/api/geospatial/q9bk-zu3p?method=export&format=GeoJSON"
Buffalo_Neighborhoods <- st_read(dsn = Neighborhood_URL)
Buffalo_sp <- as_Spatial(Buffalo_Neighborhoods)

Snapshot of Assessment Data from 2017 - 2018

The assessment data contains information such as property class, total value and structural data such as number of bedrooms and total living area.

Parcel17 %>% 
  select(PROPERTY.CLASS, PROP.CLASS.DESCRIPTION, TOTAL.VALUE, TOTAL.LIVING.AREA, NEIGHBORHOOD, LOCATION) %>%
  slice(1:10) %>% 
  kable(digits=2,align="c") %>% 
  kable_styling(bootstrap_options = 
                  c("striped", "hover", "condensed", "responsive")) 
PROPERTY.CLASS PROP.CLASS.DESCRIPTION TOTAL.VALUE TOTAL.LIVING.AREA NEIGHBORHOOD LOCATION
710 MANUFACTURING & PROCESSING 825000 0
350 URBAN RENEWAL VACANT LAND 6900 0
841 MOTOR VEHICLE 22440000 0 Parkside (42.93973696004812, -78.83864068992023)
311 RESIDENTIAL VACANT LAND 400 0
220 TWO FAMILY DWELLING 54500 2302 Elmwood Bidwell (42.91848032397568, -78.86218043101054)
482 DOWNTOWN ROW TYPE (DETACHED) 275000 0 Elmwood Bidwell (42.9194355694349, -78.87709280994117)
482 DOWNTOWN ROW TYPE (DETACHED) 34500 0 Elmwood Bidwell (42.920238159038476, -78.85944723608432)
210 ONE FAMILY DWELLING 210000 2106 Elmwood Bidwell (42.918710564992416, -78.87441389506147)
482 DOWNTOWN ROW TYPE (DETACHED) 407500 0 Elmwood Bidwell (42.91772431790134, -78.87710775061227)
220 TWO FAMILY DWELLING 110000 2076 Elmwood Bidwell (42.9198347309033, -78.87709262249685)

Data Exploration

Plotting single family property prices illustrates a high degree of variability in prices across Buffalo

# 2017 - 2018 Single Family Housing Price Histogram
Plot_2017 <- ggplot(data = Buffalo_17, mapping = aes(x = TOTAL.VALUE)) + 
  geom_histogram() + xlab("Total Property Value($)") + ylab("Count") +
  scale_fill_manual(values="lightblue") + theme_few() +
  labs(x="Total Value($)", y="Count", title="Distribution of Buffalo Home Prices",
       subtitle="Single Family Property Prices (2017 - 2018)", 
       caption="Source: Buffalo Open Data") + scale_x_continuous() + scale_y_continuous()
plot(Plot_2017)

# 2019 - 2020 Single Family Housing Price Histogram
Plot_2019 <- ggplot(data = Buffalo_20, mapping = aes(x = TOTAL.VALUE)) + 
  geom_histogram() + xlab("Total Property Value($)") + ylab("Count") +
  scale_fill_manual(values="lightblue") + theme_few() +
  labs(x="Total Value($)", y="Count", title="Distribution of Buffalo Home Prices",
       subtitle="Single Family Property Prices (2019 - 2020)", 
       caption="Source: Buffalo Open Data") + scale_x_continuous() + scale_y_continuous()
plot(Plot_2019)

Buffalo Base Map

#Buffalo Bounding Box
Buffalo_bbox <- Buffalo_sp@bbox

# Download the basemap
basemap <- get_stamenmap(
  bbox = Buffalo_bbox,
  zoom = 13,
  maptype = "toner-lite")

2017 - 2018 Assessment Roll Plot

SingleFam17 <- ggmap(basemap) +
  geom_point(data = Buffalo_17, aes(x = LONGITUDE, y = LATITUDE, color = TOTAL.VALUE), 
             size = .025, alpha = 0.7) +
  scale_color_gradient("Single Family Home Price", low = "light green", high = "dark green", trans="log",
                       labels = scales::dollar_format(prefix = "$")) +
  labs(title="Distribution of Buffalo Home Prices",
       subtitle="Property Prices (2017 - 2018)",
       caption="Open Data Buffalo")
SingleFam17

2019 - 2020 Assessment Roll Plot

SingleFam20 <- ggmap(basemap) + 
  geom_point(data = Buffalo_20, aes(x = LONGITUDE, y = LATITUDE, color = TOTAL.VALUE), 
             size = .025, alpha = 0.7) +
  scale_color_gradient("Single Family Home Price", low = "light green", high = "dark green", trans="log",
                       labels = scales::dollar_format(prefix = "$")) +
  labs(title="Distribution of Buffalo Home Prices",
       subtitle="Property Prices (2019 - 2020)",
       caption="Open Data Buffalo")
SingleFam20

Interactive Map

Single Family Home Prices 2017 - 2020

Based on the map, the highest single family property values are in the Allentown, Central, Elmwood & Parkside neighborhoods. There is a sharp drop in property value in the surrounding neighborhoods.

#Color Pallette
pallete <- colorNumeric("viridis", NULL)

Neighborhood_map <- leaflet() %>%
  setMaxBounds(lng1 = -78.91246, lat1 = 42.82603, lng2 = -78.79504, lat2 = 42.96641) %>%
  addProviderTiles("CartoDB") %>%
  addProviderTiles("Stamen.TonerLines",
                   options = providerTileOptions(opacity = 0.35)) %>%
  addCircles(data = Buffalo_17, lng = Buffalo_17$LONGITUDE, lat = Buffalo_17$LATITUDE, 
             color = ~pallete(log(Buffalo_17$TOTAL.VALUE)),
             radius = .05, opacity = 0.5,
             group = "2017 - 2018") %>%
  addCircles(data = Buffalo_20, lng = Buffalo_20$LONGITUDE, lat = Buffalo_20$LATITUDE, 
             color = ~pallete(log(Buffalo_20$TOTAL.VALUE)),
             radius = .05, opacity = 0.5,
             group = "2019 - 2020")  %>%
  addPolygons(data = Buffalo_sp, fillColor = "transparent", color = "#444444", weight = 2) %>%
  addLayersControl(overlayGroups = c("2017-2018", "2019-2020")) %>%
  addLegend(position = "bottomleft", pal = pallete, values = Buffalo_20$TOTAL.VALUE,
            title = "Single Family Home Value")
Neighborhood_map

Further Data Exploration

Distribution of Single Family Homes by Year Built

Year_built <- ggplot(data = Buffalo_20, mapping = aes(x = YEAR.BUILT)) + 
  geom_histogram() + xlab("Year Built") + ylab("Number of Homes") +
  scale_fill_manual(values="lightblue") + theme_few() +
  labs(x="Year Built", y="Number of Homes", title="Distribution of Single Family Homes by Year Built", 
       caption="Source: Buffalo Open Data") + scale_x_continuous() + scale_y_continuous()
Year_built

Price by Living Area 2019 - 2020

# Price by Living Area 2019 - 2020
live_price20 <- ggplot(data = Buffalo_20, aes(x = TOTAL.LIVING.AREA, y = TOTAL.VALUE)) +
  labs(x = "Total Living Area (sqft)", y = "Total Value Single Family Home", title = "Price by Square ft of Living Space") +
  geom_point()
live_price20

Price by Bedrooms 2019 - 2020

# Price by Bedrooms  2019 - 2020
bed_price <- ggplot(data = Buffalo_20, aes(x = X..OF.BEDS, y = TOTAL.VALUE)) +
  labs(x = "Number of Bedrooms", y = "Total Value Single Family Home", title = "Price by Number of Bedrooms") +
  geom_col()
bed_price

Median Price by Neighborhood

data_20 <- ddply(Buffalo_20, c("NEIGHBORHOOD"), summarise,
                     medianPrice = median(TOTAL.VALUE))
data_17 <- ddply(Buffalo_17, c("NEIGHBORHOOD"), summarise,
                 medianPrice = median(TOTAL.VALUE))
data.neighborhoods <- left_join(data_17, data_20, by = "NEIGHBORHOOD")
colnames(data.neighborhoods)[1] <- "nhbdname"
data.neigh <- data.neighborhoods[-c(1,34), ]
View(data.neigh)

Median_Price <- ggplot(data = data.neigh, mapping = aes(x = nhbdname, y = medianPrice.y)) + 
  geom_col() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(x="Neighborhood", y = "Price ($)", title="Median Price Single Family Home",
       subtitle="Median Price by Neighborhood", 
       caption="Source: Buffalo Open Data") + scale_y_continuous(labels=scales::dollar_format())
plot(Median_Price)

Regression Analysis

## Transform Data for Regression
Buffalo_20$log <- log10(Buffalo_20$TOTAL.VALUE)
View(Buffalo_20)

## Multiple Linear Regression
MLR <- lm(log ~ X..OF.BATHS + X..OF.BEDS +
          + YEAR.BUILT + TOTAL.LIVING.AREA + PROPERTY.CLASS + BASEMENT.TYPE,
          data = Buffalo_20)
summary(MLR)
## 
## Call:
## lm(formula = log ~ X..OF.BATHS + X..OF.BEDS + +YEAR.BUILT + TOTAL.LIVING.AREA + 
##     PROPERTY.CLASS + BASEMENT.TYPE, data = Buffalo_20)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.34156 -0.12204  0.00818  0.15185  1.12611 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -8.373e+00  4.388e+00  -1.908   0.0564 .  
## X..OF.BATHS        7.396e-02  3.214e-03  23.008   <2e-16 ***
## X..OF.BEDS        -5.799e-02  1.682e-03 -34.470   <2e-16 ***
## YEAR.BUILT         3.541e-03  4.767e-05  74.287   <2e-16 ***
## TOTAL.LIVING.AREA  3.244e-04  2.837e-06 114.327   <2e-16 ***
## PROPERTY.CLASS     2.651e-02  2.089e-02   1.269   0.2045    
## BASEMENT.TYPE      8.841e-02  1.587e-03  55.724   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2335 on 37795 degrees of freedom
##   (62 observations deleted due to missingness)
## Multiple R-squared:  0.5164, Adjusted R-squared:  0.5163 
## F-statistic:  6727 on 6 and 37795 DF,  p-value: < 2.2e-16

Conclusions

Based on the maps and the multiple linear regression it would seem that the geographic location of a single family home is a stronger predictor of price than any structural details. With an R-squared of 0.51, I conclude that the variables chosen are not strong indicators of price but with the large contrast in price across neighborhoods, I would suggest that further analysis would support the conclusion that geographic location has a stronger influence on price.

References