::install_github("LukeCe/spflow") devtools
In-class Exercise 5
1 Getting Started
1.1 Importing the packages
Use the following code chunk to install the latest version of spflow (instead from CRAN’s R library)
Then, we will load spflow and the other packages that we need for this exericse using the following code chunk.
::p_load(tmap, sf, spdep, sp, Matrix, spflow, reshape2, knitr, tidyverse) pacman
To work with spflow, we need to have: - Spatial Weights (Lesson 2) - a tibble dataframe consists of the origins, destination, flows and distances between the origins and destinations, and - a tibble dataframe consists of the explanatory variables.
<- read_rds("data/rds/mpsz_nb.rds")
mpsz_nb <- read_rds("data/rds/mpsz_flow.rds")
mpsz_flow <- read_rds("data/rds/mpsz_var.rds") mpsz_var
2 Creating spflow network class objects
spflow network class is an S4 class that contains all information on a spatial network which is composed by a set of nodes that are linked by some neighbourhood relation. It can be created by usiing spflow_network() of spflow
For our model, we choose the contiguity based neighbourhood structure.
<- spflow_network(
mpsz_net id_net = "sg",
node_neighborhood = nb2mat(mpsz_nb$by_contiguity),
node_data = mpsz_var,
node_key_column = "SZ_CODE")
mpsz_net
Spatial network nodes with id: sg
--------------------------------------------------
Number of nodes: 313
Average number of links per node: 6.077
Density of the neighborhood matrix: 1.94% (non-zero connections)
Data on nodes:
SZ_NAME SZ_CODE BUSSTOP_COUNT AGE7_12 AGE13_24 AGE25_64
1 INSTITUTION HILL RVSZ05 2 330 360 2260
2 ROBERTSON QUAY SRSZ01 10 320 350 2200
3 FORT CANNING MUSZ02 6 0 10 30
4 MARINA EAST (MP) MPSZ05 2 0 0 0
5 SENTOSA SISZ01 1 200 260 1440
6 CITY TERMINALS BMSZ17 10 0 0 0
--- --- --- --- --- --- ---
308 NEE SOON YSSZ07 12 90 140 590
309 UPPER THOMSON BSSZ01 47 1590 3660 15980
310 SHANGRI-LA AMSZ05 12 810 1920 9650
311 TOWNSVILLE AMSZ04 9 980 2000 11320
312 MARYMOUNT BSSZ02 25 1610 4060 16860
313 TUAS VIEW EXTENSION TSSZ06 11 0 0 0
SCHOOL_COUNT BUSINESS_COUNT RETAILS_COUNT FINSERV_COUNT ENTERTN_COUNT
1 1 6 26 3 0
2 0 4 207 18 6
3 0 7 17 0 3
4 0 0 0 0 0
5 0 1 84 29 2
6 0 11 14 4 0
--- --- --- --- --- ---
308 0 0 7 0 0
309 3 21 305 30 0
310 3 0 53 9 0
311 1 0 83 11 0
312 3 19 135 8 0
313 0 53 3 1 0
FB_COUNT LR_COUNT COORD_X COORD_Y
1 4 3 103.84 1.29
2 38 11 103.84 1.29
3 4 7 103.85 1.29
4 0 0 103.88 1.29
5 38 20 103.83 1.25
6 15 0 103.85 1.26
--- --- --- --- ---
308 0 0 103.81 1.4
309 5 11 103.83 1.36
310 0 0 103.84 1.37
311 1 1 103.85 1.36
312 3 11 103.84 1.35
313 0 0 103.61 1.26
id_net - we can just name it anything (to give a name to the id?)
combines contiguity matrix and explanatory variables
Use sp_network_pair() to…
<- spflow_network_pair(
mpsz_net_pairs id_orig_net = "sg",
id_dest_net = "sg",
pair_data = mpsz_flow,
orig_key_column = "ORIGIN_SZ",
dest_key_column = "DESTIN_SZ")
mpsz_net_pairs
Spatial network pair with id: sg_sg
--------------------------------------------------
Origin network id: sg (with 313 nodes)
Destination network id: sg (with 313 nodes)
Number of pairs: 97969
Completeness of pairs: 100.00% (97969/97969)
Data on node-pairs:
DESTIN_SZ ORIGIN_SZ DISTANCE TRIPS
1 RVSZ05 RVSZ05 0 67
314 SRSZ01 RVSZ05 305.74 251
627 MUSZ02 RVSZ05 951.83 0
940 MPSZ05 RVSZ05 5254.07 0
1253 SISZ01 RVSZ05 4975 0
1566 BMSZ17 RVSZ05 3176.16 0
--- --- --- --- ---
96404 YSSZ07 TSSZ06 26972.97 0
96717 BSSZ01 TSSZ06 25582.48 0
97030 AMSZ05 TSSZ06 26714.79 0
97343 AMSZ04 TSSZ06 27572.74 0
97656 BSSZ02 TSSZ06 26681.7 0
97969 TSSZ06 TSSZ06 0 270
<- spflow_network_multi(mpsz_net, mpsz_net_pairs)
mpsz_multi_net
mpsz_multi_net
Collection of spatial network nodes and pairs
--------------------------------------------------
Contains 1 spatial network nodes
With id : sg
Contains 1 spatial network pairs
With id : sg_sg
Availability of origin-destination pair information:
ID_ORIG_NET ID_DEST_NET ID_NET_PAIR COMPLETENESS C_PAIRS C_ORIG C_DEST
sg sg sg_sg 100.00% 97969/97969 313/313 313/313
both have to be spflow network object class
flow has to be n x n (i.e. 313 x 313 = 97969 obs for mpsz_flow)
2.1 Correlation Analysis
check for multicollinearity before running regression - more impt for explanatory models can also use to detect if we have suitable variables
<- log(1 + TRIPS) ~
cor_formula +
BUSSTOP_COUNT +
AGE7_12 +
AGE13_24 +
AGE25_64 +
SCHOOL_COUNT +
BUSINESS_COUNT +
RETAILS_COUNT +
FINSERV_COUNT P_(log(DISTANCE + 1))
<- pair_cor(
cor_mat
mpsz_multi_net,spflow_formula = cor_formula,
add_lags_x = FALSE)
colnames(cor_mat) <- paste0(
substr(colnames(cor_mat),1,3), "...")
cor_image(cor_mat)
“P_” always refers to impedence
3 Model Calibration
Maximum Likelihood Estimate (MLE)
Origin Destin Intra P - impedence (distance)
model 9 (see slides!)
<- spflow(
base_model spflow_formula = log(1 + TRIPS)~
O_(BUSSTOP_COUNT + AGE25_64) +
D_(SCHOOL_COUNT +
+
BUSINESS_COUNT +
RETAILS_COUNT +
FINSERV_COUNT) P_(log(DISTANCE + 1)),
spflow_networks = mpsz_multi_net)
base_model
--------------------------------------------------
Spatial interaction model estimated by: MLE
Spatial correlation structure: SDM (model_9)
Dependent variable: log(1 + TRIPS)
--------------------------------------------------
Coefficients:
est sd t.stat p.val
rho_d 0.680 0.004 192.552 0.000
rho_o 0.678 0.004 187.728 0.000
rho_w -0.396 0.006 -65.587 0.000
(Intercept) 0.410 0.065 6.265 0.000
(Intra) 1.313 0.081 16.263 0.000
D_SCHOOL_COUNT 0.017 0.002 7.885 0.000
D_SCHOOL_COUNT.lag1 0.002 0.004 0.551 0.582
D_BUSINESS_COUNT 0.000 0.000 3.015 0.003
D_BUSINESS_COUNT.lag1 0.000 0.000 -0.249 0.803
D_RETAILS_COUNT 0.000 0.000 -0.306 0.759
D_RETAILS_COUNT.lag1 0.000 0.000 0.152 0.879
D_FINSERV_COUNT 0.002 0.000 6.787 0.000
D_FINSERV_COUNT.lag1 -0.002 0.001 -3.767 0.000
O_BUSSTOP_COUNT 0.002 0.000 6.806 0.000
O_BUSSTOP_COUNT.lag1 -0.001 0.000 -2.364 0.018
O_AGE25_64 0.000 0.000 7.336 0.000
O_AGE25_64.lag1 0.000 0.000 -2.797 0.005
P_log(DISTANCE + 1) -0.050 0.007 -6.792 0.000
--------------------------------------------------
R2_corr: 0.6942932
Observations: 97969
Model coherence: Validated
rho_d : destination constrained rho_o : origin constrained rho_w : intrazonal
school_count.lag1: also tell us if school_count’s immediate neighbours can affect the flows. Notice that it is not sig. so the flow is largely due to the sch within the zone
Finserv: both finserv and its immediate have small p value.
busstop: both BS and its immediate nb are sig. so both can attract flows
3.1 Residual Diagnostic Test
If th line is very close to the diagonal, means no more spatial autocorrelation
<- par(mfrow = c(1,3),
old_par mar = c(2,2,2,2))
spflow_moran_plots(base_model)
<- pair_cor(base_model)
corr_residual colnames(corr_residual) <- substr(colnames(corr_residual), 1,3)
cor_image(corr_residual)
No violation of multicollinearity All contribute to the model
3.2 Working with Model Control
<- log(1 + TRIPS)~
spflow_formula O_(BUSSTOP_COUNT + AGE25_64) +
D_(SCHOOL_COUNT +
+
BUSINESS_COUNT +
RETAILS_COUNT +
FINSERV_COUNT) P_(log(DISTANCE + 1))
<-spflow_control(estimation_method = "mle",
model_control model = "model_1")
<- spflow(
mle_model1
spflow_formula, spflow_networks = mpsz_multi_net,
estimation_control = model_control)
mle_model1
--------------------------------------------------
Spatial interaction model estimated by: OLS
Spatial correlation structure: SLX (model_1)
Dependent variable: log(1 + TRIPS)
--------------------------------------------------
Coefficients:
est sd t.stat p.val
(Intercept) 11.384 0.069 164.255 0.000
(Intra) -6.006 0.112 -53.393 0.000
D_SCHOOL_COUNT 0.093 0.003 28.599 0.000
D_SCHOOL_COUNT.lag1 0.255 0.006 44.905 0.000
D_BUSINESS_COUNT 0.001 0.000 10.036 0.000
D_BUSINESS_COUNT.lag1 0.003 0.000 18.274 0.000
D_RETAILS_COUNT 0.000 0.000 -1.940 0.052
D_RETAILS_COUNT.lag1 0.000 0.000 -2.581 0.010
D_FINSERV_COUNT 0.005 0.000 10.979 0.000
D_FINSERV_COUNT.lag1 -0.016 0.001 -17.134 0.000
O_BUSSTOP_COUNT 0.014 0.001 25.865 0.000
O_BUSSTOP_COUNT.lag1 0.015 0.001 21.728 0.000
O_AGE25_64 0.000 0.000 14.479 0.000
O_AGE25_64.lag1 0.000 0.000 14.452 0.000
P_log(DISTANCE + 1) -1.281 0.008 -165.327 0.000
--------------------------------------------------
R2_corr: 0.2831458
Observations: 97969
Model coherence: Validated
can change the model using the model control (choose frm 1 to 9)
<- par(mfrow = c(1,3),
old_par mar = c(2,2,2,2))
spflow_moran_plots(mle_model1)