in incidenceMapR/R/appendCatchmentModel.R [16:60]
appendCatchmentModel <- function(db,shp = NULL, source='simulated_data', na.rm=TRUE){
validGeoLevels <- c('residence_puma','residence_cra_name','residence_neighborhood_district_name','residence_census_tract','residence_city',
'work_puma','work_cra_name','work_neighborhood_district_name','work_census_tract','work_city')
geo <- validGeoLevels[validGeoLevels %in% names(db$observedData)]
# get pathogen list
queryIn <- list(
SELECT =list(COLUMN=c('pathogen')),
GROUP_BY =list(COLUMN=c('pathogen')),
SUMMARIZE=list(COLUMN='pathogen', IN= 'all')
)
pathogens <- unique(selectFromDB( queryIn, source=source, na.rm=na.rm)$observedData$pathogen)
# find catchment maps for each site_type and geoLevel
# catchment represented by all samples not from target virus. Idea being participation due to other pathogens is assumed
# to be uncorrelated with pathogen of interest. Social dynamics could violate this assumption.
# making timeseries inference work for pathogen == 'all even if it maybe shouldn't...
outGroup<-setdiff(pathogens, unique(db$observedData$pathogen))
if (length(outGroup)==0){
outGroup = 'all'
}
queryIn <- list(
SELECT =list(COLUMN=c('pathogen','site_type',geo)),
WHERE =list(COLUMN=c('pathogen'), IN = outGroup),
GROUP_BY =list(COLUMN=c('site_type',geo)),
SUMMARIZE=list(COLUMN='site_type', IN= 'all')
)
catchmentDb <- expandDB( selectFromDB( queryIn, source=source, na.rm=na.rm ), shp=shp )
# at some point, we maybe should check if the catchment map is already saved,
# although this is a cheap computation relative to everything else, so that may never matter
catchmentModelDefinition <- smoothModel(db=catchmentDb, shp=shp)
catchmentModel <- modelTrainR(catchmentModelDefinition)
# append catchment as intercept covariate
db$observedData <- db$observedData %>% left_join(catchmentModel$modeledData %>% select(site_type, geo, modeled_count_median))
names(db$observedData)[names(db$observedData) %in% 'modeled_count_median'] <- 'catchment'
db$observedData$catchment <- log(db$observedData$catchment) - mean(log(db$observedData$catchment))
return(db)
}