### -*- mode: R -*- ### This file provides a collection of R functions ### for loading data from the database into into a form which can be ### proccessed by CODA/BOA. ### $Revision: 1.13 $ ### $Date: 2006-09-20 17:12:10 $ ### At this pass, we are still taking the naive approach of loading the ### entire dataset into R at once. Subsequent passes might look at the ### problem of incrementally processing the query results. #asp.file <- function(filename) { # paste(StatShop,"R","Asp",filename,sep=.Platform$file.sep) #} ### Boolean flag to report queries. ### If called with no arguments returns the current value of the flag. ### If called with an argument, sets the flag to that value. "asp.debugQueries" <- function(doDebug) { if (!exists("asp.debugMode")) { asp.debugMode <<- FALSE } if (!missing(doDebug)) { asp.debugMode <<- doDebug } asp.debugMode } ### Establish a new connection to the database. ### Uses the values of the asp.driver, asp.dbname and asp.user ### As default parameters. ### Stores result in asp.con. This function must be called before ### other functions become useful. "asp.dbConnect" <- function(database,user=Sys.info()["user"],host="localhost",password="", driver=MySQL(force.reload=FALSE), ...) { asp.lastCall <<- match.call() asp.con <<- dbConnect(driver,dbname=database,user=user, host=host,password=password, ...) } ###Closes connection to database when finished. ###This is optional, but good housekeeping. "asp.dbDisconnect" <- function() { dbDisconnect(asp.con) asp.lastCall <<- NULL } "asp.showConnection" <- function () { asp.lastCall } ###Fetches the possible list of chains from the database. "getChains" <- function () { cquery <- paste("SELECT DISTINCT CHAIN FROM chain_data_table ORDER BY CHAIN") dbchains <- dbGetQuery(asp.con,cquery) dbchains[,1] } ###Fetches a table of cycles by chain number. "getCycles" <- function () { cquery <- paste("SELECT chain_first_cycle, chain_last_cycle FROM chain_data_table ORDER BY CHAIN") dbGetQuery(asp.con,cquery) } ### Returns a list of models "listModels" <- function () { mquery <- paste("SELECT DISTINCT MODEL_NAME FROM model_description_table ORDER BY MODEL_NAME") dbGetQuery(asp.con,mquery)[,1] } ### Returns a list of model names matching pattern "grepModels" <- function(pattern) { mods <- listModels() mods[grep(pattern,mods)] } ### Returns a list of all student models "listStudentModels" <- function () { mquery <- paste("SELECT DISTINCT MODEL_NAME FROM model_description_table WHERE MODEL_TYPE='sm' ORDER BY MODEL_NAME") dbGetQuery(asp.con,mquery)[,1] } ### Returns a list of all Link (Evidence) models "listLinkModels" <- function () { mquery <- paste("SELECT DISTINCT MODEL_NAME FROM model_description_table WHERE MODEL_TYPE='lm' ORDER BY MODEL_NAME") dbGetQuery(asp.con,mquery)[,1] } ### This function calculates the name of the parameter table based on ### the name of the models. "parameterTable" <- function (model) { mquery <- paste("SELECT PARAMETER_TABLE_NAME FROM model_description_table", " WHERE MODEL_NAME =\"", model, "\"", sep="") dbGetQuery(asp.con,mquery)[,1] } ### This function calculates the name of the variable table based on ### the name of the models. "variableTable" <- function (model) { mquery <- paste("SELECT VARIABLE_TABLE_NAME FROM model_description_table", " WHERE MODEL_NAME = \"", model, "\"", sep="") dbGetQuery(asp.con,mquery)[,1] } ################################################################################### ### Parameter Queries ################################################################################### ######################################## ### Parameter Metadata ### Returns a list of parameter columns for a model. ### Old listParameters. We now use more specific information. "listParameterColumns" <- function (model) { columns <- dbListFields(asp.con,parameterTable(model)) ## First two columns are CHAIN, CYCLE ## Last column is BATCH columns <- columns [3:length(columns)] columns[columns!="BATCH"] } ### Lists the names of "Parameters" in the model. Note ### that parameters can be vector or matrix valued and correspond to more than ### one column in the table. "listParameterNames" <- function (model) { mquery <- paste("SELECT DISTINCT(PARAMETER) FROM", " parameter_description_table", " WHERE MODEL =\"", model, "\"", sep="") result <- dbGetQuery(asp.con,mquery) if (nrow(result)<1) { stop("No parameters found for model ",model) } result[,1] } ### Returns the map form column names to labels for the paraticular parameter "getParameterLabels" <- function (model, parameter) { mquery <- paste("SELECT PARAMETER_COLUMN_NAME, PARAMETER_LABEL FROM", " parameter_description_table", " WHERE MODEL =\"", model, "\"", " AND PARAMETER =\"", parameter, "\"", sep="") results <- dbGetQuery(asp.con,mquery) if (length(grep("CondMultParam",results[1,1])) >0) { ## Need to reorder rows of a matrix parameter. nums <- sapply(results[,1],function(label) { bits <- unlist(strsplit(label,"_")) as.numeric(bits[length(bits)]) }) results <- results[order(nums),] } results } ### Returns the type of a parameter. "getParameterType" <- function (model, parameter) { mquery <- paste("SELECT DISTINCT PARAMETER_TYPE FROM", " parameter_description_table", " WHERE MODEL =\"", model, "\"", " AND PARAMETER =\"", parameter, "\"", sep="") result <- dbGetQuery(asp.con,mquery) if (nrow(result)<1) { stop("Parameter ",parameter," not found for model ",model) } result[,1] } ### Returns the diminesions for a matrix valued (conditional multinomial) ### parameter. "getMatrixParameterDims" <- function (model, parameter) { mquery <- paste("SELECT MAX(TABLE_ROW_NUMBER),", " MAX(TABLE_COLUMN_NUMBER) FROM", " parameter_description_table", " WHERE MODEL =\"", model, "\"", " AND PARAMETER =\"", parameter, "\"", sep="") ## This rather unusual construct is necessary to make this look like ## a set of dims. result <- dbGetQuery(asp.con,mquery) if (nrow(result)<1) { stop("Parameter ",parameter, "not found for model ",model) } as.vector(as.matrix(result[1,])) } ### This gets a scaler parameter from the database. ### If the parameter argument is a vector of names, it gets a dataframe ### like structure, where the columns are variables. ### ### model --- String, name of model ### pname --- String giving the ### iteration --- int[2] or null which iterations to fetch, if omitted, all ### chain --- int which chain, if omitted, all "getParameterVector" <- function(model,pname, iterations=numeric(0), chain=getChains()) { labels <- getParameterLabels(model,pname) if (nrow(labels)<1) { stop("Parameters ", pname, " not found for model ",model) } result <- getParameter(model,labels[,1], iterations, chain) ## This should be unnecessary but keep it around in case database is old. if (labels[1,2] != "table") { varnames(result) <- labels[,2] } classname = paste("asp.",getParameterType(model,pname),sep="") ## Add an ASP class class(result) <- c(classname,class(result)) if (classname == "asp.table") { attr(result,"cpt.dims")<-getMatrixParameterDims(model,pname) } result } ### This fetches back a list of mcmc.list (or mcmc) objects, one for ### each parameter in the model. Note that "parameters" here are ### typically vector or matrix valued. ### model --- String, name of model ### iteration --- int[2] or null which iterations to fetch, if omitted, all ### chain --- int which chain, if omitted, all "getAllParameters" <- function (model, iterations=numeric(0), chain=getChains(),maxparams=99) { pnames <- listParameterNames(model) result <- lapply(pnames, function (pname) { if (nrow(getParameterLabels(model,pname)) > maxparams) { NA } else { getParameterVector(model,pname,iterations,chain) } }) names(result) <- pnames result } ### This gets a scaler parameter from the database. ### If the parameter argument is a vector of names, it gets a dataframe ### like structure, where the columns are variables. ### NOTE this function uses the column names in the database. ### Use getParameterVector to get the vector valued parameter. ### ### model --- String, name of model ### parameter --- String or String[] name of parameter columns or vector of ### parameter columns. If omitted, returns all parameters ### iteration --- int[2] or null which iterations to fetch, if omitted, all ### chain --- int which chain, if omitted, all ### Returns a coda "mcmc" object for a single chain and a "mcmc.list" ### object for multiple chains. getParameter <- function(model,parameter=listParameterColumns(model), iterations=numeric(0), chain=getChains()) { table <- parameterTable(model) pname <- buildSelectQuery(parameter) wclause <- buildIterQuery(iterations) if (length(chain) == 1) { ## Single chain, return this as result result <- pQuery(table,pname,wclause,chain,iterations) } else { result <- lapply(chain, function (i) { pQuery(table,pname,wclause,i,iterations) }) names(result) <- chain result <- mcmc.list(result) } result } ### Builds up the select clause of the query. If parameter ### is a single string, just return it. If it is a vector of ### strings turn it into a comma separated string. If it is empty, ### return appropriate wildcard character. buildSelectQuery <- function (parameter) { if (length(parameter) == 1) { pname <- parameter } else if (length(parameter) == 0) { pname <- "*" } else { pname <- paste(parameter,collapse=", ") } } ###Builds up the query by adding selection clauses for CYCLE. buildIterQuery <- function(iterations) { wclause <- character(0) if (length(iterations) >0) { wclause <-c(wclause,paste("CYCLE >=",min(iterations),sep=" ")) if (length(iterations) >1) { wclause <- c(wclause,paste("CYCLE <=",max(iterations),sep=" ")) } } wclause } ###Fixes the row.names attribute to match the actual interation. fixRowNames <- function(data,iterations) { result <- data if (length(iterations) > 0) { k <- min(iterations) n <- length (data[[1]])-1 row.names(result) <- paste(k:(k+n)) } result } ###Helper function for parameter queries. ### table --- String name of table (FROM clause) ### pname --- String name of variables, comma separated (SELECT clause) ### wclause --- String vector of where clauses (will be combined with AND). ### chain --- Int chain number. ### iterations --- Range of iterations, used to fix row labels. ### Returns a CODA "mcmc" object for a single chain. pQuery <- function (table, pnames, wclause, chain,iterations) { query<-paste("SELECT",pnames,"FROM",table,sep=" ") wwc <- c(wclause,paste ("CHAIN = ",chain)) wwc <- paste(wwc, collapse =" AND ") query <- paste(query," WHERE ",wwc) if (asp.debugQueries()) cat("ASP: ",query,"\n") if (length(iterations)<1) { iterations = getCycles()[chain,] } mcmc(dbGetQuery(asp.con,query),start=min(iterations),end=max(iterations)) } ################################################################################### ### Variable Queries ################################################################################### ### Returns a list of variables for a model. listVariables <- function (model) { ## Note: VARIABLE and VARIABLE_COLUMN_NAME names reversed in database. ## Now fixed. mquery <- paste("SELECT VARIABLE FROM variable_description_table", " WHERE MODEL = \"", model, "\"", sep="") result <- dbGetQuery(asp.con,mquery) if (nrow(result)<1) { stop("No variables found for model ",model) } result[,1] } ### Gets the details about the model variables in an array indexed by ### variable name. getVariableMetadata <- function (model) { ## Note: VARIABLE and VARIABLE_COLUMN_NAME names reversed in ## database (now fixed). mquery <- paste("SELECT VARIABLE, VARIABLE_COLUMN_NAME, VARIABLE_TYPE,", " VARIABLE_DOMAIN, VARIABLE_ORDERED", " FROM variable_description_table", " WHERE MODEL = \"", model, "\"", sep="") result <- dbGetQuery(asp.con,mquery) if (nrow(result)<1) { stop("No variables found for model ",model) } names(result)[2] <- "COLUMN_NAME" row.names(result) <- result[,1] result[,-1] } ### Translates from user to internal column names. Takes the variable ### data object for this model as an argument. variableColumns <- function (variableData, vars) { variableData[vars,"COLUMN_NAME"] } ### Figures out data types from variableData. variableType <- function (variableData, vars) { ## BUG in statshop causing both ordered and unordered data ## to have 0 in ordered column. ifelse(variableData[vars,"VARIABLE_TYPE"] == "Discrete", ifelse(variableData[vars,"VARIABLE_ORDERED"] == 0, "ordered", "ordered"), # "factor", "ordered" "numeric") } ### Returns a list of possible values for an ordered factor. ### Ordered in increasing order. variableDomain <- function (variableData, vars) { if (length(vars) == 1) { ## BUG in statshop causing both ordered and unordered data ## to have 0 in ordered column. vals <- unlist(strsplit(variableData[vars,"VARIABLE_DOMAIN"],",")) if (variableData[vars,"VARIABLE_ORDERED"] == 1) { result <- rev(vals) } else { ##result <- vals result <- rev(vals) } } else { result <- sapply(vars, function(v) {variableDomain(variableData,v)}) } result } ### Returns a list of subjects whom we monitored for this model. listSubjects <- function (model) { table <- "Statistics" if (!missing(model)) table <- variableTable(model) cquery <- paste("SELECT DISTINCT USER FROM ",table) subj <- dbGetQuery(asp.con,cquery) if (nrow(subj)<1) { stop("No subjects found for model ",model) } subj[,1] } ### This gets a variable from the database. ### If the variable argument is a vector of names, it gets a three dimensional ### structure, where the rows are iteration, the columns are variables, and the third dimension is subjects. ### ### model --- String, name of model ### variable --- String or String[] name of variable or vector of ### names. If omitted, returns all variables ### subject --- String or String[] ID of subject or vector of IDs. If omitted, ### returns all subjects ### iteration --- int[2] or null which iterations to fetch, if omitted, all ### chain --- int which chain, if omitted, all getVariable <- function(model,variable=listVariables(model), subject=listSubjects(model), iterations=numeric(0), chain=getChains()) { table <- variableTable(model) vd <- getVariableMetadata(model) wclause <- buildIterQuery(iterations) if (length(chain) == 1) { ## Single chain, return this as result result <- vQuery(table,variable,wclause,chain,subject,iterations,vd) } else { ## Multiple chains, return a list result <- lapply (chain, function (i) { vQuery(table,variable,wclause,i,subject,iterations,vd) }) names(result) <- chain class(result) <- "data.hypercube" } result } ###Helper function for variable queries. ### table --- String name of table (FROM clause) ### vars --- Array of variable names ### wclause --- String vector of where clauses (will be combined with AND). ### chain --- Int chain number. ### iterations --- used to fix row names. ### vd --- Variable Data structure vQuery <- function (table, vars, wclause, chain, subject,iterations, vd ) { vnames <- buildSelectQuery(variableColumns(vd,vars)) query<-paste("SELECT",vnames,"FROM",table,sep=" ") wwc <- c(wclause,paste ("CHAIN = ",chain)) wwc <- paste(wwc, collapse =" AND ") query <- paste(query," WHERE ",wwc) if (length(subject) > 1) { result <- lapply(subject, function (subj) { vSubQuery(query,subj,vars,vd,iterations) }) names(result) <- subject class(result) <- "data.cube" } else { result <- vSubQuery(query,subject,vars,vd,iterations) } result } ### This funciton does the work of setting up a query. It finds a ### table for a single subject and chain and builds up an appropriate ### table of factor variables. ### query --- Partial query (minus subject clause) ### subj --- name of this subject ### vars --- name of variables ### vd --- variable data table ### number of iterations vSubQuery <- function (query,subj,vars,vd,iterations) { q <- paste(query, " AND USER = '",subj,"'",sep="") if (asp.debugQueries()) cat("ASP: ",q,"\n") dat <- dbGetQuery(asp.con,q) names(dat) <- vars ##result <- sapply(vars, function(var) {fixTypes(dat[,var],var,vd)}) result <- NULL for (var in vars) { if (is.null(result)) { result <- data.frame(fixTypes(dat[,var],var,vd)) } else { result <- data.frame(result,fixTypes(dat[,var],var,vd)) } } names(result) <- vars fixRowNames(result,iterations) } ### This takes care of coercing variables into factor types. fixTypes <- function (dat,var,vd) { type <- variableType(vd,var) if (type == "numeric") { result <- as.numeric(dat) } else { levels <- variableDomain(vd,var) result <- do.call(type,list(dat,levels=levels)) } result } getDeviances <- function(subject="*ALL*",iterations=numeric(0), chain=getChains()) { wclause <- buildIterQuery(iterations) wclause <- paste(wclause, " AND USER = '",subject,"'",sep="") if (length(chain) == 1) { ## Single chain, return this as result result <- pQuery("Statistics","Deviance",wclause,chain,iterations) } else { ## Multiple chains, return a list result <- lapply (chain, function (i) { pQuery("Statistics","Deviance",wclause,i,iterations) }) names(result) <- chain result <- mcmc.list(result) } result } getMetadata <- function(keys = c("date","version","config","run_family", "amdf","tmdf","pdf","database","recordEveryCycle", "stepSize","interseptSize","slopeStep","stdStep","diffIncrementStep","anchorMultiplier","notes")) { mquery <- "SELECT * from calibration_log_table" table <- dbGetQuery(asp.con,mquery) row.names(table) <- as.character(table[,1]) result <- as.list(table[keys,2]) names(result) <- keys result }