## Tools for writing out StatShop objects as XML. ######################################################################### ## XML Writer for GM files. xmlns.xsi <- "http://www.w3.org/2001/XMLSchema-instance" gm.schemaLocation <- "http://research.ets.org/~ralmond/StatShop/StatShop-xml/gm.xsd" gm.namespace <- "http://pdra.ets.org/StatShop/gm" gm.prefix <- "g" amdf.schemaLocation <- "http://research.ets.org/~ralmond/StatShop/StatShop-xml/assessmentDescription.xsd" amdf.namespace <- "http://pdra.ets.org/StatShop/AssessmentDescription" amdf.prefix <- "amdf" tdd.schemaLocation <- "http://research.ets.org/~ralmond/StatShop/StatShop-xml/TaskModelData.xsd" tdd.namespace <- "http://pdra.ets.org/StatShop/TaskModelData" tdd.prefix <- "tdd" pd.schemaLocation <- "http://research.ets.org/~ralmond/StatShop/StatShop-xml/PretestDescription.xsd" pd.namespace <- "http://pdra.ets.org/StatShop/result/PretestDescription" pd.prefix <- "pd" xmlHypergraph <- function (model, toplevel =TRUE) { hypergraph <- xmlNode("Hypergraph") if (toplevel) { attrs <- c("xmlns:xsi"=xmlns.xsi, "xsi:schemaLocation"=paste(gm.namespace,gm.schemaLocation,sep=" "), "xmlns"=gm.namespace) names(attrs)[3] <- paste("xmlns",gm.prefix,sep=":") hypergraph <- xmlNode("Hypergraph", attrs=attrs, namespace=gm.prefix) } if (model$type == "SM") { hypergraph <- xmlAddChild(hypergraph, xmlNode("StudentModelID",model$id)) } else { hypergraph <- xmlAddChild(hypergraph, xmlNode("LinkID",model$id)) } if (length (model$taskID) >0) { hypergraph <- xmlAddChild(hypergraph, xmlNode("TaskID",model$taskID)) } hypergraph <- append.XMLNode(hypergraph, lapply(model$variables,xmlDiscreteVariable)) hypergraph <- append.XMLNode(hypergraph, lapply(model$distributions,xmlDistribution)) hypergraph } "xmlDiscreteVariable" <- function (var) { if (length(var$nodeName) >0) { ## The as.vector gets rid of a nasty problem where names are concatenated ## as part of the c() operation attr <- c(externalName=as.vector(var$name), varName=as.vector(var$nodeName), role=as.vector(var$role), nStates=length(var$level), isOrdered=ifelse(var$isOrdered,"true","false")) } else { attr <- c(varName=as.vector(var$name), role=as.vector(var$role), nStates=length(var$level), isOrdered=ifelse(var$isOrdered,"true","false")) } varNode <- xmlNode("DiscreteVariable",attrs=attr) ## For now punt on isTrue attribute. We will just make it true by default states <- lapply(var$level, function (x) { xmlNode("State",x,attrs=c(isTrue="true")) }) ## Make last state the false one. states[[length(states)]]$attributes[["isTrue"]] <- "false" xmlChildren(varNode) <- states varNode } "xmlDistribution" <- function (dist) { distNode <- xmlNode("Distribution",attrs=c("type"=dist$type)) condNode <- xmlNode("Conditions") xmlChildren(condNode) <- lapply(dist$conditions, function (c) xmlNode("VarRef", attrs=c(varName=c))) distNode <- xmlAddChild(distNode,condNode) consNode <- xmlNode("Consequences") xmlChildren(consNode) <- lapply(dist$consequences, function (c) xmlNode("VarRef", attrs=c(varName=c))) distNode <- xmlAddChild(distNode,consNode) if (!is.null(dist$table)) { ## Should write Distribution table here. distNode <- xmlAddChild(distNode,xmlBuildTable(dist$table, dist$conditions, "DistributionTable")) } if (!is.null(dist$parameterSet)) { distNode <- xmlAddChild(distNode,xmlParameterSet(dist$parameterSet)) } else if (!is.null(dist$parameterTable)) { distNode <- xmlAddChild(distNode,xmlBuildTable(dist$parameterTable, dist$conditions, "ParameterTable")) } else { if (dist$type != "Fixed Distribution") { warning (paste("Distribution for ",dist$consequences," has neither parameter set nor table.")) } } distNode } "xmlParameterSet" <- function (param) { pNode <- xmlNode("ParameterSet") xmlChildren(pNode) <- list() for (pname in names(param$value)) { pNode <- xmlAddChild(pNode,xmlParamVal(pname,param$value[pname])) } pNode <- xmlAddChild(pNode,xmlCovMatrix(param$covariance)) pNode } "xmlParamVal" <- function(pname,pval) { parens <- regexpr("\\(.*\\)",pname) if (parens <0) { xmlNode("ParameterValue",pval, attrs=c(parameterName=pname)) } else { name <- substring(pname,1,parens-1) vname <- substring(pname,parens+1,parens+attr(parens,"match.length")-2) comma <- regexpr(",",vname) if (comma[1] < 0) { # Gets rid of unnecessary warning xmlNode("VarParameterValue",pval, attrs=c(parameterName=name, varName=vname)) } else { sname <- substring(vname,comma+1) vname <- substring(vname,1,comma-1) xmlNode("StateParameterValue",pval, attrs=c(parameterName=name, varName=vname, state=sname)) } } } ## Used to label rows of convariance matrix. "xmlParamName" <- function(pname) { parens <- regexpr("\\(.*\\)",pname) if (parens <0) { xmlNode("ParName",pname) } else { name <- substring(pname,1,parens-1) vname <- substring(pname,parens+1,parens+attr(parens,"match.length")-2) comma <- regexpr(",",vname) if (comma[1] < 0) { # Gets rid of unnecessary warning xmlNode("VarParName",name, attrs=c(varName=vname)) } else { sname <- substring(vname,comma+1) vname <- substring(vname,1,comma-1) xmlNode("StateParName",name, attrs=c(varName=vname, state=sname)) } } } "xmlCovMatrix" <- function (cov) { ## Right now we really only support diagonal matrixes anyway. cNode <- xmlNode("CovMatrix", attrs=c(type="DIAGONAL")) xmlChildren(cNode) <- lapply(row.names(cov),xmlParamName) cNode <- append.XMLNode(cNode, lapply(diag(cov), function (c) xmlNode("CovMatrixRow",c))) cNode } xmlBuildTable <- function (table,conditions,tagname="DistributionTable") { node <- xmlNode(tagname) for(i in 1:nrow(table)) { node <- xmlAddChild(node,xmlTableRow(table[i,],conditions)) } node } xmlTableRow <- function (tabrow,conditions) { header <- xmlNode("TableRowHeader") for (cond in conditions) { header <- xmlAddChild(header, xmlNode("TableHeaderCell", attrs=c(varName=cond, state=as.character(tabrow[1,cond])))) } row <- xmlNode("TableRow",header) for (state in setdiff(names(tabrow),conditions)) { row <- xmlAddChild(row,xmlNode("TableCell",tabrow[1,state], attrs=c(state=state))) } row } xmlStatistic <- function (stat) { attrs <- c("name"=stat$name,"class"=stat$class, "reportOnUpdate"=ifelse(stat$reportOnUpdate,"Yes","No")) ## Workaround for a "feature" of R where the name "class" gets turned into "class.margin" names(attrs) <- c("name","class","reportOnUpdate") statNode <- xmlNode("Statistic", attrs=attrs) if (length(stat$model) >0) { statNode <- xmlAddChild(statNode, xmlNode("UseStudentModel", attrs=c(model=stat$model))) } statNode <- append.XMLNode(statNode, lapply(stat$reportingVars, function (v) xmlNode("UseReportingVar",attrs=c(name=v)))) if (stat$offset != 0 && stat$scale !=1) { statNode <- xmlAddChild(statNode, xmlNode("ScaleStatistic", attrs=c(offset=stat$offset, scaleFactor=stat$scale))) } statNode } ############################################################################## ## AMDs. xmlAMD <- function (amd, toplevel=TRUE) { amdNode <- xmlNode("AssessmentModelDescription") if (toplevel) { attrs <- c("xmlns:xsi"=xmlns.xsi, "xsi:schemaLocation"=paste(amdf.namespace,amdf.schemaLocation, sep=" "), "xmlns"=amdf.namespace) names(attrs)[3] <- paste("xmlns",amdf.prefix,sep=":") amdNode <- xmlNode("AssessmentModelDescription", attrs=attrs, namespace=amdf.prefix) } amdNode <- append.XMLNode(amdNode, lapply(amd$missingCodes,xmlMissingCode)) ## Assume only one SM smname <- getECDIDModel(amd$studentModels[[1]]$id) amdNode <- append.XMLNode(amdNode, lapply(amd$studentModels,xmlStudentModel)) amdNode <- append.XMLNode(amdNode, lapply(amd$evidenceModels, function(e) xmlEvidenceModelSet(e,smname,"EvidenceModel"))) amdNode <- append.XMLNode(amdNode, lapply(amd$linkModels, function(e) xmlEvidenceModelSet(e,smname,"LinkModel", getECDIDtask(e$id)))) amdNode <- append.XMLNode(amdNode, lapply(amd$statistics,xmlStatistic)) amdNode } xmlStudentModel <- function (sm) { name <- getECDIDModel(sm$id) smid <- xmlNode("StudentModelID",sm$id) smnode <- xmlNode("StudentModel",smid, attrs=c(name=name,class="Discrete Bayes Net", entityref=name)) smnode <- append.XMLNode(smnode,lapply(sm$variables,xmlReportingVar)) smnode } xmlReportingVar <- function (var) { xmlNode("ReportingVar",attrs=c(name=var$name,nodeName=var$nodeName)) } xmlEvidenceModelSet <- function (em,smname,type="EvidenceModel", emname = getECDIDModel(em$id)) { ## Assumes exactly one EM per TM linkid <- xmlNode("LinkID",em$id) tid <- xmlNode("TaskID",em$taskID) useSM <- xmlNode("UseStudentModel",attrs=c(model=smname)) emnode <- xmlNode(type,linkid,useSM, attrs=c(name=emname,class="Discrete Bayes Net", entityref=emname)) emnode <- append.XMLNode(emnode, lapply(getEMObservables(em),xmlObservable)) ems <- xmlNode(paste(type,"Set",sep=""),tid,emnode) ems } xmlObservable <- function (obs) { ## Assume that if there are two states is logical, otherwise unordered. type <- ifelse (obs$isOrdered,"OrderedObservable", ifelse(length(obs$levels)>2,"UnorderedObservable", "LogicalObservable")) attrs <- c(name=obs$name) if (length(obs$nodeName) > 0) attrs <- c(attrs,nodeName=obs$nodeName) obsnode <- xmlNode(type,attrs=attrs) ## For now punt on isTrue attribute. We will just make it true by default states <- lapply(obs$level, function (x) { xmlNode("State",x,attrs=c(isTrue="true")) }) ## Make last state the false one. states[[length(states)]]$attributes[["isTrue"]] <- "false" xmlChildren(obsnode) <- states obsnode } xmlMissingCode <- function(mc) { attrs <- c(type=mc$type,handler=mc$handler) if (length(mc$xmlCode) >0) attrs <- c(attrs,xmlCode=mc$xmlCode) if (length(mc$checksum) >0) attrs <- c(attrs,checksum=mc$checksum) if (is.null(mc$newval)) xmlNode("MissingCode",attrs=attrs) else xmlNode("MissingCode",mc$newval,attrs=attrs) } writeAMD <- function (amd, directory=getwd(), amdfile=paste(deparse(substitute(amd)),"amd.xml", sep=".")) { ## Write student models. for (name in names(amd$studentModels)) { xmlWrite(xmlHypergraph(amd$studentModels[[name]]), paste(directory, paste(name,"gm.xml",sep="."), sep=.Platform$file.sep)) } ## Write evidence models. for (name in names(amd$evidenceModels)) { xmlWrite(xmlHypergraph(amd$evidenceModels[[name]]), paste(directory, paste(name,"gm.xml",sep="."), sep=.Platform$file.sep)) } ## Write link models. for (name in names(amd$linkModels)) { xmlWrite(xmlHypergraph(amd$linkModels[[name]]), paste(directory, paste(name,"gm.xml",sep="."), sep=.Platform$file.sep)) } ## Finally, write out AMD. Do this last so amd file does not get ## created if there is an error during processing. xmlWrite(xmlAMD(amd),paste(directory,amdfile,sep=.Platform$file.sep)) invisible() } xmlTaskList <- function (Qmat, caf, anchors=character(0), toplevel=TRUE) { tddNode <- xmlNode("TaskModelDataSet") if (toplevel) { attrs <- c("xmlns:xsi"=xmlns.xsi, "xsi:schemaLocation"=paste(tdd.namespace,tdd.schemaLocation, sep=" "), "xmlns"=tdd.namespace) names(attrs)[3] <- paste("xmlns",tdd.prefix,sep=":") tddNode <- xmlNode("TaskModelDataSet", attrs=attrs, namespace=tdd.prefix) } for (irow in 1:nrow(Qmat)) { tddNode <- xmlAddChild(tddNode, xmlNode("TaskModelData", xmlNode("TaskID",makeECDID(caf,Qmat[irow,"TaskModel"], Qmat[irow,"TaskName"])))) } ## TODO Need to add scale anchor stuff here for (aname in names(anchors)) { anchorvar <- anchors[aname] ## Workaround for R "feature" names(anchorvar) <- "varName" anchorNode <- xmlNode("ScaleAnchor",attrs=c(name=aname)) anchorNode <- xmlAddChild(anchorNode, xmlNode("ParName","Task Difficulty")) anchorNode <- xmlAddChild(anchorNode, xmlNode("VarParName","Relative Importance",attrs=anchorvar)) arows <- Qmat[Qmat$Anchor == aname,] for (emname in unique(arows$EvidenceModel)) { erows <- arows[arows$EvidenceModel==emname,] for (tname in unique(erows$TaskName)) { trows <- erows[erows$TaskName==tname,] ulm <- xmlNode("UseLinkModel", xmlNode("LinkID",makeECDID(caf,emname,tname))) for (irow in 1:nrow(trows)) { ulm <- xmlAddChild(ulm, xmlNode("UseObservable",attrs=c(name=trows[irow,"ObsName"]))) } anchorNode <- xmlAddChild(anchorNode,ulm) } } tddNode <- xmlAddChild(tddNode,anchorNode) } tddNode } xmlSimpleTaskList <- function (taskIDs, toplevel=TRUE) { tddNode <- xmlNode("TaskModelDataSet") if (toplevel) { attrs <- c("xmlns:xsi"=xmlns.xsi, "xsi:schemaLocation"=paste(tdd.namespace,tdd.schemaLocation, sep=" "), "xmlns"=tdd.namespace) names(attrs)[3] <- paste("xmlns",tdd.prefix,sep=":") tddNode <- xmlNode("TaskModelDataSet", attrs=attrs, namespace=tdd.prefix) } for (tid in taskIDs) { tddNode <- xmlAddChild(tddNode, xmlNode("TaskModelData", xmlNode("TaskID",tid))) } tddNode } ###################### ## Pretest Description xmlDemographicMeta <- function(name,type,colID=name) { attrs <- c(name=name,column=as.character(nextPDColumn()), columnId=colID) if (length(type) <2) { attrs <- c("xsi:type"=paste(pd.prefix,":",type, "DemographicVariableMetaType",sep=""),attrs) xmlNode("DemographicVariableMeta",attrs=attrs) } else { attrs <- c("xsi:type"=paste(pd.prefix,":", "DiscreteDemographicVariableMetaType",sep=""), attrs) xmlNode("DemographicVariableMeta", xmlTranslationTable(names(type),type), attrs=attrs) } } xmlTranslationTable <- function (keys, values) { ttNode <- xmlNode("TranslationTable") for (i in 1:length(keys)) { ## R doens't allow "" in name column, so use special code ## when we want an empty field. key <- ifelse(keys[i]=="","",keys[i]) value <- ifelse(values[i]=="","",values[i]) ttNode <- xmlAddChild(ttNode, xmlNode("KeyValuePair",xmlNode("Key",key), xmlNode("Value",value)) ) } ttNode } demVariableType <- function (column, nacode = c("NA"="NA")) { if (is.factor(column)) { vals <- levels(column) names <- vals names[match("",names)] <- "" names(vals) <- names c(vals,nacode) } else if (is.integer(column)) { "Integer" } else if (is.numeric(column)) { "Real" } else if (is.character(column)) { "String" } else { warn("Unknown column type: ",mode(column)) "String" } } ################################################################ ## Utility function for XML package. xmlWrite <- function (node,file) { cat("Writing",deparse(substitute(node)), "to file",file,"\n") saveXML(node,file) } ############################# ## ECD ID functions. "getECDIDCaf" <- function (id) { elements <- strsplit(id, "/") sapply(elements, function(el) paste(el[1:4], collapse = "/")) } "getECDIDModel" <- function (id) { elements <- strsplit(id, "/") sapply(elements, function(el) el[5]) } "getECDIDtask" <- function (id) { sub(".*/","",id) } "makeECDID" <- function (caf, model, task = NULL, version = 1) { if (is.null(task)) { tail <- "" } else { tail <- paste(task, version, sep = ".") } paste(caf, model, tail, sep = "/") } ## This gets the Meta-object ID from and ECD ID. I.e., it converts: ## "ecd://iCT0/First/P4_AccessDefineS1NC_EM/D10kidbook.1" ## to ## "ecd://iCT0/First/P4_AccessDefineS1_EM/" getECDMetaID <- function (id) { ## Strip trailing slashes off of ids which are already meta ids. id <- ifelse(substr(id,nchar(id),nchar(id)) == "/",substr(id,1,nchar(id)-1),id) extents <- regexpr(".*/",id) length <- attr(extents,"match.length") substr(id,extents,length) } ############################### ## Missing Code functions "MissingCode" <- function (type, handler, xmlCode = character(0), checksum = character(0), newval = NULL) { mc <- list(type = type, handler = handler, xmlCode = xmlCode, checksum = checksum, newval = newval) class(mc) <- "MissingCode" mc } "print.MissingCode" <- function (x, ...) { print(paste("")) } "toString.MissingCode" <- function (x, ...) { paste("") } "parseMissingCode" <- function (mcNode) { type <- xmlGetAttr(mcNode, "type") handler <- xmlGetAttr(mcNode, "handler") xmlCode <- xmlGetAttr(mcNode, "xmlCode", character(0)) checksum <- xmlGetAttr(mcNode, "checksum", character(0)) newval <- xmlValue(mcNode) MissingCode(type, handler, xmlCode, checksum, newval) } "recodeMissingHandlers" <- function (originalHandlers, codeTable = omitCodeGENASYS) { lapply(originalHandlers, function(h) { if (!is.na(codeTable[h$type])) { MissingCode(h$type, "Recode", h$xmlCode, h$checksum, codeTable[h$type]) } else { h } }) } "defaultMissingCodes" <- list("NA" = MissingCode(type = "NA", handler = "Ignore", xmlCode = "NA", checksum = "0", newval = NULL), Omit = MissingCode(type = "Omit", handler = "Low", xmlCode = "Omit", checksum = "*", newval = NULL), "Not Reached" = MissingCode(type = "Not Reached", handler = "Ignore", xmlCode = "Not Reached", checksum = "@", newval = NULL), Invalid = MissingCode(type = "Ivalid", handler = "Error", xmlCode = character(0), checksum = character(0), newval = NULL))