## Script assumes data files are located in R working directory ## ## Preliminaries ## library(Hmisc); library(Design); library(lattice); data_model.df = read.table("./data_model_ammp.txt", header=TRUE, nrows=4000); data_model.df$sec = 1.1 * data_model.df$cycle * 18 / data_model.df$depth; data_model.df$bips = data_model.df$inst / data_model.df$sec; dd = datadist(data_model.df); options(datadist = 'dd'); describe(data_model.df); ## Hierarchical Clustering ## v = varclus(~ (depth + width + gpr_phys + br_resv + dmem_lat + load_lat + br_lat + fix_lat + fpu_lat + d2cache_lat + l2cache_size + icache_size + dcache_size + bips), data = data_model.df); trellis.device("pdf", file="./varclus_plot.pdf"); plot(v); dev.off(); print(v); ## Association Analysis ## s = summary(bips ~ depth + width + gpr_phys + br_resv, data = data_model.df); trellis.device("pdf", file="./assoc_plot.pdf"); plot(s); dev.off(); print(s); ## Model Specification ## m = (sqrt(bips) ~ (## first-order effects rcs(depth,4) + width + rcs(gpr_phys,4) + rcs(br_resv,3) + rcs(dmem_lat,3) + fix_lat + rcs(fpu_lat,3) + rcs(l2cache_size,3) + rcs(icache_size,3) + rcs(dcache_size,3) ## second-order effects ## interactions of pipe dimensions and ## in-flight resources + width %ia% rcs(depth,4) + rcs(depth,4) %ia% rcs(gpr_phys,4) + width %ia% rcs(gpr_phys,4) ## interactions of depth and hazards + width %ia% rcs(icache_size,3) + rcs(depth,4) %ia% rcs(dcache_size,3) + rcs(depth,4) %ia% rcs(l2cache_size,3) ## interactions in memory hiearchy + rcs(icache_size,3) %ia% rcs(l2cache_size,3) + rcs(dcache_size,3) %ia% rcs(l2cache_size,3) )); data_train.df = data_model.df[1:500,] f = ols(m, data=data_train.df); g = update(f, log(power) ~ . ); print(f); print(g); ## Residual Analysis ## trellis.device("pdf", file="./resid1.pdf"); xYplot(resid(f) ~ fitted(f), method='quantile', nx=20, ylim=c(-0.5,0.5), xlim = c(-0.5,2.5), abline=list(h=0, lwd=0.5, lty=2), xlab="Fitted Values [sqrt(bips)]", ylab="Residuals [sqrt(bips)]"); dev.off(); f.alt = update(f, bips ~ . ); trellis.device("pdf", file="./resid2.pdf"); xYplot(resid(f.alt) ~ fitted(f.alt), method='quantile', nx=20, ylim=c(-0.5,0.5), xlim = c(-0.5,2.5), abline=list(h=0, lwd=0.5, lty=2), xlab="Fitted Values [bips]", ylab="Residuals [bips]"); dev.off(); trellis.device("pdf", file="./qqnorm1.pdf"); qqnorm(resid(f)); qqline(resid(f)); dev.off(); trellis.device("pdf", file="./qqnorm2.pdf"); qqnorm(resid(f.alt)); qqline(resid(f.alt)); dev.off(); ## Prediction -- ammp only ## data_valid.df = data_model.df[501:600,]; p = (predict(object=f, newdata=data_valid.df))^2; o = data_valid.df$bips; e = abs(o-p)/o; results = cbind(o, p, e); write.table(results, file="./pred_bips_ammp.txt", sep="\t", row.names=FALSE, col.names=c("observed","predicted","error")); pdf("box_bips_ammp.pdf"); boxplot(e); dev.off(); ## Prediction -- all benchmarks ## bench <- c("ammp","applu","equake","gcc", "gzip","jbb","mesa","twolf"); for(b in 1:length(bench)) { print(bench[b]); data_model.df = read.table(sprintf("./data_model_%s.txt", bench[b]), header=TRUE, nrows=4000); data_model.df$sec = 1.1 * data_model.df$cycle * 18 / data_model.df$depth; data_model.df$bips = data_model.df$inst / data_model.df$sec; dd = datadist(data_model.df); options(datadist = 'dd'); data_train.df = data_model.df[1:500,]; data_valid.df = data_model.df[1001:1100,]; f = ols(m, data=data_train.df); p = (predict(object=f, newdata=data_valid.df))^2; o = data_valid.df$bips; e = abs(o-p)/o; print(summary(e)); results = cbind(o, p, e); write.table(results, file=sprintf("./pred_bips_%s.txt", bench[b]), sep="\t", row.names=FALSE, col.names=c("observed","predicted","error")); g = update(f, log(power) ~ . ); p = exp(predict(object=g, newdata=data_valid.df)); o = data_valid.df$power; e = abs(o-p)/o; print(summary(e)); results = cbind(o, p, e); write.table(results, file=sprintf("./pred_power_%s.txt", bench[b]), sep="\t", row.names=FALSE, col.names=c("observed","predicted","error")); }