Comparison of Techniques used in Q3B
Table of Contents
1 CSV files
All statistics are computed from the BenchExec CSV file results.csv.
1.1 Convert to a correct CSV
First, we need to replace tabs by commas:
sed -i 's/\t/,/g' results.csv
And then we remove the standard BenchExec header rows and replace them by more readable ones. The header is located in the file results_header.csv.
cat results_header.csv > results.out.csv tail -n +4 results.csv >> results.out.csv
1.2 Split into benchmark sets
We split benchmark name into family and name.
sed -i 's#/#,##' results.out.csv sed -i 's#20170501-Heizmann#heizmann#' results.out.csv sed -i 's#UltimateAutomizer#ua#' results.out.csv
2 R environment
2.1 Load necessary libraries
library(dplyr) library(ggplot2) library(scales) library(colorspace) library(RColorBrewer) library(xtable) options("scipen"=100, "digits"=0)
2.2 Load all CSV files
res = list() res <- read.csv("results.out.csv", header=TRUE, stringsAsFactors=FALSE) configurations = c('none', 'noneUnconstrained', 'noneGoalUnconstrained', 'variables', 'variablesUnconstrained', 'variablesGoalUnconstrained', 'both', 'bothUnconstrained', 'bothGoalUnconstrained') labels = c(none = 'none', noneUnconstrained = 'none-u', noneGoalUnconstrained='none-gu', variables='approx', variablesUnconstrained='approx-u', variablesGoalUnconstrained='approx-gu', both='full', bothUnconstrained='full-u', bothGoalUnconstrained='full-gu')
We need to modify all times for results other than sat or unsat to have the maximal values of walltime and CPU-time.
timeout <- 1200 res[['trivial']] <- TRUE for (c in configurations) { res[[paste(c, 'solved', sep='.')]] <- res[[paste(c, 'result', sep='.')]] == "sat" | res[[paste(c, 'result', sep='.')]] == "unsat" res[[paste(c, 'cputime', sep='.')]][!res[[paste(c, 'solved', sep='.')]]] <- timeout res[[paste(c, 'walltime', sep='.')]][!res[[paste(c, 'solved', sep='.')]]] <- timeout res[['trivial']] <- res[['trivial']] & res[[paste(c, 'cputime', sep='.')]] < 0.1 }
2.3 Check correctness of results
We need to check that no two solvers disagreed on the status of a benchmark (i.e. sat vs unsat). The result of the following command should be empty!
res %>% mutate( sat = ((none.result == "sat") | (noneUnconstrained.result == "sat") | (noneGoalUnconstrained.result == "sat") | (variables.result == "sat") | (variablesUnconstrained.result == "sat") | (variablesGoalUnconstrained.result == "sat") | (both.result == "sat") | (bothUnconstrained.result == "sat") | (bothGoalUnconstrained.result == "sat")), unsat = ((none.result == "unsat") | (noneUnconstrained.result == "unsat") | (noneGoalUnconstrained.result == "unsat") | (variables.result == "unsat") | (variablesUnconstrained.result == "unsat") | (variablesGoalUnconstrained.result == "unsat") | (both.result == "unsat") | (bothUnconstrained.result == "unsat") | (bothGoalUnconstrained.result == "unsat")), ) %>% filter(sat & unsat) %>% select(benchmark, family)
benchmark | family |
3 Statistics
3.1 Numbers of solved formulas
3.1.1 Helper function to generate tables
getTable <- function() { counts <- list() for (c in configurations) { counts[[c]] <- count_(res, paste(c, 'result', sep='.')) colnames(counts[[c]]) <- c('result', c) } return(Reduce(function(...) merge(..., all=TRUE), counts)) }
3.1.2 Total
getTable()
result | none | noneUnconstrained | noneGoalUnconstrained | variables | variablesUnconstrained | variablesGoalUnconstrained | both | bothUnconstrained | bothGoalUnconstrained |
---|---|---|---|---|---|---|---|---|---|
OUT OF MEMORY | nil | 19 | 20 | nil | nil | nil | nil | nil | nil |
sat | 319 | 331 | 331 | 596 | 600 | 600 | 616 | 617 | 617 |
TIMEOUT (timeout) | 785 | 616 | 581 | 460 | 330 | 296 | 287 | 181 | 146 |
unsat | 4647 | 4785 | 4819 | 4695 | 4821 | 4855 | 4848 | 4953 | 4988 |
3.2 Solved formulas in benchmark families
table <- res %>% group_by(family) %>% summarise(total = sum(n()), none = sum(none.solved), variables = sum(variables.solved), both = sum(both.solved), bothUnconstrained = sum(bothUnconstrained.solved), variablesUnconstrained = sum(variablesUnconstrained.solved)) table <- rbind(table, data.frame(family='Total', total = sum(table$total), none = sum(table$none), variables = sum(table$variables), both = sum(table$both), bothUnconstrained = sum(table$bothUnconstrained), variablesUnconstrained = sum(table$variablesUnconstrained))) table %>% select(family, total, none, variables, variablesUnconstrained, both, bothUnconstrained)
family | total | none | variables | variablesUnconstrained | both | bothUnconstrained |
---|---|---|---|---|---|---|
2017-Preiner-keymaera | 4035 | 3788 | 3886 | 3890 | 4009 | 4009 |
2017-Preiner-psyco | 194 | 118 | 179 | 180 | 181 | 182 |
2017-Preiner-scholl-smt08 | 374 | 172 | 270 | 288 | 311 | 317 |
2017-Preiner-tptp | 73 | 67 | 73 | 73 | 73 | 73 |
2017-Preiner-ua | 153 | 153 | 153 | 153 | 153 | 153 |
2018-Preiner-cav18 | 600 | 435 | 448 | 531 | 453 | 530 |
heizmann-ua | 131 | 103 | 98 | 122 | 102 | 124 |
wintersteiger | 191 | 130 | 184 | 184 | 182 | 182 |
Total | 5751 | 4966 | 5291 | 5421 | 5464 | 5570 |
3.2.1 Only sat
satTable <- res %>% filter(none.result == 'sat' | variables.result == 'sat' | variablesUnconstrained.result == 'sat' | both.result == 'sat' | bothUnconstrained.result == 'sat' | bothGoalUnconstrained.result == 'sat') %>% group_by(family) %>% summarise(total = sum(n()), result = '', none = sum(none.solved), variables = sum(variables.solved), both = sum(both.solved), bothUnconstrained = sum(bothUnconstrained.solved), bothGoalUnconstrained = sum(bothGoalUnconstrained.solved), variablesUnconstrained = sum(variablesUnconstrained.solved), ) satTable <- rbind(satTable, data.frame(family='Total SAT', result = 'SAT', total = sum(satTable$total), none = sum(satTable$none), variables = sum(satTable$variables), both = sum(satTable$both), bothUnconstrained = sum(satTable$bothUnconstrained), bothGoalUnconstrained = sum(satTable$bothGoalUnconstrained), variablesUnconstrained = sum(satTable$variablesUnconstrained) )) satTable %>% select(result, family, total, none, variables, variablesUnconstrained, both, bothUnconstrained, bothGoalUnconstrained)
result | family | total | none | variables | variablesUnconstrained | both | bothUnconstrained | bothGoalUnconstrained |
---|---|---|---|---|---|---|---|---|
2017-Preiner-keymaera | 104 | 7 | 103 | 104 | 104 | 104 | 104 | |
2017-Preiner-psyco | 126 | 74 | 124 | 124 | 123 | 123 | 123 | |
2017-Preiner-scholl-smt08 | 248 | 136 | 226 | 229 | 246 | 247 | 247 | |
2017-Preiner-tptp | 17 | 13 | 17 | 17 | 17 | 17 | 17 | |
2017-Preiner-ua | 16 | 16 | 16 | 16 | 16 | 16 | 16 | |
heizmann-ua | 20 | 18 | 18 | 18 | 20 | 20 | 20 | |
wintersteiger | 92 | 55 | 92 | 92 | 90 | 90 | 90 | |
SAT | Total SAT | 623 | 319 | 596 | 600 | 616 | 617 | 617 |
3.2.2 Only unsat
unsatTable <- res %>% filter(none.result == 'unsat' | variables.result == 'unsat' | variablesUnconstrained.result == 'unsat' | both.result == 'unsat' | bothUnconstrained.result == 'unsat' | bothGoalUnconstrained.result == 'unsat') %>% group_by(family) %>% summarise(total = sum(n()), result = '', none = sum(none.solved), variables = sum(variables.solved), both = sum(both.solved), bothUnconstrained = sum(bothUnconstrained.solved), bothGoalUnconstrained = sum(bothGoalUnconstrained.solved), variablesUnconstrained = sum(variablesUnconstrained.solved)) unsatTable <- rbind(unsatTable, data.frame(family='Total UNSAT', result = 'UNSAT', total = sum(unsatTable$total), none = sum(unsatTable$none), variables = sum(unsatTable$variables), both = sum(unsatTable$both), bothUnconstrained = sum(unsatTable$bothUnconstrained), bothGoalUnconstrained = sum(unsatTable$bothGoalUnconstrained), variablesUnconstrained = sum(unsatTable$variablesUnconstrained))) unsatTable %>% select(result, family, total, none, variables, variablesUnconstrained, both, bothGoalUnconstrained)
result | family | total | none | variables | variablesUnconstrained | both | bothGoalUnconstrained |
---|---|---|---|---|---|---|---|
2017-Preiner-keymaera | 3905 | 3781 | 3783 | 3786 | 3905 | 3905 | |
2017-Preiner-psyco | 59 | 44 | 55 | 56 | 58 | 59 | |
2017-Preiner-scholl-smt08 | 71 | 36 | 44 | 59 | 65 | 70 | |
2017-Preiner-tptp | 56 | 54 | 56 | 56 | 56 | 56 | |
2017-Preiner-ua | 137 | 137 | 137 | 137 | 137 | 137 | |
2018-Preiner-cav18 | 565 | 435 | 448 | 531 | 453 | 565 | |
heizmann-ua | 107 | 85 | 80 | 104 | 82 | 104 | |
wintersteiger | 93 | 75 | 92 | 92 | 92 | 92 | |
UNSAT | Total UNSAT | 4993 | 4647 | 4695 | 4821 | 4848 | 4988 |
3.2.3 Both
bothTable <- rbind(unsatTable, satTable) bothTable <- rbind(bothTable, data.frame(family='Total', result = '', total = sum(res %>% summarize(n())), none = sum(res$none.solved), variables = sum(res$variables.solved), variablesUnconstrained = sum(res$variablesUnconstrained.solved), both = sum(res$both.solved), bothUnconstrained = sum(res$bothUnconstrained.solved), bothGoalUnconstrained = sum(res$bothGoalUnconstrained.solved))) bothTable <- bothTable %>% transmute( Result = result, Family = family, Total = total, none = none, approx = variables, approxu = variablesUnconstrained, full = both, fullu = bothUnconstrained, fullgu = bothGoalUnconstrained) xt <- xtable(bothTable, type = "latex", label="tbl:experiments:q3bSolved", caption = "Numbers of benchmarks solved by the individual Q3B configurations divided by the satisfiability/unsatisfiability and the benchmark family.") align(xt) <- c('l', 'l', 'l', 'r', 'r', 'r', 'r', 'r', 'r', 'r') print(xt, file = "Tables/solved.tex", rotate.colnames=TRUE, include.rownames=FALSE, booktabs=TRUE, hline.after = c(-1, 0, 9, nrow(bothTable)-1, nrow(bothTable)), table.placement="tbp", sanitize.text.function=function(x) x) bothTable
Result | Family | Total | none | approx | approxu | full | fullu | fullgu |
---|---|---|---|---|---|---|---|---|
2017-Preiner-keymaera | 3905 | 3781 | 3783 | 3786 | 3905 | 3905 | 3905 | |
2017-Preiner-psyco | 59 | 44 | 55 | 56 | 58 | 59 | 59 | |
2017-Preiner-scholl-smt08 | 71 | 36 | 44 | 59 | 65 | 70 | 70 | |
2017-Preiner-tptp | 56 | 54 | 56 | 56 | 56 | 56 | 56 | |
2017-Preiner-ua | 137 | 137 | 137 | 137 | 137 | 137 | 137 | |
2018-Preiner-cav18 | 565 | 435 | 448 | 531 | 453 | 530 | 565 | |
heizmann-ua | 107 | 85 | 80 | 104 | 82 | 104 | 104 | |
wintersteiger | 93 | 75 | 92 | 92 | 92 | 92 | 92 | |
UNSAT | Total UNSAT | 4993 | 4647 | 4695 | 4821 | 4848 | 4953 | 4988 |
2017-Preiner-keymaera | 104 | 7 | 103 | 104 | 104 | 104 | 104 | |
2017-Preiner-psyco | 126 | 74 | 124 | 124 | 123 | 123 | 123 | |
2017-Preiner-scholl-smt08 | 248 | 136 | 226 | 229 | 246 | 247 | 247 | |
2017-Preiner-tptp | 17 | 13 | 17 | 17 | 17 | 17 | 17 | |
2017-Preiner-ua | 16 | 16 | 16 | 16 | 16 | 16 | 16 | |
heizmann-ua | 20 | 18 | 18 | 18 | 20 | 20 | 20 | |
wintersteiger | 92 | 55 | 92 | 92 | 90 | 90 | 90 | |
SAT | Total SAT | 623 | 319 | 596 | 600 | 616 | 617 | 617 |
Total | 5751 | 4966 | 5291 | 5421 | 5464 | 5570 | 5605 |
3.3 Cross comparison
3.3.1 Helper function to generate tables
First we need a function which for given configurations computes a number of benchmarks that the first configuration has solved, but the second one has not.
firstIsBetter <- function(c1, c2) { c1Solved <- res[[paste(c1, 'solved', sep='.')]] c2Solved <- res[[paste(c2, 'solved', sep='.')]] onlyC1Solved <- c1Solved & !(c2Solved) return(onlyC1Solved) } formulasFirstIsBetter <- function(c1, c2) { return(res[firstIsBetter(c1, c2)]) } compareConfigurations <- function(c1, c2) { return(sum(firstIsBetter(c1, c2))) }
We can use this function to generate the cross table.
crossTable <- function() { results <- c() for (c1 in configurations) { for (c2 in configurations) { results <- c(results, compareConfigurations(c1, c2)) } } results.table <- matrix(results, ncol=9,byrow=TRUE) colnames(results.table) <- configurations rownames(results.table) <- configurations out <- as.data.frame(results.table) return(out) }
3.3.2 Results
table <- crossTable() #table <- merge(table, uniqSolvedSum, by='row.names') # colnames(table) <- c("", "Boolector", "CVC4", "Q3B", "Z3", "Uniquely solved") table
none | noneUnconstrained | noneGoalUnconstrained | variables | variablesUnconstrained | variablesGoalUnconstrained | both | bothUnconstrained | bothGoalUnconstrained | |
---|---|---|---|---|---|---|---|---|---|
none | 0 | 2 | 2 | 13 | 9 | 8 | 9 | 8 | 6 |
noneUnconstrained | 152 | 0 | 0 | 134 | 7 | 6 | 112 | 7 | 5 |
noneGoalUnconstrained | 186 | 34 | 0 | 168 | 41 | 7 | 146 | 41 | 6 |
variables | 338 | 309 | 309 | 0 | 0 | 0 | 6 | 6 | 5 |
variablesUnconstrained | 464 | 312 | 312 | 130 | 0 | 0 | 111 | 6 | 5 |
variablesGoalUnconstrained | 497 | 345 | 312 | 164 | 34 | 0 | 145 | 40 | 5 |
both | 507 | 460 | 460 | 179 | 154 | 154 | 0 | 0 | 0 |
bothUnconstrained | 612 | 461 | 461 | 285 | 155 | 155 | 106 | 0 | 0 |
bothGoalUnconstrained | 645 | 494 | 461 | 319 | 189 | 155 | 141 | 35 | 0 |
xt <- xtable(table, type = "latex", label="tbl:cross", caption = "The table shows cross-comparison of solved benchmarks by all pairs of the solvers. Each cell shows the number of benchmarks that were solved by the solver in the corresponding row, but not by the solver by the corresponding column. The column \\emph{Uniquely solved} shows the number of benchmarks that were solved only by the given solver.") align(xt) <- "llrrrr|r" print(xt, file = "Tables/cross.tex", include.rownames=FALSE, booktabs=TRUE, table.placement="tbp")
4 Plots
4.1 Unsolved benchmarks
plotUnsolved <- function(cs) { cs <- rev(cs) unsolved = data.frame(family=character(), configuration=character(), stringsAsFactors=TRUE) for (c in cs) { cUnsolved <- res[["family"]][!res[[paste(c, 'solved', sep='.')]]] cUnsolved <- data.frame( family = cUnsolved, configuration = labels[c]) unsolved <- rbind(unsolved, cUnsolved) } print(levels(unsolved$family)) unsolved$family<- factor(unsolved$family, levels=sort(levels(unsolved$family))) print(levels(unsolved$family)) chart.data <- unsolved %>% arrange(family) %>% group_by(family, configuration) %>% summarize(freq = n()) %>% group_by(configuration) %>% mutate(pos = cumsum(freq) - (0.5 * freq)) ggplot(data = chart.data, aes(x = configuration, y = freq, fill = family)) + geom_bar(stat="identity", position=position_stack(reverse=TRUE)) + coord_flip() + geom_text(data=chart.data, aes(x = configuration, y = pos, label = ifelse(freq < 20, "", freq)), size=3) + labs(y = "Number of unsolved benchmarks (less is better)", x = NULL, fill = "Benchmark family") + scale_fill_brewer(palette = "Set2") + theme(legend.position="bottom") + guides(fill = guide_legend(ncol = 3)) }
plotUnsolved(c('none', 'noneUnconstrained', 'noneGoalUnconstrained', 'variables', 'variablesUnconstrained', 'variablesGoalUnconstrained', 'both', 'bothUnconstrained', 'bothGoalUnconstrained'))
pdf(file="Figures/unsolved.pdf",width=7,height=8); tryCatch({ plotUnsolved(c('none', 'noneUnconstrained', 'noneGoalUnconstrained', 'variables', 'variablesUnconstrained', 'variablesGoalUnconstrained', 'both', 'bothUnconstrained', 'bothGoalUnconstrained')) },error=function(e){plot(x=-1:1, y=-1:1, type='n', xlab='', ylab='', axes=FALSE); text(x=0, y=0, labels=e$message, col='red'); paste('ERROR', e$message, sep=' : ')}); dev.off()