Publications Search
Sam Witty, David Jensen, Vikash Mansinghka
A Simulation-Based Test of Identifiability for Bayesian Causal Inference Miscellaneous
2021.
Abstract | Links | BibTeX | Tags: Causal Modeling
@misc{DBLP:journals/corr/abs-2102-11761,
title = {A Simulation-Based Test of Identifiability for Bayesian Causal Inference},
author = {Sam Witty and David Jensen and Vikash Mansinghka},
url = {https://arxiv.org/abs/2102.11761},
year = {2021},
date = {2021-01-01},
journal = {CoRR},
volume = {abs/2102.11761},
abstract = {This paper introduces a procedure for testing the identifiability of Bayesian models for causal inference. Although the do-calculus is sound and complete given a causal graph, many practical assumptions cannot be expressed in terms of graph structure alone, such as the assumptions required by instrumental variable designs, regression discontinuity designs, and within-subjects designs. We present simulation-based identifiability (SBI), a fully automated identification test based on a particle optimization scheme with simulated observations. This approach expresses causal assumptions as priors over functions in a structural causal model, including flexible priors using Gaussian processes. We prove that SBI is asymptotically sound and complete, and produces practical finite-sample bounds. We also show empirically that SBI agrees with known results in graph-based identification as well as with widely-held intuitions for designs in which graph-based methods are inconclusive.},
keywords = {Causal Modeling},
pubstate = {published},
tppubtype = {misc}
}
Amanda M Gentzel, Purva Pruthi, David Jensen
How and Why to Use Experimental Data to Evaluate Methods for Observational Causal Inference Proceedings Article
In: International Conference on Machine Learning, pp. 3660–3671, PMLR 2021.
Abstract | Links | BibTeX | Tags: Causal Modeling
@inproceedings{gentzel2021and,
title = {How and Why to Use Experimental Data to Evaluate Methods for Observational Causal Inference},
author = {Amanda M Gentzel and Purva Pruthi and David Jensen},
url = {http://proceedings.mlr.press/v139/gentzel21a/gentzel21a.pdf},
year = {2021},
date = {2021-01-01},
booktitle = {International Conference on Machine Learning},
pages = {3660--3671},
organization = {PMLR},
abstract = {Methods that infer causal dependence from observational data are central to many areas of science, including medicine, economics, and the social sciences. A variety of theoretical properties of these methods have been proven, but empirical evaluation remains a challenge, largely due to the lack of observational data sets for which treatment effect is known. We describe and analyze observational sampling from randomized controlled trials (OSRCT), a method for evaluating causal inference methods using data from randomized controlled trials (RCTs). This method can be used to create constructed observational data sets with corresponding unbiased estimates of treatment effect, substantially increasing the number of data sets available for evaluating causal inference methods. We show that, in expectation, OSRCT creates data sets that are equivalent to those produced by randomly sampling from empirical data sets in which all potential outcomes are available. We then perform a large-scale evaluation of seven causal inference methods over 37 data sets, drawn from RCTs, as well as simulators, real-world computational systems, and observational data sets augmented with a synthetic response variable. We find notable performance differences when comparing across data from different sources, demonstrating the importance of using data from a variety of sources when evaluating any causal inference method.},
keywords = {Causal Modeling},
pubstate = {published},
tppubtype = {inproceedings}
}
Sam Witty, Kenta Takatsu, David Jensen, Vikash Mansinghka
Causal Inference using Gaussian Processes with Structured Latent Confounders Proceedings Article
In: Proceedings of the 37th International Conference on Machine Learning, ICML 2020, 13-18 July 2020, Virtual Event, pp. 10313–10323, PMLR, 2020.
Abstract | Links | BibTeX | Tags: Causal Modeling
@inproceedings{DBLP:conf/icml/WittyTJM20,
title = {Causal Inference using Gaussian Processes with Structured Latent Confounders},
author = {Sam Witty and Kenta Takatsu and David Jensen and Vikash Mansinghka},
url = {http://proceedings.mlr.press/v119/witty20a.html},
year = {2020},
date = {2020-01-01},
booktitle = {Proceedings of the 37th International Conference on Machine Learning,
ICML 2020, 13-18 July 2020, Virtual Event},
volume = {119},
pages = {10313--10323},
publisher = {PMLR},
series = {Proceedings of Machine Learning Research},
abstract = {Latent confounders---unobserved variables that influence both treatment and outcome---can bias estimates of causal effects. In some cases, these confounders are shared across observations, e.g. all students taking a course are influenced by the course's difficulty in addition to any educational interventions they receive individually. This paper shows how to semiparametrically model latent confounders that have this structure and thereby improve estimates of causal effects. The key innovations are a hierarchical Bayesian model, Gaussian processes with structured latent confounders (GP-SLC), and a Monte Carlo inference algorithm for this model based on elliptical slice sampling. GP-SLC provides principled Bayesian uncertainty estimates of individual treatment effect with minimal assumptions about the functional forms relating confounders, covariates, treatment, and outcome. Finally, this paper shows GP-SLC is competitive with or more accurate than widely used causal inference techniques on three benchmark datasets, including the Infant Health and Development Program and a dataset showing the effect of changing temperatures on state-wide energy consumption across New England.},
keywords = {Causal Modeling},
pubstate = {published},
tppubtype = {inproceedings}
}
Amanda Gentzel, Dan Garant, David Jensen
The Case for Evaluating Causal Models Using Interventional Measures and Empirical Data Proceedings Article
In: Advances in Neural Information Processing Systems, Curran Associates, Inc., 2019.
Abstract | Links | BibTeX | Tags: Causal Modeling
@inproceedings{gentzel2019case,
title = {The Case for Evaluating Causal Models Using Interventional Measures and Empirical Data},
author = {Amanda Gentzel and Dan Garant and David Jensen},
url = {https://proceedings.neurips.cc/paper/2019/file/a87c11b9100c608b7f8e98cfa316ff7b-Paper.pdf},
year = {2019},
date = {2019-01-01},
booktitle = {Advances in Neural Information Processing Systems},
volume = {32},
publisher = {Curran Associates, Inc.},
abstract = {Causal inference is central to many areas of artificial intelligence, including complex reasoning, planning, knowledge-base construction, robotics, explanation, and fairness. An active community of researchers develops and enhances algorithms that learn causal models from data, and this work has produced a series of impressive technical advances. However, evaluation techniques for causal modeling algorithms have remained somewhat primitive, limiting what we can learn from experimental studies of algorithm performance, constraining the types of algorithms and model representations that researchers consider, and creating a gap between theory and practice. We argue for more frequent use of evaluation techniques that examine interventional measures rather than structural or observational measures, and that evaluate those measures on empirical data rather than synthetic data. We survey the current practice in evaluation and show that the techniques we recommend are rarely used in practice. We show that such techniques are feasible and that data sets are available to conduct such evaluations. We also show that these techniques produce substantially different results than using structural measures and synthetic data.},
keywords = {Causal Modeling},
pubstate = {published},
tppubtype = {inproceedings}
}
Kaleigh Clary, Andrew McGregor, David Jensen
A/B Testing in Networks with Adversarial Nodes Proceedings Article
In: KDD Workshop on Mining and Learning with Graphs, 2017.
BibTeX | Tags: Causal Modeling
@inproceedings{clary2017adversaries,
title = {A/B Testing in Networks with Adversarial Nodes},
author = {Kaleigh Clary and Andrew McGregor and David Jensen},
year = {2017},
date = {2017-01-01},
booktitle = {KDD Workshop on Mining and Learning with Graphs},
keywords = {Causal Modeling},
pubstate = {published},
tppubtype = {inproceedings}
}
Dan Garant, David Jensen
Evaluating causal models by comparing interventional distributions Miscellaneous
2016.
Abstract | Links | BibTeX | Tags: Causal Modeling
@misc{garant2016evaluating,
title = {Evaluating causal models by comparing interventional distributions},
author = {Dan Garant and David Jensen},
url = {https://arxiv.org/abs/1608.04698},
year = {2016},
date = {2016-01-01},
journal = {arXiv preprint arXiv:1608.04698},
abstract = {The predominant method for evaluating the quality of causal models is to measure the graphical accuracy of the learned model structure. We present an alternative method for evaluating causal models that directly measures the accuracy of estimated interventional distributions. We contrast such distributional measures with structural measures, such as structural Hamming distance and structural intervention distance, showing that structural measures often correspond poorly to the accuracy of estimated interventional distributions. We use a number of real and synthetic datasets to illustrate various scenarios in which structural measures provide misleading results with respect to algorithm selection and parameter tuning, and we recommend that distributional measures become the new standard for evaluating causal models.},
keywords = {Causal Modeling},
pubstate = {published},
tppubtype = {misc}
}
David Arbour, Katerina Marazopoulou, David Jensen
Inferring Causal Direction from Relational Data Proceedings Article
In: Proceedings of the Thirty-Second Conference on Uncertainty in Artificial Intelligence, UAI 2016, June 25-29, 2016, New York City, NY, USA, AUAI Press, 2016.
Abstract | Links | BibTeX | Tags: Causal Modeling
@inproceedings{DBLP:conf/uai/ArbourMJ16,
title = {Inferring Causal Direction from Relational Data},
author = {David Arbour and Katerina Marazopoulou and David Jensen},
url = {http://auai.org/uai2016/proceedings/papers/217.pdf},
year = {2016},
date = {2016-01-01},
booktitle = {Proceedings of the Thirty-Second Conference on Uncertainty in Artificial
Intelligence, UAI 2016, June 25-29, 2016, New York City, NY, USA},
publisher = {AUAI Press},
abstract = {Inferring the direction of causal dependence from observational data is a fundamental problem in many scientific fields. Significant progress has been made in inferring causal direction from data that are independent and identically distributed (i.i.d.), but little is understood about this problem in the more general relational setting with multiple types of interacting entities. This work examines the task of inferring the causal direction of peer dependence in relational data. We show that, in contrast to the i.i.d. setting, the direction of peer dependence can be inferred using simple procedures, regardless of the form of the underlying distribution, and we provide a theoretical characterization on the identifiability of direction. We then examine the conditions under which the presence of confounding can be detected. Finally, we demonstrate the efficacy of the proposed methods with synthetic experiments, and we provide an application on real-world data.},
keywords = {Causal Modeling},
pubstate = {published},
tppubtype = {inproceedings}
}
David Arbour, Dan Garant, David Jensen
Inferring Network Effects from Observational Data Proceedings Article
In: Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, San Francisco, CA, USA, August 13-17, 2016, pp. 715–724, ACM, 2016.
Abstract | Links | BibTeX | Tags: Causal Modeling
@inproceedings{DBLP:conf/kdd/ArbourGJ16,
title = {Inferring Network Effects from Observational Data},
author = {David Arbour and Dan Garant and David Jensen},
url = {https://doi.org/10.1145/2939672.2939791},
doi = {10.1145/2939672.2939791},
year = {2016},
date = {2016-01-01},
booktitle = {Proceedings of the 22nd ACM SIGKDD International Conference on
Knowledge Discovery and Data Mining, San Francisco, CA, USA, August
13-17, 2016},
pages = {715--724},
publisher = {ACM},
abstract = {We present Relational Covariate Adjustment (RCA), a general method for estimating causal effects in relational data. Relational Covariate Adjustment is implemented through two high-level operations: identification of an adjustment set and relational regression adjustment. The former is achieved through an extension of Pearl’s back-door criterion to relational domains. We demonstrate how this extended definition can be used to estimate causal effects in the presence of network interference and confounding. RCA is agnostic to functional form, and it can easily model both discrete and continuous treatments as well as estimate the effects of a wider array of network interventions than existing experimental approaches. We show that RCA can yield robust estimates of causal effects using common regression models without extensive parameter tuning. Through a series of simulation experiments on a variety of synthetic and real- world network structures, we show that causal effects estimated on observational data with RCA are nearly as accurate as those estimated from well-designed network experiments.},
keywords = {Causal Modeling},
pubstate = {published},
tppubtype = {inproceedings}
}
Katerina Marazopoulou, Marc Maier, David Jensen
Learning the Structure of Causal Models with Relational and Temporal Dependence Proceedings Article
In: Proceedings of the UAI 2015 Workshop on Advances in Causal Inference co-located with the 31st Conference on Uncertainty in Artificial Intelligence (UAI 2015), Amsterdam, The Netherlands, July 16, 2015, pp. 66–75, CEUR-WS.org, 2015.
Abstract | Links | BibTeX | Tags: Causal Modeling
@inproceedings{DBLP:conf/uai/MarazopoulouMJ15,
title = {Learning the Structure of Causal Models with Relational and Temporal
Dependence},
author = {Katerina Marazopoulou and Marc Maier and David Jensen},
url = {http://ceur-ws.org/Vol-1504/uai2015aci_paper6.pdf},
year = {2015},
date = {2015-01-01},
booktitle = {Proceedings of the UAI 2015 Workshop on Advances in Causal Inference
co-located with the 31st Conference on Uncertainty in Artificial Intelligence
(UAI 2015), Amsterdam, The Netherlands, July 16, 2015},
volume = {1504},
pages = {66--75},
publisher = {CEUR-WS.org},
series = {CEUR Workshop Proceedings},
abstract = {Many real-world domains are inherently relational and temporal—they consist of heterogeneous entities that interact with each other over time. Effective reasoning about causality in such domains requires representations that explicitly model relational and temporal dependence. In this work, we provide a formalization of temporal relational models. We define temporal extensions to abstract ground graphs—a lifted representation that abstracts paths of dependence over all possible ground graphs. Temporal abstract ground graphs enable a sound and complete method for answering d-separation queries on temporal relational models. These methods provide the foundation for a constraint-based algorithm, TRCD, that learns causal models from temporal relational data. We provide experimental evidence that demonstrates the need to explicitly represent time when inferring causal dependence. We also demonstrate the expressive gain of TRCD compared to earlier algorithms that do not explicitly represent time.},
keywords = {Causal Modeling},
pubstate = {published},
tppubtype = {inproceedings}
}
David Arbour, Katerina Marazopoulou, Dan Garant, David Jensen
Propensity Score Matching for Causal Inference with Relational Data Proceedings Article
In: Proceedings of the UAI 2014 Workshop Causal Inference: Learning and Prediction co-located with 30th Conference on Uncertainty in Artificial Intelligence (UAI 2014), Quebec City, Canada, July 27, 2014, pp. 25–34, CEUR-WS.org, 2014.
Abstract | Links | BibTeX | Tags: Causal Modeling
@inproceedings{DBLP:conf/uai/ArbourMGJ14,
title = {Propensity Score Matching for Causal Inference with Relational Data},
author = {David Arbour and Katerina Marazopoulou and Dan Garant and David Jensen},
url = {http://ceur-ws.org/Vol-1274/uai2014ci_paper5.pdf},
year = {2014},
date = {2014-01-01},
booktitle = {Proceedings of the UAI 2014 Workshop Causal Inference: Learning
and Prediction co-located with 30th Conference on Uncertainty in Artificial
Intelligence (UAI 2014), Quebec City, Canada, July 27, 2014},
volume = {1274},
pages = {25--34},
publisher = {CEUR-WS.org},
series = {CEUR Workshop Proceedings},
abstract = {Propensity score matching (PSM) is a widely used method for performing causal inference with observational data. PSM requires fully specifying the set of confounding variables of treatment and outcome. In the case of relational data, this set may include non-intuitive relational variables, i.e., variables derived from the relational structure of the data. In this work, we provide an automated method to derive these relational variables based on the relational structure and a set of naive confounders. This automatic construction includes two unusual classes of variables: relational degree and entity identifiers. We provide experimental evidence that demonstrates the utility of these variables in accounting for certain latent confounders. Finally, through a set of synthetic experiments, we show that our method improves the performance of PSM for causal inference with relational data.},
keywords = {Causal Modeling},
pubstate = {published},
tppubtype = {inproceedings}
}
Marc Maier, Katerina Marazopoulou, David Arbour, David Jensen
A Sound and Complete Algorithm for Learning Causal Models from Relational Data Proceedings Article
In: Proceedings of the Twenty-Ninth Conference on Uncertainty in Artificial Intelligence, UAI 2013, Bellevue, WA, USA, August 11-15, 2013, AUAI Press, 2013.
Abstract | Links | BibTeX | Tags: Causal Modeling
@inproceedings{DBLP:conf/uai/MaierMAJ13,
title = {A Sound and Complete Algorithm for Learning Causal Models from Relational
Data},
author = {Marc Maier and Katerina Marazopoulou and David Arbour and David Jensen},
url = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1&smnu=2&article_id=2398&proceeding_id=29},
year = {2013},
date = {2013-01-01},
booktitle = {Proceedings of the Twenty-Ninth Conference on Uncertainty in Artificial
Intelligence, UAI 2013, Bellevue, WA, USA, August 11-15, 2013},
publisher = {AUAI Press},
abstract = {In this paper, we analyze the task of inferring rare links between pairs of entities that seem too similar to have occurred by chance. Variations of this task appear in such diverse areas as social network analysis, security, fraud detection, and entity resolution. To address the task in a general form, we propose a simple, flexible mixture model in which most entities are generated independently from a distribution but a small number of pairs are constrained to be similar. We predict the true pairs using a likelihood ratio that trades off the entities’ similarity with their rarity. This method always outperforms using only similarity; however, with certain parameter settings, similarity turns out to be surprisingly competitive. Using real data, we apply the model to detect twins given their birth weights and to re-identify cell phone users based on distinctive usage patterns.},
keywords = {Causal Modeling},
pubstate = {published},
tppubtype = {inproceedings}
}
Marc Maier, Katerina Marazopoulou, David Jensen
Reasoning about Independence in Probabilistic Models of Relational Data Miscellaneous
2013.
Abstract | Links | BibTeX | Tags: Causal Modeling
@misc{DBLP:journals/corr/abs-1302-4381,
title = {Reasoning about Independence in Probabilistic Models of Relational
Data},
author = {Marc Maier and Katerina Marazopoulou and David Jensen},
url = {http://arxiv.org/abs/1302.4381},
year = {2013},
date = {2013-01-01},
journal = {CoRR},
volume = {abs/1302.4381},
abstract = {We extend the theory of d-separation to cases in which data instances are not independent and identically distributed. We show that applying the rules of d-separation directly to the structure of probabilistic models of relational data inaccurately infers conditional independence. We introduce relational d-separation, a theory for deriving conditional independence facts from relational models. We provide a new representation, the abstract ground graph, that enables a sound, complete, and computationally efficient method for answering d-separation queries about relational models, and we present empirical results that demonstrate effectiveness.},
keywords = {Causal Modeling},
pubstate = {published},
tppubtype = {misc}
}
Matthew Rattigan, Marc Maier, David Jensen
Relational blocking for causal discovery Proceedings Article
In: Proceedings of the Twenty-Fifth AAAI Conference on Artificial Intelligence, 2011.
Abstract | Links | BibTeX | Tags: Causal Modeling
@inproceedings{rattigan2011relational,
title = {Relational blocking for causal discovery},
author = {Matthew Rattigan and Marc Maier and David Jensen},
url = {http://www.aaai.org/ocs/index.php/AAAI/AAAI11/paper/view/3760},
year = {2011},
date = {2011-01-01},
booktitle = {Proceedings of the Twenty-Fifth AAAI Conference on Artificial Intelligence},
volume = {25},
number = {1},
abstract = {Blocking is a technique commonly used in manual statistical analysis to account for confounding variables. However, blocking is not currently used in automated learning algorithms. These algorithms rely solely on statistical conditioning as an operator to identify conditional independence. In this work, we present relational blocking as a new operator that can be used for learning the structure of causal models. We describe how blocking is enabled by relational data sets, where blocks are determined by the links in the network. By blocking on entities rather than conditioning on variables, relational blocking can account for both measured and unobserved variables. We explain the mechanism of these methods using graphical models and the semantics of d-separation. Finally, we demonstrate the effectiveness of relational blocking for use in causal discovery by showing how blocking can be used in the causal analysis of two real-world social media systems.},
keywords = {Causal Modeling},
pubstate = {published},
tppubtype = {inproceedings}
}
Huseyin Oktay, Brian Taylor, David Jensen
Causal discovery in social media using quasi-experimental designs Proceedings Article
In: Proceedings of the 3rd Workshop on Social Network Mining and Analysis, SNAKDD, pp. 1–9, 2010.
Abstract | Links | BibTeX | Tags: Causal Modeling
@inproceedings{oktay2010causal,
title = {Causal discovery in social media using quasi-experimental designs},
author = {Huseyin Oktay and Brian Taylor and David Jensen},
url = {https://doi.org/10.1145/1964858.1964859},
year = {2010},
date = {2010-01-01},
booktitle = {Proceedings of the 3rd Workshop on Social Network Mining and Analysis,
SNAKDD},
pages = {1--9},
abstract = {Social media systems have become increasingly attractive to both users and companies providing those systems. Efficient management of these systems is essential and requires knowledge of cause-and-effect relationships within the system. Online experimentation can be used to discover causal knowledge; however, this ignores the observational data that is already being collected for operational purposes. Quasi-experimental designs (QEDs) are commonly used in social sciences to discover causal knowledge from observational data, and QEDs can be exploited to discover causal knowledge about social media systems. In this paper, we apply three different QEDs to demonstrate how one can gain a causal understanding of a social media system. The conclusions drawn from using a QED can have threats to their validity, but we show how one can carefully construct sophisticated designs to overcome some of those threats.},
keywords = {Causal Modeling},
pubstate = {published},
tppubtype = {inproceedings}
}
Marc Maier, Brian Taylor, Huseyin Oktay, David Jensen
Learning causal models of relational domains Proceedings Article
In: Proceedings of the Twenty-Fourth AAAI Conference on Artificial Intelligence, AAAI 2010, Atlanta, Georgia, USA, July 11-15, 2010, 2010.
Abstract | Links | BibTeX | Tags: Causal Modeling
@inproceedings{maier2010learning,
title = {Learning causal models of relational domains},
author = {Marc Maier and Brian Taylor and Huseyin Oktay and David Jensen},
url = {http://www.aaai.org/ocs/index.php/AAAI/AAAI10/paper/view/1919},
year = {2010},
date = {2010-01-01},
booktitle = {Proceedings of the Twenty-Fourth AAAI Conference on Artificial Intelligence,
AAAI 2010, Atlanta, Georgia, USA, July 11-15, 2010},
volume = {24},
number = {1},
abstract = {Methods for discovering causal knowledge from observational data have been a persistent topic of AI research for several decades. Essentially all of this work focuses on knowledge representations for propositional domains. In this paper, we present several key algorithmic and theoretical innovations that extend causal discovery to relational domains. We provide strong evidence that effective learning of causal models is enhanced by relational representations. We present an algorithm, relational PC, that learns causal dependencies in a state-of-the-art relational representation, and we identify the key representational and algorithmic innovations that make the algorithm possible. Finally, we prove the algorithm's theoretical correctness and demonstrate its effectiveness on synthetic and real data sets.},
keywords = {Causal Modeling},
pubstate = {published},
tppubtype = {inproceedings}
}
David Jensen, Andrew Fast, Brian Taylor, Marc Maier
Automatic Identification of Quasi-Experimental Designs for Discovering Causal Knowledge Proceedings Article
In: Proceedings of the 14th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 372–380, Association for Computing Machinery, Las Vegas, Nevada, USA, 2008, ISBN: 9781605581934.
Abstract | Links | BibTeX | Tags: causal discovery, Causal Modeling, quasi-experimental design
@inproceedings{10.1145/1401890.1401938,
title = {Automatic Identification of Quasi-Experimental Designs for Discovering Causal Knowledge},
author = {David Jensen and Andrew Fast and Brian Taylor and Marc Maier},
url = {https://doi.org/10.1145/1401890.1401938},
doi = {10.1145/1401890.1401938},
isbn = {9781605581934},
year = {2008},
date = {2008-01-01},
booktitle = {Proceedings of the 14th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
pages = {372–380},
publisher = {Association for Computing Machinery},
address = {Las Vegas, Nevada, USA},
series = {KDD '08},
abstract = {Researchers in the social and behavioral sciences routinely rely on quasi-experimental designs to discover knowledge from large data-bases. Quasi-experimental designs (QEDs) exploit fortuitous circumstances in non-experimental data to identify situations (sometimes called "natural experiments") that provide the equivalent of experimental control and randomization. QEDs allow researchers in domains as diverse as sociology, medicine, and marketing to draw reliable inferences about causal dependencies from non-experimental data. Unfortunately, identifying and exploiting QEDs has remained a painstaking manual activity, requiring researchers to scour available databases and apply substantial knowledge of statistics. However, recent advances in the expressiveness of databases, and increases in their size and complexity, provide the necessary conditions to automatically identify QEDs. In this paper, we describe the first system to discover knowledge by applying quasi-experimental designs that were identified automatically. We demonstrate that QEDs can be identified in a traditional database schema and that such identification requires only a small number of extensions to that schema, knowledge about quasi-experimental design encoded in first-order logic, and a theorem-proving engine. We describe several key innovations necessary to enable this system, including methods for automatically constructing appropriate experimental units and for creating aggregate variables on those units. We show that applying the resulting designs can identify important causal dependencies in real domains, and we provide examples from academic publishing, movie making and marketing, and peer-production systems. Finally, we discuss the integration of QEDs with other approaches to causal discovery, including joint modeling and directed experimentation.},
keywords = {causal discovery, Causal Modeling, quasi-experimental design},
pubstate = {published},
tppubtype = {inproceedings}
}