@proceedings {174, title = {Efficient and Robust Independence-Based Markov Network Structure Discovery.}, journal = {20th International Joint Conference of Artificial Inteliigence (IJCAI)}, year = {2007}, pages = {2431-2436 }, publisher = {Morgan Kaufmann Publishers Inc.}, address = {San Francisco, CA}, abstract = {

In this paper we introduce a novel algorithm for the induction of the Markov network structure of a domain from the outcome of conditional independence tests on data. Such algorithms work by successively restricting the set of possible structures until there is only a single structure consistent with the conditional independence tests executed. Existing independence-based algorithms have wellknown shortcomings, such as rigidly ordering the sequence of tests they perform, resulting in potential inefficiencies in the number of tests required, and committing fully to the test outcomes, resulting in lack of robustness in case of unreliable tests. We address both problems through a Bayesian particle filtering approach, which uses a population of Markov network structures to maintain the posterior probability distribution over them, given the outcomes of the tests performed. Instead of a fixed ordering, our approach greedily selects, at each step, the optimally informative from a pool of candidate tests according to information gain. In addition, it maintains multiple candidate structures weighed by posterior probability, which makes it more robust to errors in the test outcomes. The result is an approximate algorithm (due to the use of particle filtering) that is useful in domains where independence tests are uncertain (such as applications where little data is available) or expensive (such as cases of very large data sets and/or distributed data).

}, url = {http://www.aaai.org/Library/IJCAI/2007/ijcai07-391.php}, author = {Bromberg, Facundo and Margaritis, Dimitris} } @proceedings {176, title = {Efficient Markov network structure discovery using independence tests}, journal = {Proceedings of the SIAM Conference in Data Mining}, year = {2006}, pages = {141--152}, address = {Bethesda, Maryland, USA}, abstract = {

We present two algorithms for learning the structure of a Markov network from discrete data: GSMN and GSIMN. Both algorithms use statistical conditional independence tests on data to infer the structure by successively constraining the set of structures consistent with the results of these tests. GSMN is a natural adaptation of the Grow-Shrink algorithm of Margaritis and Thrun for learning the structure of Bayesian networks. GSIMN extends GSMN by additionally exploiting Pearl{\textquoteright}s well-known properties of conditional independence relations to infer novel independencies from known independencies, thus avoiding the need to perform these tests. Experiments on artificial and real data sets show GSIMN can yield savings of up to 70\% with respect to GSMN, while generating a Markov network with comparable or in several cases considerably improved quality. In addition to GSMN, we also compare GSIMN to a forward-chaining implementation, called GSIMN-FCH, that produces all possible conditional independence results by repeatedly applying Pearl{\textquoteright}s theorems on the known conditional independence tests. The results of this comparison show that GSIMN is nearly optimal in terms of the number of tests it can infer, under a fixed ordering of the tests performed.

}, isbn = {978-0-89871-611-5}, doi = {10.1137-1.9781611972764.13}, url = {http://epubs.siam.org/doi/abs/10.1137/1.9781611972764.13}, author = {Bromberg, Facundo and Margaritis, Dimitris and Honavar, Vasant} } @proceedings {173, title = {Learning Markov Network Structure using Few Independence Tests.}, journal = {SIAM Data Mining}, year = {2008}, pages = {680--691}, abstract = {
In this paper we present the Dynamic Grow-Shrink Inference-based Markov network learning algorithm (abbreviated DGSIMN), which improves on GSIMN, the state-of-the-art algorithm for learning the structure of the Markov network of a domain from independence tests on data. DGSIMN, like other independence-based algorithms, works by conducting a series of statistical conditional independence tests toward the goal of restricting the number of possible structures to one, thus inferring that structure as the only possibly correct one. During this process, DGSIMN, like the GSIMN algorithm, uses the axioms that govern the proba bilistic independence relation to avoid unnecessary tests i.e.,tests that can be inferred from the results of known ones. This results in both efficiency and reliability advantages over the simple application of statistical tests. However, one weakness of GSIMN is its rigid and heuristic ordering of the execution of tests, which results in potentially inefficient execution. DGSIMN instead uses a principled strategy, dynamically selecting the locally optimal test that is expected to increase the state of our knowledge about the structure the most. This is done by calculating the expected number of independence facts that will become known (through inference) after executing a particular test (before it is actually evaluated on data), and by selecting the one that is expected to maximize the number of such inferences, thus avoiding their potentially expensive evaluation on data. As we demonstrate in our experiments, this results in an overall decrease in the computational requirements of the algorithm, sometimes dramatically, due to the decreased the number of tests required to be evaluated on data. Experiments show that DGSIMN yields savings of up to 88\% on both sampled and benchmark data while achieving similar\  or better accuracy in most cases.
}, isbn = { 978-1-61197-278-8}, issn = {978-0-89871-654-2}, doi = { 10.1137/1.9781611972788.62}, url = {http://epubs.siam.org/doi/abs/10.1137/1.9781611972788.62}, author = {Gandhi, Parichey and Bromberg, Facundo and Margaritis, Dimitris} } @article {170, title = {Efficient Markov network discovery using particle filters}, journal = {Computational Intelligence}, volume = {25}, year = {2009}, month = {11/2009}, pages = {367{\textendash}394}, abstract = {

In this paper, we introduce an efficient independence-based algorithm for the induction of the Markov network (MN) structure of a domain from the outcomes of independence test conducted on data. Our algorithm utilizes a particle filter (sequential Monte Carlo) method to maintain a population of MN structures that represent the posterior probability distribution over structures, given the outcomes of the tests performed. This enables us to select, at each step, the maximally informative test to conduct next from a pool of candidates according to information gain, which minimizes the cost of the statistical tests conducted on data. This makes our approach useful in domains where independence tests are expensive, such as cases of very large data sets and/or distributed data. In addition, our method maintains multiple candidate structures weighed by posterior probability, which allows flexibility in the presence of potential errors in the test outcomes.

}, keywords = {graphical model structure learning, Markov networks, particle filters, sequential Monte Carlo}, doi = {10.1111/j.1467-8640.2009.00347.x}, url = {http://onlinelibrary.wiley.com/doi/10.1111/j.1467-8640.2009.00347.x/abstract;jsessionid=C95A98B67CD44AF9ABF59B3B0CCAA979.f01t02?userIsAuthenticated=false\&deniedAccessCustomisedMessage=}, author = {Margaritis, Dimitris and Bromberg, Facundo} } @article {171, title = {Efficient Markov network structure discovery using independence tests}, journal = {Journal of Artificial Intelligence Research}, volume = {35}, year = {2009}, pages = {449{\textendash}484}, abstract = {

We present two algorithms for learning the structure of a Markov network from data: GSMN* and GSIMN. Both algorithms use statistical independence tests to infer the structure by successively constraining the set of structures consistent with the results of these tests. Until very recently, algorithms for structure learning were based on maximum likelihood estimation, which has been proved to be NP-hard for Markov networks due to the difficulty of estimating the parameters of the network, needed for the computation of the data likelihood. The independence-based approach does not require the computation of the likelihood, and thus both GSMN* and GSIMN can compute the structure efficiently (as shown in our experiments). GSMN* is an adaptation of the Grow-Shrink algorithm of Margaritis and Thrun for learning the structure of Bayesian networks. GSIMN extends GSMN* by additionally exploiting Pearls well-known properties of the conditional independence relation to infer novel independences from known ones, thus avoiding the performance of statistical tests to estimate them. To accomplish this efficiently GSIMN uses the Triangle theorem, also introduced in this work, which is a simplified version of the set of Markov axioms. Experimental comparisons on artificial and real-world data sets show GSIMN can yield significant savings with respect to GSMN*, while generating a Markov network with comparable or in some cases improved quality. We also compare GSIMN to a forward-chaining implementation, called GSIMN-FCH, that produces all possible conditional independences resulting from repeatedly applying Pearls theorems on the known conditional independence tests. The results of this comparison show that GSIMN, by the sole use of the Triangle theorem, is nearly optimal in terms of the set of independences tests that it infers.

}, doi = { 10.1613/jair.2773}, url = {http://www.jair.org/papers/paper2773.html}, author = {Bromberg, Facundo and Margaritis, Dimitris and Honavar, Vasant} } @article {172, title = {Improving the reliability of causal discovery from small data sets using argumentation}, journal = {The Journal of Machine Learning Research}, volume = {10}, year = {2009}, month = {02/2009}, pages = {301{\textendash}340}, abstract = {

We address the problem of improving the reliability of independence-based causal discovery algorithms that results from the execution of statistical independence tests on small data sets, which typically have low reliability. We model the problem as a knowledge base containing a set of independence facts that are related through Pearl{\textquoteright}s well-known axioms. Statistical tests on finite data sets may result in errors in these tests and inconsistencies in the knowledge base. We resolve these inconsistencies through the use of an instance of the class of defeasible logics called argumentation, augmented with a preference function, that is used to reason about and possibly correct errors in these tests. This results in a more robust conditional independence test, called an argumentative independence test. Our experimental evaluation shows clear positive improvements in the accuracy of argumentative over purely statistical tests. We also demonstrate significant improvements on the accuracy of causal structure discovery from the outcomes of independence tests both on sampled data from randomly generated causal models and on real-world data sets.

}, keywords = {argumentation, causal Bayesian netw orks, Independence-based causal discovery, reliability improvement, Structure learning}, url = {http://www.jmlr.org/papers/v10/bromberg09a.html}, author = {Bromberg, Facundo and Margaritis, Dimitris} } @mastersthesis {175, title = {Markov networks structure discovery using independence tests}, volume = {Doctor of Philosophy}, year = {2007}, pages = {182}, school = {Iowa State University}, address = {Ames, IA, USA}, abstract = {

We investigate efficient algorithms for learning the structure of a Markov network from
data using the independence-based approach. Such algorithms conduct a series of conditional
independence tests on data, successively restricting the set of possible structures until there is
only a single structure consistent with the outcomes of the conditional independence tests exe-
cuted (if possible). As Pearl has shown, the instances of the conditional independence relation
in any domain are theoretically interdependent, made explicit in his well-known conditional
independence axioms. The first couple of algorithms we discuss, GSMN and GSIMN, exploit
Pearl{\textquoteright}s independence axioms to reduce the number of tests required to learn a Markov network.
This is useful in domains where independence tests are expensive, such as cases of very large
data sets or distributed data. Subsequently, we explore how these axioms can be exploited to
{\textquotedblleft}correct{\textquotedblright} the outcome of unreliable statistical independence tests, such as in applications where
little data is available. We show how the problem of incorrect tests can be mapped to inference
in inconsistent knowledge bases, a problem studied extensively in the field of non-monotonic
logic. We present an algorithm for inferring independence values based on a sub-class of non-
monotonic logics: the argumentation framework. Our results show the advantage of using our
approach in the learning of structures, with improvements in the accuracy of learned networks
of up to 20\%. As an alternative to logic-based interdependence among independence tests,
we also explore probabilistic interdependence. Our algorithm, called PFMN, takes a Bayesian
particle filtering approach, using a population of Markov network structures to maintain the
posterior probability distribution over them given the outcomes of the tests performed. The
result is an approximate algorithm (due to the use of particle filtering) that is useful in domains
where independence tests are expensive.

}, isbn = {9780549334941}, url = {http://lib.dr.iastate.edu/rtd/15575/}, author = {Bromberg, Facundo and Margaritis, Dimitris} }