Publications Search
Katerina Marazopoulou, David Arbour, David Jensen
Refining the Semantics of Social Influence Miscellaneous
2014.
Abstract | Links | BibTeX | Tags: Statistical Relational Learning
@misc{DBLP:journals/corr/MarazopoulouAJ14,
title = {Refining the Semantics of Social Influence},
author = {Katerina Marazopoulou and David Arbour and David Jensen},
url = {http://arxiv.org/abs/1412.5238},
year = {2014},
date = {2014-01-01},
journal = {CoRR},
volume = {abs/1412.5238},
abstract = {With the proliferation of network data, researchers are increasingly focusing on questions investigating phenomena occurring on networks. This often includes analysis of peer-effects, i.e., how the connections of an individual affect that individual's behavior. This type of influence is not limited to direct connections of an individual (such as friends), but also to individuals that are connected through longer paths (for example, friends of friends, or friends of friends of friends). In this work, we identify an ambiguity in the definition of what constitutes the extended neighborhood of an individual. This ambiguity gives rise to different semantics and supports different types of underlying phenomena. We present experimental results, both on synthetic and real networks, that quantify differences among the sets of extended neighbors under different semantics. Finally, we provide experimental evidence that demonstrates how the use of different semantics affects model selection.},
keywords = {Statistical Relational Learning},
pubstate = {published},
tppubtype = {misc}
}
Andrew Fast, David Jensen
Why stacked models perform effective collective classification Proceedings Article
In: Proceedings of the 8th IEEE International Conference on Data Mining (ICDM 2008), December 15-19, 2008, Pisa, Italy, pp. 785–790, IEEE Computer Society, 2008.
Abstract | Links | BibTeX | Tags: Statistical Relational Learning
@inproceedings{fast2008stacked,
title = {Why stacked models perform effective collective classification},
author = {Andrew Fast and David Jensen},
url = {https://doi.org/10.1109/ICDM.2008.126},
year = {2008},
date = {2008-01-01},
booktitle = {Proceedings of the 8th IEEE International Conference on Data Mining
(ICDM 2008), December 15-19, 2008, Pisa, Italy},
pages = {785--790},
publisher = {IEEE Computer Society},
abstract = {Collective classification techniques jointly infer all class labels of a relational data set, using the inferences about one class label to influence inferences about related class labels. Kou and Cohen recently introduced an efficient relational model based on stacking that, despite its simplicity, has equivalent accuracy to more sophisticated joint inference approaches. Using experiments on both real and synthetic data, we show that the primary cause for the performance of the stacked model is the reduction in bias from learning the stacked model on inferred labels rather than true labels. The reduction in variance due to conditional inference also contributes to the effect but it is not as strong. In addition, we show that the performance of the joint inference and stacked learners can be attributed to an implicit weighting of local and relational features at learning time.},
keywords = {Statistical Relational Learning},
pubstate = {published},
tppubtype = {inproceedings}
}
Jennifer Neville, David Jensen
Relational Dependency Networks Journal Article
In: J. Mach. Learn. Res., vol. 8, pp. 653–692, 2007.
Abstract | Links | BibTeX | Tags: Statistical Relational Learning
@article{DBLP:journals/jmlr/NevilleJ07,
title = {Relational Dependency Networks},
author = {Jennifer Neville and David Jensen},
url = {http://dl.acm.org/citation.cfm?id=1314522},
year = {2007},
date = {2007-01-01},
journal = {J. Mach. Learn. Res.},
volume = {8},
pages = {653--692},
abstract = {Recent work on graphical models for relational data has demonstrated significant improvements in classification and inference when models represent the dependencies among instances. Despite its use in conventional statistical models, the assumption of instance independence is contradicted by most relational data sets. For example, in citation data there are dependencies among the topics of a paper’s references, and in genomic data there are dependencies among the functions of interacting proteins. In this paper, we present relational dependency networks (RDNs), graphical models that are capable of expressing and reasoning with such dependencies in a relational setting. We discuss RDNs in the context of relational Bayes networks and relational Markov networks and outline the relative strengths of RDNs—namely, the ability to represent cyclic dependencies, simple methods for parameter estimation, and efficient structure learning techniques. The strengths of RDNs are due to the use of pseudolikelihood learning techniques, which estimate an efficient approximation of the full joint distribution. We present learned RDNsfor a number of real-world data sets and evaluate the models in a prediction context, showing that RDNs identify and exploit cyclic relational dependencies to achieve significant performance gains over conventional conditional models. In addition, we use synthetic data to explore model performance under various relational data characteristics, showing that RDN learning and inference techniques are accurate over a wide range of conditions.},
keywords = {Statistical Relational Learning},
pubstate = {published},
tppubtype = {article}
}
Jennifer Neville, David Jensen
Leveraging Relational Autocorrelation with Latent Group Models Proceedings Article
In: Proceedings of the 5th IEEE International Conference on Data Mining (ICDM 2005), 27-30 November 2005, Houston, Texas, USA, pp. 322–329, IEEE Computer Society, 2005.
Abstract | Links | BibTeX | Tags: Statistical Relational Learning
@inproceedings{DBLP:conf/icdm/NevilleJ05,
title = {Leveraging Relational Autocorrelation with Latent Group Models},
author = {Jennifer Neville and David Jensen},
url = {https://doi.org/10.1109/ICDM.2005.89},
doi = {10.1109/ICDM.2005.89},
year = {2005},
date = {2005-01-01},
booktitle = {Proceedings of the 5th IEEE International Conference on Data Mining
(ICDM 2005), 27-30 November 2005, Houston, Texas, USA},
pages = {322--329},
publisher = {IEEE Computer Society},
abstract = {The presence of autocorrelation provides a strong motivation for using relational learning and inference techniques. Autocorrelation is a statistical dependence between the values of the same variable on related entities and is a nearly ubiquitous characteristic of relational data sets. Recent research has explored the use of collective inference techniques to exploit this phenomenon. These techniques achieve significant performance gains by modeling observed correlations among class labels of related instances, but the models fail to capture a frequent cause of autocorrelation - the presence of underlying groups that influence the attributes on a set of entities. We propose a latent group model (LGM) for relational data, which discovers and exploits the hidden structures responsible for the observed autocorrelation among class labels. Modeling the latent group structure improves model performance, increases inference efficiency, and enhances our understanding of the datasets. We evaluate performance on three relational classification tasks and show that LGM outperforms models that ignore latent group structure, particularly when there is little information with which to seed inference.},
keywords = {Statistical Relational Learning},
pubstate = {published},
tppubtype = {inproceedings}
}
David Jensen, Jennifer Neville, Brian Gallagher
Why collective inference improves relational classification Proceedings Article
In: Proceedings of the Tenth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, Seattle, Washington, USA, August 22-25, 2004, pp. 593–598, ACM, 2004.
Abstract | Links | BibTeX | Tags: Statistical Relational Learning
@inproceedings{DBLP:conf/kdd/JensenNG04,
title = {Why collective inference improves relational classification},
author = {David Jensen and Jennifer Neville and Brian Gallagher},
url = {https://doi.org/10.1145/1014052.1014125},
doi = {10.1145/1014052.1014125},
year = {2004},
date = {2004-01-01},
booktitle = {Proceedings of the Tenth ACM SIGKDD International Conference on
Knowledge Discovery and Data Mining, Seattle, Washington, USA, August
22-25, 2004},
pages = {593--598},
publisher = {ACM},
abstract = {Procedures for collective inference make simultaneous statistical judgments about the same variables for a set of related data instances. For example, collective inference could be used to simultaneously classify a set of hyperlinked documents or infer the legitimacy of a set of related financial transactions. Several recent studies indicate that collective inference can significantly reduce classification error when compared with traditional inference techniques. We investigate the underlying mechanisms for this error reduction by reviewing past work on collective inference and characterizing different types of statistical models used for making inference in relational data. We show important differences among these models, and we characterize the necessary and sufficient conditions for reduced classification error based on experiments with real and simulated data.},
keywords = {Statistical Relational Learning},
pubstate = {published},
tppubtype = {inproceedings}
}
Jennifer Neville, David Jensen, Lisa Friedland, Michael Hay
Learning relational probability trees Proceedings Article
In: Proceedings of the Ninth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, Washington, DC, USA, August 24 - 27, 2003, pp. 625–630, ACM, 2003.
Abstract | Links | BibTeX | Tags: Statistical Relational Learning
@inproceedings{DBLP:conf/kdd/NevilleJFH03,
title = {Learning relational probability trees},
author = {Jennifer Neville and David Jensen and Lisa Friedland and Michael Hay},
url = {https://doi.org/10.1145/956750.956830},
doi = {10.1145/956750.956830},
year = {2003},
date = {2003-01-01},
booktitle = {Proceedings of the Ninth ACM SIGKDD International Conference on
Knowledge Discovery and Data Mining, Washington, DC, USA, August 24
- 27, 2003},
pages = {625--630},
publisher = {ACM},
abstract = {Classification trees are widely used in the machine learning and data mining communities for modeling propositional data. Recent work has extended this basic paradigm to probability estimation trees. Traditional tree learning algorithms assume that instances in the training data are homogenous and independently distributed. Relational probability trees (RPTs) extend standard probability estimation trees to a relational setting in which data instances are heterogeneous and interdependent. Our algorithm for learning the structure and parameters of an RPT searches over a space of relational features that use aggregation functions (e.g. AVERAGE, MODE, COUNT) to dynamically propositionalize relational data and create binary splits within the RPT. Previous work has identified a number of statistical biases due to characteristics of relational data such as autocorrelation and degree disparity. The RPT algorithm uses a novel form of randomization test to adjust for these biases. On a variety of relational learning tasks, RPTs built using randomization tests are significantly smaller than other models and achieve equivalent, or better, performance.},
keywords = {Statistical Relational Learning},
pubstate = {published},
tppubtype = {inproceedings}
}
Jennifer Neville, David Jensen, Brian Gallagher
Simple Estimators for Relational Bayesian Classifiers Proceedings Article
In: Proceedings of the 3rd IEEE International Conference on Data Mining (ICDM 2003), 19-22 December 2003, Melbourne, Florida, USA, pp. 609–612, IEEE Computer Society, 2003.
Abstract | Links | BibTeX | Tags: Statistical Relational Learning
@inproceedings{DBLP:conf/icdm/NevilleJG03,
title = {Simple Estimators for Relational Bayesian Classifiers},
author = {Jennifer Neville and David Jensen and Brian Gallagher},
url = {https://doi.org/10.1109/ICDM.2003.1250989},
doi = {10.1109/ICDM.2003.1250989},
year = {2003},
date = {2003-01-01},
booktitle = {Proceedings of the 3rd IEEE International Conference on Data Mining
(ICDM 2003), 19-22 December 2003, Melbourne, Florida, USA},
pages = {609--612},
publisher = {IEEE Computer Society},
abstract = {We present the relational Bayesian classifier (RBC), a modification of the simple Bayesian classifier (SBC) for relational data. There exist several Bayesian classifiers that learn predictive models of relational data, but each uses a different estimation technique for modelling heterogeneous sets of attribute values. The effects of data characteristics on estimation have not been explored. We consider four simple estimation techniques and evaluate them on three real-world data sets. The estimator that assumes each multiset value is independently drawn from the same distribution (INDEPVAL) achieves the best empirical results. We examine bias and variance tradeoffs over a range of data sets and show that INDEPVAL's ability to model more multiset information results in lower bias estimates and contributes to its superior performance.},
keywords = {Statistical Relational Learning},
pubstate = {published},
tppubtype = {inproceedings}
}