Publications Search
Ozgur Simsek, David Jensen
Navigating networks by using homophily and degree Journal Article
In: Proceedings of the National Academy of Sciences, vol. 105, no. 35, pp. 12758–12762, 2008.
Abstract | Links | BibTeX | Tags: Navigation and Routing in Networks
@article{csimcsek2008navigating,
title = {Navigating networks by using homophily and degree},
author = {Ozgur Simsek and David Jensen},
url = {https://www.pnas.org/content/pnas/105/35/12758.full.pdf},
year = {2008},
date = {2008-01-01},
journal = {Proceedings of the National Academy of Sciences},
volume = {105},
number = {35},
pages = {12758--12762},
publisher = {National Academy of Sciences},
abstract = {Many large distributed systems can be characterized as networks where short paths exist between nearly every pair of nodes. These include social, biological, communication, and distribution networks, which often display power-law or small-world structure. A central challenge of distributed systems is directing messages to specific nodes through a sequence of decisions made by individual nodes without global knowledge of the network. We present a probabilistic analysis of this navigation problem that produces a surprisingly simple and effective method for directing messages. This method requires calculating only the product of the two measures widely used to summarize all local information. It outperforms prior approaches reported in the literature by a large margin, and it provides a formal model that may describe how humans make decisions in sociological studies intended to explore the social network as well as how they make decisions in more naturalistic settings.},
keywords = {Navigation and Routing in Networks},
pubstate = {published},
tppubtype = {article}
}
Amy McGovern, David Jensen
Optimistic pruning for multiple instance learning Journal Article
In: Pattern recognition letters, vol. 29, no. 9, pp. 1252–1260, 2008.
Abstract | Links | BibTeX | Tags:
@article{mcgovern2008optimistic,
title = {Optimistic pruning for multiple instance learning},
author = {Amy McGovern and David Jensen},
url = {https://doi.org/10.1016/j.patrec.2008.01.024},
year = {2008},
date = {2008-01-01},
journal = {Pattern recognition letters},
volume = {29},
number = {9},
pages = {1252--1260},
publisher = {North-Holland},
abstract = {This paper introduces a simple evaluation function for multiple instance learning that admits an optimistic pruning strategy. We demonstrate comparable results to state-of-the-art methods using significantly fewer computational resources.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Michael Hay, Gerome Miklau, David Jensen, Don Towsley, Philipp Weis
Resisting structural re-identification in anonymized social networks Journal Article
In: Proceedings of the VLDB Endowment, vol. 1, no. 1, pp. 102–114, 2008.
Abstract | Links | BibTeX | Tags: Privacy and Networks
@article{hay2008resisting,
title = {Resisting structural re-identification in anonymized social networks},
author = {Michael Hay and Gerome Miklau and David Jensen and Don Towsley and Philipp Weis},
url = {https://dl.acm.org/doi/pdf/10.14778/1453856.1453873},
year = {2008},
date = {2008-01-01},
journal = {Proceedings of the VLDB Endowment},
volume = {1},
number = {1},
pages = {102--114},
publisher = {VLDB Endowment},
abstract = {We identify privacy risks associated with releasing network data sets and provide an algorithm that mitigates those risks. A network consists of entities connected by links representing relations such as friendship, communication, or shared activity. Maintaining privacy when publishing networked data is uniquely challenging because an individual's network context can be used to identify them even if other identifying information is removed. In this paper, we quantify the privacy risks associated with three classes of attacks on the privacy of individuals in networks, based on the knowledge used by the adversary. We show that the risks of these attacks vary greatly based on network structure and size. We propose a novel approach to anonymizing network data that models aggregate network structure and then allows samples to be drawn from that model. The approach guarantees anonymity for network entities while preserving the ability to estimate a wide variety of network measures with relatively little bias.},
keywords = {Privacy and Networks},
pubstate = {published},
tppubtype = {article}
}
Andrew Fast, David Jensen
Why stacked models perform effective collective classification Proceedings Article
In: Proceedings of the 8th IEEE International Conference on Data Mining (ICDM 2008), December 15-19, 2008, Pisa, Italy, pp. 785–790, IEEE Computer Society, 2008.
Abstract | Links | BibTeX | Tags: Statistical Relational Learning
@inproceedings{fast2008stacked,
title = {Why stacked models perform effective collective classification},
author = {Andrew Fast and David Jensen},
url = {https://doi.org/10.1109/ICDM.2008.126},
year = {2008},
date = {2008-01-01},
booktitle = {Proceedings of the 8th IEEE International Conference on Data Mining
(ICDM 2008), December 15-19, 2008, Pisa, Italy},
pages = {785--790},
publisher = {IEEE Computer Society},
abstract = {Collective classification techniques jointly infer all class labels of a relational data set, using the inferences about one class label to influence inferences about related class labels. Kou and Cohen recently introduced an efficient relational model based on stacking that, despite its simplicity, has equivalent accuracy to more sophisticated joint inference approaches. Using experiments on both real and synthetic data, we show that the primary cause for the performance of the stacked model is the reduction in bias from learning the stacked model on inferred labels rather than true labels. The reduction in variance due to conditional inference also contributes to the effect but it is not as strong. In addition, we show that the performance of the joint inference and stacked learners can be attributed to an implicit weighting of local and relational features at learning time.},
keywords = {Statistical Relational Learning},
pubstate = {published},
tppubtype = {inproceedings}
}
Michael Hay, Gerome Miklau, David Jensen, Philipp Weis, Siddharth Srivastava
Anonymizing social networks Journal Article
In: Computer science department faculty publication series, pp. 180, 2007.
Abstract | Links | BibTeX | Tags:
@article{hay2007anonymizing,
title = {Anonymizing social networks},
author = {Michael Hay and Gerome Miklau and David Jensen and Philipp Weis and Siddharth Srivastava},
url = {https://scholarworks.umass.edu/cgi/viewcontent.cgi?article=1175&context=cs_faculty_pubs},
year = {2007},
date = {2007-01-01},
journal = {Computer science department faculty publication series},
pages = {180},
abstract = {Advances in technology have made it possible to collect data about individuals and the connections between them, such as email correspondence and friendships. Agencies and researchers who have collected such social network data often have a compelling interest in allowing others to analyze the data. However, in many cases the data describes relationships that are private (e.g., email correspondence) and sharing the data in full can result in unacceptable disclosures. In this paper, we present a framework for assessing the privacy risk of sharing anonymized network data. This includes a model of adversary knowledge, for which we consider several variants and make connections to known graph theoretical results. On several real-world social networks, we show that simple anonymization techniques are inadequate, resulting in substantial breaches of privacy for even modestly informed adversaries. We propose a novel anonymization technique based on perturbing the network and demonstrate empirically that it leads to substantial reduction of the privacy threat. We also analyze the effect that anonymizing the network has on the utility of the data for social network analysis.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
David Jensen
Beyond Prediction: Directions for Probabilistic and Relational Learning Proceedings Article
In: Inductive Logic Programming, 17th International Conference, ILP 2007, Corvallis, OR, USA, June 19-21, 2007, Revised Selected Papers, pp. 4–21, Springer, 2007.
Abstract | Links | BibTeX | Tags:
@inproceedings{DBLP:conf/ilp/Jensen07,
title = {Beyond Prediction: Directions for Probabilistic and Relational Learning},
author = {David Jensen},
url = {https://doi.org/10.1007/978-3-540-78469-2_2},
doi = {10.1007/978-3-540-78469-2_2},
year = {2007},
date = {2007-01-01},
booktitle = {Inductive Logic Programming, 17th International Conference, ILP
2007, Corvallis, OR, USA, June 19-21, 2007, Revised Selected Papers},
volume = {4894},
pages = {4--21},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
abstract = {Research over the past several decades in learning logical and probabilistic models has greatly increased the range of phenomena that machine learning can address. Recent work has extended these boundaries even further by unifying these two powerful learning frameworks. However, new frontiers await. Current techniques are capable of learning only a subset of the knowledge needed by practitioners in important domains, and further unification of probabilistic and logical learning offers a unique ability to produce the full range of knowledge needed in a wide range of applications.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Jennifer Neville, David Jensen
Bias/Variance Analysis for Relational Domains Proceedings Article
In: Inductive Logic Programming, 17th International Conference, ILP 2007, Corvallis, OR, USA, June 19-21, 2007, Revised Selected Papers, pp. 27–28, Springer, 2007.
Abstract | Links | BibTeX | Tags:
@inproceedings{DBLP:conf/ilp/NevilleJ07,
title = {Bias/Variance Analysis for Relational Domains},
author = {Jennifer Neville and David Jensen},
url = {https://doi.org/10.1007/978-3-540-78469-2_6},
doi = {10.1007/978-3-540-78469-2_6},
year = {2007},
date = {2007-01-01},
booktitle = {Inductive Logic Programming, 17th International Conference, ILP
2007, Corvallis, OR, USA, June 19-21, 2007, Revised Selected Papers},
volume = {4894},
pages = {27--28},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
abstract = {Bias/variance analysis is a useful tool for investigating the performance of machine learning algorithms. Conventional analysis decomposes loss into errors due to aspects of the learning process with an underlying assumption that there is no variation in model predictions due to the inference process used for prediction. This assumption is often violated when collective inference models are used for classification of relational data. In relational data, when there are dependencies among the class labels of related instances, the inferences about one object can be used to improve the inferences about other related objects. Collective inference techniques exploit these dependencies by jointly inferring the class labels in a test set. This approach can produce more accurate predictions than conditional inference for each instance independently, but it also introduces an additional source of error, both through the use of approximate inference algorithms and through variation in the availability of test set information. To date, the impact of inference error on relational model performance has not been investigated.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Matthew Rattigan, Marc Maier, David Jensen, Bin Wu, Xin Pei, Jianbin Tan, Yi Wang
Exploiting Network Structure for Active Inference in Collective Classification Proceedings Article
In: Workshops Proceedings of the 7th IEEE International Conference on Data Mining (ICDM 2007), October 28-31, 2007, Omaha, Nebraska, USA, pp. 429–434, IEEE Computer Society, 2007.
Abstract | Links | BibTeX | Tags:
@inproceedings{DBLP:conf/icdm/RattiganMJWPTW07,
title = {Exploiting Network Structure for Active Inference in Collective Classification},
author = {Matthew Rattigan and Marc Maier and David Jensen and Bin Wu and Xin Pei and Jianbin Tan and Yi Wang},
url = {https://doi.org/10.1109/ICDMW.2007.124},
doi = {10.1109/ICDMW.2007.124},
year = {2007},
date = {2007-01-01},
booktitle = {Workshops Proceedings of the 7th IEEE International Conference on
Data Mining (ICDM 2007), October 28-31, 2007, Omaha, Nebraska, USA},
pages = {429--434},
publisher = {IEEE Computer Society},
abstract = {Active inference seeks to maximize classification performance while minimizing the amount of data that must be labeled ex ante. This task is particularly relevant in the context of relational data, where statistical dependencies among instances can be exploited to improve classification accuracy. We show that efficient methods for indexing network structure can be exploited to select high-value nodes for labeling. This approach substantially outperforms random selection and selection based on simple measures of local structure. We demonstrate the relative effectiveness of this selection approach through experiments with a relational neighbor classifier on a variety of real and synthetic data sets, and identify the necessary characteristics of the data set that allow this approach to perform well.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Lisa Friedland, David Jensen
Finding tribes: identifying close-knit individuals from employment patterns Proceedings Article
In: Proceedings of the 13th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, San Jose, California, USA, August 12-15, 2007, pp. 290–299, ACM, 2007.
Abstract | Links | BibTeX | Tags:
@inproceedings{DBLP:conf/kdd/FriedlandJ07,
title = {Finding tribes: identifying close-knit individuals from employment
patterns},
author = {Lisa Friedland and David Jensen},
url = {https://doi.org/10.1145/1281192.1281226},
doi = {10.1145/1281192.1281226},
year = {2007},
date = {2007-01-01},
booktitle = {Proceedings of the 13th ACM SIGKDD International Conference on
Knowledge Discovery and Data Mining, San Jose, California, USA, August
12-15, 2007},
pages = {290--299},
publisher = {ACM},
abstract = {We present a family of algorithms to uncover tribes-groups of individuals who share unusual sequences of affiliations. While much work inferring community structure describes large-scale trends, we instead search for small groups of tightly linked individuals who behave anomalously with respect to those trends. We apply the algorithms to a large temporal and relational data set consisting of millions of employment records from the National Association of Securities Dealers. The resulting tribes contain individuals at higher risk for fraud, are homogenous with respect to risk scores, and are geographically mobile, all at significant levels compared to random or to other sets of individuals who share affiliations.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Matthew Rattigan, Marc Maier, David Jensen
Graph clustering with network structure indices Proceedings Article
In: Machine Learning, Proceedings of the Twenty-Fourth International Conference (ICML 2007), Corvallis, Oregon, USA, June 20-24, 2007, pp. 783–790, ACM, 2007.
Abstract | Links | BibTeX | Tags:
@inproceedings{DBLP:conf/icml/RattiganMJ07,
title = {Graph clustering with network structure indices},
author = {Matthew Rattigan and Marc Maier and David Jensen},
url = {https://doi.org/10.1145/1273496.1273595},
doi = {10.1145/1273496.1273595},
year = {2007},
date = {2007-01-01},
booktitle = {Machine Learning, Proceedings of the Twenty-Fourth International Conference
(ICML 2007), Corvallis, Oregon, USA, June 20-24, 2007},
volume = {227},
pages = {783--790},
publisher = {ACM},
series = {ACM International Conference Proceeding Series},
abstract = {Graph clustering has become ubiquitous in the study of relational data sets. We examine two simple algorithms: a new graphical adaptation of the k-medoids algorithm and the Girvan-Newman method based on edge betweenness centrality. We show that they can be effective at discovering the latent groups or communities that are defined by the link structure of a graph. However, both approaches rely on prohibitively expensive computations, given the size of modern relational data sets. Network structure indices (NSIs) are a proven technique for indexing network structure and efficiently finding short paths. We show how incorporating NSIs into these graph clustering algorithms can overcome these complexity limitations. We also present promising quantitative and qualitative evaluations of the modified algorithms on synthetic and real data sets.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Trevor Strohman, W. Bruce Croft, David Jensen
Recommending citations for academic papers Proceedings Article
In: SIGIR 2007: Proceedings of the 30th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, Amsterdam, The Netherlands, July 23-27, 2007, pp. 705–706, ACM, 2007.
Abstract | Links | BibTeX | Tags:
@inproceedings{DBLP:conf/sigir/StrohmanCJ07,
title = {Recommending citations for academic papers},
author = {Trevor Strohman and W. Bruce Croft and David Jensen},
url = {https://doi.org/10.1145/1277741.1277868},
doi = {10.1145/1277741.1277868},
year = {2007},
date = {2007-01-01},
booktitle = {SIGIR 2007: Proceedings of the 30th Annual International ACM SIGIR
Conference on Research and Development in Information Retrieval, Amsterdam,
The Netherlands, July 23-27, 2007},
pages = {705--706},
publisher = {ACM},
abstract = {We approach the problem of academic literature search by considering an unpublished manuscript as a query to a search system. We use the text of previous literature as well as the citation graph that connects it to find relevant related material. We evaluate our technique with manual and automatic evaluation methods, and find an order of magnitude improvement in mean average precision as compared to a text similarity baseline.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Andrew Fast, Lisa Friedland, Marc Maier, Brian Taylor, David Jensen, Henry G. Goldberg, John Komoroske
Relational data pre-processing techniques for improved securities fraud detection Proceedings Article
In: Proceedings of the 13th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, San Jose, California, USA, August 12-15, 2007, pp. 941–949, ACM, 2007.
Abstract | Links | BibTeX | Tags:
@inproceedings{DBLP:conf/kdd/FastFMTJGK07,
title = {Relational data pre-processing techniques for improved securities
fraud detection},
author = {Andrew Fast and Lisa Friedland and Marc Maier and Brian Taylor and David Jensen and Henry G. Goldberg and John Komoroske},
url = {https://doi.org/10.1145/1281192.1281293},
doi = {10.1145/1281192.1281293},
year = {2007},
date = {2007-01-01},
booktitle = {Proceedings of the 13th ACM SIGKDD International Conference on
Knowledge Discovery and Data Mining, San Jose, California, USA, August
12-15, 2007},
pages = {941--949},
publisher = {ACM},
abstract = {Commercial datasets are often large, relational, and dynamic. They contain many records of people, places, things, events and their interactions over time. Such datasets are rarely structured appropriately for knowledge discovery, and they often contain variables whose meanings change across different subsets of the data. We describe how these challenges were addressed in a collaborative analysis project undertaken by the University of Massachusetts Amherst and the National Association of Securities Dealers(NASD). We describe several methods for data pre-processing that we applied to transform a large, dynamic, and relational dataset describing nearly the entirety of the U.S. securities industry, and we show how these methods made the dataset suitable for learning statistical relational models. To better utilize social structure, we first applied known consolidation and link formation techniques to associate individuals with branch office locations. In addition, we developed an innovative technique to infer professional associations by exploiting dynamic employment histories. Finally, we applied normalization techniques to create a suitable class label that adjusts for spatial, temporal, and other heterogeneity within the data. We show how these pre-processing techniques combine to provide the necessary foundation for learning high-performing statistical models of fraudulent activity.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Jennifer Neville, David Jensen
Relational Dependency Networks Journal Article
In: J. Mach. Learn. Res., vol. 8, pp. 653–692, 2007.
Abstract | Links | BibTeX | Tags: Statistical Relational Learning
@article{DBLP:journals/jmlr/NevilleJ07,
title = {Relational Dependency Networks},
author = {Jennifer Neville and David Jensen},
url = {http://dl.acm.org/citation.cfm?id=1314522},
year = {2007},
date = {2007-01-01},
journal = {J. Mach. Learn. Res.},
volume = {8},
pages = {653--692},
abstract = {Recent work on graphical models for relational data has demonstrated significant improvements in classification and inference when models represent the dependencies among instances. Despite its use in conventional statistical models, the assumption of instance independence is contradicted by most relational data sets. For example, in citation data there are dependencies among the topics of a paper’s references, and in genomic data there are dependencies among the functions of interacting proteins. In this paper, we present relational dependency networks (RDNs), graphical models that are capable of expressing and reasoning with such dependencies in a relational setting. We discuss RDNs in the context of relational Bayes networks and relational Markov networks and outline the relative strengths of RDNs—namely, the ability to represent cyclic dependencies, simple methods for parameter estimation, and efficient structure learning techniques. The strengths of RDNs are due to the use of pseudolikelihood learning techniques, which estimate an efficient approximation of the full joint distribution. We present learned RDNsfor a number of real-world data sets and evaluate the models in a prediction context, showing that RDNs identify and exploit cyclic relational dependencies to achieve significant performance gains over conventional conditional models. In addition, we use synthetic data to explore model performance under various relational data characteristics, showing that RDN learning and inference techniques are accurate over a wide range of conditions.},
keywords = {Statistical Relational Learning},
pubstate = {published},
tppubtype = {article}
}
Michael Hay, Andrew Fast, David Jensen
Understanding the effects of search constraints on structure learning Journal Article
In: U Mass. Amherst CS, Tech. Rep, pp. 07–21, 2007.
Abstract | Links | BibTeX | Tags:
@article{hay2007understanding,
title = {Understanding the effects of search constraints on structure learning},
author = {Michael Hay and Andrew Fast and David Jensen},
url = {https://kdl.cs.umass.edu/papers/hay-et-al-tr0721.pdf},
year = {2007},
date = {2007-01-01},
journal = {U Mass. Amherst CS, Tech. Rep},
pages = {07--21},
abstract = {Recently, Tsamardinos et al. [2006] presented an algorithm for Bayesian network structure learning that outperforms many state-of-the-art algorithms in terms of efficiency, structure similarity and likelihood. The Max-Min Hill Climbing algorithm is a hybrid of constraint-based and search-and-score techniques, using greedy hill climbing to search a constrained space of possible network structures. The constraints correspond to assertions of conditional independence that must hold in the network from which the data were sampled. One would expect that constraining the space would make search both faster and more accurate, focusing search on the “right” part of the space. The published results indicate, however, that the resulting structures are less accurate when search is constrained. We reproduce these results and explain why they occur. At small samples, the statistical test of conditional independence has low power, which causes the algorithm to exclude edges between dependent variables. Also, the constraints make search relatively harder, leading to errors in edge orientation. In an unconstrained space, search can “repair” these errors by adding in extra edges. We conclude by proposing and evaluating an improved algorithm.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Aaron M Ellison, Leon J Osterweil, Lori Clarke, Julian L Hadley, Alexander Wise, Emery Boose, David R Foster, Allen Hanson, David Jensen, Paul Kuzeja, others
Analytic webs support the synthesis of ecological data sets Journal Article
In: Ecology, vol. 87, no. 6, pp. 1345–1358, 2006.
Abstract | Links | BibTeX | Tags:
@article{ellison2006analytic,
title = {Analytic webs support the synthesis of ecological data sets},
author = {Aaron M Ellison and Leon J Osterweil and Lori Clarke and Julian L Hadley and Alexander Wise and Emery Boose and David R Foster and Allen Hanson and David Jensen and Paul Kuzeja and others},
url = {https://esajournals.onlinelibrary.wiley.com/doi/pdfdirect/10.1890/0012-9658%282006%2987%5B1345%3AAWSTSO%5D2.0.CO%3B2},
year = {2006},
date = {2006-01-01},
journal = {Ecology},
volume = {87},
number = {6},
pages = {1345--1358},
publisher = {Wiley Online Library},
abstract = {A wide variety of data sets produced by individual investigators are now synthesized to address ecological questions that span a range of spatial and temporal scales. It is important to facilitate such syntheses so that "consumers" of data sets can be confident that both input data sets and synthetic products are reliable. Necessary documentation to ensure the reliability and validation of data sets includes both familiar descriptive metadata and formal documentation of the scientific processes used (i.e., process metadata) to produce usable data sets from collections of raw data. Such documentation is complex and difficult to construct, so it is important to help "producers" create reliable data sets and to facilitate their creation of required metadata. We describe a formal representation, an "analytic web," that aids both producers and consumers of data sets by providing complete and precise definitions of scientific processes used to process raw and derived data sets. The formalisms used to define analytic webs are adaptations of those used in software engineering, and they provide a novel and effective support system for both the synthesis and the validation of ecological data sets. We illustrate the utility of an analytic web as an aid to producing synthetic data sets through a worked example: the synthesis of long-term measurements of whole-ecosystem carbon exchange. Analytic webs are also useful validation aids for consumers because they support the concurrent construction of a complete, Internet-accessible audit trail of the analytic processes used in the synthesis of the data sets. Finally we describe our early efforts to evaluate these ideas through the use of a prototype software tool, SciWalker. We indicate how this tool has been used to create analytic webs tailored to specific data-set synthesis and validation activities, and suggest extensions to it that will support additional forms of validation. The process metadata created by SciWalker is readily adapted for inclusion in Ecological Metadata Language (EML) files.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Jennifer Neville, David Jensen
Bias/variance analysis for network data Proceedings Article
In: Proceedings of the Workshop on Statistical Relational Learning, 23rd International Conference on Machine Learning, 2006.
Abstract | Links | BibTeX | Tags:
@inproceedings{neville2006bias,
title = {Bias/variance analysis for network data},
author = {Jennifer Neville and David Jensen},
url = {http://www.cs.umd.edu/projects/srl2006/papers/srl06-neville.pdf},
year = {2006},
date = {2006-01-01},
booktitle = {Proceedings of the Workshop on Statistical Relational Learning, 23rd International Conference on Machine Learning},
abstract = {Bias/variance analysis is a useful tool for investigating the performance of machine learning algorithms. Conventional analysis decomposes loss into errors due to aspects of the learning process, but in relational and network applications, the inference process introduces an additional source of error. Collective inference techniques introduce additional error both through the use of approximate inference algorithms and through variation in the availability of test set information. To date, the impact of inference error on model performance has not been investigated. In this paper, we propose a new bias/variance framework that decomposes loss into errors due to both the learning and inference process. We evaluate performance of three relational models on synthetic data and use the framework to understand the reasons for poor model performance. With this understanding, we propose a number of directions to explore to improve model performance.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Hendrik Blockeel, David Jensen, Stefan Kramer
Introduction to the special issue on multi-relational data mining and statistical relational learning Journal Article
In: Mach. Learn., vol. 62, no. 1-2, pp. 3–5, 2006.
@article{DBLP:journals/ml/BlockeelJK06,
title = {Introduction to the special issue on multi-relational data mining
and statistical relational learning},
author = {Hendrik Blockeel and David Jensen and Stefan Kramer},
url = {https://doi.org/10.1007/s10994-006-5856-7},
doi = {10.1007/s10994-006-5856-7},
year = {2006},
date = {2006-01-01},
journal = {Mach. Learn.},
volume = {62},
number = {1-2},
pages = {3--5},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
John Burgess, Brian Gallagher, David Jensen, Brian Neil Levine
MaxProp: Routing for Vehicle-Based Disruption-Tolerant Networks Proceedings Article
In: INFOCOM 2006. 25th IEEE International Conference on Computer Communications, Joint Conference of the IEEE Computer and Communications Societies, 23-29 April 2006, Barcelona, Catalunya, Spain, IEEE, 2006.
Abstract | Links | BibTeX | Tags: Navigation and Routing in Networks
@inproceedings{DBLP:conf/infocom/BurgessGJL06,
title = {MaxProp: Routing for Vehicle-Based Disruption-Tolerant Networks},
author = {John Burgess and Brian Gallagher and David Jensen and Brian Neil Levine},
url = {https://doi.org/10.1109/INFOCOM.2006.228},
doi = {10.1109/INFOCOM.2006.228},
year = {2006},
date = {2006-01-01},
booktitle = {INFOCOM 2006. 25th IEEE International Conference on Computer Communications,
Joint Conference of the IEEE Computer and Communications Societies,
23-29 April 2006, Barcelona, Catalunya, Spain},
publisher = {IEEE},
abstract = {Disruption-tolerant networks (DTNs) attempt to route network messages via intermittently connected nodes. Routing in such environments is difficult because peers have little information about the state of the partitioned network and transfer opportunities between peers are of limited duration. In this paper, we propose MaxProp, a protocol for effective routing of DTN messages. MaxProp is based on prioritizing both the schedule of packets transmitted to other peers and the schedule of packets to be dropped. These priorities are based on the path likelihoods to peers according to historical data and also on several complementary mechanisms, including acknowledgments, a head-start for new packets, and lists of previous intermediaries. Our evaluations show that MaxProp performs better than protocols that have access to an oracle that knows the schedule of meetings between peers. Our evaluations are based on 60 days of traces from a real DTN network we have deployed on 30 buses. Our network, called UMassDieselNet, serves a large geographic area between five colleges. We also evaluate MaxProp on simulated topologies and show it performs well in a wide variety of DTN environments.},
keywords = {Navigation and Routing in Networks},
pubstate = {published},
tppubtype = {inproceedings}
}
Chirag Shah, W. Bruce Croft, David Jensen
Representing documents with named entities for story link detection (SLD) Proceedings Article
In: Proceedings of the 2006 ACM CIKM International Conference on Information and Knowledge Management, Arlington, Virginia, USA, November 6-11, 2006, pp. 868–869, ACM, 2006.
Abstract | Links | BibTeX | Tags:
@inproceedings{DBLP:conf/cikm/ShahCJ06,
title = {Representing documents with named entities for story link detection
(SLD)},
author = {Chirag Shah and W. Bruce Croft and David Jensen},
url = {https://doi.org/10.1145/1183614.1183771},
doi = {10.1145/1183614.1183771},
year = {2006},
date = {2006-01-01},
booktitle = {Proceedings of the 2006 ACM CIKM International Conference on Information
and Knowledge Management, Arlington, Virginia, USA, November 6-11,
2006},
pages = {868--869},
publisher = {ACM},
abstract = {Several information organization, access, and filtering systems can benefit from different kind of document representations than those used in traditional Information Retrieval (IR). Topic Detection and Tracking (TDT) is an example of such an application. In this paper we demonstrate that named entities serve as better choices of units for document representation over all words. In order to test this hypothesis we study the effect of words-based and entity-based representations on Story Link Detection (SLD) - a core task in TDT research. The experiments on TDT corpora show that entity-based representations give significant improvements for SLD. We also propose a mechanism to expand the set of named entities used for document representation, which enhances the performance in some cases. We then take a step further and analyze the limitations of using only named entities for the document representation. Our studies and experiments indicate that adding additional topical terms can help in addressing such limitations.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Andrew Fast, David Jensen
The NFL Coaching Network: Analysis of the Social Network among Professional Football Coaches Proceedings Article
In: Capturing and Using Patterns for Evidence Detection, Papers from the 2006 AAAI Fall Symposium, Washington, DC, USA, October 13-15, 2006, pp. 112–119, AAAI Press, 2006.
Abstract | Links | BibTeX | Tags:
@inproceedings{DBLP:conf/aaaifs/FastJ06,
title = {The NFL Coaching Network: Analysis of the Social Network among Professional
Football Coaches},
author = {Andrew Fast and David Jensen},
url = {https://www.aaai.org/Library/Symposia/Fall/2006/fs06-02-017.php},
year = {2006},
date = {2006-01-01},
booktitle = {Capturing and Using Patterns for Evidence Detection, Papers from the
2006 AAAI Fall Symposium, Washington, DC, USA, October 13-15, 2006},
volume = {FS-06-02},
pages = {112--119},
publisher = {AAAI Press},
series = {AAAI Technical Report},
abstract = {The interactions of professional football coaches and teams in the National Football League (NFL) form a complex social network. This network provides a great opportunity to analyze the influence that coaching mentors have on their proteges. In this paper, we use this social network to identify notable coaches and characterize championship coaches. We also utilize the coaching network to learn a model of which teams will make the playoffs in a given year. Developing comprehensive models of complex adaptive networks, such as the network of NFL coaches, poses a difficult challenge for researchers. From our analysis of the NFL, we identify three types of dependencies that any model of complex network data must be able to represent.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}