Publications
2003
Jennifer Neville, David Jensen, Brian Gallagher
Simple Estimators for Relational Bayesian Classifiers Proceedings Article
In: Proceedings of the 3rd IEEE International Conference on Data Mining (ICDM 2003), 19-22 December 2003, Melbourne, Florida, USA, pp. 609–612, IEEE Computer Society, 2003.
@inproceedings{DBLP:conf/icdm/NevilleJG03,
title = {Simple Estimators for Relational Bayesian Classifiers},
author = {Jennifer Neville and David Jensen and Brian Gallagher},
url = {https://doi.org/10.1109/ICDM.2003.1250989},
doi = {10.1109/ICDM.2003.1250989},
year = {2003},
date = {2003-01-01},
booktitle = {Proceedings of the 3rd IEEE International Conference on Data Mining
(ICDM 2003), 19-22 December 2003, Melbourne, Florida, USA},
pages = {609--612},
publisher = {IEEE Computer Society},
abstract = {We present the relational Bayesian classifier (RBC), a modification of the simple Bayesian classifier (SBC) for relational data. There exist several Bayesian classifiers that learn predictive models of relational data, but each uses a different estimation technique for modelling heterogeneous sets of attribute values. The effects of data characteristics on estimation have not been explored. We consider four simple estimation techniques and evaluate them on three real-world data sets. The estimator that assumes each multiset value is independently drawn from the same distribution (INDEPVAL) achieves the best empirical results. We examine bias and variance tradeoffs over a range of data sets and show that INDEPVAL's ability to model more multiset information results in lower bias estimates and contributes to its superior performance.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Jennifer Neville, Micah Adler, David Jensen
Clustering relational data using attribute and link information Proceedings Article
In: Proceedings of the text mining and link analysis workshop, 18th international joint conference on artificial intelligence, pp. 9–15, Citeseer 2003.
@inproceedings{neville2003clustering,
title = {Clustering relational data using attribute and link information},
author = {Jennifer Neville and Micah Adler and David Jensen},
url = {https://www.cs.purdue.edu/homes/neville/papers/neville-et-al-textlink2003.pdf},
year = {2003},
date = {2003-01-01},
booktitle = {Proceedings of the text mining and link analysis workshop, 18th international joint conference on artificial intelligence},
pages = {9--15},
organization = {Citeseer},
abstract = {Clustering is a descriptive task that seeks to identify natural groupings in data. Relational data offer a wealth of information for identifying groups of similar items. Both attribute information and the structure of relationships can be used for clustering. Graph partitioning and data clustering techniques can be applied independently to relational data but a technique that exploits both sources of information simultaneously may produce more meaningful clusters. This paper will describe our work synthesizing data clustering and graph partitioning techniques into improved clustering algorithms for relational data.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
David Jensen, Jennifer Neville, Michael Hay
Avoiding Bias when Aggregating Relational Data with Degree Disparity Proceedings Article
In: Machine Learning, Proceedings of the Twentieth International Conference (ICML 2003), August 21-24, 2003, Washington, DC, USA, pp. 274–281, AAAI Press, 2003.
@inproceedings{DBLP:conf/icml/JensenNH03,
title = {Avoiding Bias when Aggregating Relational Data with Degree Disparity},
author = {David Jensen and Jennifer Neville and Michael Hay},
url = {http://www.aaai.org/Library/ICML/2003/icml03-038.php},
year = {2003},
date = {2003-01-01},
booktitle = {Machine Learning, Proceedings of the Twentieth International Conference
(ICML 2003), August 21-24, 2003, Washington, DC, USA},
pages = {274--281},
publisher = {AAAI Press},
abstract = {A common characteristic of relational data sets --degree disparity--can lead relational learning algorithms to discover misleading correlations. Degree disparity occurs when the frequency of a relation is correlated with the values of the target variable. In such cases, aggregation functions used by many relational learning algorithms will result in misleading correlations and added complexity in models. We examine this problem through a combination of simulations and experiments. We show how two novel hypothesis testing procedures can adjust for the effects of using aggregation functions in the presence of degree disparity.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
David Jensen, Matthew Rattigan, Hannah Blau
Information awareness: a prospective technical assessment Proceedings Article
In: Proceedings of the Ninth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, Washington, DC, USA, August 24 - 27, 2003, pp. 378–387, ACM, 2003.
@inproceedings{DBLP:conf/kdd/JensenRB03,
title = {Information awareness: a prospective technical assessment},
author = {David Jensen and Matthew Rattigan and Hannah Blau},
url = {https://doi.org/10.1145/956750.956794},
doi = {10.1145/956750.956794},
year = {2003},
date = {2003-01-01},
booktitle = {Proceedings of the Ninth ACM SIGKDD International Conference on
Knowledge Discovery and Data Mining, Washington, DC, USA, August 24
- 27, 2003},
pages = {378--387},
publisher = {ACM},
abstract = {Recent proposals to apply data mining systems to problems in law enforcement, national security, and fraud detection have attracted both media attention and technical critiques of their expected accuracy and impact on privacy. Unfortunately, the majority of technical critiques have been based on simplistic assumptions about data, classifiers, inference procedures, and the overall architecture of such systems. We consider these critiques in detail, and we construct a simulation model that more closely matches realistic systems. We show how both the accuracy and privacy impact of a hypothetical system could be substantially improved, and we discuss the necessary and sufficient conditions for this improvement to be achieved. This analysis is neither a defense nor a critique of any particular system concept. Rather, our model suggests alternative technical designs that could mitigate some concerns, but also raises more specific conditions that must be met for such systems to be both accurate and socially desirable.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Amy McGovern, David Jensen
Identifying Predictive Structures in Relational Data Using Multiple Instance Learning Proceedings Article
In: Machine Learning, Proceedings of the Twentieth International Conference (ICML 2003), August 21-24, 2003, Washington, DC, USA, pp. 528–535, AAAI Press, 2003.
@inproceedings{DBLP:conf/icml/McGovernJ03,
title = {Identifying Predictive Structures in Relational Data Using Multiple
Instance Learning},
author = {Amy McGovern and David Jensen},
url = {http://www.aaai.org/Library/ICML/2003/icml03-070.php},
year = {2003},
date = {2003-01-01},
booktitle = {Machine Learning, Proceedings of the Twentieth International Conference
(ICML 2003), August 21-24, 2003, Washington, DC, USA},
pages = {528--535},
publisher = {AAAI Press},
abstract = {This paper introduces an approach for identifying predictive structures in relational data using the multiple-instance framework. By a predictive structure, we mean a structure that can explain a given labeling of the data and can predict labels of unseen data. Multiple-instance learning has previously only been applied to flat, or propositional, data and we present a modification to the framework that allows multiple-instance techniques to be used on relational data. We present experimental results using a relational modification of the diverse density method and of a method based on the chi-squared statistic. We demonstrate that multipleinstance learning can be used to identify predictive structures on both a small illustrative data set and the Internet Movie Database. We compare the classification results to a k-nearest neighbor approach.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Jennifer Neville, Matthew Rattigan, David Jensen
Statistical relational learning: Four claims and a survey Proceedings Article
In: Workshop SRL, Int. Joint. Conf. on AI, 2003.
@inproceedings{neville2003statistical,
title = {Statistical relational learning: Four claims and a survey},
author = {Jennifer Neville and Matthew Rattigan and David Jensen},
url = {https://www.cs.purdue.edu/homes/neville/papers/neville-et-al-srl2003.pdf},
year = {2003},
date = {2003-01-01},
booktitle = {Workshop SRL, Int. Joint. Conf. on AI},
abstract = {Statistical relational learning (SRL) research has made significant progress over the last 5 years. We have successfully demonstrated the feasibility of a number of probabilistic models for relational data, including probabilistic relational models, Bayesian logic programs, and relational probability trees, and the interest in SRL is growing. However, in order to sustain and nurture the growth of SRL as a subfield we need to refocus our efforts on the science of machine learning — moving from demonstrations to comparative and ablation studies. We will outline four assertions that are implicit to SRL research but which have been only minimally evaluated. We hope to stimulate discussion as to how, as a community, these claims can be addressed in future research.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
David Jensen, Jennifer Neville, Matthew Rattigan
Randomization tests for relational learning Proceedings Article
In: International Joint Conference on Artificial Intelligence (submitted), Citeseer 2003.
@inproceedings{jensen2003randomization,
title = {Randomization tests for relational learning},
author = {David Jensen and Jennifer Neville and Matthew Rattigan},
url = {https://www.cs.purdue.edu/homes/neville/papers/jensen-et-al-tr0305.pdf},
year = {2003},
date = {2003-01-01},
booktitle = {International Joint Conference on Artificial Intelligence (submitted)},
organization = {Citeseer},
abstract = {Algorithms for relational learning and propositional learning face different statistical challenges. In contrast to propositional learners, relational learners often make statistical inferences about data that exhibit linkage and autocorrelation. Recent work has shown that these characteristics of relational data can bias inferences made by relational learners. In this paper, we develop a novel variant of a known statistical procedure — a randomization test — that produces accurate hypothesis tests for relational data. We show that our procedure produces unbiased inferences in situations where more obvious adaptations of existing randomization tests fail.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
David Jensen, Lise Getoor
IJCAI 2003 workshop on learning statistical models from relational data Miscellaneous
2003.
@misc{jensen2003ijcai,
title = {IJCAI 2003 workshop on learning statistical models from relational data},
author = {David Jensen and Lise Getoor},
url = {https://scholarworks.umass.edu/cgi/viewcontent.cgi?article=1163&context=cs_faculty_pubs},
year = {2003},
date = {2003-01-01},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Amy McGovern, David Jensen
Chi-squared: A simpler evaluation function for multiple-instance learning Technical Report
MASSACHUSETTS UNIV AMHERST DEPT OF COMPUTER SCIENCE 2003.
@techreport{mcgovern2003chi,
title = {Chi-squared: A simpler evaluation function for multiple-instance learning},
author = {Amy McGovern and David Jensen},
url = {https://web.cs.umass.edu/publication/docs/2003/UM-CS-2003-014.pdf},
year = {2003},
date = {2003-01-01},
institution = {MASSACHUSETTS UNIV AMHERST DEPT OF COMPUTER SCIENCE},
abstract = {This paper introduces a new evaluation function for solving the multiple instance problem. Our approach makes use of the main idea of diverse density (Maron, 1998; Maron & LozanoPerez, ´ 1998) but finds the best concept using the chi-square statistic. This approach is simpler than diverse density and allows us to search more extensively by using properties of the contingency table to prune in a guaranteed manner. We demonstrate that this approach solves the multiple-instance problem as well as or better than diverse density and that the pruning mechanism allows chi-squared to identify the best concepts more quickly.},
keywords = {},
pubstate = {published},
tppubtype = {techreport}
}
2002
Hannah Blau, Neil Immerman, David Jensen
A visual language for querying and updating graphs Journal Article
In: University of Massachusetts Amherst Computer Science Technical Report, vol. 37, pp. 2002, 2002.
@article{blau2002visual,
title = {A visual language for querying and updating graphs},
author = {Hannah Blau and Neil Immerman and David Jensen},
url = {https://kdl.cs.umass.edu/papers/blau-et-al-tr0237.pdf},
year = {2002},
date = {2002-01-01},
journal = {University of Massachusetts Amherst Computer Science Technical Report},
volume = {37},
pages = {2002},
publisher = {Citeseer},
abstract = {QGRAPH is a new visual language for querying and updating graph databases. In QGRAPH the user can draw a query consisting of some vertices and edges with specified relations between their attributes. The response will be the collection of all subgraphs of the database that have the desired pattern. QGRAPH is very useful for knowledge discovery. QGRAPH has a powerful and elegant counting feature that enables the user to easily specify how many of certain objects and links should exist in order for a subgraph to match a query. QGRAPH has a clean formal semantics which we describe in detail. We show that QGRAPH has expressive power corresponding to a well-defined subset of FO(COUNT), i.e., first-order logic with counting quantifiers.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
David Jensen
Knowledge evaluation: Statistical evaluations Book Section
In: Handbook of data mining and knowledge discovery, pp. 475–489, 2002.
@incollection{jensen2002knowledge,
title = {Knowledge evaluation: Statistical evaluations},
author = {David Jensen},
url = {https://dl.acm.org/doi/abs/10.5555/778212.778284},
year = {2002},
date = {2002-01-01},
booktitle = {Handbook of data mining and knowledge discovery},
pages = {475--489},
abstract = {This article introduces basic features of error estimators, including bias, variance, and loss functions. It outlines the logic behind classical hypothesis tests and explains the special challenges faced by knowledge discovery algorithms that search large model spaces. It discusses the statistical effects of multiple comparison procedures (MCPs), and several methods to adjust for those effects, including mathematical adjustments, cross-validation, and randomization tests. Finally, it outlines the basic concepts behind overfitting reduction and pruning.},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
David Jensen, Jennifer Neville
Linkage and Autocorrelation Cause Feature Selection Bias in Relational Learning Proceedings Article
In: Machine Learning, Proceedings of the Nineteenth International Conference (ICML 2002), University of New South Wales, Sydney, Australia, July 8-12, 2002, pp. 259–266, Morgan Kaufmann, 2002.
@inproceedings{DBLP:conf/icml/JensenN02,
title = {Linkage and Autocorrelation Cause Feature Selection Bias in Relational
Learning},
author = {David Jensen and Jennifer Neville},
year = {2002},
date = {2002-01-01},
booktitle = {Machine Learning, Proceedings of the Nineteenth International Conference
(ICML 2002), University of New South Wales, Sydney, Australia, July
8-12, 2002},
pages = {259--266},
publisher = {Morgan Kaufmann},
abstract = {Two common characteristics of relational data sets — concentrated linkage and relational autocorrelation — can cause learning algorithms to be strongly biased toward certain features, irrespective of their predictive power. We identify these characteristics, define quantitative measures of their severity, and explain how they produce this bias. We show how linkage and autocorrelation affect a representative algorithm for feature selection by applying the algorithm to synthetic data and to data drawn from the Internet Movie Database.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Jennifer Neville, David Jensen
Supporting relational knowledge discovery: Lessons in architecture and algorithm design Proceedings Article
In: Proceedings of the Data Mining Lessons Learned Workshop, 19th International Conference on Machine Learning, 2002.
@inproceedings{neville2002supporting,
title = {Supporting relational knowledge discovery: Lessons in architecture and algorithm design},
author = {Jennifer Neville and David Jensen},
url = {https://www.researchgate.net/publication/228425396_Supporting_relational_knowledge_discovery_Lessons_in_architecture_and_algorithm_design},
year = {2002},
date = {2002-01-01},
booktitle = {Proceedings of the Data Mining Lessons Learned Workshop, 19th International Conference on Machine Learning},
abstract = {This paper discusses a few of the lessons we have learned developing a relational knowledge discovery system. The relationships among data instances in relational data provide extra infor-mation for "mining." This additional information has the potential to greatly improve the quality of learned models. However, the dependencies among instances in the data also introduce new statistical challenges for learning algorithms. Re-lational data provide an ideal environment in which to examine a central challenge of knowl-edge discovery – its "chicken and egg" character. Data representation can impair the ability to learn important knowledge, but knowing the "right" data representation often requires just that knowledge. With relational data, representa-tion is often a choice; many alternate views of the data provide abundant fodder for reasoning about transformations. In light of this, we discuss representation and design choices that support a co-evolutionary process of knowledge discovery and data transformation in relation data.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
David Jensen, Victor Lesser
Social pathologies of adaptive agents Proceedings Article
In: Safe Learning Agents: Papers from the 2002 AAAI Spring Symposium, pp. 13–19, AAAI Press Menlo Park, CA 2002.
@inproceedings{jensen2002social,
title = {Social pathologies of adaptive agents},
author = {David Jensen and Victor Lesser},
url = {https://www.aaai.org/Papers/Symposia/Spring/2002/SS-02-07/SS02-07-003.pdf},
year = {2002},
date = {2002-01-01},
booktitle = {Safe Learning Agents: Papers from the 2002 AAAI Spring Symposium},
pages = {13--19},
organization = {AAAI Press Menlo Park, CA},
abstract = {We describe, briefly characterize, and give examples of social pathologies of multiagent systems. Social pathologies are system behaviors in which two or more agents interact such that improvements in local performance do not improve system performance. They are widely observed in both natural and artificial systems.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
David Jensen, Jennifer Neville
Autocorrelation and Linkage Cause Bias in Evaluation of Relational Learners Proceedings Article
In: Inductive Logic Programming, 12th International Conference, ILP 2002, Sydney, Australia, July 9-11, 2002. Revised Papers, pp. 101–116, Springer, 2002.
@inproceedings{DBLP:conf/ilp/JensenN02,
title = {Autocorrelation and Linkage Cause Bias in Evaluation of Relational
Learners},
author = {David Jensen and Jennifer Neville},
url = {https://doi.org/10.1007/3-540-36468-4_7},
doi = {10.1007/3-540-36468-4_7},
year = {2002},
date = {2002-01-01},
booktitle = {Inductive Logic Programming, 12th International Conference, ILP
2002, Sydney, Australia, July 9-11, 2002. Revised Papers},
volume = {2583},
pages = {101--116},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
abstract = {Two common characteristics of relational data sets -- concentrated linkage and relational auto-correlation -- can cause traditional methods of evaluation to greatly overestimate the accuracy of induced models on test sets. We identify these characteristics, define quantitative measures of their severity, and explain how they produce this bias. We show how linkage and autocorrelation affect estimates of model accuracy by applying FOIL to synthetic data and to data drawn from the Internet Movie Database. We show how a modified sampling procedure can eliminate the bias.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
David Jensen, Jennifer Neville
Schemas and models Proceedings Article
In: Proceedings of the Multi-Relational Data Mining Workshop, 8th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, Citeseer 2002.
@inproceedings{jensen2002schemas,
title = {Schemas and models},
author = {David Jensen and Jennifer Neville},
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.3.9704&rep=rep1&type=pdf},
year = {2002},
date = {2002-01-01},
booktitle = {Proceedings of the Multi-Relational Data Mining Workshop, 8th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
organization = {Citeseer},
abstract = {We propose the Schema-Model Framework, which characterizes algorithms that learn probabilistic models from relational data as having two parts: a schema that identifies sets of related data items and groups them into relevant categories; and a model that allows probabilistic inference about those data items. The framework highlights how relational learning techniques must structure their own learning tasks in ways that propositional learners do not. The framework also highlights interesting directions for future research in relational learning.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
David Jensen, Jennifer Neville
Data mining in networks Proceedings Article
In: Symposium on Dynamic Social Network Modelling and Analysis, National Academy of Sciences. National Academy Press, Washington, 2002.
@inproceedings{jensen2002data,
title = {Data mining in networks},
author = {David Jensen and Jennifer Neville},
year = {2002},
date = {2002-01-01},
booktitle = {Symposium on Dynamic Social Network Modelling and Analysis, National Academy of Sciences. National Academy Press, Washington},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2001
Foster Provost, David Jensen, Tim Oates
Progressive sampling Book Section
In: Instance Selection and Construction for Data Mining, pp. 151–170, Springer, Boston, MA, 2001.
@incollection{provost2001progressive,
title = {Progressive sampling},
author = {Foster Provost and David Jensen and Tim Oates},
year = {2001},
date = {2001-01-01},
booktitle = {Instance Selection and Construction for Data Mining},
pages = {151--170},
publisher = {Springer, Boston, MA},
abstract = {Having access to massive amounts of data does not necessarily imply that induction algorithms must use them all. Samples often provide the same accuracy with far less computational cost. However, the correct sample size rarely is obvious. We analyze methods for progressive samplingusing progressively larger samples as long as model accuracy improves. We explore several notions of efficient progressive sampling. We analyze efficiency relative to induction with all instances; we show that a simple, geometric sampling schedule is asymptotically optimal, and we describe how best to take into account prior expectations of accuracy convergence. We then describe the issues involved in instantiating an efficient progressive sampler, including how to detect convergence. Finally, we provide empirical results comparing a variety of progressive sampling methods. We conclude that progressive sampling can be remarkably efficient.},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
David Jensen, Jennifer Neville
Correlation and sampling in relational data mining Proceedings Article
In: Proceedings of the 33rd Symposium on the Interface of Computing Science and Statistics, 2001.
@inproceedings{jensen2001correlation,
title = {Correlation and sampling in relational data mining},
author = {David Jensen and Jennifer Neville},
url = {https://www.cs.purdue.edu/homes/neville/papers/jensen-neville-interf2001.pdf},
year = {2001},
date = {2001-01-01},
booktitle = {Proceedings of the 33rd Symposium on the Interface of Computing Science and Statistics},
abstract = {Data mining in relational data poses unique opportunities and challenges. In particular, relational autocorrelation provides an opportunity to increase the predictive power of statistical models, but it can also mislead investigators using traditional sampling approaches to evaluate data mining algorithms. We investigate the problem and provide new sampling approaches that correct the bias associated with traditional sampling.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Hannah Blau, Neil Immerman, David Jensen
A Visual Query Language for Relational Knowledge Discovery Miscellaneous
2001.
@misc{blau2001visual,
title = {A Visual Query Language for Relational Knowledge Discovery},
author = {Hannah Blau and Neil Immerman and David Jensen},
url = {https://scholarworks.umass.edu/cgi/viewcontent.cgi?article=1104&context=cs_faculty_pubs},
year = {2001},
date = {2001-01-01},
publisher = {University of Massachusetts},
abstract = {QGRAPH is a visual query language for knowledge discovery in relational data. Using QGRAPH, a user can query and update relational data in ways that support data exploration, data transformation, and sampling. When combined with modeling algorithms, such as those developed in inductive logic programming and relational learning, the language assists analysis of relational data, such as data drawn from the Web, chemical structure-activity relationships, and social networks. Several features distinguish QGRAPH from other query languages such as SQL and Datalog. It is a visual language, so its queries are annotated graphs that reflect potential structures within a database. QGRAPH treats objects, links, and attributes as first-class entities, so its queries can dynamically alter a data schema by adding and deleting those entities. Finally, the language provides grouping and counting constructs that facilitate calculation of attributes that can capture features of local graph structure. We describe the language in detail, discuss key aspects of the underlying data model and implementation, and discuss several uses of QGRAPH for knowledge discovery.},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
2000
David Jensen, Paul R. Cohen
Multiple Comparisons in Induction Algorithms Journal Article
In: Mach. Learn., vol. 38, no. 3, pp. 309–338, 2000.
@article{DBLP:journals/ml/JensenC00,
title = {Multiple Comparisons in Induction Algorithms},
author = {David Jensen and Paul R. Cohen},
url = {https://doi.org/10.1023/A:1007631014630},
doi = {10.1023/A:1007631014630},
year = {2000},
date = {2000-01-01},
journal = {Mach. Learn.},
volume = {38},
number = {3},
pages = {309--338},
abstract = {A single mechanism is responsible for three pathologies of induction algorithms: attribute selection errors, overfitting, and oversearching. In each pathology, induction algorithms compare multiple items based on scores from an evaluation function and select the item with the maximum score. We call this a multiple comparison procedure (MCP). We analyze the statistical properties of MCPs and show how failure to adjust for these properties leads to the pathologies. We also discuss approaches that can control pathological behavior, including Bonferroni adjustment, randomization testing, and cross-validation.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
David Jensen
Data Snooping, Dredging and Fishing: The Dark Side of Data Mining, A SIGKDD99 Panel Report Journal Article
In: SIGKDD Explor., vol. 1, no. 2, pp. 52–54, 2000.
@article{DBLP:journals/sigkdd/Jensen00,
title = {Data Snooping, Dredging and Fishing: The Dark Side of Data Mining,
A SIGKDD99 Panel Report},
author = {David Jensen},
url = {https://doi.org/10.1145/846183.846195},
doi = {10.1145/846183.846195},
year = {2000},
date = {2000-01-01},
journal = {SIGKDD Explor.},
volume = {1},
number = {2},
pages = {52--54},
abstract = {This article briefly describes a panel discussion at SIGKDD99.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Jennifer Neville, David Jensen
Iterative classification in relational data Proceedings Article
In: Proc. AAAI-2000 workshop on learning statistical models from relational data, pp. 13–20, 2000.
@inproceedings{neville2000iterative,
title = {Iterative classification in relational data},
author = {Jennifer Neville and David Jensen},
url = {https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.3.5425&rep=rep1&type=pdf},
year = {2000},
date = {2000-01-01},
booktitle = {Proc. AAAI-2000 workshop on learning statistical models from relational data},
pages = {13--20},
abstract = {Relational data offer a unique opportunity for improving the classification accuracy of statistical models. If two objects are related, inferring something about one object can aid inferences about the other. We present an iterative classification procedure that exploits this characteristic of relational data. This approach uses simple Bayesian classifiers in an iterative fashion, dynamically updating the attributes of some objects as inferences are made about related objects. Inferences made with high confidence in initial iterations are fed back into the data and are used to inform subsequent inferences about related objects. We evaluate the performance of this approach on a binary classification task. Experiments indicate that iterative classification significantly increases accuracy when compared to a single-pass approach.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
David Jensen
Knowledge Discovery from Graphs (Invited Talk) Proceedings Article
In: Graph Drawing, 8th International Symposium, GD 2000, Colonial Williamsburg, VA, USA, September 20-23, 2000, Proceedings, pp. 170, Springer, 2000.
@inproceedings{DBLP:conf/gd/Jensen00,
title = {Knowledge Discovery from Graphs (Invited Talk)},
author = {David Jensen},
url = {https://doi.org/10.1007/3-540-44541-2_16},
doi = {10.1007/3-540-44541-2_16},
year = {2000},
date = {2000-01-01},
booktitle = {Graph Drawing, 8th International Symposium, GD 2000, Colonial Williamsburg,
VA, USA, September 20-23, 2000, Proceedings},
volume = {1984},
pages = {170},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
abstract = {Knowledge discovery is the process of discovering useful and previously unknown knowledge by analyzing large databases. Knowledge discovery is also sometimes called “data mining” or “applied machine learning.” A new generation of knowledge discovery tools are beginning to address data that can be expressed as large graphs. Example applications include fraud detection in telecommunication networks and classifying Web pages based on hyperlink structure. These new technologies for knowledge discovery are becoming increasingly relevant to graph drawing. Specifically, graph drawing can aid the process of knowledge discovery by providing visualizations that reveal useful patterns in the data. Conversely, knowledge discovery can provide guidance for graph drawing by identifying recurring substructures or by classifying nodes into distinct types. Attempts to exploit the synergy between the two fields raises interesting new research questions. How should knowledge about a domain affect the drawing of graphs about that domain? What types of knowledge are most easily discovered using visualization, as opposed to automated statistical algorithms? These questions were posed in the context of several examples of knowledge discovery applied to large graphical data sets.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
P Utgoff, Victor Lesser, David Jensen
Inferring task structure from data Miscellaneous
2000.
@misc{utgoff2000inferring,
title = {Inferring task structure from data},
author = {P Utgoff and Victor Lesser and David Jensen},
url = {https://web.cs.umass.edu/publication/docs/2000/UM-CS-2000-054.pdf},
year = {2000},
date = {2000-01-01},
publisher = {University of Massachusetts, Department of Computer Science. Technical~…},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Lise Getoor, David Jensen
Learning Statistical Models from Relational Data: Papers from the AAAI Workshop Miscellaneous
2000.
@misc{getoor2000learning,
title = {Learning Statistical Models from Relational Data: Papers from the AAAI Workshop},
author = {Lise Getoor and David Jensen},
year = {2000},
date = {2000-01-01},
publisher = {AAAI Press},
abstract = {https://aaai.org/Papers/Workshops/2000/WS-00-06/WS-00-06.pdf},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
1998
David Jensen, H Goldberg
AAAI Fall Symposium on AI and Link Analysis Miscellaneous
1998.
@misc{jensen1998aaai,
title = {AAAI Fall Symposium on AI and Link Analysis},
author = {David Jensen and H Goldberg},
year = {1998},
date = {1998-01-01},
publisher = {AAAI Press Menlo Park},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}