Publications Search
Matthew Rattigan, Marc Maier, David Jensen
Using structure indices for efficient approximation of network properties Proceedings Article
In: Proceedings of the Twelfth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, Philadelphia, PA, USA, August 20-23, 2006, pp. 357–366, ACM, 2006.
Abstract | Links | BibTeX | Tags: Navigation and Routing in Networks
@inproceedings{DBLP:conf/kdd/RattiganMJ06,
title = {Using structure indices for efficient approximation of network properties},
author = {Matthew Rattigan and Marc Maier and David Jensen},
url = {https://doi.org/10.1145/1150402.1150443},
doi = {10.1145/1150402.1150443},
year = {2006},
date = {2006-01-01},
booktitle = {Proceedings of the Twelfth ACM SIGKDD International Conference
on Knowledge Discovery and Data Mining, Philadelphia, PA, USA, August
20-23, 2006},
pages = {357--366},
publisher = {ACM},
abstract = {Statistics on networks have become vital to the study of relational data drawn from areas such as bibliometrics, fraud detection, bioinformatics, and the Internet. Calculating many of the most important measures - such as betweenness centrality, closeness centrality, and graph diameter-requires identifying short paths in these networks. However, finding these short paths can be intractable for even moderate-size networks. We introduce the concept of a network structure index (NSI), a composition of (1) a set of annotations on every node in the network and (2) a function that uses the annotations to estimate graph distance between pairs of nodes. We present several varieties of NSIs, examine their time and space complexity, and analyze their performance on synthetic and real data sets. We show that creating an NSI for a given network enables extremely efficient and accurate estimation of a wide variety of network statistics on that network.},
keywords = {Navigation and Routing in Networks},
pubstate = {published},
tppubtype = {inproceedings}
}
Ozgur Simsek, David Jensen
A probabilistic framework for decentralized search in networks Proceedings Article
In: Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI), 2005.
BibTeX | Tags:
@inproceedings{simsek2005probabilistic,
title = {A probabilistic framework for decentralized search in networks},
author = {Ozgur Simsek and David Jensen},
year = {2005},
date = {2005-01-01},
booktitle = {Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Stephen Hart, Roderic A. Grupen, David Jensen
A Relational Representation for Procedural Task Knowledge Proceedings Article
In: Proceedings, The Twentieth National Conference on Artificial Intelligence and the Seventeenth Innovative Applications of Artificial Intelligence Conference, July 9-13, 2005, Pittsburgh, Pennsylvania, USA, pp. 1280–1285, AAAI Press / The MIT Press, 2005.
Abstract | Links | BibTeX | Tags:
@inproceedings{DBLP:conf/aaai/HartGJ05,
title = {A Relational Representation for Procedural Task Knowledge},
author = {Stephen Hart and Roderic A. Grupen and David Jensen},
url = {http://www.aaai.org/Library/AAAI/2005/aaai05-203.php},
year = {2005},
date = {2005-01-01},
booktitle = {Proceedings, The Twentieth National Conference on Artificial Intelligence
and the Seventeenth Innovative Applications of Artificial Intelligence
Conference, July 9-13, 2005, Pittsburgh, Pennsylvania, USA},
pages = {1280--1285},
publisher = {AAAI Press / The MIT Press},
abstract = {This paper proposes a methodology for learning joint probability estimates regarding the effect of sensorimotor features on the predicated quality of desired behavior. These relationships can then be used to choose actions that will most likely produce success. relational dependency networks are used to learn statistical models of procedural task knowledge. An example task expert for picking up objects is learned through actual experience with a humanoid robot. We believe that this approach is widely applicable and has great potential to allow a robot to autonomously determine which features in the world are salient and should be used to recommend policy for action.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Andrew Fast, David Jensen, Brian Neil Levine
Creating social networks to improve peer-to-peer networking Proceedings Article
In: Proceedings of the Eleventh ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, Chicago, Illinois, USA, August 21-24, 2005, pp. 568–573, ACM, 2005.
Abstract | Links | BibTeX | Tags: Navigation and Routing in Networks
@inproceedings{DBLP:conf/kdd/FastJL05,
title = {Creating social networks to improve peer-to-peer networking},
author = {Andrew Fast and David Jensen and Brian Neil Levine},
url = {https://doi.org/10.1145/1081870.1081938},
doi = {10.1145/1081870.1081938},
year = {2005},
date = {2005-01-01},
booktitle = {Proceedings of the Eleventh ACM SIGKDD International Conference
on Knowledge Discovery and Data Mining, Chicago, Illinois, USA, August
21-24, 2005},
pages = {568--573},
publisher = {ACM},
abstract = {We use knowledge discovery techniques to guide the creation of efficient overlay networks for peer-to-peer file sharing. An overlay network specifies the logical connections among peers in a network and is distinct from the physical connections of the network. It determines the order in which peers will be queried when a user is searching for a specific file. To better understand the role of the network overlay structure in the performance of peer-to-peer file sharing protocols, we compare several methods for creating overlay networks. We analyze the networks using data from a campus network for peer-to-peer file sharing that recorded anonymized data on 6,528 users sharing 291,925 music files over an 81-day period. We propose a novel protocol for overlay creation based on a model of user preference identified by latent-variable clustering with hierarchical Dirichlet processes (HDPs). Our simulations and empirical studies show that the clusters of songs created by HDPs effectively model user behavior and can be used to create desirable network overlays that outperform alternative approaches.},
keywords = {Navigation and Routing in Networks},
pubstate = {published},
tppubtype = {inproceedings}
}
Ozgur Simsek, David Jensen
Decentralized Search in Networks Using Homophily and Degree Disparity Proceedings Article
In: IJCAI-05, Proceedings of the Nineteenth International Joint Conference on Artificial Intelligence, Edinburgh, Scotland, UK, July 30 - August 5, 2005, pp. 304–310, Professional Book Center, 2005.
Abstract | Links | BibTeX | Tags:
@inproceedings{SimsekJ05,
title = {Decentralized Search in Networks Using Homophily and Degree Disparity},
author = {Ozgur Simsek and David Jensen},
url = {http://ijcai.org/Proceedings/05/Papers/1509.pdf},
year = {2005},
date = {2005-01-01},
booktitle = {IJCAI-05, Proceedings of the Nineteenth International Joint Conference
on Artificial Intelligence, Edinburgh, Scotland, UK, July 30 - August
5, 2005},
pages = {304--310},
publisher = {Professional Book Center},
abstract = {We propose a new algorithm for finding a target node in a network whose topology is known only locally. We formulate this task as a problem of decision making under uncertainty and use the statistical properties of the graph to guide this decision. This formulation uses the homophily and degree structure of the network simultaneously, differentiating our algorithm from those previously proposed in the literature. Because homophily and degree disparity are characteristics frequently observed in real-world networks, the algorithm we propose is applicable to a wide variety of networks, including two families that have received much recent attention: small-world and scale-free networks.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Brian Gallagher, David Jensen, Brian Neil Levine, others
Explaining routing performance in disruption tolerant networks Journal Article
In: University of Massachusetts Amherst, Technical Report, 2005.
Abstract | Links | BibTeX | Tags:
@article{gallagher2005explaining,
title = {Explaining routing performance in disruption tolerant networks},
author = {Brian Gallagher and David Jensen and Brian Neil Levine and others},
url = {https://kdl.cs.umass.edu/papers/gallagher-et-al-tr0557.pdf},
year = {2005},
date = {2005-01-01},
journal = {University of Massachusetts Amherst, Technical Report},
abstract = {Many routing algorithms for both traditional and ad hoc networks require a complete and contemporaneous path of peers from source to destination. Disruption Tolerant Networks (DTNs) attempt to deliver messages despite a frequently disconnected link layer (e.g., due to peer mobility, limited communication range, and power management limitations). While several algorithms have been proposed for routing in DTNs, this has not yet led to an understanding of the fundamental issues underlying routing performance in these networks. In this paper we explain the performance of routing algorithms for DTNs in terms of their ability to utilize a set of three no-cost drop criteria. The criteria are necessary and sufficient for identifying messages that may be dropped without degrading the overall delivery rate. The criteria identify whether a route exists with sufficient bandwidth, whether a message has been delivered already, and whether some other peer will deliver the message. We also use the criteria to design a new routing algorithm that we call NoCostDrop, which appears to be the first routing algorithm to take advantage of all three criteria. We show that NoCostDrop outperforms existing algorithms over a wide range of network conditions. Most novel in our approach is the use of a distributed list of delivered messages, which can easily be combined with existing routing algorithms to improve},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Jennifer Neville, David Jensen
Leveraging Relational Autocorrelation with Latent Group Models Proceedings Article
In: Proceedings of the 5th IEEE International Conference on Data Mining (ICDM 2005), 27-30 November 2005, Houston, Texas, USA, pp. 322–329, IEEE Computer Society, 2005.
Abstract | Links | BibTeX | Tags: Statistical Relational Learning
@inproceedings{DBLP:conf/icdm/NevilleJ05,
title = {Leveraging Relational Autocorrelation with Latent Group Models},
author = {Jennifer Neville and David Jensen},
url = {https://doi.org/10.1109/ICDM.2005.89},
doi = {10.1109/ICDM.2005.89},
year = {2005},
date = {2005-01-01},
booktitle = {Proceedings of the 5th IEEE International Conference on Data Mining
(ICDM 2005), 27-30 November 2005, Houston, Texas, USA},
pages = {322--329},
publisher = {IEEE Computer Society},
abstract = {The presence of autocorrelation provides a strong motivation for using relational learning and inference techniques. Autocorrelation is a statistical dependence between the values of the same variable on related entities and is a nearly ubiquitous characteristic of relational data sets. Recent research has explored the use of collective inference techniques to exploit this phenomenon. These techniques achieve significant performance gains by modeling observed correlations among class labels of related instances, but the models fail to capture a frequent cause of autocorrelation - the presence of underlying groups that influence the attributes on a set of entities. We propose a latent group model (LGM) for relational data, which discovers and exploits the hidden structures responsible for the observed autocorrelation among class labels. Modeling the latent group structure improves model performance, increases inference efficiency, and enhances our understanding of the datasets. We evaluate performance on three relational classification tasks and show that LGM outperforms models that ignore latent group structure, particularly when there is little information with which to seed inference.},
keywords = {Statistical Relational Learning},
pubstate = {published},
tppubtype = {inproceedings}
}
George Dean Bissias, Marc Liberatore, David Jensen, Brian Neil Levine
Privacy Vulnerabilities in Encrypted HTTP Streams Proceedings Article
In: Privacy Enhancing Technologies, 5th International Workshop, PET 2005, Cavtat, Croatia, May 30-June 1, 2005, Revised Selected Papers, pp. 1–11, Springer, 2005.
Abstract | Links | BibTeX | Tags:
@inproceedings{DBLP:conf/pet/BissiasLJL05,
title = {Privacy Vulnerabilities in Encrypted HTTP Streams},
author = {George Dean Bissias and Marc Liberatore and David Jensen and Brian Neil Levine},
url = {https://doi.org/10.1007/11767831_1},
doi = {10.1007/11767831_1},
year = {2005},
date = {2005-01-01},
booktitle = {Privacy Enhancing Technologies, 5th International Workshop, PET
2005, Cavtat, Croatia, May 30-June 1, 2005, Revised Selected Papers},
volume = {3856},
pages = {1--11},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
abstract = {Encrypting traffic does not prevent an attacker from performing some types of traffic analysis. We present a straightforward traffic analysis attack against encrypted HTTP streams that is surprisingly effective in identifying the source of the traffic. An attacker starts by creating a profile of the statistical characteristics of web requests from interesting sites, including distributions of packet sizes and inter-arrival times. Later, candidate encrypted streams are compared against these profiles. In our evaluations using real traffic, we find that many web sites are subject to this attack. With a training period of 24 hours and a 1 hour delay afterwards, the attack achieves only 23% accuracy. However, an attacker can easily pre-determine which of trained sites are easily identifiable. Accordingly, against 25 such sites, the attack achieves 40% accuracy; with three guesses, the attack achieves 100% accuracy for our data. Longer delays after training decrease accuracy, but not substantially. We also propose some countermeasures and improvements to our current method. Previous work analyzed SSL traffic to a proxy, taking advantage of a known flaw in SSL that reveals the length of each web object. In contrast, we exploit the statistical characteristics of web streams that are encrypted as a single flow, which is the case with WEP/WPA, IPsec, and SSH tunnels.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
George Dean Bissias, Marc Liberatore, David Jensen, Brian Neil Levine
Privacy vulnerabilities in encrypted HTTP streams Proceedings Article
In: International Workshop on Privacy Enhancing Technologies, pp. 1–11, Springer 2005.
Abstract | Links | BibTeX | Tags: Privacy and Networks
@inproceedings{bissias2005privacy,
title = {Privacy vulnerabilities in encrypted HTTP streams},
author = {George Dean Bissias and Marc Liberatore and David Jensen and Brian Neil Levine},
url = {https://scholarworks.umass.edu/cgi/viewcontent.cgi?referer=https://www.google.com/&httpsredir=1&article=1097&context=cs_faculty_pubs},
year = {2005},
date = {2005-01-01},
booktitle = {International Workshop on Privacy Enhancing Technologies},
pages = {1--11},
organization = {Springer},
abstract = {Encrypting traffic does not prevent an attacker from performing some types of traffic analysis. We present a straightforward traffic analysis attack against encrypted HTTP streams that is surprisingly effective in identifying the source of the traffic. An attacker starts by creating a profile of the statistical characteristics of web requests from interesting sites, including distributions of packet sizes and inter-arrival times. Later, candidate encrypted streams are compared against these profiles. In our evaluations using real traffic, we find that many web sites are subject to this attack. With a training period of 24 hours and a 1 hour delay afterwards, the attack achieves only 23% accuracy. However, an attacker can easily pre-determine which of trained sites are easily identifiable. Accordingly, against 25 such sites, the attack achieves 40% accuracy; with three guesses, the attack achieves 100% accuracy for our data. Longer delays after training decrease accuracy, but not substantially. We also propose some countermeasures and improvements to our current method. Previous work analyzed SSL traffic to a proxy, taking advantage of a known flaw in SSL that reveals the length of each web object. In contrast, we exploit the statistical characteristics of web streams that are encrypted as a single flow, which is the case with WEP/WPA, IPsec, and SSH tunnels.},
keywords = {Privacy and Networks},
pubstate = {published},
tppubtype = {inproceedings}
}
Matthew Rattigan, David Jensen
The case for anomalous link detection Proceedings Article
In: Proceedings of the 4th international workshop on multi-relational mining, pp. 69–74, 2005.
Abstract | Links | BibTeX | Tags:
@inproceedings{rattigan2005case,
title = {The case for anomalous link detection},
author = {Matthew Rattigan and David Jensen},
url = {https://dl.acm.org/doi/pdf/10.1145/1090193.1090205},
year = {2005},
date = {2005-01-01},
booktitle = {Proceedings of the 4th international workshop on multi-relational mining},
pages = {69--74},
abstract = {In this paper, we describe the challenges inherent to the Link Prediction (LP) problem in multirelational data mining, and explore the reasons why many LP models have performed poorly. We present the alternate (and complimentary) task of Anomalous Link Discovery (ALD) and qualitatively demonstrate the effectiveness of simple LP models for the ALD task.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Matthew Rattigan, David Jensen
The case for anomalous link discovery Journal Article
In: SIGKDD Explor., vol. 7, no. 2, pp. 41–47, 2005.
Abstract | Links | BibTeX | Tags:
@article{DBLP:journals/sigkdd/RattiganJ05,
title = {The case for anomalous link discovery},
author = {Matthew Rattigan and David Jensen},
url = {https://doi.org/10.1145/1117454.1117460},
doi = {10.1145/1117454.1117460},
year = {2005},
date = {2005-01-01},
journal = {SIGKDD Explor.},
volume = {7},
number = {2},
pages = {41--47},
abstract = {In this paper, we describe the challenges inherent to the task of link prediction, and we analyze one reason why many link prediction models perform poorly. Specifically, we demonstrate the effects of the extremely large class skew associated with the link prediction task. We then present an alternate task --- anomalous link discovery (ALD) --- and qualitatively demonstrate the effectiveness of simple link prediction models for the ALD task. We show that even the simplistic structural models that perform poorly on link prediction can perform quite well at the ALD task.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Jennifer Neville, Ozgur Simsek, David Jensen
Autocorrelation and relational learning: Challenges and opportunities Technical Report
MASSACHUSETTS UNIV AMHERST DEPT OF COMPUTER SCIENCE 2004.
Abstract | Links | BibTeX | Tags:
@techreport{neville2004autocorrelation,
title = {Autocorrelation and relational learning: Challenges and opportunities},
author = {Jennifer Neville and Ozgur Simsek and David Jensen},
url = {http://www.cs.umd.edu/projects/srl2004/Papers/neville.pdf},
year = {2004},
date = {2004-01-01},
institution = {MASSACHUSETTS UNIV AMHERST DEPT OF COMPUTER SCIENCE},
abstract = {Autocorrelation, a common characteristic of many datasets, refers to correlation between values of the same variable on related objects. It violates the critical assumption of instance independence that underlies most conventional models. In this paper, we provide an overview of research on autocorrelation in a number of fields with an emphasis on implications for relational learning, and outline a number of challenges and opportunities for model learning and inference.},
keywords = {},
pubstate = {published},
tppubtype = {techreport}
}
Jennifer Neville, David Jensen
Dependency Networks for Relational Data Proceedings Article
In: Proceedings of the 4th IEEE International Conference on Data Mining (ICDM 2004), 1-4 November 2004, Brighton, UK, pp. 170–177, IEEE Computer Society, 2004.
Abstract | Links | BibTeX | Tags:
@inproceedings{DBLP:conf/icdm/NevilleJ04,
title = {Dependency Networks for Relational Data},
author = {Jennifer Neville and David Jensen},
url = {https://doi.org/10.1109/ICDM.2004.10101},
doi = {10.1109/ICDM.2004.10101},
year = {2004},
date = {2004-01-01},
booktitle = {Proceedings of the 4th IEEE International Conference on Data Mining
(ICDM 2004), 1-4 November 2004, Brighton, UK},
pages = {170--177},
publisher = {IEEE Computer Society},
abstract = {Instance independence is a critical assumption of traditional machine learning methods contradicted by many relational datasets. For example, in scientific literature datasets, there are dependencies among the references of a paper. Recent work on graphical models for relational data has demonstrated significant performance gains for models that exploit the dependencies among instances. In this paper, we present relational dependency networks (RDNs), a new form of graphical model capable of reasoning with such dependencies in a relational setting. We describe the details of RDN models and outline their strengths, most notably the ability to learn and reason with cyclic relational dependencies. We present RDN models learned on a number of real-world datasets, and evaluate the models in a classification context, showing significant performance improvements. In addition, we use synthetic data to evaluate the quality of model learning and inference procedures.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Jennifer Neville, Micah Adler, David Jensen
Spectral clustering with links and attributes Technical Report
MASSACHUSETTS UNIV AMHERST DEPT OF COMPUTER SCIENCE 2004.
Abstract | Links | BibTeX | Tags:
@techreport{neville2004spectral,
title = {Spectral clustering with links and attributes},
author = {Jennifer Neville and Micah Adler and David Jensen},
url = {https://www.cs.purdue.edu/homes/neville/papers/neville-et-al-tr0442.pdf},
year = {2004},
date = {2004-01-01},
institution = {MASSACHUSETTS UNIV AMHERST DEPT OF COMPUTER SCIENCE},
abstract = {If relational data contain communities—groups of inter-related items with similar attribute values—a clustering technique that considers attribute information and the structure of relations simultaneously should produce more meaningful clusters than those produced by considering attributes alone. We investigate this hypothesis in the context of a spectral graph partitioning technique, considering a number of hybrid similarity metrics that combine both sources of information. Through simulation, we find that two of the hybrid metrics achieve superior performance over a wide range of data characteristics. We analyze the spectral decomposition algorithm from a statistical perspective and show that the successful hybrid metrics exaggerate the separation between cluster similarity values, at the expense of increased variance. We cluster several relational datasets using the best hybrid metric and show that the resulting clusters exhibit significant community structure, and that they significantly improve performance in a related classification task.},
keywords = {},
pubstate = {published},
tppubtype = {techreport}
}
David Jensen, Jennifer Neville, Brian Gallagher
Why collective inference improves relational classification Proceedings Article
In: Proceedings of the Tenth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, Seattle, Washington, USA, August 22-25, 2004, pp. 593–598, ACM, 2004.
Abstract | Links | BibTeX | Tags: Statistical Relational Learning
@inproceedings{DBLP:conf/kdd/JensenNG04,
title = {Why collective inference improves relational classification},
author = {David Jensen and Jennifer Neville and Brian Gallagher},
url = {https://doi.org/10.1145/1014052.1014125},
doi = {10.1145/1014052.1014125},
year = {2004},
date = {2004-01-01},
booktitle = {Proceedings of the Tenth ACM SIGKDD International Conference on
Knowledge Discovery and Data Mining, Seattle, Washington, USA, August
22-25, 2004},
pages = {593--598},
publisher = {ACM},
abstract = {Procedures for collective inference make simultaneous statistical judgments about the same variables for a set of related data instances. For example, collective inference could be used to simultaneously classify a set of hyperlinked documents or infer the legitimacy of a set of related financial transactions. Several recent studies indicate that collective inference can significantly reduce classification error when compared with traditional inference techniques. We investigate the underlying mechanisms for this error reduction by reviewing past work on collective inference and characterizing different types of statistical models used for making inference in relational data. We show important differences among these models, and we characterize the necessary and sufficient conditions for reduced classification error based on experiments with real and simulated data.},
keywords = {Statistical Relational Learning},
pubstate = {published},
tppubtype = {inproceedings}
}
Andrew McCallum, David Jensen
A note on the unification of information extraction and data mining using conditional-probability, relational models Journal Article
In: Computer Science Department Faculty Publication Series, pp. 42, 2003.
Abstract | Links | BibTeX | Tags:
@article{mccallum2003note,
title = {A note on the unification of information extraction and data mining using conditional-probability, relational models},
author = {Andrew McCallum and David Jensen},
url = {http://ciir.cs.umass.edu/pubfiles/ir-306.pdf},
year = {2003},
date = {2003-01-01},
journal = {Computer Science Department Faculty Publication Series},
pages = {42},
abstract = {Although information extraction and data mining appear together in many applications, their interface in most current systems would better be described as serial juxtaposition than as tight integration. Information extraction populates slots in a database by identifying relevant subsequences of text, but is usually not aware of the emerging patterns and regularities in the database. Data mining methods begin from a populated database, and are often unaware of where the data came from, or its inherent uncertainties. The result is that the accuracy of both suffers, and significant mining of complex text sources is beyond reach. This position paper proposes the use of unified, relational, undirected graphical models for information extraction and data mining, in which extraction decisions and data-mining decisions are made in the same probabilistic “currency,” with a common inference procedure—each component thus being able to make up for the weaknesses of the other and therefore improving the performance of both. For example, data mining run on a partiallyfilled database can find patterns that provide “topdown” accuracy-improving constraints to information extraction. Information extraction can provide a much richer set of “bottom-up” hypotheses to data mining if the mining is set up to handle additional uncertainty information from extraction. We outline an approach and describe several models, but provide no experimental results.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
David Jensen, Jennifer Neville, Michael Hay
Avoiding Bias when Aggregating Relational Data with Degree Disparity Proceedings Article
In: Machine Learning, Proceedings of the Twentieth International Conference (ICML 2003), August 21-24, 2003, Washington, DC, USA, pp. 274–281, AAAI Press, 2003.
Abstract | Links | BibTeX | Tags:
@inproceedings{DBLP:conf/icml/JensenNH03,
title = {Avoiding Bias when Aggregating Relational Data with Degree Disparity},
author = {David Jensen and Jennifer Neville and Michael Hay},
url = {http://www.aaai.org/Library/ICML/2003/icml03-038.php},
year = {2003},
date = {2003-01-01},
booktitle = {Machine Learning, Proceedings of the Twentieth International Conference
(ICML 2003), August 21-24, 2003, Washington, DC, USA},
pages = {274--281},
publisher = {AAAI Press},
abstract = {A common characteristic of relational data sets --degree disparity--can lead relational learning algorithms to discover misleading correlations. Degree disparity occurs when the frequency of a relation is correlated with the values of the target variable. In such cases, aggregation functions used by many relational learning algorithms will result in misleading correlations and added complexity in models. We examine this problem through a combination of simulations and experiments. We show how two novel hypothesis testing procedures can adjust for the effects of using aggregation functions in the presence of degree disparity.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Amy McGovern, David Jensen
Chi-squared: A simpler evaluation function for multiple-instance learning Technical Report
MASSACHUSETTS UNIV AMHERST DEPT OF COMPUTER SCIENCE 2003.
Abstract | Links | BibTeX | Tags:
@techreport{mcgovern2003chi,
title = {Chi-squared: A simpler evaluation function for multiple-instance learning},
author = {Amy McGovern and David Jensen},
url = {https://web.cs.umass.edu/publication/docs/2003/UM-CS-2003-014.pdf},
year = {2003},
date = {2003-01-01},
institution = {MASSACHUSETTS UNIV AMHERST DEPT OF COMPUTER SCIENCE},
abstract = {This paper introduces a new evaluation function for solving the multiple instance problem. Our approach makes use of the main idea of diverse density (Maron, 1998; Maron & LozanoPerez, ´ 1998) but finds the best concept using the chi-square statistic. This approach is simpler than diverse density and allows us to search more extensively by using properties of the contingency table to prune in a guaranteed manner. We demonstrate that this approach solves the multiple-instance problem as well as or better than diverse density and that the pruning mechanism allows chi-squared to identify the best concepts more quickly.},
keywords = {},
pubstate = {published},
tppubtype = {techreport}
}
Jennifer Neville, Micah Adler, David Jensen
Clustering relational data using attribute and link information Proceedings Article
In: Proceedings of the text mining and link analysis workshop, 18th international joint conference on artificial intelligence, pp. 9–15, Citeseer 2003.
Abstract | Links | BibTeX | Tags:
@inproceedings{neville2003clustering,
title = {Clustering relational data using attribute and link information},
author = {Jennifer Neville and Micah Adler and David Jensen},
url = {https://www.cs.purdue.edu/homes/neville/papers/neville-et-al-textlink2003.pdf},
year = {2003},
date = {2003-01-01},
booktitle = {Proceedings of the text mining and link analysis workshop, 18th international joint conference on artificial intelligence},
pages = {9--15},
organization = {Citeseer},
abstract = {Clustering is a descriptive task that seeks to identify natural groupings in data. Relational data offer a wealth of information for identifying groups of similar items. Both attribute information and the structure of relationships can be used for clustering. Graph partitioning and data clustering techniques can be applied independently to relational data but a technique that exploits both sources of information simultaneously may produce more meaningful clusters. This paper will describe our work synthesizing data clustering and graph partitioning techniques into improved clustering algorithms for relational data.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Jennifer Neville, David Jensen
Collective classification with relational dependency networks Proceedings Article
In: Workshop on Multi-Relational Data Mining (MRDM-2003), pp. 77, 2003.
Abstract | Links | BibTeX | Tags:
@inproceedings{neville2003collective,
title = {Collective classification with relational dependency networks},
author = {Jennifer Neville and David Jensen},
url = {https://www.cs.purdue.edu/homes/neville/papers/neville-jensen-mrdm2003.pdf},
year = {2003},
date = {2003-01-01},
booktitle = {Workshop on Multi-Relational Data Mining (MRDM-2003)},
pages = {77},
abstract = {Collective classification models exploit the dependencies in a network of objects to improve predictions. For example, in a network of web pages, the topic of a page may depend on the topics of hyperlinked pages. A relational model capable of expressing and reasoning with such dependencies should achieve superior performance to relational models that ignore such dependencies. In this paper, we present relational dependency networks (RDNs), extending recent work in dependency networks to a relational setting. RDNs are a collective classification model that offers simple parameter estimation and efficient structure learning. On two real-world data sets, we compare RDNs to ordinary classification with relational probability trees and show that collective classification improves performance.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}