Paolo Missier Publications

2019 (7)

Why-Diff: Exploiting Provenance to Understand Outcome Differences from non-identical Reproduced Workflows. Thavasimani, P.; Cala, J.; and Missier, P. IEEE Access,1–1. 2019.

Why-Diff: Exploiting Provenance to Understand Outcome Differences from non-identical Reproduced Workflows [link]

Paper doi link bibtex abstract 5 downloads

@article{8662612,
abstract = {Data analytics processes such as scientific workflows tend to be executed repeatedly, with varying dependencies and input datasets. The case has been made in the past for tracking the provenance of the final information products through the workflow steps, to enable their reproducibility. In this work, we explore the hypothesis that provenance traces recorded during execution are also instrumental to answering questions about the observed differences between sets of results obtained from similar but not identical workflow configurations. Such differences in configurations may be introduced deliberately, i.e., to explore process variations, or accidentally, typically as the result of porting efforts or of changes in the computing environment. Using a commonly used workflow programming model as a reference, we consider both structural variations in the workflows as well as variations within their individual components. Our whydiff algorithm compares the graph representations of two provenance traces derived from two workflow variations. It produces a delta graph that can be used to produce human-readable explanations of the impact of workflow differences on observed output differences. We report on our Neo4j graph database. We also report explanations of difference between workflow results using a suite of synthetic workflows as well as real-world workflows.},
author = {Thavasimani, Priyaa and Cala, Jacek and Missier, Paolo},
doi = {10.1109/ACCESS.2019.2903727},
issn = {2169-3536},
journal = {IEEE Access},
keywords = {Alzheimer's disease,Big Data,Databases,Genetics,Libraries,Provenance,Reproducibility,Sentiment analysis,Software,Why-Diff,Workflow,eScience Central},
pages = {1--1},
title = {{Why-Diff: Exploiting Provenance to Understand Outcome Differences from non-identical Reproduced Workflows}},
url = {https://ieeexplore.ieee.org/document/8662612/},
year = {2019}
}

Increasing phenotypic annotation improves the diagnostic rate of exome sequencing in a rare neuromuscular disorder. Thompson, R.; Papakonstantinou Ntalis, A.; Beltran, S.; Tapf, A.; de Paula Estephan, E.; Polavarapu, K.; ’t Hoen, P. A. C.; Missier, P.; and Lochmuller, H. Human Mutation. 2019.

Increasing phenotypic annotation improves the diagnostic rate of exome sequencing in a rare neuromuscular disorder [pdf]

Paper doi link bibtex abstract

@article{doi:10.1002/humu.23792,
title = {Increasing phenotypic annotation improves the diagnostic rate of exome sequencing in a rare neuromuscular disorder},
author = {Thompson, Rachel and Papakonstantinou Ntalis, Anastasios and Beltran, Sergi and Tapf, Ana and de Paula Estephan, Eduardo and Polavarapu, Kiran and ’t Hoen, Peter A. C. and Missier, Paolo and Lochmuller, Hanns},
year={2019},
journal = {Human Mutation},
keywords = {congenital myasthenic syndromes, deep phenotyping, diagnosis, exome sequencing, Exomiser, human phenotype ontology, variant prioritization},
doi = {10.1002/humu.23792},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/humu.23792},
eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/humu.23792},
abstract = {Abstract Phenotype-based filtering and prioritization contribute to the interpretation of genetic variants detected in exome sequencing. However, it is currently unclear how extensive this phenotypic annotation should be. In this study, we compare methods for incorporating phenotype into the interpretation process and assess the extent to which phenotypic annotation aids prioritization of the correct variant. Using a cohort of 29 patients with congenital myasthenic syndromes with causative variants in known or newly discovered disease genes, exome data and the Human Phenotype Ontology (HPO)-coded phenotypic profiles, we show that gene-list filters created from phenotypic annotations perform similarly to curated disease-gene virtual panels. We use Exomiser, a prioritization tool incorporating phenotypic comparisons, to rank candidate variants while varying phenotypic annotation. Analyzing 3,712 combinations, we show that increasing phenotypic annotation improved prioritization of the causative variant, from 62\% ranked first on variant alone to 90\% with seven HPO annotations. We conclude that any HPO-based phenotypic annotation aids variant discovery and that annotation with over five terms is recommended in our context. Although focused on a constrained cohort, this provides real-world validation of the utility of phenotypic annotation for variant prioritization. Further research is needed to extend this concept to other diseases and more diverse cohorts.},
urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/PhenotypicannotationHumanMutation2019.pdf}
}

Parametrised Data Sampling for Fairness Optimisation. Gonz�lez Zelaya, C. V.; Missier, P.; and Prangle, D. In Proceedings of Explainable AI for Fairness, Accountability & Transparency Workshop (KDD XAI), 2019. ACM

Parametrised Data Sampling for Fairness Optimisation [pdf]

Paper link bibtex

@inproceedings{zelaya2019correction,
title = {Parametrised Data Sampling for Fairness Optimisation},
author={Gonz�lez Zelaya, Carlos Vladimiro and Missier, Paolo and Prangle, Dennis},
booktitle={Proceedings of Explainable AI for Fairness, Accountability \& Transparency Workshop (KDD XAI)},
year={2019},
organization={ACM},
urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/kddSubmission.pdf}
}

Toward a Decentralized, Trust-less Marketplace for Brokered IoT Data Trading using Blockchain. Bajoudah, S.; Changyu, D.; and Missier, P. In Procs. 2nd IEEE International Conference on Blockchain (Blockchain 2019), Atlanta, USA, 2019. IEEE

Toward a Decentralized, Trust-less Marketplace for Brokered IoT Data Trading using Blockchain [pdf]

Paper link bibtex

@inproceedings{Bajoudah2019,
address = {Atlanta, USA},
author = {Bajoudah, Shaimaa and Changyu, Dong and Missier, Paolo},
booktitle = {Procs. 2nd IEEE International Conference on Blockchain (Blockchain 2019)},
publisher = {IEEE},
title = {{Toward a Decentralized, Trust-less Marketplace for Brokered IoT Data Trading using Blockchain}},
year = {2019},
urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/Decentralised_Marketplace_USA_Conference___Accepted_Version_.pdf}
}

Efficient Re-computation of Big Data Analytics Processes in the Presence of Changes: Computational Framework, Reference Architecture, and Applications. Missier, P.; and Cala, J. In Procs. IEEE Big Data Congress, Milano, Italy, 2019. IEEE

Paper link bibtex

@inproceedings{Missier2019,
address = {Milano, Italy},
author = {Missier, Paolo and Cala, Jacek},
booktitle = {Procs. IEEE Big Data Congress},
keywords = {{\#}provenance,{\#}re-computation,{\#}workflow},
mendeley-tags = {{\#}re-computation,{\#}workflow,{\#}provenance},
publisher = {IEEE},
title = {{Efficient Re-computation of Big Data Analytics Processes in the Presence of Changes: Computational Framework, Reference Architecture, and Applications}},
year = {2019},
urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/PID5953159.pdf}
}

A customisable pipeline for continuously harvesting socially-minded Twitter users. Primo, F.; Missier, P.; Romanovsky, A.; Mickael, F.; and Cacho, N. In procs. ICWE'19, Daedjeon, Korea, 2019.

Paper link bibtex abstract

@inproceedings{Primo2019,
abstract = {On social media platforms and Twitter in particular, specific classes of users such as influencers have been given satisfactory operational definitions in terms of network and content metrics. Others, for instance online activists, are not less important but their characterisation still requires experimenting. We make the hypothesis that such interesting users can be found within temporally and spatially localised contexts, i.e., small but topical fragments of the network containing interactions about social events or campaigns with a significant footprint on Twitter. To explore this hypothesis, we have designed a continuous user profile discovery pipeline that produces an ever-growing dataset of user profiles by harvesting and analysing contexts from the Twitter stream. The profiles dataset includes key network and content-based users metrics, enabling experimentation with user-defined score functions that characterise specific classes of online users. The paper describes the design and implementation of the pipeline and its empirical evaluation on a case study consisting of healthcare-related campaigns in the UK, showing how it supports the operational definitions of online activism, by comparing three experimental ranking functions. The code is publicly available.},
address = {Daedjeon, Korea},
author = {Primo, Flavio and Missier, Paolo and Romanovsky, Alexander and Mickael, Figueredo and Cacho, Nelio},
booktitle = {procs. ICWE'19},
keywords = {online activism,twitter analytics},
mendeley-tags = {twitter analytics,online activism},
title = {{A customisable pipeline for continuously harvesting socially-minded Twitter users}},
url ={https://arxiv.org/abs/1903.07061},
year = {2019}
}

Targeted therapies for congenital myasthenic syndromes: systematic review and steps towards a treatabolome. Thompson, R.; Bonne, G.; Missier, P.; and Lochmüller, H. Emerging Topics in Life Sciences,ETLS20180100. jan 2019.

Targeted therapies for congenital myasthenic syndromes: systematic review and steps towards a treatabolome [link]

Paper doi link bibtex abstract

@article{Thompson2019,
abstract = {Despite recent scientific advances, most rare genetic diseases ? including most neuromuscular diseases ? do not currently have curative gene-based therapies available. However, in some cases, such as vitamin, cofactor or enzyme deficiencies, channelopathies and disorders of the neuromuscular junction, a confirmed genetic diagnosis provides guidance on treatment, with drugs available that may significantly alter the disease course, improve functional ability and extend life expectancy. Nevertheless, many treatable patients remain undiagnosed or do not receive treatment even after genetic diagnosis. The growth of computer-aided genetic analysis systems that enable clinicians to diagnose their undiagnosed patients has not yet been matched by genetics-based decision-support systems for treatment guidance. Generating a ?treatabolome' of treatable variants and the evidence for the treatment has the potential to increase treatment rates for treatable conditions. Here, we use the congenital myasthenic syndromes (CMS), a group of clinically and genetically heterogeneous but frequently treatable neuromuscular conditions, to illustrate the steps in the creation of a treatabolome for rare inherited diseases. We perform a systematic review of the evidence for pharmacological treatment of each CMS type, gathering evidence from 207 studies of over 1000 patients and stratifying by genetic defect, as treatment varies depending on the underlying cause. We assess the strength and quality of the evidence and create a dataset that provides the foundation for a computer-aided system to enable clinicians to gain easier access to information about treatable variants and the evidence they need to consider.3,4-DAP, 3,4-diaminopyridine; AChE, acetylcholinesterase; AChR, acetylcholine receptor; CEBM, Centre for evidence-based medicine; CMS, congenital myasthenic syndrome; NGS, next-generation sequencing; NMJ, neuromuscular junction},
author = {Thompson, Rachel and Bonne, Gis{\`{e}}le and Missier, Paolo and Lochm{\"{u}}ller, Hanns},
doi = {10.1042/ETLS20180100},
journal = {Emerging Topics in Life Sciences},
month = {jan},
pages = {ETLS20180100},
title = {{Targeted therapies for congenital myasthenic syndromes: systematic review and steps towards a treatabolome}},
url = {http://www.emergtoplifesci.org/content/early/2019/01/25/ETLS20180100.abstract},
year = {2019}
}

2018 (8)

Selective and Recurring Re-computation of Big Data Analytics Tasks: Insights from a Genomics Case Study. Cal�a, J.; and Missier, P. Big Data Research, 13: 76 - 94. 2018. Big Medical/Healthcare Data Analytics

Selective and Recurring Re-computation of Big Data Analytics Tasks: Insights from a Genomics Case Study [link]

Paper doi link bibtex abstract 1 download

@article{CALA201876,
title = "Selective and Recurring Re-computation of Big Data Analytics Tasks: Insights from a Genomics Case Study",
journal = "Big Data Research",
volume = "13",
pages = "76 - 94",
year = "2018",
note = "Big Medical/Healthcare Data Analytics",
issn = "2214-5796",
doi = "https://doi.org/10.1016/j.bdr.2018.06.001",
url = "http://www.sciencedirect.com/science/article/pii/S2214579617303520",
author = "Jacek Cal�a and Paolo Missier",
keywords = "Re-computation, Knowledge decay, Big data analysis, Genomics",
abstract = "The value of knowledge assets generated by analytics processes using Data Science techniques tends to decay over time, as a consequence of changes in the elements the process depends on: external data sources, libraries, and system dependencies. For large-scale problems, refreshing those outcomes through greedy re-computation is both expensive and inefficient, as some changes have limited impact. In this paper we address the problem of refreshing past process outcomes selectively, that is, by trying to identify the subset of outcomes that will have been affected by a change, and by only re-executing fragments of the original process. We propose a technical approach to address the selective re-computation problem by combining multiple techniques, and present an extensive experimental study in Genomics, namely variant calling and their clinical interpretation, to show its effectiveness. In this case study, we are able to decrease the number of required re-computations on a cohort of individuals from 495 (blind) down to 71, and that we can reduce runtime by at least 60\% relative to the naïve blind approach, and in some cases by 90\%. Starting from this experience, we then propose a blueprint for a generic re-computation meta-process that makes use of process history metadata to make informed decisions about selective re-computations in reaction to a variety of changes in the data."
}

Versioned-PROV: A PROV extension to support mutable data entities. Pimentel, J. F.; Missier, P.; Murta, L.; and Braganholo, V. In Procs. IPAW 2018, London, 2018. Springer
link bibtex

@inproceedings{PMMB18,
address = {London},
author = {Pimentel, Joao Felipe  and Missier, Paolo  and Murta, Leonardo and Braganholo, Vanessa},
booktitle = {Procs. IPAW 2018},
keywords = {{\#}provenance,{\#}recomputation,process re-computation,provenance annotations},
mendeley-tags = {{\#}recomputation,{\#}provenance},
publisher = {Springer},
title = {{Versioned-PROV: A PROV extension to support mutable data entities}},
year = {2018}
	}

Editorial: Special Issue on Improving the Veracity and Value of Big Data. Geerts, F.; Missier, P.; and Paton, N. W. J. Data and Information Quality, 9(3): 13:1–13:2. 2018.

Editorial: Special Issue on Improving the Veracity and Value of Big Data [link]

Paper doi link bibtex

@article{DBLP:journals/jdiq/GeertsMP18,
  author    = {Floris Geerts and
               Paolo Missier and
               Norman W. Paton},
  title     = {Editorial: Special Issue on Improving the Veracity and Value of Big
               Data},
  journal   = {J. Data and Information Quality},
  volume    = {9},
  number    = {3},
  pages     = {13:1--13:2},
  year      = {2018},
  url       = {http://doi.acm.org/10.1145/3174791},
  doi       = {10.1145/3174791},
  timestamp = {Wed, 28 Mar 2018 12:00:19 +0200},
  biburl    = {https://dblp.org/rec/bib/journals/jdiq/GeertsMP18},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

Design and evaluation of a genomics variant analysis pipeline using GATK Spark tools. Tucci, N.; Cala, J.; Steyn, J.; and Missier, P. In Procs. SEBD '18 – 26TH Italian Symposium on Advanced Database Systems, Bari, italy, 2018.

Design and evaluation of a genomics variant analysis pipeline using GATK Spark tools [pdf]

Paper link bibtex abstract

@inproceedings{Tucci2018,
abstract = {Scalable and efficient processing of genome sequence data, i.e. for variant discovery, is key to the mainstream adoption of High Throughput technology for disease prevention and for clinical use. Achieving scalability, however, requires a significant effort to enable the parallel execution of the analysis tools that make up the pipelines. This is facilitated by the new Spark versions of the well-known GATK toolkit, which offer a black-box approach by transparently exploiting the underlying Map Reduce architecture. In this paper we report on our experience implementing a standard variant discovery pipeline using GATK 4.0 with Docker-based deployment over a cluster. We provide a preliminary performance analysis, comparing the processing times and cost to those of the new Microsoft Genomics Services.},
address = {Bari, italy},
author = {Tucci, Nicholas and Cala, Jacek and Steyn, Jannetta and Missier, Paolo},
booktitle = {Procs. SEBD '18 -- 26TH Italian Symposium on Advanced Database Systems},
keywords = {{\#}genomics,{\#}spark},
mendeley-tags = {{\#}spark,{\#}genomics},
title = {{Design and evaluation of a genomics variant analysis pipeline using GATK Spark tools}},
	urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/SEBD-18-CR.pdf},
year = {2018}
}

Provenance Annotation and Analysis to Support Process Re-Computation. Cala, J.; and Missier, P. In Procs. IPAW 2018, London, 2018. Springer

Provenance Annotation and Analysis to Support Process Re-Computation [pdf]

Paper link bibtex abstract

@inproceedings{Cala2018,
abstract = {Many resource-intensive analytics processes evolve over time following new versions of the reference datasets and software dependen- cies they use. We focus on scenarios in which any version change has the potential to affect many outcomes, as is the case for instance in high throughput genomics where the same process is used to analyse large cohorts of patient genomes, or cases. As any version change is unlikely to affect the entire population, an efficient strategy for restoring the cur- rency of the outcomes requires first to identify the scope of a change, i.e., the subset of affected data products. In this paper we describe a generic and reusable provenance-based approach to address this scope discovery problem. It applies to a scenario where the process consists of complex hierarchical components, where different input cases are processed using different version configurations of each component, and where separate provenance traces are collected for the executions of each of the com- ponents. We show how a new data structure, called a restart tree, is computed and exploited to manage the change scope discovery problem.},
address = {London},
author = {Cala, Jacek and Missier, Paolo},
booktitle = {Procs. IPAW 2018},
keywords = {{\#}provenance,{\#}recomputation,process re-computation,provenance annotations},
mendeley-tags = {{\#}recomputation,{\#}provenance},
publisher = {Springer},
title = {{Provenance Annotation and Analysis to Support Process Re-Computation}},
year = {2018},
	urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/recomp-core-prov.pdf}
	}

Analyzing Social Network Images with Deep Learning Models to Fight Zika Virus. Barros, H. P.; Lima, B. G. C.; Crispim, F. C.; Vieira, T.; Missier, P.; and Fonseca, B. In Procs. 15th International Conference on Image Analysis and Recognition (ICIAR'18), 2018.

Paper link bibtex

@inproceedings{Barros2018,
author = {Barros, H. Pedro and Lima, Bruno G. C. and Crispim, Felipe C. and Vieira, Tiago and Missier, Paolo and Fonseca, Baldoino},
booktitle = {Procs. 15th International Conference on Image Analysis and Recognition (ICIAR'18)},
keywords = {{\#}zika},
mendeley-tags = {{\#}zika},
title = {{Analyzing Social Network Images with Deep Learning Models to Fight Zika Virus}},
year = {2018},
	urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/analyzing-social-network.pdf}
}

VazaDengue: An information system for preventing and combating mosquito-borne diseases with social networks. Sousa, L.; de Mello, R.; Cedrim, D.; Garcia, A.; Missier, P.; Uch�a, A.; Oliveira, A.; and Romanovsky, A. Information Systems, 75: 26 - 42. 2018.

Paper doi link bibtex 1 download

@article{SOUSA201826,
title = "VazaDengue: An information system for preventing and combating mosquito-borne diseases with social networks",
journal = "Information Systems",
volume = "75",
pages = "26 - 42",
year = "2018",
issn = "0306-4379",
doi = "10.1016/j.is.2018.02.003",
url = "http://www.sciencedirect.com/science/article/pii/S030643791730618X",
author = "Leonardo Sousa and Rafael de Mello and Diego Cedrim and Alessandro Garcia and Paolo Missier and Anderson Uch�a and Anderson Oliveira and Alexander Romanovsky",
keywords = "Dengue, Mosquito, Social media, Surveillance, Tweets",
	urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/1-s2.0-S030643791730618X-main.pdf}
}

Loom: Query-aware Partitioning of Online Graphs. Firth, H; and Missier, P In Procs. 21st International Conference on Extending Database Technology (EDBT), Vienna, Austria, 2018. EDBT

Loom: Query-aware Partitioning of Online Graphs [pdf]

Paper link bibtex abstract

@inproceedings{Firth2018,
	abstract = {As with general graph processing systems, partitioning data over a cluster of machines improves the scalability of graph database management systems. However, these systems will incur additional network cost during the execution of a query workload, due to inter- partition traversals. Workload-agnostic partitioning algorithms typically minimise the likelihood of any edge crossing partition boundaries. However, these partitioners are sub-optimal with re- spect to many workloads, especially queries, which may require more frequent traversal of speci c subsets of inter-partition edges. Furthermore, they largely unsuited to operating incrementally on dynamic, growing graphs. We present a new graph partitioning algorithm, Loom, that op- erates on a stream of graph updates and continuously allocates the new vertices and edges to partitions, taking into account a query workload of graph pattern expressions along with their relative frequencies. First we capture the most common patterns of edge traversals which occur when executing queries. We then compare sub-graphs, which present themselves incrementally in the graph update stream, against these common patterns. Finally we attempt to allocate each match to single partitions, reducing the number of inter-partition edges within frequently traversed sub-graphs and improving average query performance. Loom is extensively evaluated over several large test graphs with realistic query workloads and various orderings of the graph updates. We demonstrate that, given a workload, our prototype produces partitionings of signi cantly better quality than existing streaming graph partitioning algorithms Fennel {\&} LDG.},
	address = {Vienna, Austria},
	author = {Firth, H and Missier, P},
	booktitle = {Procs. 21st International Conference on Extending Database Technology (EDBT)},
	keywords = {distributed graphs,graph partitioning},
	publisher = {EDBT},
	title = {{Loom: Query-aware Partitioning of Online Graphs}},
	url = {http://edbticdt2018.at/},
	urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/1711.06608.pdf},
	year = {2018}
}

2017 (8)

Why-Diff: Explaining differences amongst similar workflow runs by exploiting scientific metadata. Thavasimani, P.; Cala, J.; and Missier, P. In 2017 IEEE International Conference on Big Data, BigData 2017, Boston, MA, USA, December 11-14, 2017, pages 3031–3041, 2017.

Why-Diff: Explaining differences amongst similar workflow runs by exploiting scientific metadata [link]

Paper doi link bibtex

@inproceedings{DBLP:conf/bigdataconf/ThavasimaniCM17,
  author    = {Priyaa Thavasimani and
               Jacek Cala and
               Paolo Missier},
  title     = {Why-Diff: Explaining differences amongst similar workflow runs by
               exploiting scientific metadata},
  booktitle = {2017 {IEEE} International Conference on Big Data, BigData 2017, Boston,
               MA, USA, December 11-14, 2017},
  pages     = {3031--3041},
  year      = {2017},
  crossref  = {DBLP:conf/bigdataconf/2017},
  url       = {https://doi.org/10.1109/BigData.2017.8258275},
  doi       = {10.1109/BigData.2017.8258275},
  timestamp = {Tue, 23 Jan 2018 12:40:42 +0100},
  biburl    = {https://dblp.org/rec/bib/conf/bigdataconf/ThavasimaniCM17},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

Adaptive Incremental Learning for Statistical Relational Models Using Gradient-Based Boosting. Gu, Y.; and Missier, P. In Procs. ILP '17, 27th International Conference on Inductive Logic Programming (late-breaking paper), Orleans, France, 2017. CEUR-WS

Adaptive Incremental Learning for Statistical Relational Models Using Gradient-Based Boosting [pdf]

Paper link bibtex abstract

@inproceedings{Gu2017,
	abstract = {We consider the problem of incrementally learning models from relational data. Most existing learning methods for statistical relational models use batch learning, which becomes computationally expensive and eventually infeasible for large datasets. The majority of the previous work in relational incremental learning assumes the model's structure is given and only the model's parameters needed to be learned. In this paper, we propose algorithms that can incrementally learn the model's parameters and structure simultaneously. These algorithms are based on the successful formalisation of the relational functional gradient boosting system (RFGB), and extend the classical propositional ensemble methods to relational learning for handling evolving data streams.},
	address = {Orleans, France},
	author = {Gu, Yulong and Missier, Paolo},
	booktitle = {Procs. ILP '17, 27th International Conference on Inductive Logic Programming (late-breaking paper)},
	publisher = {CEUR-WS},
	title = {{Adaptive Incremental Learning for Statistical Relational Models Using Gradient-Based Boosting}},
	url = {https://ilp2017.sciencesconf.org/data/pages/ILP{\_}2017{\_}paper{\_}27.pdf},
	year = {2017},
   urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/ILP_2017_paper_27.pdf}	
}

Mind My Value: a Decentralized Infrastructure for Fair and Trusted IoT Data Trading. Missier, P.; Bajoudah, S.; Capossele, A.; Gaglione, A.; and Nati, M. In Procs. 7th International Conference on the Internet of Things, Linz,Austria, 2017.

Mind My Value: a Decentralized Infrastructure for Fair and Trusted IoT Data Trading [pdf]

Paper link bibtex abstract

@inproceedings{Missier2017d,
	abstract = {Internet of Things (IoT) data are increasingly viewed as a new form of massively distributed and large scale digital assets, which are continuously generated by millions of connected devices. The real value of such assets can only be realized by allowing IoT data trading to occur on a marketplace that rewards every single producer and consumer, at a very granular level. Crucially, we believe that such a marketplace should not be owned by anybody, and should instead fairly and transparently self-enforce a well defined set of governance rules. In this paper we address some of the technical challenges involved in realizing such a marketplace. We leverage emerging blockchain technologies to build a decentralized, trusted, transparent and open architecture for IoT traffic metering and contract compliance, on top of the largely adopted IoT brokered data infrastructure. We discuss an Ethereum-based prototype implementation and experimentally evaluate the overhead cost associated with Smart Contract transactions, concluding that a viable business model can indeed be associated with our technical approach.},
	address = {Linz,Austria},
	author = {Missier, Paolo and Bajoudah, Shaimaa and Capossele, Angelo and Gaglione, Andrea and Nati, Michele},
	booktitle = {Procs. 7th International Conference on the Internet of Things},
	file = {:Users/npm65/Documents/Newcastle/CURRENT/DECatapult/IoT/IoT-prov-prototype/dissemination/IOT-conf/iot-conf.pdf:pdf},
	keywords = {{\#}IoT,{\#}marketplace},
	mendeley-tags = {{\#}IoT,{\#}marketplace},
	title = {{Mind My Value: a Decentralized Infrastructure for Fair and Trusted IoT Data Trading}},
	url = {http://iot-conference.org/iot2017/},
	year = {2017},
   urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/iot-conf.pdf}
}

Preserving the value of large scale data analytics over time through selective re-computation. Missier, P.; Cala, J.; and Rathi, M. In Procs. 31st British International Conference on Databases - BICOD, 2017.

Preserving the value of large scale data analytics over time through selective re-computation [pdf]

Paper link bibtex

@inproceedings{Missier2017c,
	author = {Missier, Paolo and Cala, Jacek and Rathi, Manisha},
	booktitle = {Procs. 31st British International Conference on Databases - BICOD},
	title = {{Preserving the value of large scale data analytics over time through selective re-computation}},
	year = {2017},
 	urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/RecompVision.pdf}
}

Recruiting from the Network: Discovering Twitter Users Who Can Help Combat Zika Epidemics. Missier, P.; McClean, C.; Carlton, J.; Cedrim, D.; Silva, L.; Garcia, A.; Plastino, A.; and Romanovsky, A. In Cabot, J.; De Virgilio, R.; and Torlone, R., editor(s), Web Engineering: 17th International Conference, ICWE 2017, Rome, Italy, June 5-8, 2017, Proceedings, pages 437–445, Roma, Italy, 2017. Springer International Publishing

Paper doi link bibtex

@inproceedings{Missier2017b,
	author={Missier, Paolo
	and McClean, Callum
	and Carlton, Jonathan
	and Cedrim, Diego
	and Silva, Leonardo
	and Garcia, Alessandro
	and Plastino, Alexandre
	and Romanovsky, Alexander},
	editor={Cabot, Jordi
	and De Virgilio, Roberto
	and Torlone, Riccardo},
	title={Recruiting from the Network: Discovering Twitter Users Who Can Help Combat Zika Epidemics},
	bookTitle={Web Engineering: 17th International Conference, ICWE 2017, Rome, Italy, June 5-8, 2017, Proceedings},
	year={2017},
		address = {Roma, Italy},
	publisher={Springer International Publishing},
	pages={437--445},
	isbn={978-3-319-60131-1},
	doi={10.1007/978-3-319-60131-1_30},
	url={http://dx.doi.org/10.1007/978-3-319-60131-1_30},
	urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/chp%253A10.1007%252F978-3-319-60131-1_30.pdf}
}

. Missier, P. Provenance Standards, pages 1–8. Liu, L.; and Özsu, M T., editor(s). Springer New York, New York, NY, 2017.

Paper doi link bibtex

@inbook{Missier2017,
	address = {New York, NY},
	author = {Missier, Paolo},
	booktitle = {Encyclopedia of Database Systems},
	doi = {10.1007/978-1-4899-7993-3_80749-1},
	editor = {Liu, Ling and {\"{O}}zsu, M Tamer},
	isbn = {978-1-4899-7993-3},
	pages = {1--8},
	publisher = {Springer New York},
	title = {{Provenance Standards}},
	url = {https://doi.org/10.1007/978-1-4899-7993-3{\_}80749-1},
	year = {2017},
	urlpaper="http://homepages.cs.ncl.ac.uk/paolo.missier/doc/Provenance-standards.pdf"
}

TAPER: query-aware, partition-enhancement for large, heterogenous graphs. Firth, H.; and Missier, P. Distributed and Parallel Databases,1–31. 2017.

TAPER: query-aware, partition-enhancement for large, heterogenous graphs [link]

Paper doi link bibtex abstract

@Article{Firth2017,
	author="Firth, Hugo
	and Missier, Paolo",
	title="TAPER: query-aware, partition-enhancement for large, heterogenous graphs",
	journal="Distributed and Parallel Databases",
	year="2017",
	pages="1--31",
	abstract="Graph partitioning has long been seen as a viable approach to addressing Graph DBMS scalability. A partitioning, however, may introduce extra query processing latency unless it is sensitive to a specific query workload, and optimised to minimise inter-partition traversals for that workload. Additionally, it should also be possible to incrementally adjust the partitioning in reaction to changes in the graph topology, the query workload, or both. Because of their complexity, current partitioning algorithms fall short of one or both of these requirements, as they are designed for offline use and as one-off operations. The TAPER system aims to address both requirements, whilst leveraging existing partitioning algorithms. TAPER takes any given initial partitioning as a starting point, and iteratively adjusts it by swapping chosen vertices across partitions, heuristically reducing the probability of inter-partition traversals for a given path queries workload. Iterations are inexpensive thanks to time and space optimisations in the underlying support data structures. We evaluate TAPER on two different large test graphs and over realistic query workloads. Our results indicate that, given a hash-based partitioning, TAPER reduces the number of inter-partition traversals by                                                                   {\$}{\$}{\backslash}sim {\$}{\$}                                                      ?                                                80{\%}; given an unweighted Metis partitioning, by                                                                   {\$}{\$}{\backslash}sim {\$}{\$}                                                      ?                                                30{\%}. These reductions are achieved within eight iterations and with the additional advantage of being workload-aware and usable online.",
	issn="1573-7578",
	doi="10.1007/s10619-017-7196-y",
	url="http://dx.doi.org/10.1007/s10619-017-7196-y"
}

Revealing the Detailed Lineage of Script Outputs using Hybrid Provenance. Zhang, Q.; Cao, Y.; Wang, Q.; Vu, D.; Thavasimani, P.; McPhillips, T.; Missier, P.; Slaughter, P.; Jones, C.; Jones, M. B; and Ludascher, B. In Procs. 11th Intl. Digital Curation Conference (IDCC), Edinburgh, Scotland, UK, 2017. Digital Curation Center

Revealing the Detailed Lineage of Script Outputs using Hybrid Provenance [pdf]

Paper link bibtex abstract

@inproceedings{Zhang2017,
abstract = {We illustrate how combining retrospective and prospective provenance can yield scientifically meaningful hybrid provenance representations of the computational histories of data produced during a script run. We use scripts from multiple disciplines (astrophysics, climate science, biodiversity data curation, and social network analysis), implemented in Python, R, and MATLAB, to highlight the usefulness of diverse forms of retrospective provenance when coupled with prospective provenance. Users provide prospective provenance (i.e., the conceptual workflows latent in scripts) via simple YesWorkflow annotations, embedded as script comments. Runtime observables, hidden in filenames or folder structures, recorded in log-files, or automatically captured using tools such as noWorkflow or the DataONE RunManagers can be linked to prospective provenance via relational views and queries. The YesWorkflow toolkit, example scripts, and demonstration code are available via an open source repository.},
address = {Edinburgh, Scotland, UK},
author = {Zhang, Qian and Cao, Yang and Wang, Qiwen and Vu, Duc and Thavasimani, Priyaa and McPhillips, Tim and Missier, Paolo and Slaughter, Peter and Jones, Christopher and Jones, Matthew B and Ludascher, Bertram},
booktitle = {Procs. 11th Intl. Digital Curation Conference (IDCC)},
file = {:Users/paolo/Documents/myGRID/refs/Zhang-Cao{\_}etal.pdf:pdf},
keywords = {{\#}provenance},
mendeley-tags = {{\#}provenance},
publisher = {Digital Curation Center},
title = {{Revealing the Detailed Lineage of Script Outputs using Hybrid Provenance}},
urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/Zhang-Cao_etal.pdf},
year = {2017}
}

2016 (10)

Facilitating reproducible research by investigating computational metadata. Thavasimani, P.; and Missier, P. In 2016 IEEE International Conference on Big Data, BigData 2016, Washington DC, USA, December 5-8, 2016, pages 3045–3051, 2016.

Facilitating reproducible research by investigating computational metadata [link]

Paper doi link bibtex 1 download

@inproceedings{DBLP:conf/bigdataconf/ThavasimaniM16,
  author    = {Priyaa Thavasimani and
               Paolo Missier},
  title     = {Facilitating reproducible research by investigating computational
               metadata},
  booktitle = {2016 {IEEE} International Conference on Big Data, BigData 2016, Washington
               DC, USA, December 5-8, 2016},
  pages     = {3045--3051},
  year      = {2016},
  crossref  = {DBLP:conf/bigdataconf/2016},
  url       = {https://doi.org/10.1109/BigData.2016.7840958},
  doi       = {10.1109/BigData.2016.7840958},
  timestamp = {Thu, 20 Jul 2017 12:20:57 +0200},
  biburl    = {https://dblp.org/rec/bib/conf/bigdataconf/ThavasimaniM16},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

Clustering Provenance Facilitating Provenance Exploration Through Data Abstraction. Karsai, L.; Fekete, A.; Kay, J.; and Missier, P. In Proceedings of the Workshop on Human-In-the-Loop Data Analytics, of HILDA '16, pages 6:1—-6:5, New York, NY, USA, 2016. ACM

Clustering Provenance Facilitating Provenance Exploration Through Data Abstraction [link]

Paper doi link bibtex

@inproceedings{Karsai:2016:CPF:2939502.2939508,
	address = {New York, NY, USA},
	author = {Karsai, Linus and Fekete, Alan and Kay, Judy and Missier, Paolo},
	booktitle = {Proceedings of the Workshop on Human-In-the-Loop Data Analytics},
	doi = {10.1145/2939502.2939508},
	isbn = {978-1-4503-4207-0},
	keywords = { provenance, visualisation,large-scale graphs},
	pages = {6:1----6:5},
	publisher = {ACM},
	series = {HILDA '16},
	title = {{Clustering Provenance Facilitating Provenance Exploration Through Data Abstraction}},
	url = {http://doi.acm.org/10.1145/2939502.2939508},
	year = {2016}
}

Alan Turing Intitute Symposium on Reproducibioity for Data-Intensive Research – Final Report. Burgess, L. C; Crotty, D.; de Roure, D.; Gibbons, J.; Goble, C.; Missier, P.; Mortier, R.; Nichols, T. E; and O�Beirne, R. . 2016.

Alan Turing Intitute Symposium on Reproducibioity for Data-Intensive Research – Final Report [link]

Paper link bibtex

@article{burgess2016alan,
  title={Alan Turing Intitute Symposium on Reproducibioity for Data-Intensive Research -- Final Report},
  author={Burgess, Lucie C and Crotty, David and de Roure, David and Gibbons, Jeremy and Goble, Carole and Missier, Paolo and Mortier, Richard and Nichols, Thomas E and O�Beirne, Richard},
 url={https://dx.doi.org/10.6084/m9.figshare.3487382},
  year={2016}
}

The lifecycle of provenance metadata and its associated challenges and opportunities. Missier, P. In Lemieux, V., editor(s), Building Trust in Financial Information - Perspectives on the Frontiers of Provenance., volume Springer. Springer, 2016.

The lifecycle of provenance metadata and its associated challenges and opportunities [link]

Paper link bibtex abstract

@incollection{Missier2016b,
abstract = {This chapter outlines some of the challenges and opportunities associated with adopting provenance principles [CFLV12] and stan- dards [MGC+15] in a variety of disciplines, including data publication and reuse, and information sciences.},
author = {Missier, Paolo},
booktitle = {Building Trust in Financial Information - Perspectives on the Frontiers of Provenance.},
editor = {Lemieux, Victoria},
keywords = {{\#}provenance},
mendeley-tags = {{\#}provenance},
publisher = {Springer},
title = {{The lifecycle of provenance metadata and its associated challenges and opportunities}},
volume = {Springer},
url={http://arxiv.org/abs/1605.01229},
year = {2016}
}

The data, they are a-changin'. Missier, P.; Cala, J.; and Wijaya, E. In Cohen-Boulakia, S., editor(s), Proc. TAPP'16 (Theory and Practice of Provenance), Washington D.C., USA, 2016. USENIX Association

Paper link bibtex abstract

@inproceedings{Paolo2016,
abstract = {The cost of deriving actionable knowledge from large datasets has been decreasing thanks to a convergence of positive fac- tors: low cost data generation, inexpensively scalable stor- age and processing infrastructure (cloud), software frame- works and tools for massively distributed data processing, and parallelisable data analytics algorithms. One observa- tion that is often overlooked, however, is that each of these elements is not immutable, rather they all evolve over time. This suggests that the value of such derivative knowledge may decay over time, unless it is preserved by reacting to those changes. Our broad research goal is to develop mod- els, methods, and tools for selectively reacting to changes by balancing costs and benefits, i.e. through complete or partial re-computation of some of the underlying processes. In this paper we present an initial model for reasoning about change and re-computations, and show how analysis of detailed provenance of derived knowledge informs re-computation decisions. We illustrate the main ideas through a real-world case study in genomics, namely on the interpretation of hu- man variants in support of genetic diagnosis.},
address = {Washington D.C., USA},
author = {Missier, Paolo and Cala, Jacek and Wijaya, Eldarina},
booktitle = {Proc. TAPP'16 (Theory and Practice of Provenance)},
editor = {Cohen-Boulakia, Sarah},
keywords = {{\#}big data processing,{\#}data change,{\#}provenance,{\#}re-computation},
mendeley-tags = {{\#}big data processing,{\#}data change,{\#}provenance,{\#}re-computation},
publisher = {USENIX Association},
title = {{The data, they are a-changin'}},
url = {https://arxiv.org/abs/1604.06412},
year = {2016}
}

Analyzing Provenance across Heterogeneous Provenance Graphs. Oliveira, W.; Missier, P.; Ocana, K.; de Oliveira, D.; and Braganholo, V. In Procs. IPAW 2016, Washington D.C., USA, 2016. Springer
link bibtex abstract

@inproceedings{Oliveira2016,
abstract = {Provenance generated by different workflow systems is generally ex- pressed using different formats. This is not an issue when scientists analyze provenance graphs in isolation, or when they use the same workflow system. However, when analyzing heterogeneous provenance graphs from multiple systems poses a challenge. To address this problem we adopt ProvONE as an integration model, and show how different provenance databases can be con- verted to a global ProvONE schema. Scientists can then query this integrated database, exploring and linking provenance across several different workflows that may represent different implementations of the same experiment. To illus- trate the feasibility of our approach, we developed conceptual mappings be- tween the provenance databases of two workflow systems (e-Science Central and SciCumulus). We provide cartridges that implement these mappings and generate an integrated provenance database expressed as Prolog facts. To demonstrate its usage, we have developed Prolog rules that enable scientists to query the integrated database.},
address = {Washington D.C., USA},
author = {Oliveira, Wellington and Missier, Paolo and Ocana, Kary and de Oliveira, Daniel and Braganholo, Vanessa},
booktitle = {Procs. IPAW 2016},
keywords = {{\#}provenance},
mendeley-tags = {{\#}provenance},
publisher = {Springer},
title = {{Analyzing Provenance across Heterogeneous Provenance Graphs}},
year = {2016}
}

Tracking Dengue Epidemics using Twitter Content Classification and Topic Modelling. Missier, P.; Romanovsky, A; Miu, T; Pal, A; Daniilakis, M; Garcia, A; Cedrim, D; and Sousa, L In Procs. SoWeMine workshop, co-located with ICWE 2016, Lugano, Switzerland, 2016.

Paper link bibtex abstract

@inproceedings{Missier2016a,
abstract = {Detecting and preventing outbreaks of mosquito-borne diseases such as Dengue and Zika in Brasil and other tropical regions has long been a priority for governments in affected areas. Streaming social media content, such as Twit- ter, is increasingly being used for health vigilance applications such as flu detec- tion. However, previous work has not addressed the complexity of drastic sea- sonal changes on Twitter a across multiple epidemic outbreaks. In order to address this gap, this paper contrasts two complementary approaches to detecting Twitter content that is relevant for Dengue outbreak detection, namely supervised classification and unsupervised clustering using topic modelling. Each approach has benefits and shortcomings. Our classifier achieves a prediction accuracy of about 80{\%} based on a small training set of about 1,000 instances, but the need for manual annotation makes it hard to track seasonal changes in the nature of the epidemics, such as the emergence of new types of virus in certain geographical locations. In contrast, LDA-based topic modelling scales well, generating cohe- sive and well-separated clusters from larger samples. While clusters can be easily re-generated following changes in epidemics, however, this approach makes it hard to clearly segregate relevant tweets into well-defined clusters.},
address = {Lugano, Switzerland},
author = {Missier, Paolo and Romanovsky, A and Miu, T and Pal, A and Daniilakis, M and Garcia, A and Cedrim, D and Sousa, L},
booktitle = {Procs. SoWeMine workshop, co-located with ICWE 2016},
keywords = {{\#}social media analytics,{\#}twitter analytics},
mendeley-tags = {{\#}social media analytics,{\#}twitter analytics},
title = {{Tracking Dengue Epidemics using Twitter Content Classification and Topic Modelling}},
year = {2016},
url={http://arxiv.org/abs/1605.00968}
}

Workload-aware streaming graph partitioning. Firth, H.; and Missier, P. In Procs. GraphQ Workshop, co-located with EDBT'16, Bordeaux, 2016.
link bibtex

@inproceedings{Firth2016,
address = {Bordeaux},
author = {Firth, Hugo and Missier, Paolo},
booktitle = {Procs. GraphQ Workshop, co-located with EDBT'16},
title = {{Workload-aware streaming graph partitioning}},
year = {2016}
}

Data trajectories: tracking reuse of published data for transitive credit attribution. Missier, P. International Journal of Digital Curation, 11(1): 1–16. 2016.

Data trajectories: tracking reuse of published data for transitive credit attribution [link]

Paper

Data trajectories: tracking reuse of published data for transitive credit attribution [pdf]

paper

slides doi link bibtex abstract

@article{Missier2016,
abstract = {The ability to measure the use and impact of published data sets is key to the success of the open data / open science paradigm. A direct measure of impact would require tracking data (re)use in the wild, which however is difficult to achieve. This is therefore commonly replaced by simpler metrics based on data download and citation counts. In this paper we describe a scenario where it is possible to track the trajectory of a dataset after its publication, and we show how this enables the design of accurate models for ascribing credit to data originators. A Data Trajectory (DT) is a graph that encodes knowledge of how, by whom, and in which context data has been re-used, possibly after several generations. We provide a theoretical model of DTs that is grounded in the W3C PROV data model for provenance, and we show how DTs can be used to automatically propagate a fraction of the credit associated with transitively derived datasets, back to original data contributors. We also show this model of transitive credit in action by means of a Data Reuse Simulator. Ultimately, our hope is that, in the longer term, credit models based on direct measures of data reuse will provide further incentives to data publication. We conclude by outlining a research agenda to address the hard questions of creating, collecting, and using DTs systematically across a large number of data reuse instances, in the wild.},
address = {Amsterdam},
author = {Missier, Paolo},
doi = {doi:10.2218/ijdc.v11i1.425},
file = {:Users/paolo/Documents/myGRID/refs/DT.pdf:pdf},
journal = {International Journal of Digital Curation},
keywords = {data reuse,data trajectories,provenance},
mendeley-tags = {data reuse,data trajectories,provenance},
number = {1},
pages = {1--16},
publisher = {DCC},
title = {{Data trajectories: tracking reuse of published data for transitive credit attribution}},
url = {http://bibbase.org/network/publication/missier-datatrajectoriestrackingreuseofpublisheddatafortransitivecreditattribution-2016},
volume = {11},
year = {2016},
url_Paper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/425-1828-1-PB.pdf},
url_Slides={http://www.slideshare.net/pmissier/data-trajectories-tracking-the-reuse-of-published-datafor-transitive-credit-attribution}
}

Scalable and Efficient Whole-exome Data Processing Using Workflows on the Cloud. Cala, J.; Marei, E.; Yu, Y.; Takeda, K.; and Missier, P. Future Generation Computer Systems, In press(Special Issue: Big Data in the Cloud - Best paper award at the FGCS forum 2016). 2016.

Scalable and Efficient Whole-exome Data Processing Using Workflows on the Cloud [pdf]

Paper link bibtex abstract

@article{Cala2015,
abstract = {Dataflow-style workflows offer a simple, high-level programming model for flexible prototyping of scientific applications as an attractive alternative to low-level scripting. At the same time, workflow management systems (WFMS) may support data parallelism over big datasets by providing scalable, distributed deployment and execution of the workflow over a cloud infrastructure. In theory, the combination of these properties makes workflows a natural choice for implementing Big Data processing pipelines, common for instance in bioinformatics. In practice, however, correct workflow design for parallel Big Data problems can be complex and very time-consuming. In this paper we present our experience in porting a genomics data processing pipeline from an existing scripted implementation deployed on a closed HPC cluster, to a workflow-based design deployed on the Microsoft Azure public cloud. We draw two contrasting and general conclusions from this project. On the positive side, we show that our solution based on the e-Science Central WFMS and deployed in the cloud clearly outperforms the original HPC-based implementation achieving up to 2.3x speed-up. However, in order to deliver such performance we describe the importance of optimising the workflow deployment model to best suit the characteristics of the cloud computing infrastructure. The main reason for the performance gains was the availability of fast, node-local SSD disks delivered by D-series Azure VMs combined with the implicit use of local disk resources by e-Science Central workflow engines. These conclusions suggest that, on parallel Big Data problems, it is important to couple understanding of the cloud computing architecture and its software stack with simplicity of design, and that further efforts in automating parallelisation of complex pipelines are required.},
author = {Cala, Jacek and Marei, Eyad and Yu, Yaobo and Takeda, Kenji and Missier, Paolo},
journal = {Future Generation Computer Systems},
keywords = {Cloud computing,HPC,Performance analysis,Whole-exome sequencing,Workflow-based application,cloud,genomics,workflow,?},
mendeley-groups = {Paolo-public},
mendeley-tags = {workflow,cloud,genomics},
number = {Special Issue: Big Data in the Cloud - Best paper award at the FGCS forum 2016},
publisher = {Elsevier},
title = {{Scalable and Efficient Whole-exome Data Processing Using Workflows on the Cloud}},
volume = {In press},
year = {2016},
urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/1-s2.0-S0167739X16000030-main.pdf}
}

2015 (4)

Bootstrapping Personalised Human Activity Recognition Models Using Online Active Learning. Miu, T.; Missier, P.; and Plötz, T. In Proceedings of the 14th IEEE International Conference on Ubiquitous Computing and Communications, 2015.

Bootstrapping Personalised Human Activity Recognition Models Using Online Active Learning [pdf]

Paper link bibtex

@INPROCEEDINGS{Miu2015, 
	author={Miu, T. and Missier, P. and Pl\"{o}tz, T.},
	booktitle={Proceedings of the 14th IEEE International Conference on Ubiquitous Computing and Communications},
	title={Bootstrapping Personalised Human Activity Recognition Models Using Online Active Learning},
	year={2015},
	urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/tudor-Liverpool.pdf}
}

SVI: a simple single-nucleotide Human Variant Interpretation tool for Clinical Use. Missier, P.; Wijaya, E.; Kirby, R.; and Keogh, M. In Procs. 11th International conference on Data Integration in the Life Sciences, Los Angeles, CA, 2015. Springer

SVI: a simple single-nucleotide Human Variant Interpretation tool for Clinical Use [pdf]

Paper link bibtex

@inproceedings{Missier2015,
address = {Los Angeles, CA},
author = {Missier, Paolo and Wijaya, Eldarina and Kirby, Ryan and Keogh, Michael},
booktitle = {Procs. 11th International conference on Data Integration in the Life Sciences},
keywords = {\#NGS,\#variant interpretation},
mendeley-tags = {\#NGS,\#variant interpretation},
publisher = {Springer},
title = {{SVI: a simple single-nucleotide Human Variant Interpretation tool for Clinical Use}},
year = {2015},
urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/dils15-main.pdf}
}

Access control and view generation for provenance graphs. Danger, R.; Curcin, V.; Missier, P.; and Bryans, J. Future Generation Computer Systems, 49: 8–27. February 2015.

Access control and view generation for provenance graphs [link]

Paper doi link bibtex abstract

@article{Danger2015,
abstract = {Data provenance refers to the knowledge about data sources and operations carried out to obtain some piece of data. A provenance-enabled system maintains record of the interoperation of processes across different modules, stages and authorities to capture the full lineage of the resulting data, and typically allows data-focused audits using semantic technologies, such as ontologies, that capture domain knowledge. However, regulating access to captured provenance data is a non-trivial problem, since execution records form complex, overlapping graphs with individual nodes possibly being subject to different access policies. Applying traditional access control to provenance queries can either hide from the user the entire graph with nodes that had access to them denied, reveal too much information, or return a semantically invalid graph. An alternative approach is to answer queries with a new graph that abstracts over the missing nodes and fragments. In this paper, we present TACLP, an access control language for provenance data that supports this approach, together with an algorithm that transforms graphs according to sets of access restrictions. The algorithm produces safe and valid provenance graphs that retain the maximum amount of information allowed by the security model. The approach is demonstrated on an example of restricting access to a clinical trial provenance trace.},
author = {Danger, Roxana and Curcin, Vasa and Missier, Paolo and Bryans, Jeremy},
doi = {10.1016/j.future.2015.01.014},
issn = {0167739X},
journal = {Future Generation Computer Systems},
keywords = {Access Control Language,Provenance,Semantic Web},
month = feb,
pages = {8--27},
title = {{Access control and view generation for provenance graphs}},
url = {http://www.sciencedirect.com/science/article/pii/S0167739X1500031X},
volume = {49},
year = {2015}
}

Recent advances in Scalable Workflow Enactment Engines and Technologies. Hidders, J.; Missier, P.; and Sroka, J. Future Generation Computer Systems, 46: 1–2. May 2015.

Recent advances in Scalable Workflow Enactment Engines and Technologies [link]

Paper doi link bibtex

@article{Hidders2015,
author = {Hidders, Jan and Missier, Paolo and Sroka, Jacek},
doi = {10.1016/j.future.2015.01.003},
issn = {0167739X},
journal = {Future Generation Computer Systems},
month = may,
pages = {1--2},
title = {{Recent advances in Scalable Workflow Enactment Engines and Technologies}},
url = {http://www.sciencedirect.com/science/article/pii/S0167739X15000047},
volume = {46},
year = {2015}
}

2014 (9)

On Strategies for Budget-based Online Annotation in Human Activity Recognition. Miu, T.; Plötz, T.; Missier, P.; and Roggen, D. In Proceedings of the 2014 ACM International Joint Conference on Pervasive and Ubiquitous Computing: Adjunct Publication, of UbiComp '14 Adjunct, pages 767–776, New York, NY, USA, 2014. ACM

On Strategies for Budget-based Online Annotation in Human Activity Recognition [link]

Paper doi link bibtex

@inproceedings{Miu:2014:SBO:2638728.2641300,
address = {New York, NY, USA},
author = {Miu, Tudor and Pl\"{o}tz, Thomas and Missier, Paolo and Roggen, Daniel},
booktitle = {Proceedings of the 2014 ACM International Joint Conference on Pervasive and Ubiquitous Computing: Adjunct Publication},
doi = {10.1145/2638728.2641300},
isbn = {978-1-4503-3047-3},
keywords = { budget-based annotation, online learning,activity recognition},
pages = {767--776},
publisher = {ACM},
series = {UbiComp '14 Adjunct},
title = {{On Strategies for Budget-based Online Annotation in Human Activity Recognition}},
url = {http://doi.acm.org/10.1145/2638728.2641300},
year = {2014}
}

Forget Dimensions: Define Your Information Quality Using Quality View Patterns. Embury, S.; and Missier, P. In Floridi, L.; and Illari, P., editor(s), The Philosophy of Information Quality SE - 3, volume 358, of Synthese Library, pages 25–41. Springer International Publishing, 2014.

Forget Dimensions: Define Your Information Quality Using Quality View Patterns [link]

Paper doi link bibtex

@incollection{Embury2014,
author = {Embury, SuzanneM. and Missier, Paolo},
booktitle = {The Philosophy of Information Quality SE - 3},
doi = {10.1007/978-3-319-07121-3\_3},
editor = {Floridi, Luciano and Illari, Phyllis},
isbn = {978-3-319-07120-6},
keywords = {\#information quality},
language = {English},
mendeley-tags = {\#information quality},
pages = {25--41},
publisher = {Springer International Publishing},
series = {Synthese Library},
title = {{Forget Dimensions: Define Your Information Quality Using Quality View Patterns}},
url = {http://dx.doi.org/10.1007/978-3-319-07121-3\_3},
volume = {358},
year = {2014}
}

Measuring the impact of cognitive distractions on driving performance using time series analysis. Garcia-Constantino, M.; Missier, P.; and Guo, P. B. A. W. In Procs. IEEE conference on Intelligent Transport Systems (ITSC'14), August 2014.

Measuring the impact of cognitive distractions on driving performance using time series analysis [link]

Paper link bibtex abstract

@inproceedings{Garcia-Constantino2014,
abstract = {Using current sensing technology, a wealth of data on driving sessions is potentially available through a combination of vehicle sensors and drivers' physiology sensors (heart rate, breathing rate, skin temperature, etc.). Our hypothesis is that it should be possible to exploit the combination of time series produced by such multiple sensors during a driving session, in order to (i) learn models of normal driving behaviour, and (ii) use such models to detect important and potentially dangerous deviations from the norm in real-time, and thus enable the generation of appropriate alerts. Crucially, we believe that such models and interventions should and can be personalised and tailor-made for each individual driver. As an initial step towards this goal, in this paper we present techniques for assessing the impact of cognitive distraction on drivers, based on simple time series analysis. We have tested our method on a rich dataset of driving sessions, carried out in a professional simulator, involving a panel of volunteer drivers. Each session included a different type of cognitive distraction, and resulted in multiple time series from a variety of on-board sensors as well as sensors worn by the driver. Crucially, each driver also recorded an initial session with no distractions. In our model, such initial session provides the baseline times series that make it possible to quantitatively assess driver performance under distraction conditions.},
annote = {presented at IEEE ITS 2014},
archivePrefix = {arXiv},
arxivId = {1408.5573},
author = {Garcia-Constantino, Matias and Missier, Paolo and Guo, Phil Blytheand Amy Weihong},
booktitle = {Procs. IEEE conference on Intelligent Transport Systems (ITSC'14)},
eprint = {1408.5573},
keywords = {\#ITS},
mendeley-tags = {\#ITS},
month = aug,
title = {{Measuring the impact of cognitive distractions on driving performance using time series analysis}},
url = {http://arxiv.org/abs/1408.5573},
urlpaper = {http://arxiv.org/abs/1408.5573},
year = {2014}
}

Tweet My Street: A Cross-Disciplinary Collaboration for the Analysis of Local Twitter Data. Mearns, G.; Simmonds, R.; Richardson, R.; Turner, M.; Watson, P.; and Missier, P. Future Internet, 6(2): 378–396. 2014.

Paper doi link bibtex 1 download

@article{fi6020378,
author = {Mearns, Graeme and Simmonds, Rebecca and Richardson, Ranald and Turner, Mark and Watson, Paul and Missier, Paolo},
doi = {10.3390/fi6020378},
issn = {1999-5903},
journal = {Future Internet},
number = {2},
pages = {378--396},
title = {{Tweet My Street: A Cross-Disciplinary Collaboration for the Analysis of Local Twitter Data}},
url = {http://www.mdpi.com/1999-5903/6/2/378},
volume = {6},
year = {2014}
}

ProvGen: generating synthetic PROV graphs with predictable structure. Firth, H.; and Missier, P. In Procs. IPAW 2014 (Provenance and Annotations), Koln, Germany, 2014. Springer

ProvGen: generating synthetic PROV graphs with predictable structure [pdf]

Paper link bibtex 12 downloads

@inproceedings{Firth2014,
address = {Koln, Germany},
author = {Firth, Hugo and Missier, Paolo},
booktitle = {Procs. IPAW 2014 (Provenance and Annotations)},
publisher = {Springer},
title = {{ProvGen: generating synthetic PROV graphs with predictable structure}},
year = {2014},
urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/provGen2014.pdf}
}

ProvAbs: model, policy, and tooling for abstracting PROV graphs. Missier, P.; Bryans, J.; Gamble, C.; Curcin, V.; and Danger, R. In Procs. IPAW 2014 (Provenance and Annotations), Koln, Germany, 2014. Springer

ProvAbs: model, policy, and tooling for abstracting PROV graphs [link]

Paper link bibtex 8 downloads

@inproceedings{Missier2014,
address = {Koln, Germany},
author = {Missier, Paolo and Bryans, Jeremy and Gamble, Carl and Curcin, Vasa and Danger, Roxana},
booktitle = {Procs. IPAW 2014 (Provenance and Annotations)},
publisher = {Springer},
title = {{ProvAbs: model, policy, and tooling for abstracting PROV graphs}},
year = {2014},
urlpaper={http://uk.arxiv.org/abs/1406.1998},
}

From scripted HPC-based NGS pipelines to workflows on the cloud. Cala, J.; Xu, Y. X.; Wijaya, E. A.; and Missier, P. In Procs. C4Bio workshop, co-located with the 2014 CCGrid conference, Chicago, IL, 2014. IEEE

From scripted HPC-based NGS pipelines to workflows on the cloud [pdf]

Paper link bibtex

@inproceedings{Cala2014,
address = {Chicago, IL},
author = {Cala, Jacek and Xu, Yaobo Xu and Wijaya, Eldarina Azfar and Missier, Paolo},
booktitle = {Procs. C4Bio workshop, co-located with the 2014 CCGrid conference},
keywords = {NGS,pipeline,scientific workflows,workflow},
mendeley-tags = {NGS,pipeline,scientific workflows,workflow},
publisher = {IEEE},
title = {{From scripted HPC-based NGS pipelines to workflows on the cloud}},
year = {2014},
urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/C4Bio.pdf}
}

The PBase Scientific Workflow Provenance Repository. Cuevas-Vicenttín, V.; Kianmajd, P.; Ludäscher, B.; Missier, P.; Chirigati, F.; Wei, Y.; Koop, D.; and Dey, S. In Procs. 9th International Digital Curation Conference, San Francisco, CA, USA, 2014.

The PBase Scientific Workflow Provenance Repository [pdf]

Paper link bibtex

@inproceedings{Cuevas-Vicenttin2014,
address = {San Francisco, CA, USA},
author = {Cuevas-Vicentt\'{\i}n, V\'{\i}ctor and Kianmajd, Parisa and Lud\"{a}scher, Bertram and Missier, Paolo and Chirigati, Fernando and Wei, Yaxing and Koop, David and Dey, Saumen},
booktitle = {Procs. 9th International Digital Curation Conference},
file = {:Users/paolo/Documents/myGRID/refs/idcc14-pbasefinal.pdf:pdf},
keywords = {\#DataONE,\#provenance,\#workflow},
mendeley-tags = {\#DataONE,\#provenance,\#workflow},
title = {{The PBase Scientific Workflow Provenance Repository}},
year = {2014},
urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/idcc14-pbasefinal.pdf}
}

Distilling structure in Taverna scientific workflows: a refactoring approach. Cohen-Boulakia, S.; Chen, J.; Missier, P.; Goble, C.; Williams, A.; and Froidevaux, C. BMC Bioinformatics, 15(Suppl 1): S12. 2014.

Distilling structure in Taverna scientific workflows: a refactoring approach [pdf]

Paper doi link bibtex abstract 2 downloads

@article{Cohen-Boulakia2014,
abstract = {BACKGROUND:Scientific workflows management systems are increasingly used to specify and manage bioinformatics experiments. Their programming model appeals to bioinformaticians, who can use them to easily specify complex data processing pipelines. Such a model is underpinned by a graph structure, where nodes represent bioinformatics tasks and links represent the dataflow. The complexity of such graph structures is increasing over time, with possible impacts on scientific workflows reuse. In this work, we propose effective methods for workflow design, with a focus on the Taverna model. We argue that one of the contributing factors for the difficulties in reuse is the presence of "anti-patterns", a term broadly used in program design, to indicate the use of idiomatic forms that lead to over-complicated design. The main contribution of this work is a method for automatically detecting such anti-patterns, and replacing them with different patterns which result in a reduction in the workflow's overall structural complexity. Rewriting workflows in this way will be beneficial both in terms of user experience (easier design and maintenance), and in terms of operational efficiency (easier to manage, and sometimes to exploit the latent parallelism amongst the tasks).RESULTS:We have conducted a thorough study of the workflows structures available in Taverna, with the aim of finding out workflow fragments whose structure could be made simpler without altering the workflow semantics. We provide four contributions. Firstly, we identify a set of anti-patterns that contribute to the structural workflow complexity. Secondly, we design a series of refactoring transformations to replace each anti-pattern by a new semantically-equivalent pattern with less redundancy and simplified structure. Thirdly, we introduce a distilling algorithm that takes in a workflow and produces a distilled semantically-equivalent workflow. Lastly, we provide an implementation of our refactoring approach that we evaluate on both the public Taverna workflows and on a private collection of workflows from the BioVel project.CONCLUSION:We have designed and implemented an approach to improving workflow structure by way of rewriting preserving workflow semantics. Future work includes considering our refactoring approach during the phase of workflow design and proposing guidelines for designing distilled workflows.},
author = {Cohen-Boulakia, Sarah and Chen, Jiuqiang and Missier, Paolo and Goble, Carole and Williams, Alan and Froidevaux, Christine},
doi = {10.1186/1471-2105-15-S1-S12},
file = {:Users/paolo/Library/Application Support/Mendeley Desktop/Downloaded/Cohen-Boulakia et al. - 2014 - Distilling structure in Taverna scientific workflows a refactoring approach(2).pdf:pdf},
issn = {1471-2105},
journal = {BMC Bioinformatics},
keywords = {\#taverna,\#workflow},
mendeley-tags = {\#taverna,\#workflow},
number = {Suppl 1},
pages = {S12},
title = {{Distilling structure in Taverna scientific workflows: a refactoring approach}},
url = {http://www.biomedcentral.com/1471-2105/15/S1/S12},
urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/BMCBionf-Sarah.pdf},
volume = {15},
year = {2014}
}

BACKGROUND:Scientific workflows management systems are increasingly used to specify and manage bioinformatics experiments. Their programming model appeals to bioinformaticians, who can use them to easily specify complex data processing pipelines. Such a model is underpinned by a graph structure, where nodes represent bioinformatics tasks and links represent the dataflow. The complexity of such graph structures is increasing over time, with possible impacts on scientific workflows reuse. In this work, we propose effective methods for workflow design, with a focus on the Taverna model. We argue that one of the contributing factors for the difficulties in reuse is the presence of "anti-patterns", a term broadly used in program design, to indicate the use of idiomatic forms that lead to over-complicated design. The main contribution of this work is a method for automatically detecting such anti-patterns, and replacing them with different patterns which result in a reduction in the workflow's overall structural complexity. Rewriting workflows in this way will be beneficial both in terms of user experience (easier design and maintenance), and in terms of operational efficiency (easier to manage, and sometimes to exploit the latent parallelism amongst the tasks).RESULTS:We have conducted a thorough study of the workflows structures available in Taverna, with the aim of finding out workflow fragments whose structure could be made simpler without altering the workflow semantics. We provide four contributions. Firstly, we identify a set of anti-patterns that contribute to the structural workflow complexity. Secondly, we design a series of refactoring transformations to replace each anti-pattern by a new semantically-equivalent pattern with less redundancy and simplified structure. Thirdly, we introduce a distilling algorithm that takes in a workflow and produces a distilled semantically-equivalent workflow. Lastly, we provide an implementation of our refactoring approach that we evaluate on both the public Taverna workflows and on a private collection of workflows from the BioVel project.CONCLUSION:We have designed and implemented an approach to improving workflow structure by way of rewriting preserving workflow semantics. Future work includes considering our refactoring approach during the phase of workflow design and proposing guidelines for designing distilled workflows.

2013 (5)

Extracting PROV provenance traces from Wikipedia history pages. Missier, P.; and Chen, Z. In EDBT/ICDT Workshops, pages 327–330, 2013.

Extracting PROV provenance traces from Wikipedia history pages [pdf]

Paper link bibtex 2 downloads

@inproceedings{DBLP:conf/edbt/MissierC13,
author = {Missier, Paolo and Chen, Ziyu},
booktitle = {EDBT/ICDT Workshops},
pages = {327--330},
title = {{Extracting PROV provenance traces from Wikipedia history pages}},
urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/p327-missier.pdf},
year = {2013}
}

Fundamenta Informaticae – Special issue on Scalable Workflow Enactment Engines and Technology. Hidders, J.; Missier, P.; and Sroka, J., editors. Volume 128 IOS Press, 2013.

Fundamenta Informaticae – Special issue on Scalable Workflow Enactment Engines and Technology [link]

Paper link bibtex

@book{Hidders2013,
annote = {from SWEET'12},
editor = {Hidders, Jan and Missier, Paolo and Sroka, Jacek},
issn = {0169-2968},
keywords = {\#cloud,\#workflow},
mendeley-tags = {\#cloud,\#workflow},
number = {3},
publisher = {IOS Press},
title = {{Fundamenta Informaticae -- Special issue on Scalable Workflow Enactment Engines and Technology}},
type = {Special Issue},
url = {http://iospress.metapress.com/content/n8802x1448hr/?p=c2c17be2c8c64e1195aaa3c93db188c6\&pi=1},
volume = {128},
year = {2013}
}

Provenance and data differencing for workflow reproducibility analysis. Missier, P.; Woodman, S.; Hiden, H.; and Watson, P. Concurrency and Computation: Practice and Experience. 2013.

Provenance and data differencing for workflow reproducibility analysis [pdf]

Paper doi link bibtex abstract 8 downloads

@article{CPE:CPE3035,
abstract = {One of the foundations of science is that researchers must publish the methodology used to achieve their results so that others can attempt to reproduce them. This has the added benefit of allowing methods to be adopted and adapted for other purposes. In the field of e-Science, services – often choreographed through workflow, process data to generate results. The reproduction of results is often not straightforward as the computational objects may not be made available or may have been updated since the results were generated. For example, services are often updated to fix bugs or improve algorithms. This paper addresses these problems in three ways. Firstly, it introduces a new framework to clarify the range of meanings of ‘reproducibility’. Secondly, it describes a new algorithm, PDIFF, that uses a comparison of workflow provenance traces to determine whether an experiment has been reproduced; the main innovation is that if this is not the case then the specific point(s) of divergence are identified through graph analysis, assisting any researcher wishing to understand those differences. One key feature is support for user-defined, semantic data comparison operators. Finally, the paper describes an implementation of PDIFF that leverages the power of the e-Science Central platform that enacts workflows in the cloud. As well as automatically generating a provenance trace for consumption by PDIFF, the platform supports the storage and reuse of old versions of workflows, data and services; the paper shows how this can be powerfully exploited to achieve reproduction and reuse. Copyright © 2013 John Wiley \& Sons, Ltd.},
author = {Missier, Paolo and Woodman, Simon and Hiden, Hugo and Watson, Paul},
doi = {10.1002/cpe.3035},
file = {:Users/paolo/Documents/Newcastle/CURRENT/repro-paper-CCPE-2012/CCPE-2012.pdf:pdf},
issn = {1532-0634},
journal = {Concurrency and Computation: Practice and Experience},
keywords = {e-science,provenance,reproducibility,scientific workflow},
title = {{Provenance and data differencing for workflow reproducibility analysis}},
url = {http://dx.doi.org/10.1002/cpe.3035},
urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/CCPE-2012.pdf},
year = {2013}
}

The W3C PROV family of specifications for modelling provenance metadata. Missier, P.; Belhajjame, K.; and Cheney, J. In Procs. EDBT'13 (Tutorial), Genova, Italy, 2013. ACM

The W3C PROV family of specifications for modelling provenance metadata [pdf]

Paper link bibtex abstract 2 downloads

@inproceedings{Missier2013b,
abstract = {Provenance, a form of structured metadata designed to record the origin or source of information, can be instrumental in deciding whether information is to be trusted, how it can be integrated with other diverse information sources, and how to establish attribution of information to authors through- out its history. The PROV set of speci cations, produced by the World Wide Web Consortium (W3C), is designed to pro- mote the publication of provenance information on the Web, and o ers a basis for interoperability across diverse prove- nance management systems. The PROV provenance model is deliberately generic and domain-agnostic, but extension mechanisms are available and can be exploited for modelling speci c domains. This tutorial provides an account of these speci cations. Starting from intuitive and informal exam- ples that present idiomatic provenance patterns, it progres- sively introduces the relational model of provenance along with the constraints model for validation of provenance doc- uments, and concludes with example applications that show the extension points in use.},
address = {Genova, Italy},
author = {Missier, Paolo and Belhajjame, Khalid and Cheney, James},
booktitle = {Procs. EDBT'13 (Tutorial)},
keywords = {\#PROV,\#provenance},
mendeley-tags = {\#PROV,\#provenance},
publisher = {ACM},
title = {{The W3C PROV family of specifications for modelling provenance metadata}},
url = {http://www.edbt.org/Proceedings/2013-Genova/papers/edbt/a80-missier.pdf},
urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/PROV-Tutorial-paper.pdf},
year = {2013}
}

D-PROV: extending the PROV provenance model with workflow structure. Missier, P.; Dey, S.; Belhajjame, K.; Cuevas, V.; and Ludaescher, B. In Procs. TAPP'13, Lombard, IL, 2013.

D-PROV: extending the PROV provenance model with workflow structure [pdf]

Paper link bibtex 6 downloads

@inproceedings{Missier2013a,
address = {Lombard, IL},
author = {Missier, Paolo and Dey, Saumen and Belhajjame, Khalid and Cuevas, Victor and Ludaescher, Bertram},
booktitle = {Procs. TAPP'13},
keywords = {PROV,workflow-provenance},
mendeley-tags = {PROV,workflow-provenance},
title = {{D-PROV}: extending the {PROV} provenance model with workflow structure},
urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/D-PROV-TAPP-2013.pdf},
year = {2013}
}

2012 (8)

Report from the first workshop on Scalable Workflow Enactment Engines and Technology (SWEET'12) . Hidders, J.; Sroka, J.; and Missier, P. In SIGMOD Record, volume 41. December 2012.

Report from the first workshop on Scalable Workflow Enactment Engines and Technology (SWEET'12) [pdf]

Paper link bibtex

@InCollection{SWEETReport2011,
author = {Hidders, Jan and Sroka, Jacek and Missier, Paolo},
title = {Report from the first workshop on Scalable Workflow Enactment Engines and Technology (SWEET'12) },
booktitle = {SIGMOD Record},
url={http://www.sigmod.org/publications/sigmod-record/1212/pdfs/12.report.hidders.pdf},
volume={41},
issue={4},
month={December},
year={2012}
}

SWEET '12: Proceedings of the 1st ACM SIGMOD Workshop on Scalable Workflow Execution Engines and Technologies. Hidders, J.; Missier, P.; and Sroka, J., editors. SIGMOD Record. New York, NY, USA, December 2012.

SWEET '12: Proceedings of the 1st ACM SIGMOD Workshop on Scalable Workflow Execution Engines and Technologies [pdf]

Paper link bibtex 1 download

@proceedings{Hidders:2012:2443416,
address = {New York, NY, USA},
editor = {Hidders, Jan and Missier, Paolo and Sroka, Jacek},
isbn = {978-1-4503-1876-1},
publisher = {SIGMOD Record},
title = {{SWEET '12: Proceedings of the 1st ACM SIGMOD Workshop on Scalable Workflow Execution Engines and Technologies}},
url = {http://dl.acm.org/citation.cfm?id=2443416},
urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/SweetReport.pdf},
month={December},
year = {2012}
}

Predicting the Execution Time of Workflow Activities Based on Their Input Features. Miu, T.; and Missier, P. In Taylor, I.; and Montagnat, J., editor(s), Procs. WORKS 2012, Salt Lake City, US, 2012. ACM

Predicting the Execution Time of Workflow Activities Based on Their Input Features [pdf]

Paper link bibtex 3 downloads

@inproceedings{Miu2012,
address = {Salt Lake City, US},
author = {Miu, Tudor and Missier, Paolo},
booktitle = {Procs. WORKS 2012},
editor = {Taylor, Ian and Montagnat, Johan},
file = {:Users/paolo/Dropbox/tudor-phd/WORKS 2012/escience2012.pdf:pdf},
publisher = {ACM},
title = {{Predicting the Execution Time of Workflow Activities Based on Their Input Features}},
year = {2012},
urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/WORKS12.pdf}
}

Modelling Provenance using Structured Occurrence Networks. Missier, P; Randell, B; and Koutny, M In Proc. IPAW'12, Santa Barbara, California, 2012. Springer-Verlag, Lecture Notes in Computer Science

Modelling Provenance using Structured Occurrence Networks [pdf]

Paper link bibtex 7 downloads

@inproceedings{Missier2012,
address = {Santa Barbara, California},
author = {Missier, P and Randell, B and Koutny, M},
booktitle = {Proc. IPAW'12},
keywords = {\#provenance},
mendeley-tags = {\#provenance},
publisher = {Springer-Verlag, Lecture Notes in Computer Science},
title = {{Modelling Provenance using Structured Occurrence Networks}},
urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/IPAW12-040312.pdf},
year = {2012}
}

A PROV encoding for provenance analysis using deductive rules. Missier, P; and Belhajjame, K. In Procs. IPAW'12, Santa Barbara, California, 2012. Springer-Verlag, Lecture Notes in Computer Science

A PROV encoding for provenance analysis using deductive rules [pdf]

Paper link bibtex

@inproceedings{Missier2012a,
address = {Santa Barbara, California},
author = {Missier, P and Belhajjame, K.},
booktitle = {Procs. IPAW'12},
keywords = {\#PROV,\#datalog,\#provenance},
publisher = {Springer-Verlag, Lecture Notes in Computer Science},
title = {{A PROV encoding for provenance analysis using deductive rules}},
urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/IPAW2012-datalog.pdf},
year = {2012}
}

Detecting Duplicate Records in Scientific Workflow Results. Belhajjame, K.; Missier, P; and Goble, C In Procs. IPAW'12, Santa Barbara, California, 2012. Springer-Verlag, Lecture Notes in Computer Science
link bibtex

@inproceedings{Missier2012b,
address = {Santa Barbara, California},
author = {Belhajjame, K. and Missier, P and Goble, C},
booktitle = {Procs. IPAW'12},
keywords = {\#PROV,\#provenance},
publisher = {Springer-Verlag, Lecture Notes in Computer Science},
title = {{Detecting Duplicate Records in Scientific Workflow Results}},
year = {2012}
}

Principles of Provenance (Dagstuhl Seminar 12091). Cheney, J.; Finkelstein, A.; Ludaescher, B.; and Vansummeren, S. Dagstuhl Reports, 2(2): 84–113. 2012.

Principles of Provenance (Dagstuhl Seminar 12091) [link]

Paper doi link bibtex 1 download

@article{cheney_et_al:DR:2012:3507,
address = {Dagstuhl, Germany},
annote = {Keywords: Provenance, Lineage, Metadata, Trust, Repeatability, Accountability},
author = {Cheney, James and Finkelstein, Anthony and Ludaescher, Bertram and Vansummeren, Stijn},
doi = {http://dx.doi.org/10.4230/DagRep.2.2.84},
editor = {Cheney, James and Finkelstein, Anthony and Ludaescher, Bertram and Vansummeren, Stijn},
issn = {2192-5283},
journal = {Dagstuhl Reports},
keywords = {\#provenance},
mendeley-tags = {\#provenance},
number = {2},
pages = {84--113},
publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik},
title = {{Principles of Provenance (Dagstuhl Seminar 12091)}},
url = {http://drops.dagstuhl.de/opus/volltexte/2012/3507},
volume = {2},
year = {2012}
}

Golden Trail: Retrieving the Data History that Matters from a Comprehensive Provenance Repository. Missier, P.; Ludascher, B.; Bowers, S.; Altintas, I.; Dey, S.; and Agun, M. International Journal of Digital Curation, 7(1). 2012.

Golden Trail: Retrieving the Data History that Matters from a Comprehensive Provenance Repository [pdf]

Paper link bibtex 1 download

@article{Missier2011c,
address = {Bristol,UK},
author = {Missier, Paolo and Ludascher, Bertram and Bowers, Shawn and Altintas, Ilkay and Dey, Saumen and Agun, Michael},
file = {:Users/paolo/Downloads/221-927-1-PB.pdf:pdf},
journal = {International Journal of Digital Curation},
keywords = {\#provenance,\#repository,\#workflow},
mendeley-tags = {\#provenance,\#repository,\#workflow},
number = {1},
publisher = {UKOLN},
title = {{Golden Trail: Retrieving the Data History that Matters from a Comprehensive Provenance Repository}},
url = {http://www.dcc.ac.uk/events/idcc11},
volume = {7},
year = {2012},
urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/221-927-1-PB.pdf}
}

2011 (8)

Achieving Reproducibility by Combining Provenance with Service and Workflow Versioning. Woodman, S.; Hiden, H.; Watson, P.; and Missier, P. In Procs. WORKS 2011, Seattle, WA, USA, 2011.
link bibtex

@inproceedings{Woodman2011,
address = {Seattle, WA, USA},
author = {Woodman, Simon and Hiden, Hugo and Watson, Paul and Missier, Paolo},
booktitle = {Procs. WORKS 2011},
keywords = {cloud,provenance,reproducible science,workflow},
mendeley-tags = {cloud,provenance,reproducible science,workflow},
title = {{Achieving Reproducibility by Combining Provenance with Service and Workflow Versioning}},
year = {2011}
}

Workflows for Information Integration in the Life Sciences. Missier, P.; Paton, N.; and Li, P. In Ceri, S.; and Brambilla, M., editor(s), Search Computing, volume 6585, of Lecture Notes in Computer Science, pages 215–225. Springer Berlin / Heidelberg, 2011.

Workflows for Information Integration in the Life Sciences [link]

Paper link bibtex abstract

@incollection{springerlink:10.1007/978-3-642-19668-3_20,
abstract = {The increasingly computationally- and data-intensive nature of experimental science motivates recent interest in workflows, as a way to specify complex data processing and integration pipelines in a fairly intuitive way. Such workflows orchestrate the invocation of data retrieval services in a way that resembles, to some extent, Search Computing query plans. While the former are manually specified, however, the latter are the result of an automated translation process. Using lessons learnt from experience in workflow design, in this chapter we discuss some of the requirements on service curation that make automated, on-demand data integration processes possible and realistic.},
annote = {10.1007/978-3-642-19668-3\_20},
author = {Missier, Paolo and Paton, Norman and Li, Peter},
booktitle = {Search Computing},
editor = {Ceri, Stefano and Brambilla, Marco},
isbn = {978-3-642-19667-6},
pages = {215--225},
publisher = {Springer Berlin / Heidelberg},
series = {Lecture Notes in Computer Science},
title = {{Workflows for Information Integration in the Life Sciences}},
url = {http://dx.doi.org/10.1007/978-3-642-19668-3_20},
volume = {6585},
year = {2011}
}

Towards the preservation of scientific workflows. Roure, D. D.; Belhajjame, K.; Missier, P.; and Al., E. In Procs. of the 8th International Conference on Preservation of Digital Objects (iPRES 2011), Singapore, 2011.

Towards the preservation of scientific workflows [pdf]

Paper link bibtex

@inproceedings{Roure2011,
address = {Singapore},
author = {Roure, David De and Belhajjame, Khalid and Missier, Paolo and Al., Et},
booktitle = {Procs. of the 8th International Conference on Preservation of Digital Objects (iPRES 2011)},
title = {{Towards the preservation of scientific workflows}},
year = {2011},
urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/wfpreservev12.pdf}
}

Why linked data is not enough for scientists. Bechhofer, S.; Buchan, I.; De Roure, D.; Missier, P.; and Al., E. Future Generation Computer Systems (FGCS). 2011.

Why linked data is not enough for scientists [link]

Paper doi link bibtex 2 downloads

@article{Bechhofer2011,
author = {Bechhofer, Sean and Buchan, Iain and {De Roure}, David and Missier, Paolo and Al., Et},
doi = {doi:10.1016/j.future.2011.08.004},
journal = {Future Generation Computer Systems (FGCS)},
publisher = {Elsevier},
title = {{Why linked data is not enough for scientists}},
url = {http://www.sciencedirect.com/science/article/pii/S0167739X11001439},
year = {2011}
}

Incremental workflow improvement through analysis of its data provenance. Missier, P. In Procs. TAPP'11 (Theory and Practice of Provenance), Heraklyion, Crete, Greece, June 2011.

Incremental workflow improvement through analysis of its data provenance [pdf]

Paper link bibtex abstract

@inproceedings{Missier2011a,
abstract = {Repeated executions of resource-intensive workflows over a large number of runs are commonly observed in e-science practice. We explore the hypothesis that, in some cases, provenance traces recorded for past runs of a workflow can be used to make future runs more efficient. This investigation is an initial step into the systematic study of the role that provenance analysis can play in the broader context of self-managing software systems. We have tested our hypothesis on a concrete case study involving a Chemical Engineering workflow deployed on a cloud infrastructure, where we can measure the cost of its repeated execution. Our approach involves augmenting the workflow with a feedback loop in which incremental analysis of the provenance of past runs is used to control some of the workflow steps in subsequent executions. We present initial experimental results and hint at future improvements as part of ongoing work.},
address = {Heraklyion, Crete, Greece},
author = {Missier, Paolo},
booktitle = {Procs. TAPP'11 (Theory and Practice of Provenance)},
file = {:Users/paolo/Dropbox/Provenance-mining/TAPP11/TAPP-missier.pdf:pdf},
title = {{Incremental workflow improvement through analysis of its data provenance}},
year = {2011},
month={June},
urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/TAPP-missier.pdf}
}

Simulating Taverna workflows using stochastic process algebras. Curcin, V.; Missier, P.; and De Roure, D. Concurrency and Computation: Practice and Experience, In press.. 2011.

Simulating Taverna workflows using stochastic process algebras [pdf]

Paper link bibtex

@article{Curcin2011,
author = {Curcin, Vasa and Missier, Paolo and {De Roure}, David},
file = {:Users/paolo/Documents/myGRID/papers/Simulating Taverna\_v6.pdf:pdf},
journal = {Concurrency and Computation: Practice and Experience},
title = {{Simulating Taverna workflows using stochastic process algebras}},
volume = {In press.},
year = {2011},
urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/Simulating%20Taverna_v6.pdf}
}

Workflows to Open Provenance Graphs, round-trip. Missier, P.; and Goble, C. Future Generation Computer Systems (FGCS), 27(6): 812–819. April 2011.

Workflows to Open Provenance Graphs, round-trip [pdf]

Paper doi link bibtex abstract

@article{Missier2011,
abstract = {The Open Provenance Model is designed to capture relationships amongst data values, and amongst processors that produce or consume those values. While OPM graphs are able to describe aspects of a workflow execution, capturing the structure of the workflows themselves is understandably beyond the scope of the OPM specification, since the graphs may be generated by a broad variety of processes, which may not be formal workflows at all. \% In particular, OPM does not address two questions: firstly, whether for any OPM graph there exists a $\backslash$textit\{plausible\} workflow, in some model, which could have generated the graph. And secondly, which information should be captured as part of an OPM graph that is derived from the execution of some known type of workflow, so that the workflow structure and the execution trace can both be inferred back from the graph. \% Motivated by the need to address the $\backslash$textit\{Third Provenance Challenge\} using Taverna workflows and provenance, in this paper we explore such notion of $\backslash$textit\{lossless-ness\} of OPM graphs relative to Taverna workflows. \% For the first question, we show that Taverna is a suitable model for representing plausible OPM-generating processes. For the second question, we show how augmenting OPM with two types of annotations makes it lossless with respect to Taverna. We support this claim by presenting a two-way mapping between OPM graphs and Taverna workflows.},
author = {Missier, Paolo and Goble, Carole},
doi = {http://dx.doi.org/10.1016/j.future.2010.10.012},
file = {:Users/paolo/Library/Application Support/Mendeley Desktop/Downloaded/Missier, Goble - 2011 - Workflows to Open Provenance Graphs, round-trip(2).pdf:pdf;:Users/paolo/Library/Application Support/Mendeley Desktop/Downloaded/Missier, Goble - 2011 - Workflows to Open Provenance Graphs, round-trip.pdf:pdf},
journal = {Future Generation Computer Systems (FGCS)},
keywords = {\#OPM,\#provenance,OPM},
mendeley-tags = {\#OPM,\#provenance},
number = {6},
pages = {812--819},
publisher = {Elsevier},
title = {{Workflows to Open Provenance Graphs, round-trip}},
volume = {27},
year = {2011},
month={April},
urlpaper={http://www.mendeley.com/download/personal/212462/3538519411/e3fb0fd4c581dc383cb202183f4773dd48ff8c55/dl.pdf}
}

Extending Semantic Provenance into the Web of Data. Zhao, J.; Sahoo, S. S; Missier, P.; Sheth, A.; and Goble, C. IEEE Internet Computing, 15: 40–48. 2011.

Extending Semantic Provenance into the Web of Data [link]

Paper doi link bibtex

@article{10.1109/MIC.2011.7,
address = {Los Alamitos, CA, USA},
author = {Zhao, Jun and Sahoo, Satya S and Missier, Paolo and Sheth, Amit and Goble, Carole},
doi = {http://doi.ieeecomputersociety.org/10.1109/MIC.2011.7},
issn = {1089-7801},
journal = {IEEE Internet Computing},
pages = {40--48},
publisher = {IEEE Computer Society},
url={http://doi.ieeecomputersociety.org/10.1109/MIC.2011.7},
title = {{Extending Semantic Provenance into the Web of Data}},
volume = {15},
year = {2011}
}

2010 (11)

Why Linked Data is Not Enough for Scientists. Bechhofer, S.; Ainsworth, J.; Bhagat, J.; Buchan, I.; Couch, P.; Cruickshank, D.; Roure, D. D.; Delderfield, M.; Dunlop, I.; Gamble, M.; Goble, C.; Michaelides, D.; Missier, P.; Owen, S.; Newman, D.; and Sufi, S. In e-Science (e-Science), 2010 IEEE Sixth International Conference on, pages 300–307, 2010.

Why Linked Data is Not Enough for Scientists [pdf]

Paper doi link bibtex

@inproceedings{5693931,
author = {Bechhofer, Sean and Ainsworth, John and Bhagat, Jiten and Buchan, Iain and Couch, Philip and Cruickshank, Don and Roure, David De and Delderfield, Mark and Dunlop, Ian and Gamble, Matthew and Goble, Carole and Michaelides, Danius and Missier, Paolo and Owen, Stuart and Newman, David and Sufi, Shoaib},
booktitle = {e-Science (e-Science), 2010 IEEE Sixth International Conference on},
doi = {10.1109/eScience.2010.21},
file = {:Users/paolo/Library/Application Support/Mendeley Desktop/Downloaded/Bechhofer et al. - 2010 - Why Linked Data is Not Enough for Scientists.pdf:pdf},
pages = {300--307},
title = {{Why Linked Data is Not Enough for Scientists}},
url = {http://ieeexplore.ieee.org/xpls/abs\_all.jsp?arnumber=5693931},
year = {2010},
urlpaper={http://www.mendeley.com/download/public/212462/3654049861/b5af47a9fee58a215547315f8f82a4efce0e27ea/dl.pdf}
}

Linking Multiple Workflow Provenance Traces for Interoperable Collaborative Science. Missier, P.; Ludascher, B.; Bowers, S.; Anand, M. K.; Altintas, I.; Dey, S.; Sarkar, A.; Shrestha, B.; and Goble, C. In Proc.s 5th Workshop on Workflows in Support of Large-Scale Science (WORKS), 2010.

Linking Multiple Workflow Provenance Traces for Interoperable Collaborative Science [pdf]

Paper link bibtex abstract

@inproceedings{Missier2010e,
abstract = {Scientific collaboration increasingly involves data sharing between separate groups. We consider a scenario where data products of scientific workflows are published and then used by other researchers as inputs to their workflows. For proper interpretation, shared data must be complemented by descriptive metadata. We focus on provenance traces, a prime example of such metadata which describes the genesis and processing history of data products in terms of the computational workflow steps. Through the reuse of published data, virtual, implicitly collaborative experiments emerge, making it desirable to compose the independently generated traces into global ones that describe the combined executions as single, seamless experiments. We present a model for provenance sharing that realizes this holistic view by overcoming the various interoperability problems that emerge from the heterogeneity of workflow systems, data formats, and provenance models. At the heart lie (i) an abstract workflow and provenance model in which (ii) data sharing becomes itself part of the combined workflow. We then describe an implementation of our model that we developed in the context of the Data Observation Network for Earth (DataONE) project and that can “stitch together” traces from different Kepler and Taverna workflow runs. It provides a prototypical framework for seamless cross-system, collaborative provenance management and can be easily extended to include other systems. Our approach also opens the door to new ways of workflow interoperability not only through often elusive workflow standards but through shared provenance information from public repositories.},
author = {Missier, Paolo and Ludascher, Bertram and Bowers, Shawn and Anand, Manish Kumar and Altintas, Ilkay and Dey, Saumen and Sarkar, Anandarup and Shrestha, Biva and Goble, Carole},
booktitle = {Proc.s 5th Workshop on Workflows in Support of Large-Scale Science (WORKS)},
file = {:Users/paolo/Library/Application Support/Mendeley Desktop/Downloaded/Missier et al. - 2010 - Linking Multiple Workflow Provenance Traces for Interoperable Collaborative Science.pdf:pdf},
title = {{Linking Multiple Workflow Provenance Traces for Interoperable Collaborative Science}},
year = {2010},
urlpaper={http://homepages.cs.ncl.ac.uk/paolo.missier/doc/WORKS10.pdf}
}

The Open Provenance Model — Core Specification (v1.1). Moreau, L.; Clifford, B.; Freire, J.; Futrelle, J.; Gil, Y.; Groth, P.; Kwasnikowska, N.; Miles, S.; Missier, P.; Myers, J.; Plale, B.; Simmhan, Y.; Stephan, E.; and Van Den Bussche, J. Future Generation Computer Systems. 2010.

The Open Provenance Model — Core Specification (v1.1) [pdf]

Paper doi link bibtex

@article{Moreau2010a,
author = {Moreau, Luc and Clifford, Ben and Freire, Juliana and Futrelle, Joe and Gil, Yolanda and Groth, Paul and Kwasnikowska, Natalia and Miles, Simon and Missier, Paolo and Myers, Jim and Plale, Beth and Simmhan, Yogesh and Stephan, Eric and {Van Den Bussche}, Jan},
doi = {http://dx.doi.org/10.1016/j.future.2010.07.005},
file = {:Users/paolo/Documents/myGRID/refs/opm.pdf:pdf},
journal = {Future Generation Computer Systems},
title = {{The Open Provenance Model --- Core Specification (v1.1)}},
year = {2010},
urlpaper={http://www.mendeley.com/download/public/212462/3464695141/849c7a26f20b3fa9338bc8d6ab292b6c8332cf8d/dl.pdf}
}

Seamless Provenance Representation and Use in Collaborative Science Scenarios (Abstract). Missier, P.; Ludascher, B.; Bowers, S.; Anand, M. K.; Altintas, I.; Dey, S.; Sarkar, A.; Shrestha, B.; and Goble, C. In AGU Fall Meeting, San Francisco, CA, USA, 2010.

Seamless Provenance Representation and Use in Collaborative Science Scenarios (Abstract) [pdf]

Paper link bibtex

@inproceedings{Missier2010d,
address = {San Francisco, CA, USA},
author = {Missier, Paolo and Ludascher, Bertram and Bowers, Shawn and Anand, Manish Kumar and Altintas, Ilkay and Dey, Saumen and Sarkar, Anandarup and Shrestha, Biva and Goble, Carole},
booktitle = {AGU Fall Meeting},
file = {:Users/paolo/Documents/DataONE/AGU 2010 abstracts/AGU-IN02 submission.pdf:pdf},
title = {{Seamless Provenance Representation and Use in Collaborative Science Scenarios (Abstract)}},
year = {2010},
urlpaper = {http://www.mendeley.com/download/public/212462/1436102841/a3435cce7917d1995fdb70485b20efa0d3ffdf81/dl.pdf}

}

A comparison of using Taverna and BPEL in building scientific workflows: the case of caGrid. Tan, W.; Missier, P.; Foster, I.; Madduri, R.; De Roure, D.; and Goble, C. Concurrency and Computation: Practice and Experience, 22(9): 1098–1117. 2010.

A comparison of using Taverna and BPEL in building scientific workflows: the case of caGrid [link]

Paper doi link bibtex

@article{CPE:CPE1547,
author = {Tan, Wei and Missier, Paolo and Foster, Ian and Madduri, Ravi and {De Roure}, David and Goble, Carole},
doi = {10.1002/cpe.1547},
file = {:Users/paolo/Library/Application Support/Mendeley Desktop/Downloaded/Tan et al. - 2010 - A comparison of using Taverna and BPEL in building scientific workflows the case of caGrid.pdf:pdf},
issn = {1532-0634},
journal = {Concurrency and Computation: Practice and Experience},
keywords = {BPEL,Taverna,caGrid,functional programming,scientific workflow},
number = {9},
pages = {1098--1117},
publisher = {John Wiley \& Sons, Ltd.},
title = {{A comparison of using Taverna and BPEL in building scientific workflows: the case of caGrid}},
url = {http://dx.doi.org/10.1002/cpe.1547},
volume = {22},
year = {2010}
}

Fine-grained and efficient lineage querying of collection-based workflow provenance. Missier, P.; Paton, N.; and Belhajjame, K. In Procs. EDBT, Lausanne, Switzerland, 2010.

Fine-grained and efficient lineage querying of collection-based workflow provenance [pdf]

Paper link bibtex 1 download

@inproceedings{Missier2010a,
address = {Lausanne, Switzerland},
author = {Missier, P. and Paton, N. and Belhajjame, K.},
booktitle = {Procs. EDBT},
file = {:Users/paolo/Library/Application Support/Mendeley Desktop/Downloaded/Missier, Paton, Belhajjame - 2010 - Fine-grained and efficient lineage querying of collection-based workflow provenance.pdf:pdf},
title = {{Fine-grained and efficient lineage querying of collection-based workflow provenance}},
year = {2010},
urlpaper={http://www.mendeley.com/download/public/212462/1436102841/a3435cce7917d1995fdb70485b20efa0d3ffdf81/dl.pdf}
}

ERGOT: A Semantic-based System for Service Discovery in Distributed Infrastructures. Pirro', G.; Trunfio, P.; Talia, D.; Missier, P.; and Goble, C. In Procs. CCGRID '10, Melbourne, Australia, 2010.
doi link bibtex

@inproceedings{pirro10:,
address = {Melbourne, Australia},
author = {Pirro', Giuseppe and Trunfio, Paolo and Talia, Domenico and Missier, Paolo and Goble, Carole},
booktitle = {Procs. CCGRID '10},
doi = {http://dx.doi.org/10.1109/CCGRID.2010.24},
file = {:Users/paolo/Documents/myGRID/refs/p299-missier.pdf:pdf},
title = {{ERGOT: A Semantic-based System for Service Discovery in Distributed Infrastructures}},
year = {2010}
}

Functional Units: Abstractions for Web Service Annotations. Missier, P.; Wolstencroft, K.; Tanoh, F.; Li, P.; Bechhofer, S.; Belhajjame, K.; and Goble, C. In Procs. IEEE 2010 Fourth International Workshop on Scientific Workflows (SWF 2010), Miami, FL, 2010.

Functional Units: Abstractions for Web Service Annotations [pdf]

Paper link bibtex

@inproceedings{Missier2010,
address = {Miami, FL},
author = {Missier, Paolo and Wolstencroft, Katy and Tanoh, Franck and Li, Peter and Bechhofer, Sean and Belhajjame, Khalid and Goble, Carole},
booktitle = {Procs. IEEE 2010 Fourth International Workshop on Scientific Workflows (SWF 2010)},
file = {:Users/paolo/Library/Application Support/Mendeley Desktop/Downloaded/Missier et al. - 2010 - Functional Units Abstractions for Web Service Annotations.pdf:pdf},
keywords = {service annotations biocatalogue},
title = {{Functional Units: Abstractions for Web Service Annotations}},
url = {http://www.cs.wayne.edu/\~{}shiyong/swf/swf2010.html},
year = {2010},
urlpaper={http://www.mendeley.com/download/public/212462/2916934991/f6fa568c0c07692423a097a6083e94909b3bbcd6/dl.pdf}
}

Taverna, reloaded. Missier, P.; Soiland-Reyes, S.; Owen, S.; Tan, W.; Nenadic, A.; Dunlop, I.; Williams, A.; Oinn, T.; and Goble, C. In Gertz, M; Hey, T; and Ludaescher, B, editor(s), Procs. SSDBM 2010, Heidelberg, Germany, 2010.

Paper link bibtex 2 downloads

@inproceedings{Missier2010b,
address = {Heidelberg, Germany},
author = {Missier, Paolo and Soiland-Reyes, Stian and Owen, Stuart and Tan, Wei and Nenadic, Alex and Dunlop, Ian and Williams, Alan and Oinn, Tom and Goble, Carole},
booktitle = {Procs. SSDBM 2010},
editor = {Gertz, M and Hey, T and Ludaescher, B},
file = {:Users/paolo/Documents/myGRID/papers/T2Performance/short-SSDBM/Missier-CR/T2Architecture-SSDBM-CR.pdf:pdf},
title = {{Taverna, reloaded}},
url = {http://www.ssdbm2010.org/},
year = {2010},
urlpaper={http://www.mendeley.com/download/public/212462/2916934941/e91110015e961be177247d40c8f5dc874367106b/dl.pdf}
}

Janus: from Workflows to Semantic Provenance and Linked Open Data. Missier, P.; Sahoo, S. S; Zhao, J.; Sheth, A.; and Goble, C. In Procs. IPAW 2010, Troy, NY, 2010.

Janus: from Workflows to Semantic Provenance and Linked Open Data [pdf]

Paper link bibtex

@inproceedings{Missier2010c,
address = {Troy, NY},
author = {Missier, Paolo and Sahoo, Satya S and Zhao, Jun and Sheth, Amit and Goble, Carole},
booktitle = {Procs. IPAW 2010},
file = {:Users/paolo/Dropbox/Janus/paper-IPAW2010/SP-IPAW10.pdf:pdf},
keywords = {provenance Taverna LOD RDF semantics},
title = {{Janus: from Workflows to Semantic Provenance and Linked Open Data}},
year = {2010},
urlpaper={http://www.mendeley.com/download/public/212462/2928549431/f166661dce9df21dd0c321fbffb0fb6eb4c85cd1/dl.pdf}
}

Understanding Collaborative Studies Through Interoperable Workflow Provenance. Altintas, I.; Anand, M. K.; Crawl, D.; Belloum, A.; Missier, P.; Goble, C.; and Sloot, P. In Procs. IPAW 2010, Troy, NY, 2010.

Understanding Collaborative Studies Through Interoperable Workflow Provenance [pdf]

Paper link bibtex

@inproceedings{Altintas2010a,
address = {Troy, NY},
author = {Altintas, Ilkay and Anand, Manish Kumar and Crawl, Daniel and Belloum, Adam and Missier, Paolo and Goble, Carole and Sloot, Peter},
booktitle = {Procs. IPAW 2010},
file = {:Users/paolo/Documents/myGRID/refs/IPAW2010-CP.pdf:pdf},
keywords = {provenance interoperability},
title = {{Understanding Collaborative Studies Through Interoperable Workflow Provenance}},
year = {2010},
urlpaper={http://www.mendeley.com/download/public/212462/2928549401/bcae9fcd73771e1d2dde11d3123f475178aca3fa/dl.pdf}
}

2009 (9)

Time-completeness trade-offs in record linkage using Adaptive Query Processing. Lengu, R; Missier, P; Fernandes, A A A; Guerrini, G; and Mesiti, M In Procs. EDBT, St. Petersburg, Russia, March 2009.

Time-completeness trade-offs in record linkage using Adaptive Query Processing [link]

Paper doi link bibtex

@inproceedings{Lengu09,
address = {St. Petersburg, Russia},
annote = {conference},
author = {Lengu, R and Missier, P and Fernandes, A A A and Guerrini, G and Mesiti, M},
booktitle = {Procs. EDBT},
doi = {http://dx.doi.org/10.1145/1516360.1516458},
file = {:Users/paolo/Documents/myGRID/refs/p299-missier.pdf:pdf},
keywords = {"Adaptive Query Processing","Record Linkage"},
month = mar,
title = {{Time-completeness trade-offs in record linkage using Adaptive Query Processing}},
url = {http://dx.doi.org/10.1145/1516360.1516458},
year = {2009}
}

A Comparison of Using Taverna and BPEL in Building Scientific Workflows: the case of caGrid. Tan, W.; Missier, P.; Foster, I.; Madduri, R.; and Goble, C. Concurrency and Computation Practice and Experience. 2009.
link bibtex

@article{Tan:2009lk,
annote = {In press},
author = {Tan, Wei and Missier, Paolo and Foster, Ian and Madduri, Ravi and Goble, Carole},
file = {:Users/paolo/Library/Application Support/Mendeley Desktop/Downloaded/Tan et al. - 2009 - A Comparison of Using Taverna and BPEL in Building Scientific Workflows the case of caGrid.pdf:pdf},
journal = {Concurrency and Computation Practice and Experience},
keywords = {scientific workflow BPEL Taverna},
title = {{A Comparison of Using Taverna and BPEL in Building Scientific Workflows: the case of caGrid}},
year = {2009}
}

Combining DHTs and SONs for Semantic-Based Service Discovery. Pirro', G.; Missier, P.; Trunfio, P.; Talia, D.; Falace, G.; and Goble, C. In Procs.International Conference on Intelligent System Design and Applications (ISDA'09), Pisa, Italy, November 2009.

Combining DHTs and SONs for Semantic-Based Service Discovery [pdf]

Paper link bibtex

@inproceedings{Giuseppe-Pirr:2009oa,
address = {Pisa, Italy},
annote = {conference},
author = {Pirro', Giuseppe and Missier, Paolo and Trunfio, Paolo and Talia, Domenico and Falace, Gabriele and Goble, Carole},
booktitle = {Procs.International Conference on Intelligent System Design and Applications (ISDA'09)},
keywords = {"Distributed Systems"},
month = nov,
title = {{Combining DHTs and SONs for Semantic-Based Service Discovery}},
year = {2009},
urlpaper={http://www.mendeley.com/download/public/212462/858964852/fd27883c64acca89540c464ddd402b66d5fb3000/dl.pdf}
}

Formal semantics for the Taverna 2 Workflow Model. Sroka, J.; Hidders, J.; Missier, P.; and Goble, C. Journal of Computer and System Sciences. 2009.

Formal semantics for the Taverna 2 Workflow Model [pdf]

Paper doi link bibtex abstract

@article{Sroka:2009la,
abstract = {This paper presents a formal semantics for the Taverna 2 scientific workflow system. Taverna 2 is a successor to Taverna, an open-source workflow system broadly adopted within the e-science community worldwide. The new version improves upon the existing model in two main ways: (i) by adding support for data pipelining, which in turns enables input streams of indefinite length to be processed efficiently; and (ii) by providing new extensibility points that make it possible to add new operators to the workflow model. Consistent with previous work by some of the authors, we use trace semantics to describe the effect of workflow computations, and we show how they can be used to describe the new features in the Taverna 2 model.},
annote = {in press.},
author = {Sroka, Jacek and Hidders, Jan and Missier, Paolo and Goble, Carole},
doi = {10.1016/j.jcss.2009.11.009},
file = {:Users/paolo/Library/Application Support/Mendeley Desktop/Downloaded/Sroka et al. - 2009 - Formal semantics for the Taverna 2 Workflow Model.pdf:pdf},
journal = {Journal of Computer and System Sciences},
keywords = {workflow model semantics},
title = {{Formal semantics for the Taverna 2 Workflow Model}},
url = {http://dx.doi.org/10.1016/j.jcss.2009.11.009},
year = {2009},
urlpaper={http://www.mendeley.com/download/public/212462/858963592/08b14b4cad8f8881793405799a72a4aeb6e6aa24/dl.pdf}
}

The Data Playground: An Intuitive Workflow Specification Environment. Gibson, A; Gamble, M; Wolstencroft, K; Oinn, T; Goble, C; Belajjame, K; and Missier, P. Future Generation Computer Systems, 25: 453–459. April 2009.

The Data Playground: An Intuitive Workflow Specification Environment [link]

Paper doi link bibtex

@article{A.-Gibson:2009ti,
annote = {journal},
author = {Gibson, A and Gamble, M and Wolstencroft, K and Oinn, T and Goble, C and Belajjame, K and Missier, Paolo},
doi = {http://dx.doi.org/10.1016/j.future.2008.09.009},
file = {:Users/paolo/Documents/myGRID/refs/p299-missier.pdf:pdf},
journal = {Future Generation Computer Systems},
keywords = {"Workflow Management","e-Science"},
month = apr,
pages = {453--459},
title = {{The Data Playground: An Intuitive Workflow Specification Environment}},
url = {http://dx.doi.org/10.1016/j.future.2008.09.009},
volume = {25},
year = {2009}
}

Semantically Annotated Provenance in the Life Science Grid. Cao, B; Plale, B; Subramanian, G; Missier, P; Goble, C; and Simmhan, Y In Freire, J.; Missier, P.; and Sahoo, S. S., editor(s), 1st International Workshop on the Role of Semantic Web in Provenance Management, 2009. CEUR Proceedings

Semantically Annotated Provenance in the Life Science Grid [pdf]

Paper link bibtex

@inproceedings{Bin2009,
author = {Cao, B and Plale, B and Subramanian, G and Missier, P and Goble, C and Simmhan, Y},
booktitle = {1st International Workshop on the Role of Semantic Web in Provenance Management},
editor = {Freire, Juliana and Missier, Paolo and Sahoo, Satya S.},
file = {:Users/paolo/Library/Application Support/Mendeley Desktop/Downloaded/Cao et al. - 2009 - Semantically Annotated Provenance in the Life Science Grid(2).pdf:pdf},
keywords = {\#provxg \#provenance,semantics provenance},
mendeley-tags = {\#provxg \#provenance},
publisher = {CEUR Proceedings},
title = {{Semantically Annotated Provenance in the Life Science Grid}},
url = {http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-526/},
year = {2009},
urlpaper={http://www.mendeley.com/download/public/212462/982701542/3a8950b4a6495a57e59ccc5166f84737f8dc9c4a/dl.pdf}
}

Medical Image Processing Workflow Support on the EGEE Grid with Taverna. Maheshwari, K.; Missier, P.; Goble, C.; and Montagnat, J. In Procs. conference of Computer Based Medical Systems (CBMS), Albuquerque, NM, USA, 2009.

Medical Image Processing Workflow Support on the EGEE Grid with Taverna [pdf]

Paper link bibtex

@inproceedings{Ketan-Maheshwari:jh,
address = {Albuquerque, NM, USA},
annote = {conference},
author = {Maheshwari, Ketan and Missier, Paolo and Goble, Carole and Montagnat, Johan},
booktitle = {Procs. conference of Computer Based Medical Systems (CBMS)},
file = {:Users/paolo/Library/Application Support/Mendeley Desktop/Downloaded/Maheshwari et al. - 2009 - Medical Image Processing Workflow Support on the EGEE Grid with Taverna.pdf:pdf},
keywords = {"Workflow Management","e-Science"},
title = {{Medical Image Processing Workflow Support on the EGEE Grid with Taverna}},
year = {2009},
urlpaper={http://www.mendeley.com/download/public/212462/858962842/10cd50751591a44f384a80766f8165723ec21eda/dl.pdf}
}

. Belhajjame, K.; Missier, P.; and Goble, C. Data Provenance in Scientific Workflows. IGI Global, 2009.
link bibtex

@inbook{Khalid-Belhajjame:2009ix,
annote = {chapter},
author = {Belhajjame, Khalid and Missier, Paolo and Goble, Carole},
booktitle = {Handbook of Research on Computational Grid Technologies for Life Sciences, Biomedicine, and Healthcare},
keywords = {"Workflow Management",Provenance},
publisher = {IGI Global},
title = {{Data Provenance in Scientific Workflows}},
year = {2009}
}

Incorporating Domain-Specific Information Quality Constraints into Database Queries. Embury, S. M; Missier, P.; Sampaio, S.; Greenwood, R M.; and Preece, A. D J. Data and Information Quality, 1(2). 2009.

Incorporating Domain-Specific Information Quality Constraints into Database Queries [pdf]

Paper link bibtex

@article{DBLP:journals/jdiq/EmburyMSGP09,
author = {Embury, Suzanne M and Missier, Paolo and Sampaio, Sandra and Greenwood, R Mark and Preece, Alun D},
file = {:Users/paolo/Documents/myGRID/refs/Incorporating Domain-Specific Information Quality Constraints into Database Queries.pdf:pdf},
journal = {J. Data and Information Quality},
number = {2},
title = {{Incorporating Domain-Specific Information Quality Constraints into Database Queries}},
volume = {1},
year = {2009},
urlpaper={http://www.mendeley.com/download/public/212462/3145881381/2d9ae3771e97e17f7899676a5a98f57afd667fbe/dl.pdf}
}

2008 (7)

Building Scientific Workflow with Taverna and BPEL: a Comparative Study in caGrid. Tan, W.; Missier, P.; Madduri, R.; and Foster, I. In Procs.4th International workshop on Engineering Service-Oriented applications (WESOA), Sydney, Australia, December 2008.

Building Scientific Workflow with Taverna and BPEL: a Comparative Study in caGrid [pdf]

Paper doi link bibtex

@inproceedings{Wei-Tan:2008td,
address = {Sydney, Australia},
annote = {conference},
author = {Tan, Wei and Missier, Paolo and Madduri, Ravi and Foster, Ian},
booktitle = {Procs.4th International workshop on Engineering Service-Oriented applications (WESOA)},
doi = {http://dx.doi.org/10.1007/978-3-642-01247-1\_11},
keywords = {"Workflow Management ","e-Science"},
month = dec,
title = {{Building Scientific Workflow with Taverna and BPEL: a Comparative Study in caGrid}},
url = {http://dx.doi.org/10.1007/978-3-642-01247-1\_11},
year = {2008},
urlpaper={http://www.mendeley.com/download/public/212462/858964232/b62f315e67100842b89c100f95aac798e8b5ee9c/dl.pdf}
}

. C.Goble D. De Roure, P. Scientific Workflows. McGraw Hill, 2008.
link bibtex

@inbook{C.Goble:2008hw,
annote = {chapter},
author = {{C.Goble D. De Roure}, P.Missier},
booktitle = {Yearbook of Science and Technology},
keywords = {"Workflow Management","e-Science"},
publisher = {McGraw Hill},
title = {{Scientific Workflows}},
year = {2008}
}

Exploiting provenance to make sense of automated data acceptance decisions in scientific workflows. Missier, P.; Embury, S.; and Stapenhurst, R. In IPAW, volume 5272/2008, of LNCS series, Salt Lake City, Utah, June 2008. Springer

Exploiting provenance to make sense of automated data acceptance decisions in scientific workflows [pdf]

Paper doi link bibtex

@inproceedings{Paolo-Missier:2008zk,
address = {Salt Lake City, Utah},
annote = {DOI: http://dx.doi.org/10.1007/978-3-540-89965-5\_19
conference},
author = {Missier, Paolo and Embury, Suzanne and Stapenhurst, Richard},
booktitle = {IPAW},
doi = {http://dx.doi.org/10.1007/978-3-540-89965-5\_19},
file = {:Users/paolo/Library/Application Support/Mendeley Desktop/Downloaded/Missier, Embury, Stapenhurst - 2008 - Exploiting provenance to make sense of automated data acceptance decisions in scientific workflows(4).pdf:pdf},
keywords = {"Information Quality Management",\#provxg \#provenance \#qurator,provenance data quality},
mendeley-tags = {\#provxg \#provenance \#qurator},
month = jun,
publisher = {Springer},
series = {LNCS series},
title = {{Exploiting provenance to make sense of automated data acceptance decisions in scientific workflows}},
url = {http://www.springerlink.com/content/r07524068770k401/},
volume = {5272/2008},
year = {2008},
urlpaper={http://www.mendeley.com/download/public/212462/836703912/38d5918a52b3c587f4c1d4e465bc068fa1a9cdc9/dl.pdf}
}

Information Quality in Proteomics. Stead, D; Paton, N; Missier, P; Embury, S; Hedeler, C; Jin, B; Brown, A; and Preece, A Briefings in Bioinformatics, 9: 174–188. March 2008.

Information Quality in Proteomics [link]

Paper link bibtex

@article{D.-Stead:2008zl,
annote = {journal},
author = {Stead, D and Paton, N and Missier, P and Embury, S and Hedeler, C and Jin, B and Brown, A and Preece, A},
journal = {Briefings in Bioinformatics},
keywords = {"Biological Information Management","Information Quality Management","e-Science",information quality proteomics},
month = mar,
pages = {174--188},
title = {{Information Quality in Proteomics}},
url = {http://bib.oxfordjournals.org/cgi/reprint/9/2/174},
volume = {9},
year = {2008}
}

Data lineage model for Taverna workflows with lightweight annotation requirements. Missier, P; Belhajjame, K; Zhao, J; and Goble, C In IPAW, volume 5272/2008, of LNCS, Salt Lake City, US, June 2008. Springer

Data lineage model for Taverna workflows with lightweight annotation requirements [pdf]

Paper doi link bibtex 1 download

@inproceedings{missier-IPAW08a:,
address = {Salt Lake City, US},
annote = {DOI: http://dx.doi.org/10.1007/978-3-540-89965-5\_4
conference},
author = {Missier, P and Belhajjame, K  and Zhao, J and Goble, C},
booktitle = {IPAW},
doi = {http://dx.doi.org/10.1007/978-3-540-89965-5\_4},
file = {:Users/paolo/Library/Application Support/Mendeley Desktop/Downloaded/Missier et al. - 2008 - Data lineage model for Taverna workflows with lightweight annotation requirements(4).pdf:pdf},
keywords = {"Workflow Management ",\#provxg \#provenance,provenance lineage Taverna annotations},
mendeley-tags = {\#provxg \#provenance},
month = jun,
publisher = {Springer},
series = {LNCS},
title = {{Data lineage model for Taverna workflows with lightweight annotation requirements}},
url = {http://www.springerlink.com/content/36rw83153m0v171h/},
volume = {5272/2008},
year = {2008},
urlpaper={http://www.mendeley.com/download/public/212462/836704122/965a9eee272375793a11319f906cf7ae6dd001c4/dl.pdf}
}

Brokering infrastructure for minimum cost data procurement based on quality - quantity models. Avenali, A; Bertolazzi, P; Batini, C; and Missier, P Decision Support Systems, 45: 95–109. 2008.

Brokering infrastructure for minimum cost data procurement based on quality - quantity models [link]

Paper doi link bibtex

@article{Avenali07:,
author = {Avenali, A and Bertolazzi, P and Batini, C and Missier, P},
doi = {http://dx.doi.org/10.1016/j.dss.2007.10.012},
journal = {Decision Support Systems},
keywords = {"Distributed Query Processing","Information Quality Management","Optimization"},
pages = {95--109},
title = {{Brokering infrastructure for minimum cost data procurement based on quality - quantity models}},
url = {http://dx.doi.org/10.1016/j.dss.2007.10.012},
volume = {45},
year = {2008}
}

An ontology-based approach to handling information quality in e-Science. Preece, A; Missier, P; Embury, S; Jin, B; and Greenwood, M Concurrency and Computation: Practice and Experience, 20: 253–264. 2008.

An ontology-based approach to handling information quality in e-Science [link]

Paper doi link bibtex

@article{preece2008,
annote = {journal},
author = {Preece, A and Missier, P and Embury, S and Jin, B and Greenwood, M},
doi = {http://dx.doi.org/10.1002/cpe.1195},
journal = {Concurrency and Computation: Practice and Experience},
keywords = {"Automated Reasoning","Information Quality Management","Knowledge Representation","e-Science",\#qurator},
mendeley-tags = {\#qurator},
pages = {253--264},
title = {{An ontology-based approach to handling information quality in e-Science}},
url = {http://dx.doi.org/10.1002/cpe.1195},
volume = {20},
year = {2008}
}

2007 (7)

Taverna Workflows: Syntax and Semantics. Turi, D; Missier, P; Roure, D D.; Goble, C; and Oinn, T In Proceedings of the 3rd e-Science conference, Bangalore, India, December 2007.

Taverna Workflows: Syntax and Semantics [pdf]

Paper doi link bibtex

@inproceedings{Turi07,
address = {Bangalore, India},
author = {Turi, D and Missier, P and Roure, D De and Goble, C and Oinn, T},
booktitle = {Proceedings of the 3rd e-Science conference},
doi = {http://dx.doi.org/10.1109/E-SCIENCE.2007.71},
file = {:Users/paolo/Library/Application Support/Mendeley Desktop/Downloaded/Turi et al. - 2007 - Taverna Workflows Syntax and Semantics(5).pdf:pdf},
keywords = {"Language semantics","Workflow Management",Taverna},
month = dec,
title = {{Taverna Workflows: Syntax and Semantics}},
url = {http://dx.doi.org/10.1109/E-SCIENCE.2007.71},
year = {2007},
urlpaper={http://www.mendeley.com/download/public/212462/858964392/343a55b959d23b7c5a1e42b8fdd848f3bc8074ae/dl.pdf}
}

Accelerating Disease Gene Identification Through Integrated SNP Data Analysis. Missier, P; Embury, S; Hedeler, C; Greenwood, M; Pennock, J; and Brass, A In Proceedings 4th International Workshop on Data Integration in the Life Sciences, of LNBI, pages 215–230, 2007. Springer

Accelerating Disease Gene Identification Through Integrated SNP Data Analysis [link]

Paper doi link bibtex 2 downloads

@inproceedings{paolodils07,
author = {Missier, P and Embury, S and Hedeler, C and Greenwood, M and Pennock, J and Brass, A},
booktitle = {Proceedings 4th International Workshop on Data Integration in the Life Sciences},
doi = {http://dx.doi.org/10.1007/978-3-540-73255-6\_18},
keywords = {"Distributed Query Processing","Information Quality Management","Web Services",Biological Information Management},
pages = {215--230},
publisher = {Springer},
series = {LNBI},
title = {{Accelerating Disease Gene Identification Through Integrated SNP Data Analysis}},
url = {http://dx.doi.org/10.1007/978-3-540-73255-6\_18},
year = {2007}
}

. Hedeler, C; and Missier, P Quality management challenges in the post-genomic era. Artech House, 2007.

Quality management challenges in the post-genomic era [pdf]

Paper link bibtex

@inbook{hedeler06:,
author = {Hedeler, C and Missier, P},
booktitle = {Database Modeling in Biology: Practices and Challenges},
file = {:Users/paolo/Library/Application Support/Mendeley Desktop/Downloaded/Hedeler, Missier - 2007 - Database Modeling in Biology Practices and Challenges.pdf:pdf},
keywords = {"Biological Information Management","Information Quality Management"},
publisher = {Artech House},
title = {{Quality management challenges in the post-genomic era}},
year = {2007},
urlpaper={http://www.mendeley.com/download/public/212462/858964882/92edb6a59a8f3f1ef86695fd52a6c9655484d89a/dl.pdf}
}

Managing information quality in e-science: the Qurator workbench. Missier, P; Embury, S M; Greenwood, R M; Preece, A D; and Jin, B In SIGMOD '07: Proceedings of the 2007 ACM SIGMOD international conference on Management of data, pages 1150–1152, New York, NY, USA, 2007. ACM

Managing information quality in e-science: the Qurator workbench [link]

Paper doi link bibtex

@inproceedings{paolosigmod07,
address = {New York, NY, USA},
author = {Missier, P and Embury, S M and Greenwood, R M and Preece, A D and Jin, B},
booktitle = {SIGMOD '07: Proceedings of the 2007 ACM SIGMOD international conference on Management of data},
doi = {http://dx.doi.org/10.1145/1247480.1247638},
isbn = {978-1-59593-686-8},
keywords = {"Information Quality Management","Workflow Management","e-Science",\#qurator},
mendeley-tags = {\#qurator},
pages = {1150--1152},
publisher = {ACM},
title = {{Managing information quality in e-science: the Qurator workbench}},
url = {http://dx.doi.org/10.1145/1247480.1247638},
year = {2007}
}

Architectural patterns for the Semantic Grid. Kotsiopoulos, I; Missier, P; Alper, P; Corcho, O; Bechhofer, S; and Goble, C In Talia, D; A.Bilas; and Dikaiakos, M, editor(s), CoreGRID Institute on Knowledge and Data Management, Poznan Workshop, September 2005, volume XVIII, of CoreGRID Series, Knowledge and Data Management in GRIDs. Springer, 2007.
link bibtex

@incollection{kotsiopoulos07:_archit_seman_grid,
author = {Kotsiopoulos, I and Missier, P and Alper, P and Corcho, O and Bechhofer, S and Goble, C},
booktitle = {CoreGRID Institute on Knowledge and Data Management, Poznan Workshop, September 2005},
editor = {Talia, D and A.Bilas and Dikaiakos, M},
keywords = {"Semantic Grid"},
publisher = {Springer},
series = {CoreGRID Series, Knowledge and Data Management in GRIDs},
title = {{Architectural patterns for the Semantic Grid}},
volume = {XVIII},
year = {2007}
}

Grid Metadata Management: requirements and architecture. Corcho, O.; Alper, P.; Missier, P.; Bechhofer, S.; and Goble, C. In 8th ACM/IEEE International Conference on Grid Computing (\GRID\ 2007), Austin, Texas, September 2007.

Grid Metadata Management: requirements and architecture [link]

Paper doi link bibtex

@inproceedings{corcho07:_grid_metad_manag,
address = {Austin, Texas},
author = {Corcho, Oscar and Alper, Pinar and Missier, Paolo and Bechhofer, Sean and Goble, Carole},
booktitle = {8th ACM/IEEE International Conference on Grid Computing (\{GRID\} 2007)},
doi = {http://dx.doi.org/10.1109/GRID.2007.4354121},
keywords = {"Semantic Grid","Semantic Web Services","metadata management"},
month = sep,
title = {{Grid Metadata Management: requirements and architecture}},
url = {http://dx.doi.org/10.1109/GRID.2007.4354121},
year = {2007}
}

Requirements and services for Metadata Management. Missier, P; Alper, P; Corcho, O; Dunlop, I; and Goble, C IEEE internet Computing, (Special issue on Semantic-Based Knowledge Management). 2007.

Requirements and services for Metadata Management [link]

Paper doi link bibtex 1 download

@article{Missier-IC07,
annote = {magazine},
author = {Missier, P and Alper, P and Corcho, O and Dunlop, I and Goble, C},
doi = {http://doi.ieeecomputersociety.org/10.1109/MIC.2007.113},
journal = {IEEE internet Computing},
keywords = {"Semantic Grid","Semantic Web Services","metadata management"},
number = {Special issue on Semantic-Based Knowledge Management},
title = {{Requirements and services for Metadata Management}},
url = {http://doi.ieeecomputersociety.org/10.1109/MIC.2007.113},
year = {2007}
}

2006 (6)

Towards the Management of Information Quality in Proteomics. Preece, A D; Jin, B; Missier, P; Embury, S M; Stead, D; and Brown, A In Proceedings of 19th IEEE International Symposium on Computer-Based Medical Systems (CBMS'06), pages 936–940, Salt Lake City, US, 2006. IEEE Computer Society Press

Towards the Management of Information Quality in Proteomics [link]

Paper doi link bibtex

@inproceedings{preece:2006,
address = {Salt Lake City, US},
author = {Preece, A D and Jin, B and Missier, P and Embury, S M and Stead, D and Brown, A},
booktitle = {Proceedings of 19th IEEE International Symposium on Computer-Based Medical Systems (CBMS'06)},
doi = {http://dx.doi.org/10.1109/CBMS.2006.160},
keywords = {"Information Quality Management","Workflow Management","e-Science"},
pages = {936--940},
publisher = {IEEE Computer Society Press},
title = {{Towards the Management of Information Quality in Proteomics}},
url = {http://dx.doi.org/10.1109/CBMS.2006.160},
year = {2006}
}

An overview of S-OGSA: A Reference Semantic Grid Architecture. Corcho, O.; Alper, P.; Kotsiopoulos, I.; Missier, P.; Bechhofer, S.; and Goble, C. Journal of Web Semantics, 4(2): 102–115. 2006.

An overview of S-OGSA: A Reference Semantic Grid Architecture [pdf]

Paper doi link bibtex abstract

@article{CORCHO2006,
abstract = {The Grid's vision, of sharing diverse resources in a flexible, coordinated and secure manner through dynamic formation and disbanding of virtual communities, strongly depends on metadata. Currently, Grid metadata is generated and used in an ad hoc fashion, much of it buried in the Grid middleware's code libraries and database schemas. This ad hoc expression and use of metadata causes chronic dependency on human intervention during the operation of Grid machinery, leading to systems which are brittle when faced with frequent syntactic changes in resource coordination and sharing protocols. The Semantic Grid is an extension of the Grid in which rich resource metadata is exposed and handled explicitly, and shared and managed via Grid protocols. The layering of an explicit semantic infrastructure over the Grid Infrastructure potentially leads to increased interoperability and greater flexibility. In recent years, several projects have embraced the Semantic Grid vision. However, the Semantic Grid lacks a Reference Architecture or any kind of systematic framework for designing Semantic Grid components or applications. The Open Grid Service Architecture (OGSA) aims to define a core set of capabilities and behaviours for Grid systems. We propose a Reference Architecture that extends OGSA to support the explicit handling of semantics, and defines the associated knowledge services to support a spectrum of service capabilities. Guided by a set of design principles, Semantic-OGSA (S-OGSA) defines a model, the capabilities and the mechanisms for the Semantic Grid. We conclude by highlighting the commonalities and differences that the proposed architecture has with respect to other Grid frameworks.},
author = {Corcho, Oscar and Alper, Pinar and Kotsiopoulos, Ioannis and Missier, Paolo and Bechhofer, Sean and Goble, Carole},
doi = {10.1016/j.websem.2006.03.001},
file = {:Users/paolo/Library/Application Support/Mendeley Desktop/Downloaded/CORCHO et al. - 2006 - An overview of S-OGSA A Reference Semantic Grid Architecture.pdf:pdf},
issn = {15708268},
journal = {Journal of Web Semantics},
keywords = {Architecture,Explicit metadata,Grid,Semantic Grid,Semantics},
number = {2},
pages = {102--115},
title = {{An overview of S-OGSA: A Reference Semantic Grid Architecture}},
url = {http://linkinghub.elsevier.com/retrieve/pii/S1570826806000059},
volume = {4},
year = {2006},
url={http://www.mendeley.com/download/public/212462/837355462/77dfb167a0b74918da2d3f9fb2ad72eb295e2260/dl.pdf}
}

Managing Information Quality in e-Science Using Semantic Web Technology. Preece, A D; Jin, B; Pignotti, E; Missier, P; Embury, S M; Stead, D; and Brown, A In ESWC, pages 472–486, 2006.

Managing Information Quality in e-Science Using Semantic Web Technology. [pdf]

Paper doi link bibtex

@inproceedings{DBLP:conf/esws/PreeceJPMESB06,
author = {Preece, A D and Jin, B and Pignotti, E and Missier, P and Embury, S M and Stead, D and Brown, A},
booktitle = {ESWC},
doi = {http://dx.doi.org/10.1007/11762256\_35},
file = {:Users/paolo/Library/Application Support/Mendeley Desktop/Downloaded/Preece et al. - 2006 - Managing Information Quality in e-Science Using Semantic Web Technology.(4).pdf:pdf},
keywords = {"Information Quality Management","Knowledge Representation","Semantic Web","e-Science",\#qurator,Automated Reasoning},
mendeley-tags = {\#qurator},
pages = {472--486},
title = {{Managing Information Quality in e-Science Using Semantic Web Technology.}},
url = {http://dx.doi.org/10.1007/11762256\_35},
year = {2006},
urlpaper={http://www.mendeley.com/download/public/212462/858962572/2589db283c873a226c1696faec2e245aba3d2f10/dl.pdf}
}

Practical data quality certification: model, architecture, and experiences. Missier, P; Oliaro, A; and Raffa, S In IQIS, International Workshop on Information Quality in Information Systems, 30 June 2006, Chicago, USA (SIGMOD 2006 Workshop), 2006. ACM

Practical data quality certification: model, architecture, and experiences [pdf]

Paper link bibtex

@inproceedings{missier06:_pract,
author = {Missier, P and Oliaro, A and Raffa, S},
booktitle = {IQIS, International Workshop on Information Quality in Information Systems, 30 June 2006, Chicago, USA (SIGMOD 2006 Workshop)},
file = {:Users/paolo/Library/Application Support/Mendeley Desktop/Downloaded/Missier, Oliaro, Raffa - 2006 - Practical data quality certification model, architecture, and experiences.pdf:pdf},
keywords = {"Data Quality","Information Quality Management"},
publisher = {ACM},
title = {{Practical data quality certification: model, architecture, and experiences}},
year = {2006},
urlpaper={http://www.mendeley.com/download/public/212462/858962792/1f186264788242cfea71ef0090f49ca20ba48935/dl.pdf}
}

Quality Views: Capturing and Exploiting the User Perspective on Data Quality. Missier, P; Embury, S M; Greenwood, M; Preece, A D; and Jin, B In Procs. VLDB, pages 977–988, Seoul, Korea, September 2006.

Quality Views: Capturing and Exploiting the User Perspective on Data Quality. [pdf]

Paper link bibtex

@inproceedings{DBLP:conf/vldb/MissierEGPJ06,
address = {Seoul, Korea},
author = {Missier, P and Embury, S M and Greenwood, M and Preece, A D and Jin, B},
booktitle = {Procs. VLDB},
keywords = {"Information Quality Management","Workflow Management","e-Science",\#qurator},
mendeley-tags = {\#qurator},
month = sep,
pages = {977--988},
title = {{Quality Views: Capturing and Exploiting the User Perspective on Data Quality.}},
url = {http://www.vldb.org/conf/2006/p977-missier.pdf},
year = {2006}
}

Semantic Support For Meta-Scheduling in Grids. Missier, P; Wieder, P; and Ziegler, W In Knowledge and Data Management in Grids, volume 3, of CoreGRID. Springer, 2006.
link bibtex

@incollection{missier06:_coreg,
author = {Missier, P and Wieder, P and Ziegler, W},
booktitle = {Knowledge and Data Management in Grids},
keywords = {"Grid Systems","Scheduling","Semantic Grid"},
publisher = {Springer},
series = {CoreGRID},
title = {{Semantic Support For Meta-Scheduling in Grids}},
volume = {3},
year = {2006}
}

2005 (6)

Provider issues in quality-constrained data provisioning. Missier, P; and Embury, S M In IQIS 2005, International Workshop on Information Quality in Information Systems, 17 June 2005, Baltimore, Maryland, USA (SIGMOD 2005 Workshop), pages 5–15, 2005.

Provider issues in quality-constrained data provisioning [link]

Paper doi link bibtex

@inproceedings{DBLP:conf/iqis/MissierE05,
author = {Missier, P and Embury, S M},
booktitle = {IQIS 2005, International Workshop on Information Quality in Information Systems, 17 June 2005, Baltimore, Maryland, USA (SIGMOD 2005 Workshop)},
doi = {http://dx.doi.org/10.1145/1077501.1077507},
keywords = {"Information Quality Management"},
pages = {5--15},
title = {{Provider issues in quality-constrained data provisioning}},
url = {http://dx.doi.org/10.1145/1077501.1077507},
year = {2005}
}

Clustering Web pages based on their structure. Crescenzi, V.; Merialdo, P.; and Missier, P. Data Knowl. Eng., 54: 279–299. 2005.

Clustering Web pages based on their structure. [link]

Paper doi link bibtex 1 download

@article{DBLP:journals/dke/CrescenziMM05,
author = {Crescenzi, Valter and Merialdo, Paolo and Missier, Paolo},
doi = {http://dx.doi.org/10.1016/j.datak.2004.11.004},
journal = {Data Knowl. Eng.},
pages = {279--299},
title = {{Clustering Web pages based on their structure.}},
url = {http://dx.doi.org/10.1016/j.datak.2004.11.004},
volume = {54},
year = {2005}
}

An ontology-based approach to handling information quality in e-science. Missier, P; Embury, S; Greenwood, M; Preece, A; and Jin, B In Procs. 4th e-Science All Hands Meeting, Nottingham, UK, 2005.

An ontology-based approach to handling information quality in e-science [pdf]

Paper link bibtex

@inproceedings{missier05,
address = {Nottingham, UK},
author = {Missier, P and Embury, S and Greenwood, M and Preece, A and Jin, B},
booktitle = {Procs. 4th e-Science All Hands Meeting},
keywords = {"Automated Reasoning","Information Quality Management","e-Science",\#qurator},
mendeley-tags = {\#qurator},
title = {{An ontology-based approach to handling information quality in e-science}},
url = {http://www.csd.abdn.ac.uk/\~{}apreece/qurator/resources/qurator\_ahm\_8page.pdf},
year = {2005}
}

Managing Information Quality in e-Science: A Case Study in Proteomics. Missier, P; Preece, A; Embury, S; Jin, B; Greenwood, M; D.Stead; and Brown, A In 1st Workshop on Quality of Information Systems (QoIS 2005), Lecture Notes in Computer Science., volume 3770, pages 423–432, 2005. Springer

Managing Information Quality in e-Science: A Case Study in Proteomics [link]

Paper doi link bibtex

@inproceedings{paoloqois05,
author = {Missier, P and Preece, A and Embury, S and Jin, B and Greenwood, M and D.Stead and Brown, A},
booktitle = {1st Workshop on Quality of Information Systems (QoIS 2005), Lecture Notes in Computer Science.},
doi = {http://dx.doi.org/10.1007/11568346\_45},
keywords = {"Automated Reasoning","Information Quality Management","e-Science",\#qurator},
mendeley-tags = {\#qurator},
pages = {423--432},
publisher = {Springer},
title = {{Managing Information Quality in e-Science: A Case Study in Proteomics}},
url = {http://dx.doi.org/10.1007/11568346\_45},
volume = {3770},
year = {2005}
}

Improving Government-to-business relationships through data reconciliation and process re-engineering. Bertoletti, M.; Missier, P.; Scannapieco, M.; Aimetti, P.; and Batini, C. In Advances in Management Information System Monograph Series, of Advances in Management Information System Monograph Series, 5. April 2005.
link bibtex

@incollection{bertoletti05:_improv_gover,
annote = {ISBN: 0-7656-1133-3},
author = {Bertoletti, Marco and Missier, Paolo and Scannapieco, Monica and Aimetti, Pietro and Batini, Carlo},
booktitle = {Advances in Management Information System Monograph Series},
chapter = {5},
month = apr,
series = {Advances in Management Information System Monograph Series},
title = {{Improving Government-to-business relationships through data reconciliation and process re-engineering}},
year = {2005}
}

Data Quality at a Glance. Scannapieco, M; Missier, P; and Batini, C Datenbank-Spektrum, 14: 6–14. 2005.

Paper link bibtex

@article{DBLP:journals/dbsk/ScannapiecoMB05,
annote = {magazine},
author = {Scannapieco, M and Missier, P and Batini, C},
journal = {Datenbank-Spektrum},
keywords = {Data Quality},
pages = {6--14},
title = {{Data Quality at a Glance.}},
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.85.555\&rep=rep1\&type=pdf},
volume = {14},
year = {2005}
}

2004 (4)

An Automatic Data Grabber for Large Web Sites. Crescenzi, V.; Mecca, G.; Merialdo, P.; and Missier, P. In Procs. VLDB, pages 1321–1324, 2004.

An Automatic Data Grabber for Large Web Sites. [pdf]

Paper link bibtex

@inproceedings{DBLP:conf/vldb/CrescenziMMM04,
author = {Crescenzi, Valter and Mecca, Giansalvatore and Merialdo, Paolo and Missier, Paolo},
booktitle = {Procs. VLDB},
pages = {1321--1324},
title = {{An Automatic Data Grabber for Large Web Sites.}},
url = {http://www.vldb.org/conf/2004/DEMP18.PDF},
year = {2004}
}

QoS in Multichannel IS: The MAIS Approach. Cappiello, C.; Missier, P.; Pernici, B.; Plebani, P.; and Batini, C. In ICWE Workshops, pages 255–268, 2004.

QoS in Multichannel IS: The MAIS Approach. [link]

Paper link bibtex

@inproceedings{DBLP:conf/icwe/CappielloMPPB04,
author = {Cappiello, Cinzia and Missier, Paolo and Pernici, Barbara and Plebani, Pierluigi and Batini, Carlo},
booktitle = {ICWE Workshops},
pages = {255--268},
title = {{QoS in Multichannel IS: The MAIS Approach.}},
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.122.3805\&rep=rep1\&type=pdf},
year = {2004}
}

Ontology-Based Question Answering in a Federation of University Sites: The MOSES Case Study. Atzeni, P.; Basili, R.; Hansen, D H; Missier, P.; Paggio, P.; Pazienza, M. T.; and Zanzotto, F. M. In Procs. NLDB, pages 413–420, 2004.

Ontology-Based Question Answering in a Federation of University Sites: The MOSES Case Study. [link]

Paper doi link bibtex

@inproceedings{DBLP:conf/nldb/AtzeniBHMPPZ04,
author = {Atzeni, Paolo and Basili, Roberto and Hansen, D H and Missier, Paolo and Paggio, Patrizia and Pazienza, Maria Teresa and Zanzotto, Fabio Massimo},
booktitle = {Procs. NLDB},
doi = {http://dx.doi.org/10.1007/b98754},
pages = {413--420},
title = {{Ontology-Based Question Answering in a Federation of University Sites: The MOSES Case Study.}},
url = {http://dx.doi.org/10.1007/b98754},
year = {2004}
}

A formulation of the Data Quality Optimization Problem in Cooperative Information Systems. Avenali, A.; Bertolazzi, P.; Batini, C.; and Missier, P. In CAiSE Workshops (2), pages 49–63, 2004.

A formulation of the Data Quality Optimization Problem in Cooperative Information Systems. [link]

Paper link bibtex

@inproceedings{DBLP:conf/caise/AvenaliBBM04,
author = {Avenali, Alessandro and Bertolazzi, Paola and Batini, Carlo and Missier, Paolo},
booktitle = {CAiSE Workshops (2)},
pages = {49--63},
title = {{A formulation of the Data Quality Optimization Problem in Cooperative Information Systems.}},
url = {http://dblp.uni-trier.de/db/conf/caise/caisews2004-2.html\#AvenaliBBM04},
year = {2004}
}

2003 (6)

Fine-grain web site structure discovery. Crescenzi, V.; Merialdo, P.; and Missier, P. In Procs. WIDM, pages 15–22, 2003.

Fine-grain web site structure discovery. [link]

Paper doi link bibtex 1 download

@inproceedings{DBLP:conf/widm/CrescenziMM03,
author = {Crescenzi, Valter and Merialdo, Paolo and Missier, Paolo},
booktitle = {Procs. WIDM},
doi = {http://dx.doi.org/10.1145/956699.956703},
pages = {15--22},
title = {{Fine-grain web site structure discovery.}},
url = {http://dx.doi.org/10.1145/956699.956703},
year = {2003}
}

The Service to Businesses Project: Improving Government-to-Business Relationships in Italy. Bertoletti, M.; Missier, P.; Scannapieco, M.; Aimetti, P.; and Batini, C. In Procs. EGOV, pages 468–471, 2003.

The Service to Businesses Project: Improving Government-to-Business Relationships in Italy. [link]

Paper doi link bibtex

@inproceedings{DBLP:conf/egov/BertolettiMSAB03,
author = {Bertoletti, Marco and Missier, Paolo and Scannapieco, Monica and Aimetti, Pietro and Batini, Carlo},
booktitle = {Procs. EGOV},
doi = {http://dx.doi.org/10.1007/b11827},
pages = {468--471},
title = {{The Service to Businesses Project: Improving Government-to-Business Relationships in Italy.}},
url = {http://dx.doi.org/10.1007/b11827},
year = {2003}
}

An Information Quality Management Framework for Cooperative Information Systems. Missier, P; and Batini, C In Procs. ISE 2003, Montreal, Canada, July 2003.
link bibtex

@inproceedings{missier03b:,
address = {Montreal, Canada},
author = {Missier, P and Batini, C},
booktitle = {Procs. ISE 2003},
month = jul,
title = {{An Information Quality Management Framework for Cooperative Information Systems}},
year = {2003}
}

A Multidimensional Model for Information Quality in Cooperative Systems. Missier, P; and Batini, C In Proceedings of 8th International Conference on Information Quality (IQ'03), pages 25–40, 2003.
link bibtex

@inproceedings{missier:2003,
author = {Missier, P and Batini, C},
booktitle = {Proceedings of 8th International Conference on Information Quality (IQ'03)},
pages = {25--40},
title = {{A Multidimensional Model for Information Quality in Cooperative Systems}},
year = {2003}
}

A model for Information Quality management in Cooperative Information Systems. Missier, P.; and Batini, C. In SEBD, pages 191–206, 2003.
link bibtex

@inproceedings{DBLP:conf/sebd/MissierB03,
author = {Missier, Paolo and Batini, Carlo},
booktitle = {SEBD},
pages = {191--206},
title = {{A model for Information Quality management in Cooperative Information Systems.}},
year = {2003}
}

Improving Data Quality in Practice: A Case Study in the Italian Public Administration. Missier, P; Lalk, G; Verykios, V S; Grillo, F; Lorusso, T; and Angeletti, P Distributed and Parallel Databases, 13: 135–160. 2003.

Improving Data Quality in Practice: A Case Study in the Italian Public Administration. [link]

Paper link bibtex

@article{DBLP:journals/dpd/MissierLVGLA03,
author = {Missier, P and Lalk, G and Verykios, V S and Grillo, F and Lorusso, T and Angeletti, P},
journal = {Distributed and Parallel Databases},
pages = {135--160},
title = {{Improving Data Quality in Practice: A Case Study in the Italian Public Administration.}},
url = {http://www.springerlink.com/content/x80m0245551j0202/},
volume = {13},
year = {2003}
}

2001 (2)

Eguru: a decision support system for the assisted design of e-commerce architectures. Missier, P; Bianchi, M; Zordan, A; and Umar, A In Knowledge Management & Intelligent Enterprises - Industrial Volume. Procs. 9th IFIP 2.6 Working Conference on Database Semantics (DS-9), Hong Kong, April 2001.

Eguru: a decision support system for the assisted design of e-commerce architectures [link]

Paper link bibtex

@inproceedings{p.missier01:_eguru,
address = {Hong Kong},
author = {Missier, P and Bianchi, M and Zordan, A and Umar, A},
booktitle = {Knowledge Management \& Intelligent Enterprises - Industrial Volume. Procs. 9th IFIP 2.6 Working Conference on Database Semantics (DS-9)},
month = apr,
title = {{Eguru: a decision support system for the assisted design of e-commerce architectures}},
url = {http://books.google.co.uk/books?hl=en\&lr=\&id=K8Z8GvYrbEcC\&oi=fnd\&pg=PA69\&dq=related:d41Mrh54DIgJ:scholar.google.com/\&ots=zF1CSZ73jM\&sig=pl3HEwWOt7sQ6HFWqWiNkR5YD5w\#v=onepage\&q=\&f=false},
year = {2001}
}

CitiTime: a system for rapid creation of portable next-generation telephony services. Anjum, F.; Caruso, F.; Jain, R.; Missier, P.; and Zordan, A. Computer Networks, 35: 579–595. 2001.

CitiTime: a system for rapid creation of portable next-generation telephony services. [link]

Paper link bibtex

@article{DBLP:journals/cn/AnjumCJMZ01,
author = {Anjum, Farooq and Caruso, Francesco and Jain, Ravi and Missier, Paolo and Zordan, Adalberto},
journal = {Computer Networks},
pages = {579--595},
title = {{CitiTime: a system for rapid creation of portable next-generation telephony services.}},
url = {http://dx.doi.org/10.1016/S1389-1286(00)00195-X},
volume = {35},
year = {2001}
}

2000 (3)

Demonstration of Telcordia's Database Reconciliation and Data Quality Analysis Tool. Caruso, F; Cochinwala, M; Ganapathy, U; Lalk, G; and Missier, P In VLDB 2000, September 10-14, 2000, Cairo, Egypt, pages 615–618, 2000. Morgan Kaufmann

Demonstration of Telcordia's Database Reconciliation and Data Quality Analysis Tool [link]

Paper link bibtex

@inproceedings{DBLP:conf/vldb/CarusoCGLM00,
author = {Caruso, F and Cochinwala, M and Ganapathy, U and Lalk, G and Missier, P},
booktitle = {VLDB 2000, September 10-14, 2000, Cairo, Egypt},
isbn = {1-55860-715-3},
pages = {615--618},
publisher = {Morgan Kaufmann},
title = {{Demonstration of Telcordia's Database Reconciliation and Data Quality Analysis Tool}},
url = {http://dblp.uni-trier.de/db/conf/vldb/CarusoCGLM00.html},
year = {2000}
}

Java Call Control, Coordination and Transactions. Jain, R; Anjum, F; Missier, P; and Shastry, S IEEE Communications. January 2000.

Java Call Control, Coordination and Transactions [link]

Paper link bibtex

@article{jain00:,
author = {Jain, R and Anjum, F and Missier, P and Shastry, S},
journal = {IEEE Communications},
month = jan,
title = {{Java Call Control, Coordination and Transactions}},
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.16.8466\&rep=rep1\&type=pdf},
year = {2000}
}

A Knowledge-Based Decision Support Workbench for Advanced Ecommerce. Umar, A.; Bianchi, M.; Caruso, F.; and Missier, P. In AIWoRC, pages 93–100, 2000.

A Knowledge-Based Decision Support Workbench for Advanced Ecommerce. [link]

Paper link bibtex

@inproceedings{DBLP:conf/aiworc/UmarBCM00,
author = {Umar, Amjad and Bianchi, Michelle and Caruso, Francesco and Missier, Paolo},
booktitle = {AIWoRC},
pages = {93--100},
title = {{A Knowledge-Based Decision Support Workbench for Advanced Ecommerce.}},
url = {http://csdl.computer.org/comp/proceedings/aiworc/2000/0628/00/06280093abs.htm},
year = {2000}
}

1999 (4)

ChaiTime:A System for Rapid Creation of Portable Next-Generation Telephony Services Using Third-Party Software Components. Anjum, F; Caruso, F; Jain, R; Missier, P; and Zordan, A In Procs. Second IEEE Conference on Open Architectures and Network Programming (OpenArch), New York, March 1999.

ChaiTime:A System for Rapid Creation of Portable Next-Generation Telephony Services Using Third-Party Software Components [link]

Paper doi link bibtex

@inproceedings{f.anjum99:,
address = {New York},
author = {Anjum, F and Caruso, F and Jain, R and Missier, P and Zordan, A},
booktitle = {Procs. Second IEEE Conference on Open Architectures and Network Programming (OpenArch)},
doi = {10.1109/OPNARC.1999.758431},
month = mar,
title = {{ChaiTime:A System for Rapid Creation of Portable Next-Generation Telephony Services Using Third-Party Software Components}},
url = {http://ieeexplore.ieee.org/xpl/freeabs\_all.jsp?arnumber=758431},
year = {1999}
}

Integration of Highly Fragmented Legacy Information Systems Through Object Modeling and Layered Wrappers. Mecella, M; Missier, P; Massari; and Batini, C In Procs. AICA99, Italy, 1999.
link bibtex

@inproceedings{m.mecella99:,
address = {Italy},
author = {Mecella, M and Missier, P and Massari and Batini, C},
booktitle = {Procs. AICA99},
file = {:Users/paolo/Library/Application Support/Mendeley Desktop/Downloaded/Mecella et al. - 1999 - Integration of Highly Fragmented Legacy Information Systems Through Object Modeling and Layered Wrappers(2).pdf:pdf},
title = {{Integration of Highly Fragmented Legacy Information Systems Through Object Modeling and Layered Wrappers}},
year = {1999}
}

A Knowledge-based Decision Support Workbench for Enterprise Resource Integration and Migration. Umar, A; and Missier, P In Procs. First International Workshop on Enterprise Management and Resource Planning Systems (EMRPS99), Venice, Italy, 1999.
link bibtex

@inproceedings{umar99:,
address = {Venice, Italy},
author = {Umar, A and Missier, P},
booktitle = {Procs. First International Workshop on Enterprise Management and Resource Planning Systems (EMRPS99)},
title = {{A Knowledge-based Decision Support Workbench for Enterprise Resource Integration and Migration}},
year = {1999}
}

A Framework for Analyzing Virtual Enterprise Infrastructure. Umar, A.; and Missier, P. In RIDE, pages 4–11, 1999.

A Framework for Analyzing Virtual Enterprise Infrastructure. [link]

Paper link bibtex

@inproceedings{DBLP:conf/ride/UmarM99,
author = {Umar, Amjad and Missier, Paolo},
booktitle = {RIDE},
pages = {4--11},
title = {{A Framework for Analyzing Virtual Enterprise Infrastructure.}},
url = {http://computer.org/conferen/proceed/ride/0119/01190004abs.htm},
year = {1999}
}

1998 (2)

. Missier, P; Rusinkiewicz, M; and Jin, W Multidatabase Languages. Morgan Kauffman, 1998.

Paper link bibtex 1 download

@inbook{p.missier98b:,
author = {Missier, P and Rusinkiewicz, M and Jin, W},
booktitle = {Management of Heterogeneous and Autonomous Database Systems},
publisher = {Morgan Kauffman},
title = {{Multidatabase Languages}},
url = {http://books.google.co.uk/books?hl=en\&lr=\&id=BvuTyyMtGbAC\&oi=fnd\&pg=PA175\&dq=IFIP+Conference+Proceedings+1995+missier\&ots=ubXv\_IiwE0\&sig=fUxrDxeLlchFL9ckMRndXZRXH9E},
year = {1998}
}

. Missier, P Technology for the Copyright Protection of Digital Image. Scuola Normale Superiore di Pisa, Centro Ricerche Informatiche, 1998.
link bibtex

@inbook{p.missier98:,
author = {Missier, P},
booktitle = {Monography Bullettin on archiving in Art History},
publisher = {Scuola Normale Superiore di Pisa, Centro Ricerche Informatiche},
title = {{Technology for the Copyright Protection of Digital Image}},
year = {1998}
}

1995 (2)

Providing Multidatabase Access - an Association Approach. Missier, P; Rusinkiewicz, M; and Silberschatz, A In Procs. 6th International Hong Kong Computer Society Database Workshop on Database Reengineering and Interoperability, Hong Kong, March 1995.

Providing Multidatabase Access - an Association Approach [link]

Paper link bibtex

@inproceedings{missier95:,
address = {Hong Kong},
author = {Missier, P and Rusinkiewicz, M and Silberschatz, A},
booktitle = {Procs. 6th International Hong Kong Computer Society Database Workshop on Database Reengineering and Interoperability},
month = mar,
title = {{Providing Multidatabase Access - an Association Approach}},
url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.30.4719},
year = {1995}
}

Extending a Multidatabase Manipulation Language to Resolve Schema and Data Conflicts. Missier, P.; and Rusinkiewicz, M. In DS-6, pages 93–115, 1995.

Extending a Multidatabase Manipulation Language to Resolve Schema and Data Conflicts. [link]

Paper link bibtex 1 download

@inproceedings{DBLP:conf/ds/MissierR95,
author = {Missier, Paolo and Rusinkiewicz, Marek},
booktitle = {DS-6},
pages = {93--115},
title = {{Extending a Multidatabase Manipulation Language to Resolve Schema and Data Conflicts.}},
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.1.5127\&rep=rep1\&type=pdf},
year = {1995}
}

1992 (1)

Semantic unification in the inference of union types. Missier, P In Procs. GULP'92 (Logic Programming), Tremezzo, Como, Italy, 1992.
link bibtex

@inproceedings{missier92:_seman,
address = {Tremezzo, Como, Italy},
author = {Missier, P},
booktitle = {Procs. GULP'92 (Logic Programming)},
title = {{Semantic unification in the inference of union types}},
year = {1992}
}