@article {24590,
title = {Codesign Lessons Learned from Implementing Graph Matching on Multithreaded Architectures},
journal = {IEEE Computer},
volume = {48},
year = {2015},
month = {08/2015},
pages = {46{\textendash}55},
publisher = {ACM IEEE},
doi = {10.1109/MC.2015.215},
url = {http://doi.ieeecomputersociety.org/10.1109/MC.2015.215},
author = {Halappanavar, Mahantesh and Pothen, Alex and Azad, Ariful and Manne, Fredrik and Langguth, Johannes and Arif M. {Khan}}
}
@book {24193,
title = {Digital s{\r a}rbarhet {\textendash} sikkert samfunn},
volume = {13},
number = {NOU},
year = {2015},
month = {11/15},
publisher = {Norwegian Ministry of Justice and Public Security},
organization = {Norwegian Ministry of Justice and Public Security},
address = {Oslo},
author = {Lysne, Olav and Beitland, Kristine and Gj{\o}steen, Kristian and Hagen, Janne and Holmgren, {\r A}ke and Jarbekk, Eva and Lunde, Einar and Manne, Fredrik and Nystr{\o}m, Sofie}
}
@inproceedings {23955,
title = {Optimizing Approximate Weighted Matching on Nvidia Kepler K40},
journal = {IEEE International Conference on High Performance Computing (HiPC)},
year = {2015},
month = {12/2015},
abstract = {Matching is a fundamental graph problem with numerous applications inscience and engineering. While algorithms for computing optimal matchings are difficult to parallelize, approximation algorithms on the other hand generally compute high quality solutions and are amenable to parallelization.In this paper, we present efficient implementations of the current best algorithm for half-approximate weighted matching, the Suitor algorithm, on Nvidia Kepler K-40 platform.We develop four variants of the algorithm that exploit hardware features to address key challenges for a GPU implementation.We also experiment with different combinations of work assigned to a warp.Using an exhaustive set of 269 inputs, we demonstrate that the new implementation outperforms the previous best GPU algorithm by 10x to 100x for over 100 instances, and from 100x to 1000x for 15 instances.We also demonstrate up to 20x speedup relative to 2 threads, and up to 5x relative to 16 threads on Intel Xeon platform with 16 cores for the same algorithm.The new algorithms and implementations provided in this paper will have a direct impact on several applications that repeatedly use matching as a key compute kernel. Further, algorithm designs and insights provided in this paper will benefit other researchers implementing graph algorithms on modern GPU architectures.},
author = {Naim, Md and Manne, Fredrik and Halappanavar, Mahantesh and Tumeo, Antonino and Langguth, Johannes}
}
@article {JohaSimula.simula.2866,
title = {On Parallel Push-Relabel Based Algorithms for Bipartite Maximum Matching},
journal = {Parallel Computing},
volume = {40},
number = {7},
year = {2014},
month = {July},
pages = {289-308},
publisher = {Elsevier},
abstract = {We study multithreaded push-relabel based algorithms for computing maximum cardinality matching in bipartite graphs. Matching is a fundamental combinatorial problem with applications in a wide variety of problems in science and engineering. We are motivated by its use in the context of sparse linear solvers for computing the maximum transversal of a matrix. Other applications can be found in many fields such as bioinformatics (Azad et al., 2010) [4], scheduling (Timmer and Jess, 1995) [27], and chemical structure analysis (John, 1995) [14]. We implement and test our algorithms on several multi-socket multicore systems and compare their performance to state-of-the-art augmenting path-based serial and parallel algorithms using a test set comprised of a wide range of real-world instances. Building on several heuristics for enhancing performance, we demonstrate good scaling for the parallel push-relabel algorithm. We show that it is comparable to the best augmenting path-based algorithms for bipartite matching. To the best of our knowledge, this is the first extensive study of multithreaded push-relabel based algorithms. In addition to a direct impact on the applications using matching, the proposed algorithmic techniques can be extended to preflow-push based algorithms for computing maximum flow in graphs.},
doi = {10.1016/j.parco.2014.03.004},
author = {Langguth, Johannes and Azad, Ariful and Halappanavar, Mahantesh and Manne, Fredrik}
}
@article {24589,
title = {Push-relabel based algorithms for the maximum transversal problem},
journal = {Computers \& Operations Research},
volume = {40},
year = {2013},
month = {02/2013},
pages = {1266{\textendash}1275},
publisher = {Elsevier},
doi = {10.1016/j.cor.2012.12.009},
url = {http://dx.doi.org/10.1016/j.cor.2012.12.009},
author = {Kaya, Kamer and Langguth, Johannes and Manne, Fredrik and U{\c c}ar, Bora}
}
@inproceedings {Simula.ND.347,
title = {Almost Optimal Distributed M2M Multicasting in Wireless Mesh Networks},
journal = {Proceedings of the 6th IEEE International Conference on Mobile Ad-hoc and Sensor Systems (IEEE MASS 2009)},
year = {2009},
publisher = {IEEE},
type = {1},
isbn = {978-1-4244-5113-5},
author = {Xin, Qin and Manne, Fredrik and Zhang, Yan and Wang, Jianping and Zheng, Zeyu}
}
@article {Simula.ND.77,
title = {Faster Deterministic Communication in Radio Networks},
journal = {Algorithmica},
volume = {54},
number = {2},
year = {2009},
month = {June},
pages = {226-242},
publisher = {Springer},
abstract = {We study the communication primitives of broadcasting (one-to-all communication) and gossiping (all-to-all communication) in known topology radio networks, i.e., where for each primitive the schedule of transmissions is precomputed in advance based on full knowledge about the size and the topology of the network. We show that gossiping can be completed in O(D+\fracΔ{\l}og n}{\l}ogΔ-{\l}og{\l}og n}}) time units in any radio network of size n, diameter D, and maximum degree Δ = {\O}mega({\l}og n). This is an almost optimal schedule in the sense that there exists a radio network topology, specifically a Δ-regular tree, in which the radio gossiping cannot be completed in less than {\O}mega(D+\fracΔ{\l}og n}{\l}ogΔ}) units of time. Moreover, we show a D+O(\frac{\l}og^3 n}{\l}og{\l}og n}}) schedule for the broadcast task. Both our transmission schemes significantly improve upon the currently best known schedules in G{\k a}sieniec, Peleg, and Xin [PODC\&$\#$39;05], i.e., a O(D+Δ{\l}og n) time schedule for gossiping and a D+O({\l}og^3 n) time schedule for broadcast. Our broadcasting schedule also improves, for large D, a very recent O(D+{\l}og^2 n) time broadcasting schedule by Kowalski and Pelc.},
doi = {10.1007/s00453-007-9136-0},
author = {Cicalese, Ferdinando and Manne, Fredrik and Xin, Qin}
}
@inproceedings {Simula.ND.345,
title = {Latency-Optimal Communication in Wireless Mesh Networks},
journal = {Proceedings of the 15th Asia-Pacific Conference on Communications (APCC 2009)},
year = {2009},
publisher = {IEEE},
type = {1},
abstract = {Wireless Mesh Networking (WMN) is an emerging communication paradigm to enable resilient, cost-efficient and reliable services for the future-generation wireless networks. We study the minimum-latency communication primitive of gossiping (all-to-all communication) in known topology WMNs, i.e., where the schedule of transmissions is pre-computed in advance based on full knowledge about the size and the topology of the Wireless Mesh Network (WMN). Each mesh node in the WMN is initially given a message and the objective is to design a minimum-latency schedule such that each mesh node distributes its message to all other mesh nodes. The problem of computing a minimum-latency gossiping schedule for a given WMN is NP-hard, hence it is only possible to get a polynomial approximation algorithm. In this paper, we show a deterministic approximation algorithm that can complete gossiping task in (D+\fracΔ{\l}og n}{\l}ogΔ}) time units in any WMN of size n, diameter D, and maximum degree Δ. This is an asymptotically optimal schedule in the sense that there exists a WMN topology, specifically a Δ-regular tree, in which the gossiping task cannot be accomplished in less than {\O}mega(D+\fracΔ{\l}og n}{\l}ogΔ}) units of time. Our algorithm also improves on currently best known gossiping schedule of length O(D+\fracΔ{\l}og n}{\l}ogΔ-{\l}og{\l}og n}}) in [Algorithmica 54(2):226-242, 2009].},
isbn = {978-1-4244-4784-8},
author = {Xin, Qin and Manne, Fredrik}
}
@article {Simula.ND.76,
title = {Time Efficient Radio Broadcasting in Planar Graphs},
journal = {Journal of Networks},
volume = {3},
number = {2},
year = {2008},
month = {February},
pages = {9-16},
abstract = {We study the communication primitive of broadcasting (one-to-all communication) in known topology radio networks, i.e., where for each primitive the schedule of transmissions is precomputed in advance based on full knowledge about the size and the topology of the network. We show that radio broadcasting can be completed in D+O({\l}og n) time units in planar graphs of size n, and with diameter D. This improves the currently best known D+O({\l}og^3n) time schedule proposed by Elkin and Kortsarz in \cite{EK05} [SODA{\textquoteright}05], and 3D time schedule due to G{\k a}sieniec, Peleg and Xin in \cite{GPX05} [PODC{\textquoteright}05]. In this paper, we also explore broadcasting in radio networks in the presence of edge failures.},
author = {Manne, Fredrik and Xin, Qin}
}