Developer Documentation
Histogram.hh
1/*===========================================================================*\
2 * *
3 * OpenFlipper *
4 * Copyright (c) 2001-2015, RWTH-Aachen University *
5 * Department of Computer Graphics and Multimedia *
6 * All rights reserved. *
7 * www.openflipper.org *
8 * *
9 *---------------------------------------------------------------------------*
10 * This file is part of OpenFlipper. *
11 *---------------------------------------------------------------------------*
12 * *
13 * Redistribution and use in source and binary forms, with or without *
14 * modification, are permitted provided that the following conditions *
15 * are met: *
16 * *
17 * 1. Redistributions of source code must retain the above copyright notice, *
18 * this list of conditions and the following disclaimer. *
19 * *
20 * 2. Redistributions in binary form must reproduce the above copyright *
21 * notice, this list of conditions and the following disclaimer in the *
22 * documentation and/or other materials provided with the distribution. *
23 * *
24 * 3. Neither the name of the copyright holder nor the names of its *
25 * contributors may be used to endorse or promote products derived from *
26 * this software without specific prior written permission. *
27 * *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS *
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED *
30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A *
31 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER *
32 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
33 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, *
34 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR *
35 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF *
36 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
37 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
38 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
39 * *
40\*===========================================================================*/
41
42#pragma once
43
44#include <vector>
45#include <cassert>
46#include <memory>
47#include <exception>
48#include <stdexcept>
49#include <algorithm>
50#include <type_traits>
51#include <map>
52
53#include <QString>
54
55#include "SmartPointer.hh"
56#include "../Config/ACGDefines.hh"
57
58namespace ACG {
59class ACGDLLEXPORT Histogram {
60public:
61 enum class LabelType {
62 PerBin,
63 PerBoundary,
64 };
65
66 Histogram() = default;
67 Histogram(std::vector<size_t> &&bins,
68 std::vector<double> &&bin_widths)
69 : bins_(std::move(bins)),
70 bin_widths_(std::move(bin_widths))
71 {}
72
73 virtual ~Histogram() = default;
74 const std::vector<size_t> &getBins() const { return bins_; }
75 const std::vector<double> &getBinWidths() const { return bin_widths_; }
76 virtual double getTotalWidth() const = 0;
77
78 virtual LabelType getLabelType() const = 0;
79 virtual QString getBoundaryLabel(size_t /*idx*/) const { assert(false); return QString();}
80 virtual QString getBinLabel (size_t /*idx*/) const { assert(false); return QString();}
81
82protected:
83 std::vector<size_t> bins_;
84 std::vector<double> bin_widths_;
85};
86
87
88inline QString formatValue (int val) {
89 return QString::number(val);
90}
91inline QString formatValue (unsigned int val) {
92 return QString::number(val);
93}
94inline QString formatValue (double val) {
95 return QString::number(val, 'g', 3);
96}
97
98template<typename T>
100{
101public:
102 UnbinnedHistogram(std::vector<size_t> &&bin_counts,
103 std::vector<T> &&bin_values)
104 : Histogram(std::move(bin_counts),
105 std::vector<double>(bin_counts.size(), 1.)),
106 bin_values_(std::move(bin_values))
107 {
108 }
109 double getTotalWidth() const override { return bins_.size();};
110 LabelType getLabelType() const override { return LabelType::PerBin; };
111 QString getBinLabel (size_t idx) const override { return formatValue(bin_values_[idx]);}
112private:
113 std::vector<T> bin_values_;
114};
115
116template<typename T>
117class HistogramT : public Histogram {
118public:
119 HistogramT() = default;
120
121 HistogramT(std::vector<size_t> &&histogram,
122 std::vector<T> &&bin_boundaries,
123 std::vector<double> &&bin_widths
124 )
125 : Histogram(std::move(histogram), std::move(bin_widths)),
126 bin_boundaries_(std::move(bin_boundaries))
127 {
128 if (bins_.size() != bin_widths_.size()
129 || bins_.size() + 1 != bin_boundaries_.size()) {
130 throw std::runtime_error("Histogram constructor sizes don't match.");
131 }
132 }
133
134 const std::vector<T> &getBinBoundaries() const {
135 return bin_boundaries_;
136 }
137
138 double getTotalWidth() const override
139 {
140 return bin_boundaries_.back() - bin_boundaries_.front();
141 }
142
143 LabelType getLabelType() const override
144 {
145 return LabelType::PerBoundary;
146 }
147
148 QString getBoundaryLabel (size_t idx) const override
149 {
150 // TODO: for floating point types, choose accuracy depending on bin size
151 return formatValue(bin_boundaries_[idx]);
152 };
153
154
155private:
156 std::vector<T> bin_boundaries_;
157};
158
159
160template<typename T>
161std::unique_ptr<Histogram> create_histogram_unbinned(const std::map<T, size_t> &counts)
162{
163 std::vector<T> values;
164 std::vector<size_t> histogram;
165 values.reserve(counts.size());
166 histogram.reserve(counts.size());
167 for (const auto &entry: counts)
168 {
169 values.push_back(entry.first);
170 histogram.push_back(entry.second);
171 }
172 return ptr::make_unique<UnbinnedHistogram<T>>(std::move(histogram), std::move(values));
173}
174
175template<typename T, typename Iterable>
176std::unique_ptr<Histogram> create_histogram_autorange(const Iterable &range, size_t max_bins = 50)
177{
178// we need to be careful with ranges, some sums (e.g. INT_MAX - INT_MIN) do not fit into a signed int,
179// so we store bin sizes as doubles. With specialization or some tricks we
180// could probably use the next-biggest integer type, but if we're using
181// the biggest integer type already, we should to fall back to double anyways.
182
183 using std::begin;
184 using std::end;
185
186 std::vector<T> bin_boundaries;
187 std::vector<size_t> bins;
188 std::vector<double> bin_widths;
189
190 const size_t n = std::distance(begin(range), end(range));
191 if (n == 0) return {};
192 const auto minmax = std::minmax_element(begin(range), end(range));
193 const T min = *minmax.first;
194 const T max = *minmax.second;
195
196 const double min_dbl = static_cast<double>(min);
197 const double val_range = static_cast<double>(max) - min_dbl;
198
199 const size_t n_bins_max = std::min(max_bins, n);
200 bin_boundaries.reserve(n_bins_max + 1);
201
202 T last_boundary = min;
203 bin_boundaries.push_back(min);
204 for (size_t i = 1; i < n_bins_max; ++i) {
205 // Adding val_range/n_bins to a accumulator might seem more efficient/elegant,
206 // but might cause numeric issues.
207
208 // This multiplication order is bad for huge ranges that cause overflows,
209 // however I assume tiny ranges are more common than huge values and more
210 // important to get right. If you disagree, add a case distinction or something better.
211
212 T boundary = static_cast<T>(min + (i * val_range) / n_bins_max);
213 // avoid zero-sized bins (happens for many ints with values in a small range)
214 if (boundary != last_boundary || i == 0) {
215 bin_boundaries.push_back(boundary);
216 bin_widths.push_back(boundary - last_boundary);
217 }
218 last_boundary = boundary;
219 }
220 bin_boundaries.push_back(max); // avoid rounding issues etc by explicitly picking max.
221 bin_widths.push_back(max - last_boundary);
222
223 bin_boundaries.shrink_to_fit();
224 size_t n_bins = bin_boundaries.size() - 1;
225 bins.resize(n_bins);
226
227 // note that due to rounding, our bins may have differing sizes, which matters
228 // if we handle integral types (relative size difference worst case: bin width 1 vs 2).
229 // Be careful to select the right bin.
230 std::for_each(begin(range), end(range), [&](const T &val) {
231 auto it = std::upper_bound(bin_boundaries.begin(), bin_boundaries.end(), val);
232 if (it == bin_boundaries.end()) --it; // the last value is exactly max!
233 size_t idx = std::distance(bin_boundaries.begin(), it);
234 assert(idx > 0);
235 ++bins[idx - 1];
236 });
237 return ptr::make_unique<HistogramT<T>>(std::move(bins), std::move(bin_boundaries), std::move(bin_widths));
238}
239
240template<typename Iterable>
241std::unique_ptr<Histogram> create_histogram_auto(const Iterable &range, size_t max_bins = 50)
242{
243 using std::begin;
244 using std::end;
245 using T = typename std::remove_cv<
246 typename std::remove_reference<
247 decltype(*begin(range))
248 >::type
249 >::type;
250 const size_t n = std::distance(begin(range), end(range));
251 if (n == 0) return {};
252
253 std::map<T, size_t> elem_counts;
254 bool too_many_unique = false;
255 for (const auto &v: range)
256 {
257 ++elem_counts[v];
258 if (elem_counts.size() > max_bins)
259 {
260 too_many_unique = true;
261 break;
262 }
263 }
264
265 if (too_many_unique) {
266 return create_histogram_autorange<T>(range, max_bins);
267 } else {
268 return create_histogram_unbinned(elem_counts);
269 }
270}
271
272} // namespace ACG
273
Namespace providing different geometric functions concerning angles.