tiny_dnn 1.0.0
A header only, dependency-free deep learning framework in C++11
Loading...
Searching...
No Matches
parallel_for.h
1/*
2 Copyright (c) 2016, Taiga Nomi
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright
10 notice, this list of conditions and the following disclaimer in the
11 documentation and/or other materials provided with the distribution.
12 * Neither the name of the <organization> nor the
13 names of its contributors may be used to endorse or promote products
14 derived from this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
17 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
20 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27#pragma once
28#include <vector>
29#include <type_traits>
30#include <limits>
31#include <cassert>
32#include <cstdio>
33#include <string>
34#include "aligned_allocator.h"
35#include "nn_error.h"
36#include "tiny_dnn/config.h"
37
38#ifdef CNN_USE_TBB
39#ifndef NOMINMAX
40#define NOMINMAX // tbb includes windows.h in tbb/machine/windows_api.h
41#endif
42#include <tbb/tbb.h>
43#include <tbb/task_group.h>
44#endif
45
46#ifndef CNN_USE_OMP
47#include <thread>
48#include <future>
49#endif
50
51namespace tiny_dnn {
52
53#ifdef CNN_USE_TBB
54
55static tbb::task_scheduler_init tbbScheduler(tbb::task_scheduler_init::automatic);//tbb::task_scheduler_init::deferred);
56
57typedef tbb::blocked_range<int> blocked_range;
58
59template<typename Func>
60void parallel_for(int begin, int end, const Func& f, int grainsize) {
61 tbb::parallel_for(blocked_range(begin, end, end - begin > grainsize ? grainsize : 1), f);
62}
63template<typename Func>
64void xparallel_for(int begin, int end, const Func& f) {
65 f(blocked_range(begin, end, 100));
66}
67
68#else
69
71 typedef int const_iterator;
72
73 blocked_range(int begin, int end) : begin_(begin), end_(end) {}
74 blocked_range(size_t begin, size_t end) : begin_(static_cast<int>(begin)), end_(static_cast<int>(end)) {}
75
76 const_iterator begin() const { return begin_; }
77 const_iterator end() const { return end_; }
78private:
79 int begin_;
80 int end_;
81};
82
83template<typename Func>
84void xparallel_for(size_t begin, size_t end, const Func& f) {
85 blocked_range r(begin, end);
86 f(r);
87}
88
89#if defined(CNN_USE_OMP)
90
91template<typename Func>
92void parallel_for(int begin, int end, const Func& f, int /*grainsize*/) {
93 #pragma omp parallel for
94 for (int i=begin; i<end; ++i)
95 f(blocked_range(i,i+1));
96}
97
98#elif defined(CNN_SINGLE_THREAD)
99
100template<typename Func>
101void parallel_for(int begin, int end, const Func& f, int /*grainsize*/) {
102 xparallel_for(static_cast<size_t>(begin), static_cast<size_t>(end), f);
103}
104
105#else
106
107template<typename Func>
108void parallel_for(int start, int end, const Func &f, int /*grainsize*/) {
109 int nthreads = std::thread::hardware_concurrency();
110 int blockSize = (end - start) / nthreads;
111 if (blockSize*nthreads < end - start)
112 blockSize++;
113
114 std::vector<std::future<void>> futures;
115
116 int blockStart = start;
117 int blockEnd = blockStart + blockSize;
118 if (blockEnd > end) blockEnd = end;
119
120 for (int i = 0; i < nthreads; i++) {
121 futures.push_back(std::move(std::async(std::launch::async, [blockStart, blockEnd, &f] {
122 f(blocked_range(blockStart, blockEnd));
123 })));
124
125 blockStart += blockSize;
126 blockEnd = blockStart + blockSize;
127 if (blockStart >= end) break;
128 if (blockEnd > end) blockEnd = end;
129 }
130
131 for (auto &future : futures)
132 future.wait();
133}
134
135#endif
136
137#endif // CNN_USE_TBB
138
139template<typename T, typename U>
140bool value_representation(U const &value) {
141 return static_cast<U>(static_cast<T>(value)) == value;
142}
143
144template<typename T, typename Func>
145inline
146void for_(std::true_type, bool parallelize, int begin, T end, Func f, int grainsize = 100){
147 parallelize = parallelize && value_representation<int>(end);
148 parallelize ? parallel_for(begin, static_cast<int>(end), f, grainsize) :
149 xparallel_for(begin, static_cast<int>(end), f);
150}
151
152template<typename T, typename Func>
153inline
154void for_(std::false_type, bool parallelize, int begin, T end, Func f, int grainsize = 100){
155 parallelize ? parallel_for(begin, static_cast<int>(end), f, grainsize) : xparallel_for(begin, end, f);
156}
157
158template<typename T, typename Func>
159inline
160void for_(bool parallelize, int begin, T end, Func f, int grainsize = 100) {
161 static_assert(std::is_integral<T>::value, "end must be integral type");
162 for_(typename std::is_unsigned<T>::type(), parallelize, begin, end, f, grainsize);
163}
164
165template <typename T, typename Func>
166void for_i(bool parallelize, T size, Func f, int grainsize = 100)
167{
168 for_(parallelize, 0, size, [&](const blocked_range& r) {
169#ifdef CNN_USE_OMP
170#pragma omp parallel for
171#endif
172 for (int i = r.begin(); i < r.end(); i++)
173 f(i);
174 }, grainsize);
175}
176
177template <typename T, typename Func>
178void for_i(T size, Func f, int grainsize = 100) {
179 for_i(true, size, f, grainsize);
180}
181
182} // namespace tiny_dnn
Simple image utility class.
Definition image.h:94
Definition parallel_for.h:70