tiny_dnn 1.0.0
A header only, dependency-free deep learning framework in C++11
Loading...
Searching...
No Matches
tensor.h
1/*
2 COPYRIGHT
3
4 All contributions by Taiga Nomi
5 Copyright (c) 2013, Taiga Nomi
6 All rights reserved.
7
8 All other contributions:
9 Copyright (c) 2013-2016, the respective contributors.
10 All rights reserved.
11
12 Each contributor holds copyright over their respective contributions.
13 The project versioning (Git) records all such contribution source information.
14
15 LICENSE
16
17 The BSD 3-Clause License
18
19
20 Redistribution and use in source and binary forms, with or without
21 modification, are permitted provided that the following conditions are met:
22
23 * Redistributions of source code must retain the above copyright notice, this
24 list of conditions and the following disclaimer.
25
26 * Redistributions in binary form must reproduce the above copyright notice,
27 this list of conditions and the following disclaimer in the documentation
28 and/or other materials provided with the distribution.
29
30 * Neither the name of tiny-dnn nor the names of its
31 contributors may be used to endorse or promote products derived from
32 this software without specific prior written permission.
33
34 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
35 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
37 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
38 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
40 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
41 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
42 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
43 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44*/
45#pragma once
46
47#include <cmath> // sqrt
48#include <algorithm> // std::fill, std::generate
49#include <numeric> // std::accumulate
50#include <vector>
51
52#include "tiny_dnn/core/framework/device.fwd.h"
53
54#if defined(USE_OPENCL) || defined(USE_CUDA)
55#ifdef USE_OPENCL
56#include "third_party/CLCudaAPI/clpp11.h"
57#else
58#include "third_party/CLCudaAPI/cupp11.h"
59#endif
60#endif
61
62namespace tiny_dnn {
63
64template<typename U = float_t>
65class Tensor {
66public:
67 /*
68 * Initializes an empty tensor.
69 */
70 Tensor()
71 {
72 reshape(0, 0, 0, 0);
73 }
74
75 /*
76 * Create a tensor of the given dimension.
77 * It is assumed that a tensor will hold data in NxWxHxD order,
78 * where:
79 * N the batch axis
80 * W the width axis
81 * H the heigth axis
82 * D the depth axis
83 *
84 * Data will be hold by a std::vector with 64bytes alignment.
85 */
86 explicit Tensor(const size_t d0,
87 const size_t d1,
88 const size_t d2,
89 const size_t d3) {
90 reshape(d0, d1, d2, d3);
91 }
92
93 explicit Tensor(const std::array<size_t, 4>& shape) {
94 reshape(shape[0], shape[1], shape[2], shape[3]);
95 }
96
97 explicit Tensor(const std::vector<size_t>& shape) {
98 assert(shape.size() == 4);
99 reshape(shape[0], shape[1], shape[2], shape[3]);
100 }
101
102 ~Tensor() = default;
103
104 Tensor(const Tensor&other) {
105 other.fromDevice();
106 shape_ = other.shape_;
107 host_data_ = other.host_data_;
108 data_is_on_host_ = true;
109 data_dirty_ = true;
110 //device_data_ is intentionally left uninitialized.
111 }
112
113 Tensor &operator = (const Tensor& other) {
114 other.fromDevice();
115 shape_ = other.shape_;
116 data_is_on_host_ = true;
117 data_dirty_ = true;
118 host_data_ = other.host_data_;
119
120 //device_data_ is intentionally left as-is. It will be erased only if new tensor won't fit, and only when data gets moved to the GPU.
121 return *this;
122 }
123
124#ifdef CNN_USE_DEFAULT_MOVE_CONSTRUCTORS
125 Tensor(Tensor&& other) = default; // move ctor
126 Tensor &operator = (Tensor&&) = default; // move assign
127#else
128 Tensor(Tensor&& other) { // for VS2013 we need to manually implement these if we want to have move semantics
129 shape_ = std::move(other.shape_);
130 host_data_ = std::move(other.host_data_);
131#if defined(USE_OPENCL) || defined(USE_CUDA)
132 device_data_ = std::move(other.device_data_);
133#endif
134 data_is_on_host_ = other.data_is_on_host_;
135 data_dirty_ = other.data_dirty_;
136 }
137
138 Tensor &operator = (Tensor&& other) {
139 shape_ = std::move(other.shape_);
140 host_data_ = std::move(other.host_data_);
141#if defined(USE_OPENCL) || defined(USE_CUDA)
142 device_data_ = std::move(other.device_data_);
143#endif
144 data_is_on_host_ = other.data_is_on_host_;
145 data_dirty_ = other.data_dirty_;
146 return *this;
147 }
148#endif
149
150 // Returns the tensor shape
151 const std::array<size_t, 4>& shape() const { return shape_; }
152
153 // Returns the value of a specified index in the tensor.
154 // Checked version (throw exceptions for out-of-range error)
155 U& host_at(const size_t d0,
156 const size_t d1,
157 const size_t d2,
158 const size_t d3) {
159 return *host_ptr(d0, d1, d2, d3);
160 }
161
162 U host_at(const size_t d0,
163 const size_t d1,
164 const size_t d2,
165 const size_t d3) const {
166 return *host_ptr(d0, d1, d2, d3);
167 }
168
169 // Returns the pointer to a specified index in the tensor
170 // Checked version (throw exceptions for out-of-range error)
171 const U* host_ptr(const size_t d0,
172 const size_t d1,
173 const size_t d2,
174 const size_t d3) const {
175 if (d0 >= shape_[0] || d1 >= shape_[1] ||
176 d2 >= shape_[2] || d3 >= shape_[3]) {
177 throw nn_error("Access tensor out of range.");
178 }
179
180 return host_data() + (
181 shape_[1] * shape_[2] * shape_[3] * d0 +
182 shape_[1] * shape_[2] * d3 +
183 shape_[1] * d2 +
184 d1
185 );
186 }
187
188 U* host_ptr(const size_t d0,
189 const size_t d1,
190 const size_t d2,
191 const size_t d3) {
192 if (d0 >= shape_[0] || d1 >= shape_[1] ||
193 d2 >= shape_[2] || d3 >= shape_[3]) {
194 throw nn_error("Access tensor out of range.");
195 }
196
197 return mutable_host_data() + (
198 shape_[1] * shape_[2] * shape_[3] * d0 +
199 shape_[1] * shape_[2] * d3 +
200 shape_[1] * d2 +
201 d1
202 );
203 }
204
205 const U* host_data() const {
206 fromDevice();
207 return host_data_.data();
208 }
209
210 U* mutable_host_data() {
211 fromDevice();
212 data_dirty_ = true;
213 return host_data_.data();
214 }
215
216#if defined(USE_OPENCL) || defined(USE_CUDA)
217 const void *device_data() const {
218 toDevice();
219 return (*device_data_)();
220 }
221
222 void *mutable_device_data() {
223 toDevice();
224 data_dirty_ = true;
225 return (*device_data_)();
226 }
227#endif
228
229 size_t size() const {
230 return host_data_.size();
231 }
232
233 void fill(U value) {
234 data_is_on_host_ = true;
235 data_dirty_ = true;
236 std::fill(std::begin(host_data_), std::end(host_data_), value);
237 }
238
239 void reshape(const size_t d0,
240 const size_t d1,
241 const size_t d2,
242 const size_t d3) {
243 shape_[0] = d0;
244 shape_[1] = d1;
245 shape_[2] = d2;
246 shape_[3] = d3;
247 host_data_.resize(calcSize(), U(0));
248 }
249
250 void reshape(const std::array<size_t, 4> &sz) {
251 shape_ = sz;
252 host_data_.resize(calcSize(), U(0));
253 }
254
255private:
256 size_t calcSize() const {
257 return std::accumulate(std::begin(shape_), std::end(shape_), size_t(1), std::multiplies<size_t>());
258 }
259
260 void toDevice() const {
261 if (data_is_on_host_ && data_dirty_) {
262#if defined(USE_OPENCL) || defined(USE_CUDA)
263 CLCudaAPI::Queue queue = device->queue();
264 if (device_data_ && device_data_->GetSize() >= host_data_.size()) {
265 device_data_->Write(queue, host_data.size(), host_data_.data(), 0);
266 }
267 else {
268 CLCudaAPI::Context ctx = device->context();
270 ctx, queue, host_data_.begin(), host_data_.end());
271 }
272#endif
273 data_is_on_host_ = false;
274 data_dirty_ = false;
275 }
276 }
277
278 void fromDevice() const {
279 if (!data_is_on_host_ && data_dirty_) {
280#if defined(USE_OPENCL) || defined(USE_CUDA)
281 assert(device_);
283 device_data_->Read(device_->queue(), host_data_.size(), const_cast<U*>(host_data_.data())); // using const_cast<> to avoid making host_data_ entirely mutable
284#endif
285 data_is_on_host_ = true;
286 data_dirty_ = false;
287 }
288 }
289
290private:
291 /* Vector with the size of the tensor
292 * shape_[0]: batch
293 * shape_[1]: width
294 * shape_[2]: height
295 * shape_[3]: depth
296 */
297 std::array<size_t, 4> shape_;
298
299 /* Pointer to the Tensor data in pure in the host device */
300 std::vector<U, aligned_allocator<U, 64> > host_data_;
301
302#if defined(USE_OPENCL) || defined(USE_CUDA)
303 /* Pointer to the Tensor data in the device */
304 std::unique_ptr<CLCudaAPI::Buffer<U> > device_data_;
305#endif
306 mutable bool data_is_on_host_; //< current data is on host if true, on device if false.
307 mutable bool data_dirty_; //< set to true if current data might have been modified
308
309 /* Pointer to the current device where the data resides */
310 Device* device_;
311};
312
313// Overloaded method to print the Tensor class to the standard output
314template<typename T>
315inline std::ostream& operator<< (std::ostream &os,
316 const Tensor<T>& tensor) {
317 const std::vector<serial_size_t>& shape = tensor.shape();
318 for (serial_size_t i = 0; i < shape[0]; ++i) {
319 os << "-- Batch: " << i << "\n";
320 for (serial_size_t j = 0; j < shape[3]; ++j) {
321 os << "-- Channel: " << j << "\n";
322 os << "-- Data:\n";
323 for (serial_size_t k = 0; k < shape[1]; ++k) {
324 for (serial_size_t l = 0; l < shape[2]; ++l) {
325 os << " " << tensor.at(i, k, l, j) << " ";
326 }
327 os << ";\n";
328 }
329 }
330 }
331 os << "----------------\n"
332 << "--> Tensor size: [ "
333 << shape[0] << " x " << shape[1] << " x "
334 << shape[2] << " x " << shape[3] << " ]\n";
335 return os;
336}
337
338// utilities for element-wise and tensor-scalar/scalar-tensor operations
339
340template<typename TD, typename TS1, typename TS2, typename F> void binary_tensor_tensor_elementwise_operation(Tensor<TD> &dst, const Tensor<TS1> &src1, const Tensor<TS2> &src2, F f) {
341 if (src1.shape() != src2.shape()) {
342 throw nn_error("Tensor must have same shape");
343 }
344
345 dst.reshape(src1.shape());
346
347 TD* pdst = dst.mutable_host_data();
348 const TS1* psrc1 = src1.host_data();
349 const TS2* psrc2 = src2.host_data();
350
351 for_i(true, dst.size(), [pdst, psrc1, psrc2, &f](size_t i) {
352 pdst[i] = f(psrc1[i], psrc2[i]);
353 });
354}
355
356template<typename TD, typename TS, typename F> void unary_tensor_elementwise_operation(Tensor<TD> &dst, const Tensor<TS> &src, F f) {
357 dst.reshape(src.shape());
358
359 TD* pdst = dst.mutable_host_data();
360 const TS* psrc = src.host_data();
361
362 for_i(true, dst.size(), [pdst, psrc, &f](size_t i) {
363 pdst[i] = f(psrc[i]);
364 });
365}
366
367template<typename TD, typename TS1, typename TS2, typename F> void binary_tensor_scalar_operation(Tensor<TD> &dst, const Tensor<TS1> &src1, TS2 src2, F f) {
368 dst.reshape(src1.shape());
369
370 TD* pdst = dst.mutable_host_data();
371 const TS1* psrc1 = src1.host_data();
372
373 for_i(true, dst.size(), [pdst, psrc1, src2, &f](size_t i) {
374 pdst[i] = f(psrc1[i], src2);
375 });
376}
377
378template<typename TD, typename TS1, typename TS2, typename F> void binary_scalar_tensor_operation(Tensor<TD> &dst, TS1 src1, const Tensor<TS2> &src2, F f) {
379 dst.reshape(src2.shape());
380
381 TD* pdst = dst.mutable_host_data();
382 const TS2* psrc2 = src2.host_data();
383
384 for_i(true, dst.size(), [pdst, src1, psrc2, &f](size_t i) {
385 pdst[i] = f(src1, psrc2[i]);
386 });
387}
388
389// implementation of
390
391namespace details {
392 template<typename TS1, typename TS2> auto plus(TS1 s1, TS2 s2) -> decltype(s1 + s2) { return s1 + s2; }
393
394 template<typename TS1, typename TS2> auto minus(TS1 s1, TS2 s2) -> decltype(s1 - s2) { return s1 - s2; }
395
396 template<typename TS1, typename TS2> auto multiplies(TS1 s1, TS2 s2) -> decltype(s1 * s2) { return s1 * s2; }
397
398 template<typename TS1, typename TS2> auto divides_checked(TS1 s1, TS2 s2) -> decltype(s1 / s2) {
399 typedef decltype(s1 / s2) result_type;
400 return (s2 == result_type{}) ? std::numeric_limits<result_type>::quiet_NaN() : s1 / s2;
401 }
402
403 template<typename TS1, typename TS2> auto divides_unchecked(TS1 s1, TS2 s2) -> decltype(s1 / s2) {
404 return s1 / s2;
405 }
406
407 template<typename T> T sqrt_checked(T s1) {
408 return (s1 <= T{}) ? std::numeric_limits<T>::quiet_NaN() : sqrt(s1);
409 }
410
411 // do not inline - this function converts the std::exp overloadeds in a single templated function.
412 template<typename T> T exp(T s1) {
413 return std::exp(s1);
414 }
415}
416
417template<typename TD, typename TS1, typename TS2> void layer_add(Tensor<TD> &dst, TS1 src1, const Tensor<TS2> &src2) {
418 binary_scalar_tensor_operation(dst, src1, src2, details::plus<TS1, TS2>);
419}
420
421template<typename TD, typename TS1, typename TS2> void layer_add(Tensor<TD> &dst, const Tensor<TS1> &src1, TS2 src2) {
422 binary_tensor_scalar_operation(dst, src1, src2, details::plus<TS1, TS2>);
423}
424
425template<typename TD, typename TS1, typename TS2> void layer_add(Tensor<TD> &dst, const Tensor<TS1> &src1, const Tensor<TS2> &src2) {
426 binary_tensor_tensor_elementwise_operation(dst, src1, src2, details::plus<TS1, TS2>);
427}
428
429template<typename TD, typename TS1, typename TS2> void layer_sub(Tensor<TD> &dst, TS1 src1, const Tensor<TS2> &src2) {
430 binary_scalar_tensor_operation(dst, src1, src2, details::minus<TS1, TS2>);
431}
432
433template<typename TD, typename TS1, typename TS2> void layer_sub(Tensor<TD> &dst, const Tensor<TS1> &src1, TS2 src2) {
434 binary_tensor_scalar_operation(dst, src1, src2, details::minus<TS1, TS2>);
435}
436
437template<typename TD, typename TS1, typename TS2> void layer_sub(Tensor<TD> &dst, const Tensor<TS1> &src1, const Tensor<TS2> &src2) {
438 binary_tensor_tensor_elementwise_operation(dst, src1, src2, details::minus<TS1, TS2>);
439}
440
441template<typename TD, typename TS1, typename TS2> void layer_mul(Tensor<TD> &dst, TS1 src1, const Tensor<TS2> &src2) {
442 binary_scalar_tensor_operation(dst, src1, src2, details::multiplies<TS1, TS2>);
443}
444
445template<typename TD, typename TS1, typename TS2> void layer_mul(Tensor<TD> &dst, const Tensor<TS1> &src1, TS2 src2) {
446 binary_tensor_scalar_operation(dst, src1, src2, details::multiplies<TS1, TS2>);
447}
448
449template<typename TD, typename TS1, typename TS2> void layer_mul(Tensor<TD> &dst, const Tensor<TS1> &src1, const Tensor<TS2> &src2) {
450 binary_tensor_tensor_elementwise_operation(dst, src1, src2, details::multiplies<TS1, TS2>);
451}
452
453template<typename TD, typename TS1, typename TS2> void layer_div(Tensor<TD> &dst, TS1 src1, const Tensor<TS2> &src2) {
454 binary_scalar_tensor_operation(dst, src1, src2, details::divides_checked<TS1, TS2>);
455}
456
457template<typename TD, typename TS1, typename TS2> void layer_div(Tensor<TD> &dst, const Tensor<TS1> &src1, TS2 src2) {
458 if (src2 == TS2(0.0)) {
459 dst.reshape(src1.shape());
460 dst.fill(std::numeric_limits<TD>::quiet_NaN());
461 } else {
462 binary_tensor_scalar_operation(dst, src1, src2, details::divides_unchecked<TS1, TS2>);
463 }
464}
465
466template<typename TD, typename TS1, typename TS2> void layer_div(Tensor<TD> &dst, const Tensor<TS1> &src1, const Tensor<TS2> &src2) {
467 binary_tensor_tensor_elementwise_operation(dst, src1, src2, details::divides_checked<TS1, TS2>);
468}
469
470template<typename TD, typename TS> void layer_sqrt(Tensor<TD> &dst, const Tensor<TS> &src1) {
471 return unary_tensor_elementwise_operation(dst, src1, details::sqrt_checked<TS>);
472}
473
474template<typename TD, typename TS> void layer_exp(Tensor<TD> &dst, const Tensor<TS> &src1) {
475 return unary_tensor_elementwise_operation(dst, src1, details::exp<TS>);
476}
477
478} // namespace tiny_dnn
Definition device.fwd.h:73
Definition tensor.h:65
Simple image utility class.
Definition image.h:94
error exception class for tiny-dnn
Definition nn_error.h:37