tiny_dnn 1.0.0
A header only, dependency-free deep learning framework in C++11
Loading...
Searching...
No Matches
conv2d_op_internal.h
1/*
2 COPYRIGHT
3
4 All contributions by Taiga Nomi
5 Copyright (c) 2013, Taiga Nomi
6 All rights reserved.
7
8 All other contributions:
9 Copyright (c) 2013-2016, the respective contributors.
10 All rights reserved.
11
12 Each contributor holds copyright over their respective contributions.
13 The project versioning (Git) records all such contribution source information.
14
15 LICENSE
16
17 The BSD 3-Clause License
18
19
20 Redistribution and use in source and binary forms, with or without
21 modification, are permitted provided that the following conditions are met:
22
23 * Redistributions of source code must retain the above copyright notice, this
24 list of conditions and the following disclaimer.
25
26 * Redistributions in binary form must reproduce the above copyright notice,
27 this list of conditions and the following disclaimer in the documentation
28 and/or other materials provided with the distribution.
29
30 * Neither the name of tiny-dnn nor the names of its
31 contributors may be used to endorse or promote products derived from
32 this software without specific prior written permission.
33
34 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
35 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
37 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
38 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
40 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
41 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
42 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
43 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44*/
45#pragma once
46
47namespace tiny_dnn {
48namespace kernels {
49
50inline void
51conv2d_op_internal(const tensor_t& in_data,
52 const vec_t& W,
53 const vec_t& bias,
54 tensor_t& out_data,
55 const core::conv_params& params,
56 const bool parallelize) {
57 for_i(parallelize, in_data.size(), [&](int sample) {
58 const vec_t& in = in_data[sample];
59 vec_t& a = out_data[sample];
60
61 for (serial_size_t o = 0; o < params.out.depth_; o++) {
62 for (serial_size_t inc = 0; inc < params.in.depth_; inc++) {
63 if (!params.tbl.is_connected(o, inc)) continue;
64
65 serial_size_t idx = 0;
66 idx = params.in.depth_ * o + inc;
67 idx = params.weight.get_index(0, 0, idx);
68 const float_t *pw = &W[idx];
69
70 idx = params.in_padded.get_index(0, 0, inc);
71 const float_t *pi = &in[idx];
72
73 idx = params.out.get_index(0, 0, o);
74 float_t *pa = &a[idx];
75
76 for (serial_size_t y = 0; y < params.out.height_; y++) {
77 for (serial_size_t x = 0; x < params.out.width_; x++) {
78 const float_t * ppw = pw;
79 const float_t * ppi = pi + params.in_padded.width_ *
80 (y * params.h_stride) +
81 x * params.w_stride;
82 float_t sum = float_t(0);
83
84 // should be optimized for small kernel(3x3,5x5)
85 for (serial_size_t wy = 0; wy < params.weight.height_; wy++) { // NOLINT
86 for (serial_size_t wx = 0; wx < params.weight.width_; wx++) { // NOLINT
87 idx = wy * params.in_padded.width_ + wx;
88 sum += *ppw++ * ppi[idx];
89 }
90 }
91 pa[y * params.out.width_ + x] += sum;
92 }
93 }
94 }
95
96 if (params.has_bias) {
97 float_t * pa = &a[params.out.get_index(0, 0, o)];
98 float_t * paa = pa + params.out.width_ * params.out.height_;
99 std::for_each(pa, paa, [&](float_t& f) { f += bias[o]; });
100 }
101 }
102 });
103}
104
105
106/******************************************************************/
107
108
109template <typename tensor_t, typename vec_t>
110void
111conv2d_op_internal(const tensor_t& prev_out,
112 const vec_t& W,
113 tensor_t& dW,
114 tensor_t& db,
115 tensor_t& curr_delta,
116 tensor_t& prev_delta,
117 const core::conv_params& params,
118 const bool parallelize) {
119
120 typedef typename vec_t::value_type float_t;
121
122 for_i(parallelize, prev_out.size(), [&](int sample) {
123 // propagate delta to previous layer
124 for (serial_size_t inc = 0; inc < params.in.depth_; inc++) {
125 for (serial_size_t outc = 0; outc < params.out.depth_; outc++) {
126 if (!params.tbl.is_connected(outc, inc)) continue;
127
128 serial_size_t idx = 0;
129 idx = params.in.depth_ * outc + inc;
130 idx = params.weight.get_index(0, 0, idx);
131 const float_t *pw = &W[idx];
132
133 idx = params.out.get_index(0, 0, outc);
134 const float_t *pdelta_src = &curr_delta[sample][idx];
135
136 idx = params.in_padded.get_index(0, 0, inc);
137 //float_t *pdelta_dst = &(*prev_delta)[sample][idx];
138 float_t *pdelta_dst = &prev_delta[sample][idx];
139
140 for (serial_size_t y = 0; y < params.out.height_; y++) {
141 for (serial_size_t x = 0; x < params.out.width_; x++) {
142 const float_t * ppw = pw;
143
144 idx = y * params.out.width_ + x;
145 const float_t ppdelta_src = pdelta_src[idx];
146
147 float_t * ppdelta_dst = pdelta_dst +
148 y * params.h_stride * params.in_padded.width_ +
149 x * params.w_stride;
150
151 for (serial_size_t wy = 0; wy < params.weight.height_; wy++) { // NOLINT
152 for (serial_size_t wx = 0; wx < params.weight.width_; wx++) { // NOLINT
153 idx = wy * params.in_padded.width_ + wx;
154 ppdelta_dst[idx] += *ppw++ * ppdelta_src;
155 }
156 }
157 }
158 }
159 }
160 }
161
162 // accumulate dw
163 for (serial_size_t inc = 0; inc < params.in.depth_; inc++) {
164 for (serial_size_t outc = 0; outc < params.out.depth_; outc++) {
165 if (!params.tbl.is_connected(outc, inc)) continue;
166
167 for (serial_size_t wy = 0; wy < params.weight.height_; wy++) {
168 for (serial_size_t wx = 0; wx < params.weight.width_; wx++) {
169 float_t dst = float_t(0);
170
171 serial_size_t idx = 0;
172 idx = params.in_padded.get_index(wx, wy, inc);
173 const float_t * prevo = &prev_out[sample][idx];
174
175 idx = params.out.get_index(0, 0, outc);
176 const float_t * delta = &curr_delta[sample][idx];
177
178 if (params.w_stride > 1) {
179 for (serial_size_t y = 0; y < params.out.height_; y++) {
180 serial_size_t prevo_idx = y * params.in_padded.width_ * params.h_stride;
181 serial_size_t delta_idx = y * params.out.width_;
182
183 for (serial_size_t x = 0; x < params.out.width_; x++) {
184 dst += prevo[prevo_idx + x * params.w_stride] * delta[delta_idx + x];
185 }
186 }
187 } else {
188 for (serial_size_t y = 0; y < params.out.height_; y++) {
189 dst += vectorize::dot(
190 prevo + y * params.in_padded.width_ * params.h_stride,
191 delta + y * params.out.width_,
192 params.out.width_);
193 }
194 }
195
196
197 idx = params.in.depth_ * outc + inc;
198 dW[sample][params.weight.get_index(wx, wy, idx)] += dst;
199 }
200 }
201 }
202 }
203
204 // accumulate db
205 if (params.has_bias) {
206 for (serial_size_t outc = 0; outc < params.out.depth_; outc++) {
207 serial_size_t idx = params.out.get_index(0, 0, outc);
208 const float_t * delta = &curr_delta[sample][idx];
209 const float_t * deltaa = delta + params.out.width_ *
210 params.out.height_;
211 db[sample][outc] += std::accumulate(delta, deltaa, float_t(0));
212 }
213 }
214 });
215}
216
217} // namespace kernels
218} // namespace tiny_dnn