@@ -93,39 +93,38 @@ void CPUDevice<double>::copy(const int N, const double *X, double *Y) {
9393
9494template <typename Dtype>
9595void CPUDevice<Dtype>::copy_from_cpu(const int N, const Dtype *X, Dtype *Y) {
96- copy<Dtype> (N, X, Y);
96+ this -> copy (N, X, Y);
9797}
9898
9999template <typename Dtype>
100100void CPUDevice<Dtype>::set(const int N, const Dtype alpha, Dtype *X) {
101101 if (alpha == 0 ) {
102- memset (Y , 0 , sizeof (Dtype) * N);
102+ memset (X , 0 , sizeof (Dtype) * N);
103103 return ;
104104 }
105105 for (int i = 0 ; i < N; ++i) {
106- Y [i] = alpha;
106+ X [i] = alpha;
107107 }
108108}
109109
110110template <>
111- void CPUDevice<int >::set(const int N, const int alpha, int *X);
112- template <>
113- void CPUDevice<float >::set(const int N, const float alpha, float *X);
114- template <>
115- void CPUDevice<double >::set(const int N, const double alpha, double *X);
111+ void CPUDevice<int >::set(const int N, const int alpha, int *X) {
112+ if (alpha == 0 ) {
113+ memset (X, 0 , sizeof (int ) * N);
114+ return ;
115+ }
116+ for (int i = 0 ; i < N; ++i) {
117+ X[i] = alpha;
118+ }
119+ }
116120
117121template <typename Dtype>
118122void CPUDevice<Dtype>::add_scalar(const int N, const Dtype alpha, Dtype *X) {
119123 for (int i = 0 ; i < N; ++i) {
120- Y [i] += alpha;
124+ X [i] += alpha;
121125 }
122126}
123127
124- template <>
125- void CPUDevice<float >::add_scalar(const int N, const float alpha, float *X);
126- template <>
127- void CPUDevice<double >::add_scalar(const int N, const double alpha, double *X);
128-
129128template <>
130129void CPUDevice<float >::scal(const int N, const float alpha, float *X) {
131130 cblas_sscal (N, alpha, X, 1 );
@@ -138,12 +137,12 @@ void CPUDevice<double>::scal(const int N, const double alpha, double *X) {
138137
139138template <>
140139void CPUDevice<float >::sqr(const int N, const float * a, float * y) {
141- vsSqr (n , a, y);
140+ vsSqr (N , a, y);
142141}
143142
144143template <>
145144void CPUDevice<double >::sqr(const int N, const double * a, double * y) {
146- vdSqr (n , a, y);
145+ vdSqr (N , a, y);
147146}
148147
149148template <>
@@ -195,13 +194,13 @@ void CPUDevice<double>::div(const int N, const double* a, const double* b,
195194}
196195
197196template <>
198- void CPUDevice<float >::powx(const int N, const float * a, const float * b,
197+ void CPUDevice<float >::powx(const int N, const float * a, const float b,
199198 float * y) {
200199 vsPowx (N, a, b, y);
201200}
202201
203202template <>
204- void CPUDevice<double >::powx(const int N, const double * a, const double * b,
203+ void CPUDevice<double >::powx(const int N, const double * a, const double b,
205204 double * y) {
206205 vdPowx (N, a, b, y);
207206}
@@ -212,9 +211,6 @@ static Dtype _nextafter(const Dtype b) {
212211 b, std::numeric_limits<Dtype>::max ());
213212}
214213
215- template static float _nextafter (const float b);
216- template static double _nextafter (const double b);
217-
218214template <typename Dtype>
219215void CPUDevice<Dtype>::rng_uniform(const int N, const Dtype a,
220216 const Dtype b, Dtype* r) {
@@ -229,13 +225,6 @@ void CPUDevice<Dtype>::rng_uniform(const int N, const Dtype a,
229225 }
230226}
231227
232- template <>
233- void CPUDevice<float >::rng_uniform(const int N, const float a, const float b,
234- float * r);
235- template <>
236- void CPUDevice<double >::rng_uniform(const int N, const double a, const double b,
237- double * r);
238-
239228template <typename Dtype>
240229void CPUDevice<Dtype>::rng_gaussian(const int N, const Dtype mu,
241230 const Dtype sigma, Dtype* r) {
@@ -250,13 +239,6 @@ void CPUDevice<Dtype>::rng_gaussian(const int N, const Dtype mu,
250239 }
251240}
252241
253- template <>
254- void CPUDevice<float >::rng_gaussian(const int N, const float mu,
255- const float sigma, float * r);
256- template <>
257- void CPUDevice<double >::rng_gaussian(const int N, const double mu,
258- const double sigma, double * r);
259-
260242template <typename Dtype>
261243void CPUDevice<Dtype>::rng_bernoulli(const int N, const Dtype p, int * r) {
262244 CHECK_GE (N, 0 );
@@ -271,11 +253,6 @@ void CPUDevice<Dtype>::rng_bernoulli(const int N, const Dtype p, int* r) {
271253 }
272254}
273255
274- template <>
275- void CPUDevice<float >::rng_bernoulli(const int N, const float p, int * r);
276- template <>
277- void CPUDevice<double >::rng_bernoulli(const int N, const double p, int * r);
278-
279256template <typename Dtype>
280257void CPUDevice<Dtype>::rng_bernoulli(const int N, const Dtype p,
281258 unsigned int * r) {
@@ -291,13 +268,6 @@ void CPUDevice<Dtype>::rng_bernoulli(const int N, const Dtype p,
291268 }
292269}
293270
294- template <>
295- void CPUDevice<float >::rng_bernoulli(const int N, const float p,
296- unsigned int * r);
297- template <>
298- void CPUDevice<double >::rng_bernoulli(const int N, const double p,
299- unsigned int * r);
300-
301271template <>
302272void CPUDevice<float >::exp(const int N, const float * a, float * y) {
303273 vsExp (N, a, y);
@@ -321,25 +291,25 @@ void CPUDevice<double>::dot(const int N, const double* x, const double* y,
321291}
322292
323293template <>
324- void CPUDevice<float >::hamming_distance(const int N, const float * x,
325- const float * y, uint32_t * out ) {
326- int dist = 0 ;
327- for (int i = 0 ; i < n ; ++i) {
294+ uint32_t CPUDevice<float >::hamming_distance(const int N, const float * x,
295+ const float * y) {
296+ uint32_t dist = 0 ;
297+ for (int i = 0 ; i < N ; ++i) {
328298 dist += __builtin_popcount (static_cast <uint32_t >(x[i]) ^
329299 static_cast <uint32_t >(y[i]));
330300 }
331- *out = dist;
301+ return dist;
332302}
333303
334304template <>
335- void CPUDevice<double >::hamming_distance(const int N, const double * x,
336- const double * y, uint64_t * out ) {
337- int dist = 0 ;
338- for (int i = 0 ; i < n ; ++i) {
305+ uint32_t CPUDevice<double >::hamming_distance(const int N, const double * x,
306+ const double * y) {
307+ uint32_t dist = 0 ;
308+ for (int i = 0 ; i < N ; ++i) {
339309 dist += __builtin_popcount (static_cast <uint64_t >(x[i]) ^
340310 static_cast <uint64_t >(y[i]));
341311 }
342- *out = dist;
312+ return dist;
343313}
344314
345315template <>
@@ -371,11 +341,6 @@ void CPUDevice<Dtype>::sign(const int N, const Dtype* x, Dtype* y) {
371341 }
372342}
373343
374- template <>
375- void CPUDevice<float >::sign(const int N, const float * x, float * y);
376- template <>
377- void CPUDevice<double >::sign(const int N, const double * x, double * y);
378-
379344// This returns a nonzero value if the input has its sign bit set.
380345// The name sngbit is meant to avoid conflicts with std::signbit in the macro
381346template <typename Dtype>
@@ -388,11 +353,6 @@ void CPUDevice<Dtype>::sgnbit(const int N, const Dtype* x, Dtype* y) {
388353 }
389354}
390355
391- template <>
392- void CPUDevice<float >::sgnbit(const int N, const float * x, float * y);
393- template <>
394- void CPUDevice<double >::sgnbit(const int N, const double * x, double * y);
395-
396356template <typename Dtype>
397357void CPUDevice<Dtype>::fabs(const int N, const Dtype* x, Dtype* y) {
398358 CHECK_GT (N, 0 );
@@ -403,11 +363,6 @@ void CPUDevice<Dtype>::fabs(const int N, const Dtype* x, Dtype* y) {
403363 }
404364}
405365
406- template <>
407- void CPUDevice<float >::fabs(const int N, const float * x, float * y);
408- template <>
409- void CPUDevice<double >::fabs(const int N, const double * x, double * y);
410-
411366template <>
412367void CPUDevice<float >::scale(const int N, const float alpha, const float *x,
413368 float * y) {
@@ -447,18 +402,9 @@ void CPUDevice<Dtype>::im2col(const Dtype* data_im, const int channels,
447402 }
448403}
449404
450- template <>
451- void CPUDevice<float >::im2col(const float * data_im, const int channels,
452- const int height, const int width, const int ksize, const int pad,
453- const int stride, float * data_col);
454- template <>
455- void CPUDevice<double >::im2col(const double * data_im, const int channels,
456- const int height, const int width, const int ksize, const int pad,
457- const int stride, double * data_col);
458-
459405template <typename Dtype>
460406void CPUDevice<Dtype>::col2im(const Dtype* data_col, const int channels,
461- const int height, const int width, const int psize , const int pad,
407+ const int height, const int width, const int ksize , const int pad,
462408 const int stride, Dtype* data_im) {
463409 memset (data_im, 0 , sizeof (Dtype) * height * width * channels);
464410 int height_col = (height + 2 * pad - ksize) / stride + 1 ;
@@ -480,13 +426,6 @@ void CPUDevice<Dtype>::col2im(const Dtype* data_col, const int channels,
480426 }
481427}
482428
483- void CPUDevice<float >::col2im(const float * data_col, const int channels,
484- const int height, const int width, const int psize, const int pad,
485- const int stride, float * data_im);
486- void CPUDevice<double >::col2im(const double * data_col, const int channels,
487- const int height, const int width, const int psize, const int pad,
488- const int stride, double * data_im);
489-
490429INSTANTIATE_CLASS (CPUDevice);
491430
492431} // namespace caffe
0 commit comments