Skip to content

Commit c125a97

Browse files
Rob Hessjeffdonahue
authored andcommitted
Implement all kernel device functions under GPUDevice. Get rid of unused files.
1 parent e1c66dc commit c125a97

37 files changed

+390
-925
lines changed

include/caffe/device.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,16 @@
33
#ifndef CAFFE_UTIL_DEVICE_H_
44
#define CAFFE_UTIL_DEVICE_H_
55

6+
extern "C" {
7+
#include <cblas.h>
8+
}
9+
610
#include <cublas_v2.h>
711
#include <stdint.h>
812

913
#include "glog/logging.h"
1014

1115
#include "caffe/common.hpp"
12-
#include "caffe/util/im2col.hpp"
13-
#include "caffe/util/math_functions.hpp"
1416

1517
namespace caffe {
1618

@@ -252,8 +254,6 @@ class GPUDevice : public Device<Dtype> {
252254
virtual void rng_gaussian(const int N, const Dtype mu, const Dtype sigma,
253255
Dtype* r);
254256

255-
virtual void rng_bernoulli(const int N, const Dtype p, int* r);
256-
257257
virtual void exp(const int N, const Dtype* a, Dtype* y);
258258

259259
virtual void dot(const int N, const Dtype* x, const Dtype* y, Dtype* out);

include/caffe/util/im2col.hpp

Lines changed: 0 additions & 34 deletions
This file was deleted.

include/caffe/util/math_functions.hpp

Lines changed: 0 additions & 92 deletions
This file was deleted.

src/caffe/blob.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
#include "caffe/common.hpp"
33
#include "caffe/device.hpp"
44
#include "caffe/syncedmem.hpp"
5-
#include "caffe/util/math_functions.hpp"
65

76
namespace caffe {
87

src/caffe/devices/gpu_device.cpp

Lines changed: 0 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -97,17 +97,6 @@ void GPUDevice<Dtype>::copy_from_cpu(const int N, const Dtype *X, Dtype *Y) {
9797
CUDA_CHECK(cudaMemcpy(Y, X, sizeof(Dtype) * N, cudaMemcpyHostToDevice));
9898
}
9999

100-
template<typename Dtype>
101-
void GPUDevice<Dtype>::set(const int N, const Dtype alpha, Dtype *X) {
102-
caffe_gpu_set<Dtype>(N, alpha, X);
103-
}
104-
105-
template<typename Dtype>
106-
void GPUDevice<Dtype>::add_scalar(const int N, const Dtype alpha,
107-
Dtype *X) {
108-
caffe_gpu_add_scalar<Dtype>(N, alpha, X);
109-
}
110-
111100
template<>
112101
void GPUDevice<float>::scal(const int N, const float alpha, float *X) {
113102
CUBLAS_CHECK(cublasSscal(Caffe::cublas_handle(), N, &alpha, X, 1));
@@ -124,53 +113,6 @@ void GPUDevice<Dtype>::sqr(const int N, const Dtype* a, Dtype* y) {
124113
NOT_IMPLEMENTED;
125114
}
126115

127-
template<typename Dtype>
128-
void GPUDevice<Dtype>::add(const int N, const Dtype* a, const Dtype* b,
129-
Dtype* y) {
130-
caffe_gpu_add<Dtype>(N, a, b, y);
131-
}
132-
133-
template<typename Dtype>
134-
void GPUDevice<Dtype>::sub(const int N, const Dtype* a, const Dtype* b,
135-
Dtype* y) {
136-
caffe_gpu_sub<Dtype>(N, a, b, y);
137-
}
138-
139-
template<typename Dtype>
140-
void GPUDevice<Dtype>::mul(const int N, const Dtype* a, const Dtype* b,
141-
Dtype* y) {
142-
caffe_gpu_mul<Dtype>(N, a, b, y);
143-
}
144-
145-
template<typename Dtype>
146-
void GPUDevice<Dtype>::div(const int N, const Dtype* a, const Dtype* b,
147-
Dtype* y) {
148-
caffe_gpu_div<Dtype>(N, a, b, y);
149-
}
150-
151-
template<typename Dtype>
152-
void GPUDevice<Dtype>::powx(const int N, const Dtype* a, const Dtype b,
153-
Dtype* y) {
154-
caffe_gpu_powx<Dtype>(N, a, b, y);
155-
}
156-
157-
template<typename Dtype>
158-
void GPUDevice<Dtype>::rng_uniform(const int N, const Dtype a,
159-
const Dtype b, Dtype* r) {
160-
caffe_gpu_rng_uniform<Dtype>(N, a, b, r);
161-
}
162-
163-
template<typename Dtype>
164-
void GPUDevice<Dtype>::rng_gaussian(const int N, const Dtype mu,
165-
const Dtype sigma, Dtype* r) {
166-
caffe_gpu_rng_gaussian<Dtype>(N, mu, sigma, r);
167-
}
168-
169-
template<typename Dtype>
170-
void GPUDevice<Dtype>::rng_bernoulli(const int N, const Dtype p, int* r) {
171-
NOT_IMPLEMENTED;
172-
}
173-
174116
template<typename Dtype>
175117
void GPUDevice<Dtype>::exp(const int N, const Dtype* a, Dtype* y) {
176118
// TODO: implement this
@@ -189,12 +131,6 @@ void GPUDevice<double>::dot(const int N, const double* x, const double* y,
189131
CUBLAS_CHECK(cublasDdot(Caffe::cublas_handle(), N, x, 1, y, 1, out));
190132
}
191133

192-
template<typename Dtype>
193-
void GPUDevice<Dtype>::hamming_distance(const int N, const Dtype* x,
194-
const Dtype* y, int* out) {
195-
*out = caffe_gpu_hamming_distance<Dtype>(N, x, y);
196-
}
197-
198134
template<>
199135
// Returns the sum of the absolute values of the elements of vector x
200136
void GPUDevice<float>::asum(const int N, const float* x, float* y) {
@@ -207,44 +143,13 @@ void GPUDevice<double>::asum(const int N, const double* x, double* y) {
207143
CUBLAS_CHECK(cublasDasum(Caffe::cublas_handle(), N, x, 1, y));
208144
}
209145

210-
template<typename Dtype>
211-
void GPUDevice<Dtype>::sign(const int N, const Dtype* x, Dtype* y) {
212-
caffe_gpu_sign<Dtype>(N, x, y);
213-
}
214-
215-
template<typename Dtype>
216-
void GPUDevice<Dtype>::sgnbit(const int N, const Dtype* x, Dtype* y) {
217-
caffe_gpu_sgnbit<Dtype>(N, x, y);
218-
}
219-
220-
template<typename Dtype>
221-
void GPUDevice<Dtype>::fabs(const int N, const Dtype* x, Dtype* y) {
222-
caffe_gpu_fabs<Dtype>(N, x, y);
223-
}
224-
225146
template<typename Dtype>
226147
void GPUDevice<Dtype>::scale(const int N, const Dtype alpha, const Dtype *x,
227148
Dtype* y) {
228149
this->copy(N, x, y);
229150
this->scal(N, alpha, y);
230151
}
231152

232-
template<typename Dtype>
233-
void GPUDevice<Dtype>::im2col(const Dtype* data_im, const int channels,
234-
const int height, const int width, const int ksize, const int pad,
235-
const int stride, Dtype* data_col) {
236-
im2col_gpu(data_im, channels, height, width, ksize, pad, stride,
237-
data_col);
238-
}
239-
240-
template<typename Dtype>
241-
void GPUDevice<Dtype>::col2im(const Dtype* data_col, const int channels,
242-
const int height, const int width, const int psize, const int pad,
243-
const int stride, Dtype* data_im) {
244-
col2im_gpu(data_col, channels, height, width, psize, pad, stride,
245-
data_im);
246-
}
247-
248153
INSTANTIATE_CLASS(GPUDevice);
249154

250155
} // namespace caffe

0 commit comments

Comments
 (0)