Skip to content

Commit 8a59748

Browse files
A. Unique TensorFlowerVijay Vasudevan
A. Unique TensorFlower
authored and
Vijay Vasudevan
committed
Use cc_binary rather than cc_library to reduce size of native library in APK from 5.5mb to 3.2mb (compressed).
Change: 113369407
1 parent faf747a commit 8a59748

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+2576
-438
lines changed

configure

+51-10
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
#!/bin/bash
22

3+
if [ "$TF_UNOFFICIAL_SETTING" == "1" ]; then
4+
echo -e "\nWARNING: You are configuring unofficial settings in TensorFlow. Because some external libraries are not backward compatible, these settings are largely untested and unsupported. \n" 1>&2
5+
fi
6+
37
## Set up python-related environment settings
48
while true; do
59
fromuser=""
@@ -44,32 +48,55 @@ fi
4448

4549
# Find out where the CUDA toolkit is installed
4650
while true; do
51+
# Configure the Cuda SDK version to use.
52+
default_cuda_version="7.0"
53+
if [ "$TF_UNOFFICIAL_SETTING" == "1" ]; then
54+
if [ -z "$TF_CUDA_VERSION" ]; then
55+
read -p "Please specify the Cuda SDK version you want to use. [Default is $default_cuda_version]: " TF_CUDA_VERSION
56+
fi
57+
fi
58+
if [ -z "$TF_CUDA_VERSION" ]; then
59+
TF_CUDA_VERSION=$default_cuda_version
60+
fi
61+
4762
fromuser=""
4863
if [ -z "$CUDA_TOOLKIT_PATH" ]; then
4964
default_cuda_path=/usr/local/cuda
50-
read -p "Please specify the location where CUDA 7.0 toolkit is installed. Refer to README.md for more details. [Default is $default_cuda_path]: " CUDA_TOOLKIT_PATH
65+
read -p "Please specify the location where CUDA $TF_CUDA_VERSION toolkit is installed. Refer to README.md for more details. [Default is $default_cuda_path]: " CUDA_TOOLKIT_PATH
5166
fromuser="1"
5267
if [ -z "$CUDA_TOOLKIT_PATH" ]; then
5368
CUDA_TOOLKIT_PATH=$default_cuda_path
5469
fi
5570
fi
56-
if [ -e "$CUDA_TOOLKIT_PATH/lib64/libcudart.so.7.0" ]; then
71+
if [ -e "$CUDA_TOOLKIT_PATH/lib64/libcudart.so.$TF_CUDA_VERSION" ]; then
5772
break
5873
fi
59-
echo "Invalid path to CUDA 7.0 toolkit. ${CUDA_TOOLKIT_PATH}/lib64/libcudart.so.7.0 cannot be found"
74+
echo "Invalid path to CUDA $TF_CUDA_VERSION toolkit. ${CUDA_TOOLKIT_PATH}/lib64/libcudart.so.$TF_CUDA_VERSION cannot be found"
6075
if [ -z "$fromuser" ]; then
6176
exit 1
6277
fi
78+
TF_CUDA_VERSION=""
6379
CUDA_TOOLKIT_PATH=""
6480
# Retry
6581
done
6682

6783
# Find out where the cuDNN library is installed
6884
while true; do
85+
# Configure the Cudnn version to use.
86+
default_cudnn_version="6.5"
87+
if [ "$TF_UNOFFICIAL_SETTING" == "1" ]; then
88+
if [ -z "$TF_CUDNN_VERSION" ]; then
89+
read -p "Please specify the Cudnn version you want to use. [Default is $default_cudnn_version]: " TF_CUDNN_VERSION
90+
fi
91+
fi
92+
if [ -z "$TF_CUDNN_VERSION" ]; then
93+
TF_CUDNN_VERSION=$default_cudnn_version
94+
fi
95+
6996
fromuser=""
7097
if [ -z "$CUDNN_INSTALL_PATH" ]; then
7198
default_cudnn_path=${CUDA_TOOLKIT_PATH}
72-
read -p "Please specify the location where cuDNN v2 library is installed. Refer to README.md for more details. [Default is $default_cudnn_path]: " CUDNN_INSTALL_PATH
99+
read -p "Please specify the location where cuDNN $TF_CUDNN_VERSION library is installed. Refer to README.md for more details. [Default is $default_cudnn_path]: " CUDNN_INSTALL_PATH
73100
fromuser="1"
74101
if [ -z "$CUDNN_INSTALL_PATH" ]; then
75102
CUDNN_INSTALL_PATH=$default_cudnn_path
@@ -78,32 +105,46 @@ while true; do
78105
# Going through one more level of expansion to handle that.
79106
CUDNN_INSTALL_PATH=$(bash -c "readlink -f $CUDNN_INSTALL_PATH")
80107
fi
81-
if [ -e "$CUDNN_INSTALL_PATH/libcudnn.so.6.5" -o -e "$CUDNN_INSTALL_PATH/lib64/libcudnn.so.6.5" ]; then
108+
if [ -e "$CUDNN_INSTALL_PATH/libcudnn.so.${TF_CUDNN_VERSION}" -o -e "$CUDNN_INSTALL_PATH/lib64/libcudnn.so.${TF_CUDNN_VERSION}" ]; then
82109
break
83110
fi
84-
echo "Invalid path to cuDNN v2 toolkit. Neither of the following two files can be found:"
85-
echo "$CUDNN_INSTALL_PATH/lib64/libcudnn.so.6.5"
86-
echo "$CUDNN_INSTALL_PATH/libcudnn.so.6.5"
111+
echo "Invalid path to cuDNN ${TF_CUDNN_VERSION} toolkit. Neither of the following two files can be found:"
112+
echo "$CUDNN_INSTALL_PATH/lib64/libcudnn.so.${TF_CUDNN_VERSION}"
113+
echo "$CUDNN_INSTALL_PATH/libcudnn.so.${TF_CUDNN_VERSION}"
87114
if [ -z "$fromuser" ]; then
88115
exit 1
89116
fi
117+
TF_CUDNN_VERSION=""
90118
CUDNN_INSTALL_PATH=""
91119
# Retry
92120
done
93121

94122
cat > third_party/gpus/cuda/cuda.config <<EOF
95-
# CUDA_TOOLKIT_PATH refers to the CUDA toolkit. Tensorflow requires Cuda 7.0
123+
# CUDA_TOOLKIT_PATH refers to the CUDA toolkit. Tensorflow requires Cuda $TF_CUDA_VERSION
96124
# at the moment.
97125
CUDA_TOOLKIT_PATH="$CUDA_TOOLKIT_PATH"
98126
99127
# CUDNN_INSTALL_PATH refers to the cuDNN toolkit. The cuDNN header and library
100128
# files can be either in this directory, or under include/ and lib64/
101129
# directories separately.
102130
CUDNN_INSTALL_PATH="$CUDNN_INSTALL_PATH"
131+
132+
# The Cuda SDK version that should be used in this build
133+
TF_CUDA_VERSION=$TF_CUDA_VERSION
134+
135+
# The Cudnn version that should be used in this build
136+
TF_CUDNN_VERSION=$TF_CUDNN_VERSION
137+
103138
EOF
104139

105140
function UnofficialSetting() {
106-
echo -e "\nWARNING: You are configuring unofficial settings in TensorFlow. Because some external libraries are not backward compatible, these settings are largely untested and unsupported. \n" 1>&2
141+
# Configure the Cuda toolkit version to work with.
142+
perl -pi -e "s,CUDA_VERSION = '[0-9\.]*',CUDA_VERSION = '$TF_CUDA_VERSION',s" tensorflow/core/platform/default/build_config.bzl
143+
perl -pi -e "s,(GetCudaVersion.*return )\"[0-9\.]*\",\1\"$TF_CUDA_VERSION\",s" tensorflow/stream_executor/dso_loader.cc
144+
145+
# Configure the Cudnn version to work with.
146+
perl -pi -e "s,CUDNN_VERSION = '[0-9\.]*',CUDNN_VERSION = '$TF_CUDNN_VERSION',s" tensorflow/core/platform/default/build_config.bzl
147+
perl -pi -e "s,(GetCudnnVersion.*return )\"[0-9\.]*\",\1\"$TF_CUDNN_VERSION\",s" tensorflow/stream_executor/dso_loader.cc
107148

108149
# Configure the compute capabilities that TensorFlow builds for.
109150
# Since Cuda toolkit is not backward-compatible, this is not guaranteed to work.

tensorflow/core/BUILD

+1
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,7 @@ tf_cuda_library(
298298
"graph/graph_constructor.h",
299299
"graph/graph_def_builder.h",
300300
"graph/node_builder.h",
301+
"graph/validate.h",
301302
"public/session.h",
302303
"public/session_options.h",
303304
"public/tensor_c_api.h",

tensorflow/core/client/tensor_c_api.cc

+80-12
Original file line numberDiff line numberDiff line change
@@ -316,16 +316,16 @@ Status LoadLibrary(const char* library_filename, void** result,
316316

317317
} // namespace tensorflow
318318

319-
extern "C" {
320-
321-
void TF_Run(TF_Session* s,
322-
// Input tensors
323-
const char** c_input_names, TF_Tensor** c_inputs, int ninputs,
324-
// Output tensors
325-
const char** c_output_tensor_names, TF_Tensor** c_outputs,
326-
int noutputs,
327-
// Target nodes
328-
const char** c_target_node_names, int ntargets, TF_Status* status) {
319+
void TF_Run_Helper(TF_Session* s, const char* handle,
320+
// Input tensors
321+
const char** c_input_names, TF_Tensor** c_inputs,
322+
int ninputs,
323+
// Output tensors
324+
const char** c_output_tensor_names, TF_Tensor** c_outputs,
325+
int noutputs,
326+
// Target nodes
327+
const char** c_target_node_names, int ntargets,
328+
TF_Status* status) {
329329
status->status = Status::OK();
330330
for (int i = 0; i < noutputs; i++) {
331331
c_outputs[i] = NULL;
@@ -365,8 +365,13 @@ void TF_Run(TF_Session* s,
365365
for (int i = 0; i < ntargets; i++) {
366366
target_node_names[i] = c_target_node_names[i];
367367
}
368-
Status result =
369-
s->session->Run(inputs, output_tensor_names, target_node_names, &outputs);
368+
Status result;
369+
if (handle == nullptr) {
370+
result = s->session->Run(inputs, output_tensor_names, target_node_names,
371+
&outputs);
372+
} else {
373+
result = s->session->PRun(handle, inputs, output_tensor_names, &outputs);
374+
}
370375
if (!result.ok()) {
371376
status->status = result;
372377
return;
@@ -392,6 +397,69 @@ void TF_Run(TF_Session* s,
392397
}
393398
}
394399

400+
extern "C" {
401+
402+
void TF_Run(TF_Session* s,
403+
// Input tensors
404+
const char** c_input_names, TF_Tensor** c_inputs, int ninputs,
405+
// Output tensors
406+
const char** c_output_tensor_names, TF_Tensor** c_outputs,
407+
int noutputs,
408+
// Target nodes
409+
const char** c_target_node_names, int ntargets, TF_Status* status) {
410+
TF_Run_Helper(s, nullptr, c_input_names, c_inputs, ninputs,
411+
c_output_tensor_names, c_outputs, noutputs, c_target_node_names,
412+
ntargets, status);
413+
}
414+
415+
void TF_PRunSetup(TF_Session* s,
416+
// Input names
417+
const char** c_input_names, int ninputs,
418+
// Output names
419+
const char** c_output_tensor_names, int noutputs,
420+
// Target nodes
421+
const char** c_target_node_names, int ntargets, char** handle,
422+
TF_Status* status) {
423+
status->status = Status::OK();
424+
425+
std::vector<tensorflow::string> input_names(ninputs);
426+
std::vector<tensorflow::string> output_tensor_names(noutputs);
427+
std::vector<tensorflow::string> target_node_names(ntargets);
428+
for (int i = 0; i < ninputs; i++) {
429+
input_names[i] = c_input_names[i];
430+
}
431+
for (int i = 0; i < noutputs; i++) {
432+
output_tensor_names[i] = c_output_tensor_names[i];
433+
}
434+
for (int i = 0; i < ntargets; i++) {
435+
target_node_names[i] = c_target_node_names[i];
436+
}
437+
tensorflow::string new_handle;
438+
Status result;
439+
result = s->session->PRunSetup(input_names, output_tensor_names,
440+
target_node_names, &new_handle);
441+
if (result.ok()) {
442+
*handle = new char[new_handle.size() + 1];
443+
memcpy(*handle, new_handle.c_str(), new_handle.size() + 1);
444+
} else {
445+
status->status = result;
446+
}
447+
}
448+
449+
void TF_PRun(TF_Session* s, const char* handle,
450+
// Input tensors
451+
const char** c_input_names, TF_Tensor** c_inputs, int ninputs,
452+
// Output tensors
453+
const char** c_output_tensor_names, TF_Tensor** c_outputs,
454+
int noutputs,
455+
// Target nodes
456+
const char** c_target_node_names, int ntargets,
457+
TF_Status* status) {
458+
TF_Run_Helper(s, handle, c_input_names, c_inputs, ninputs,
459+
c_output_tensor_names, c_outputs, noutputs, c_target_node_names,
460+
ntargets, status);
461+
}
462+
395463
const void* TF_BufferData(TF_Buffer* buffer) { return buffer->data; }
396464

397465
size_t TF_BufferLength(TF_Buffer* buffer) { return buffer->length; }

tensorflow/core/common_runtime/copy_tensor.cc

+48-49
Original file line numberDiff line numberDiff line change
@@ -53,58 +53,57 @@ void CopyTensor::ViaDMA(const string& edge_name,
5353
StatusCallback done) {
5454
initialization_done = true;
5555
port::Tracing::ScopedAnnotation annotation(edge_name);
56-
VLOG(1) << "CopyViaDMA " << edge_name;
57-
const size_t total_bytes = input->TotalBytes();
58-
59-
// Note that 0-size tensors have no backing buffer.
60-
if (total_bytes > 0) {
61-
const DeviceType src_device_type(src_alloc_attr.on_host()
62-
? DEVICE_CPU
63-
: src->attributes().device_type());
64-
const DeviceType dst_device_type(dst_alloc_attr.on_host()
65-
? DEVICE_CPU
66-
: dst->attributes().device_type());
67-
const bool non_cpu_src = src_device_type != DeviceType(DEVICE_CPU);
68-
const bool non_cpu_dst = dst_device_type != DeviceType(DEVICE_CPU);
69-
70-
if (non_cpu_src) {
71-
if (non_cpu_dst) {
72-
// Device to device copy. Look through registry for an appropriate
73-
// CopyFunction.
74-
std::vector<RegistrationInfo>* registry = MutableRegistry();
75-
for (const RegistrationInfo& ri : *registry) {
76-
if (ri.sender_device_type == src_device_type &&
77-
ri.receiver_device_type == dst_device_type) {
78-
ri.copy_function(send_dev_context, recv_dev_context, src, dst,
79-
src_alloc_attr, dst_alloc_attr, input, output,
80-
done);
81-
return;
82-
}
83-
}
84-
85-
// TODO(josh11b): If no CopyFunction is found, we currently fail
86-
// but we could copy between devices via CPU.
87-
done(errors::Unimplemented(
88-
"No function registered to copy from devices of type ",
89-
src_device_type.type(), " to devices of type ",
90-
dst_device_type.type()));
91-
} else {
92-
// Device to host copy.
93-
return send_dev_context->CopyDeviceTensorToCPU(input, edge_name, src,
94-
output, done);
56+
VLOG(1) << "Copy " << edge_name;
57+
58+
const DeviceType src_device_type(
59+
src_alloc_attr.on_host() ? DEVICE_CPU : src->attributes().device_type());
60+
const DeviceType dst_device_type(
61+
dst_alloc_attr.on_host() ? DEVICE_CPU : dst->attributes().device_type());
62+
const bool non_cpu_src = src_device_type != DeviceType(DEVICE_CPU);
63+
const bool non_cpu_dst = dst_device_type != DeviceType(DEVICE_CPU);
64+
65+
// E.g., gpu -> gpu
66+
if (non_cpu_src && non_cpu_dst) {
67+
// Device to device copy. Look through registry for an appropriate
68+
// CopyFunction.
69+
std::vector<RegistrationInfo>* registry = MutableRegistry();
70+
for (const RegistrationInfo& ri : *registry) {
71+
if (ri.sender_device_type == src_device_type &&
72+
ri.receiver_device_type == dst_device_type) {
73+
ri.copy_function(send_dev_context, recv_dev_context, src, dst,
74+
src_alloc_attr, dst_alloc_attr, input, output, done);
75+
return;
9576
}
96-
} else if (non_cpu_dst) {
97-
// Host to Device copy.
98-
// Note that this is already an async copy.
99-
recv_dev_context->CopyCPUTensorToDevice(input, dst, output, done);
100-
} else {
101-
*output = *input;
102-
done(Status::OK());
10377
}
104-
} else {
105-
// buffer is empty
106-
done(Status::OK());
78+
79+
// TODO(josh11b): If no CopyFunction is found, we currently fail
80+
// but we could copy between devices via CPU.
81+
done(errors::Unimplemented(
82+
"No function registered to copy from devices of type ",
83+
src_device_type.type(), " to devices of type ",
84+
dst_device_type.type()));
85+
return;
86+
}
87+
88+
// E.g., gpu -> cpu
89+
if (non_cpu_src && !non_cpu_dst) {
90+
// Device to host copy.
91+
send_dev_context->CopyDeviceTensorToCPU(input, edge_name, src, output,
92+
done);
93+
return;
10794
}
95+
96+
// E.g., cpu -> gpu
97+
if (!non_cpu_src && non_cpu_dst) {
98+
// Host to Device copy.
99+
recv_dev_context->CopyCPUTensorToDevice(input, dst, output, done);
100+
return;
101+
}
102+
103+
// cpu -> cpu
104+
CHECK(!non_cpu_src && !non_cpu_dst);
105+
*output = *input;
106+
done(Status::OK());
108107
}
109108

110109
// static

0 commit comments

Comments
 (0)