Skip to content

Commit bb77a7f

Browse files
authored
Fix (#3980)
1 parent 59d641b commit bb77a7f

25 files changed

+47
-47
lines changed

.github/ISSUE_TEMPLATE/bug-report-s2t.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ If applicable, add screenshots to help explain your problem.
3333
- Python Version [e.g. 3.7]
3434
- PaddlePaddle Version [e.g. 2.0.0]
3535
- Model Version [e.g. 2.0.0]
36-
- GPU/DRIVER Informationo [e.g. Tesla V100-SXM2-32GB/440.64.00]
36+
- GPU/DRIVER Information [e.g. Tesla V100-SXM2-32GB/440.64.00]
3737
- CUDA/CUDNN Version [e.g. cuda-10.2]
3838
- MKL Version
3939
- TensorRT Version

.github/ISSUE_TEMPLATE/bug-report-tts.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ If applicable, add screenshots to help explain your problem.
3232
- Python Version [e.g. 3.7]
3333
- PaddlePaddle Version [e.g. 2.0.0]
3434
- Model Version [e.g. 2.0.0]
35-
- GPU/DRIVER Informationo [e.g. Tesla V100-SXM2-32GB/440.64.00]
35+
- GPU/DRIVER Information [e.g. Tesla V100-SXM2-32GB/440.64.00]
3636
- CUDA/CUDNN Version [e.g. cuda-10.2]
3737
- MKL Version
3838
- TensorRT Version

audio/paddleaudio/backends/soundfile_backend.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def resample(y: np.ndarray,
6161
if mode == 'kaiser_best':
6262
warnings.warn(
6363
f'Using resampy in kaiser_best to {src_sr}=>{target_sr}. This function is pretty slow, \
64-
we recommend the mode kaiser_fast in large scale audio trainning')
64+
we recommend the mode kaiser_fast in large scale audio training')
6565

6666
if not isinstance(y, np.ndarray):
6767
raise ParameterError(

audio/paddleaudio/compliance/kaldi.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ def spectrogram(waveform: Tensor,
233233
round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
234234
to FFT. Defaults to True.
235235
sr (int, optional): Sample rate of input waveform. Defaults to 16000.
236-
snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a singal frame when it
236+
snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a signal frame when it
237237
is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
238238
subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
239239
window_type (str, optional): Choose type of window for FFT computation. Defaults to "povey".
@@ -443,7 +443,7 @@ def fbank(waveform: Tensor,
443443
round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
444444
to FFT. Defaults to True.
445445
sr (int, optional): Sample rate of input waveform. Defaults to 16000.
446-
snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a singal frame when it
446+
snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a signal frame when it
447447
is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
448448
subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
449449
use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False.
@@ -566,7 +566,7 @@ def mfcc(waveform: Tensor,
566566
round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
567567
to FFT. Defaults to True.
568568
sr (int, optional): Sample rate of input waveform. Defaults to 16000.
569-
snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a singal frame when it
569+
snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a signal frame when it
570570
is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
571571
subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
572572
use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False.

audio/paddleaudio/datasets/dataset.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def __init__(self,
4747
files (:obj:`List[str]`): A list of absolute path of audio files.
4848
labels (:obj:`List[int]`): Labels of audio files.
4949
feat_type (:obj:`str`, `optional`, defaults to `raw`):
50-
It identifies the feature type that user wants to extrace of an audio file.
50+
It identifies the feature type that user wants to extract of an audio file.
5151
"""
5252
super(AudioClassificationDataset, self).__init__()
5353

audio/paddleaudio/datasets/esc50.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def __init__(self,
117117
split (:obj:`int`, `optional`, defaults to 1):
118118
It specify the fold of dev dataset.
119119
feat_type (:obj:`str`, `optional`, defaults to `raw`):
120-
It identifies the feature type that user wants to extrace of an audio file.
120+
It identifies the feature type that user wants to extract of an audio file.
121121
"""
122122
files, labels = self._get_data(mode, split)
123123
super(ESC50, self).__init__(

audio/paddleaudio/datasets/gtzan.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def __init__(self,
6767
split (:obj:`int`, `optional`, defaults to 1):
6868
It specify the fold of dev dataset.
6969
feat_type (:obj:`str`, `optional`, defaults to `raw`):
70-
It identifies the feature type that user wants to extrace of an audio file.
70+
It identifies the feature type that user wants to extract of an audio file.
7171
"""
7272
assert split <= n_folds, f'The selected split should not be larger than n_fold, but got {split} > {n_folds}'
7373
files, labels = self._get_data(mode, seed, n_folds, split)

audio/paddleaudio/datasets/tess.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def __init__(self,
7676
split (:obj:`int`, `optional`, defaults to 1):
7777
It specify the fold of dev dataset.
7878
feat_type (:obj:`str`, `optional`, defaults to `raw`):
79-
It identifies the feature type that user wants to extrace of an audio file.
79+
It identifies the feature type that user wants to extract of an audio file.
8080
"""
8181
assert split <= n_folds, f'The selected split should not be larger than n_fold, but got {split} > {n_folds}'
8282
files, labels = self._get_data(mode, seed, n_folds, split)

audio/paddleaudio/datasets/urban_sound.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def __init__(self,
6868
split (:obj:`int`, `optional`, defaults to 1):
6969
It specify the fold of dev dataset.
7070
feat_type (:obj:`str`, `optional`, defaults to `raw`):
71-
It identifies the feature type that user wants to extrace of an audio file.
71+
It identifies the feature type that user wants to extract of an audio file.
7272
"""
7373

7474
def _get_meta_info(self):

audio/paddleaudio/datasets/voxceleb.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -262,8 +262,8 @@ def generate_csv(self,
262262
split_chunks: bool=True):
263263
print(f'Generating csv: {output_file}')
264264
header = ["id", "duration", "wav", "start", "stop", "spk_id"]
265-
# Note: this may occurs c++ execption, but the program will execute fine
266-
# so we can ignore the execption
265+
# Note: this may occurs c++ exception, but the program will execute fine
266+
# so we can ignore the exception
267267
with Pool(cpu_count()) as p:
268268
infos = list(
269269
tqdm(

audio/paddleaudio/features/layers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434

3535
class Spectrogram(nn.Layer):
3636
"""Compute spectrogram of given signals, typically audio waveforms.
37-
The spectorgram is defined as the complex norm of the short-time Fourier transformation.
37+
The spectrogram is defined as the complex norm of the short-time Fourier transformation.
3838
3939
Args:
4040
n_fft (int, optional): The number of frequency components of the discrete Fourier transform. Defaults to 512.

audio/paddleaudio/functional/functional.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ def create_dct(n_mfcc: int,
247247
Args:
248248
n_mfcc (int): Number of mel frequency cepstral coefficients.
249249
n_mels (int): Number of mel filterbanks.
250-
norm (Optional[str], optional): Normalizaiton type. Defaults to 'ortho'.
250+
norm (Optional[str], optional): Normalization type. Defaults to 'ortho'.
251251
dtype (str, optional): The data type of the return matrix. Defaults to 'float32'.
252252
253253
Returns:

audio/paddleaudio/metric/eer.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ def compute_eer(labels: np.ndarray, scores: np.ndarray) -> List[float]:
2222
"""Compute EER and return score threshold.
2323
2424
Args:
25-
labels (np.ndarray): the trial label, shape: [N], one-dimention, N refer to the samples num
26-
scores (np.ndarray): the trial scores, shape: [N], one-dimention, N refer to the samples num
25+
labels (np.ndarray): the trial label, shape: [N], one-dimension, N refer to the samples num
26+
scores (np.ndarray): the trial scores, shape: [N], one-dimension, N refer to the samples num
2727
2828
Returns:
2929
List[float]: eer and the specific threshold

audio/paddleaudio/sox_effects/sox_effects.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,8 @@ def apply_effects_tensor(
121121
122122
"""
123123
tensor_np = tensor.numpy()
124-
ret = paddleaudio._paddleaudio.sox_effects_apply_effects_tensor(tensor_np, sample_rate,
125-
effects, channels_first)
124+
ret = paddleaudio._paddleaudio.sox_effects_apply_effects_tensor(
125+
tensor_np, sample_rate, effects, channels_first)
126126
if ret is not None:
127127
return (paddle.to_tensor(ret[0]), ret[1])
128128
raise RuntimeError("Failed to apply sox effect")
@@ -139,7 +139,7 @@ def apply_effects_file(
139139
140140
Note:
141141
This function works in the way very similar to ``sox`` command, however there are slight
142-
differences. For example, ``sox`` commnad adds certain effects automatically (such as
142+
differences. For example, ``sox`` command adds certain effects automatically (such as
143143
``rate`` effect after ``speed``, ``pitch`` etc), but this function only applies the given
144144
effects. Therefore, to actually apply ``speed`` effect, you also need to give ``rate``
145145
effect with desired sampling rate, because internally, ``speed`` effects only alter sampling
@@ -228,14 +228,14 @@ def apply_effects_file(
228228
>>> pass
229229
"""
230230
if hasattr(path, "read"):
231-
ret = paddleaudio._paddleaudio.apply_effects_fileobj(path, effects, normalize,
232-
channels_first, format)
231+
ret = paddleaudio._paddleaudio.apply_effects_fileobj(
232+
path, effects, normalize, channels_first, format)
233233
if ret is None:
234234
raise RuntimeError("Failed to load audio from {}".format(path))
235235
return (paddle.to_tensor(ret[0]), ret[1])
236236
path = os.fspath(path)
237-
ret = paddleaudio._paddleaudio.sox_effects_apply_effects_file(path, effects, normalize,
238-
channels_first, format)
237+
ret = paddleaudio._paddleaudio.sox_effects_apply_effects_file(
238+
path, effects, normalize, channels_first, format)
239239
if ret is not None:
240240
return (paddle.to_tensor(ret[0]), ret[1])
241241
raise RuntimeError("Failed to load audio from {}".format(path))

audio/paddleaudio/src/pybind/kaldi/feature_common_inl.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ template <class F>
2626
bool StreamingFeatureTpl<F>::ComputeFeature(
2727
const std::vector<float>& wav,
2828
std::vector<float>* feats) {
29-
// append remaned waves
29+
// append remained waves
3030
int wav_len = wav.size();
3131
if (wav_len == 0) return false;
3232
int left_len = remained_wav_.size();
@@ -38,7 +38,7 @@ bool StreamingFeatureTpl<F>::ComputeFeature(
3838
wav.data(),
3939
wav_len * sizeof(float));
4040

41-
// cache remaned waves
41+
// cache remained waves
4242
knf::FrameExtractionOptions frame_opts = computer_.GetFrameOptions();
4343
int num_frames = knf::NumFrames(waves.size(), frame_opts);
4444
int frame_shift = frame_opts.WindowShift();

audio/paddleaudio/src/pybind/kaldi/kaldi_feature_wrapper.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,5 +44,5 @@ py::array_t<float> KaldiFeatureWrapper::ComputeFbank(
4444
return result.reshape(shape);
4545
}
4646

47-
} // namesapce kaldi
47+
} // namespace kaldi
4848
} // namespace paddleaudio

audio/paddleaudio/src/pybind/sox/effects.cpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ using namespace paddleaudio::sox_utils;
1212
namespace paddleaudio::sox_effects {
1313

1414
// Streaming decoding over file-like object is tricky because libsox operates on
15-
// FILE pointer. The folloing is what `sox` and `play` commands do
15+
// FILE pointer. The following is what `sox` and `play` commands do
1616
// - file input -> FILE pointer
17-
// - URL input -> call wget in suprocess and pipe the data -> FILE pointer
17+
// - URL input -> call wget in subprocess and pipe the data -> FILE pointer
1818
// - stdin -> FILE pointer
1919
//
2020
// We want to, instead, fetch byte strings chunk by chunk, consume them, and
@@ -127,12 +127,12 @@ namespace {
127127

128128
enum SoxEffectsResourceState { NotInitialized, Initialized, ShutDown };
129129
SoxEffectsResourceState SOX_RESOURCE_STATE = NotInitialized;
130-
std::mutex SOX_RESOUCE_STATE_MUTEX;
130+
std::mutex SOX_RESOURCE_STATE_MUTEX;
131131

132132
} // namespace
133133

134134
void initialize_sox_effects() {
135-
const std::lock_guard<std::mutex> lock(SOX_RESOUCE_STATE_MUTEX);
135+
const std::lock_guard<std::mutex> lock(SOX_RESOURCE_STATE_MUTEX);
136136

137137
switch (SOX_RESOURCE_STATE) {
138138
case NotInitialized:
@@ -150,7 +150,7 @@ void initialize_sox_effects() {
150150
};
151151

152152
void shutdown_sox_effects() {
153-
const std::lock_guard<std::mutex> lock(SOX_RESOUCE_STATE_MUTEX);
153+
const std::lock_guard<std::mutex> lock(SOX_RESOURCE_STATE_MUTEX);
154154

155155
switch (SOX_RESOURCE_STATE) {
156156
case NotInitialized:

audio/paddleaudio/src/pybind/sox/effects_chain.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ namespace {
1414

1515
/// helper classes for passing the location of input tensor and output buffer
1616
///
17-
/// drain/flow callback functions require plaing C style function signature and
17+
/// drain/flow callback functions require plain C style function signature and
1818
/// the way to pass extra data is to attach data to sox_effect_t::priv pointer.
1919
/// The following structs will be assigned to sox_effect_t::priv pointer which
2020
/// gives sox_effect_t an access to input Tensor and output buffer object.
@@ -50,7 +50,7 @@ int tensor_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp) {
5050
*osamp -= *osamp % num_channels;
5151

5252
// Slice the input Tensor
53-
// refacor this module, chunk
53+
// refactor this module, chunk
5454
auto i_frame = index / num_channels;
5555
auto num_frames = *osamp / num_channels;
5656

audio/paddleaudio/src/pybind/sox/utils.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ py::dtype get_dtype(
162162
}
163163
default:
164164
// default to float32 for the other formats, including
165-
// 32-bit flaoting-point WAV,
165+
// 32-bit floating-point WAV,
166166
// MP3,
167167
// FLAC,
168168
// VORBIS etc...
@@ -177,7 +177,7 @@ py::array convert_to_tensor(
177177
const py::dtype dtype,
178178
const bool normalize,
179179
const bool channels_first) {
180-
// todo refector later(SGoat)
180+
// todo refactor later(SGoat)
181181
py::array t;
182182
uint64_t dummy = 0;
183183
SOX_SAMPLE_LOCALS;

audio/paddleaudio/src/pybind/sox/utils.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ py::dtype get_dtype(
7676
/// Tensor.
7777
/// @param dtype Target dtype. Determines the output dtype and value range in
7878
/// conjunction with normalization.
79-
/// @param noramlize Perform normalization. Only effective when dtype is not
79+
/// @param normalize Perform normalization. Only effective when dtype is not
8080
/// kFloat32. When effective, the output tensor is kFloat32 type and value range
8181
/// is [-1.0, 1.0]
8282
/// @param channels_first When True, output Tensor has shape of [num_channels,

audio/paddleaudio/third_party/sox/CMakeLists.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ set(patch_dir ${CMAKE_CURRENT_SOURCE_DIR}/../patches)
88
set(COMMON_ARGS --quiet --disable-shared --enable-static --prefix=${INSTALL_DIR} --with-pic --disable-dependency-tracking --disable-debug --disable-examples --disable-doc)
99

1010
# To pass custom environment variables to ExternalProject_Add command,
11-
# we need to do `${CMAKE_COMMAND} -E env ${envs} <COMMANAD>`.
11+
# we need to do `${CMAKE_COMMAND} -E env ${envs} <COMMAND>`.
1212
# https://stackoverflow.com/a/62437353
13-
# We constrcut the custom environment variables here
13+
# We construct the custom environment variables here
1414
set(envs
1515
"PKG_CONFIG_PATH=${INSTALL_DIR}/lib/pkgconfig"
1616
"LDFLAGS=-L${INSTALL_DIR}/lib $ENV{LDFLAGS}"

audio/paddleaudio/utils/download.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,14 @@ def download_and_decompress(archives: List[Dict[str, str]],
4141
path: str,
4242
decompress: bool=True):
4343
"""
44-
Download archieves and decompress to specific path.
44+
Download archives and decompress to specific path.
4545
"""
4646
if not os.path.isdir(path):
4747
os.makedirs(path)
4848

4949
for archive in archives:
5050
assert 'url' in archive and 'md5' in archive, \
51-
'Dictionary keys of "url" and "md5" are required in the archive, but got: {list(archieve.keys())}'
51+
'Dictionary keys of "url" and "md5" are required in the archive, but got: {list(archive.keys())}'
5252
download.get_path_from_url(
5353
archive['url'], path, archive['md5'], decompress=decompress)
5454

audio/paddleaudio/utils/log.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858

5959
class Logger(object):
6060
'''
61-
Deafult logger in PaddleAudio
61+
Default logger in PaddleAudio
6262
Args:
6363
name(str) : Logger name, default is 'PaddleAudio'
6464
'''

audio/paddleaudio/utils/sox_utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def set_use_threads(use_threads: bool):
5555
5656
Args:
5757
use_threads (bool): When ``True``, enables ``libsox``'s parallel effects channels processing.
58-
To use mutlithread, the underlying ``libsox`` has to be compiled with OpenMP support.
58+
To use multithread, the underlying ``libsox`` has to be compiled with OpenMP support.
5959
6060
See Also:
6161
http://sox.sourceforge.net/sox.html

audio/paddleaudio/utils/tensor_utils.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
"""Unility functions for Transformer."""
14+
"""Utility functions for Transformer."""
1515
from typing import List
1616
from typing import Tuple
1717

@@ -80,7 +80,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
8080
# assuming trailing dimensions and type of all the Tensors
8181
# in sequences are same and fetching those from sequences[0]
8282
max_size = paddle.shape(sequences[0])
83-
# (TODO Hui Zhang): slice not supprot `end==start`
83+
# (TODO Hui Zhang): slice not support `end==start`
8484
# trailing_dims = max_size[1:]
8585
trailing_dims = tuple(
8686
max_size[1:].numpy().tolist()) if sequences[0].ndim >= 2 else ()
@@ -94,7 +94,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
9494
length = tensor.shape[0]
9595
# use index notation to prevent duplicate references to the tensor
9696
if batch_first:
97-
# TODO (Hui Zhang): set_value op not supprot `end==start`
97+
# TODO (Hui Zhang): set_value op not support `end==start`
9898
# TODO (Hui Zhang): set_value op not support int16
9999
# TODO (Hui Zhang): set_varbase 2 rank not support [0,0,...]
100100
# out_tensor[i, :length, ...] = tensor
@@ -103,7 +103,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
103103
else:
104104
out_tensor[i, length] = tensor
105105
else:
106-
# TODO (Hui Zhang): set_value op not supprot `end==start`
106+
# TODO (Hui Zhang): set_value op not support `end==start`
107107
# out_tensor[:length, i, ...] = tensor
108108
if length != 0:
109109
out_tensor[:length, i] = tensor

0 commit comments

Comments
 (0)