As described in the main documentation, Numba translates Python functions to optimized machine code at runtime. The compiled numerical algorithms in Python can then approach the speeds of C or FORTRAN. Without the need of running a separate compilation step, Numba can be simply applied by adding decorators to the Python function. Since there are certain restriction on the Python code, we recommend to only use Numba for compiling functions that are performance-critical. Usually, this is only a small fraction of code. In the following we give a couple of illustrating example and refer to the main documentation for details.
jit
-decorator and see what you need to change.
In particular, unknown data types of a function's argument variables may cause unexpected errors when using numba, which tries to infer the types at call time. For example, when initializing a variable with var=None
in a function header may cause problems. We come back to this issue later in this notebook.
jit
¶As a first example, we consider a function that performs an average filtering over a spectrogram. To illustrate the kind of accelerations introduced by Numba, we implement this filtering as a naive double loop.
import os
import numpy as np
import librosa
from matplotlib import pyplot as plt
%matplotlib inline
fn_wav = os.path.join('..', 'data', 'B', 'FMP_B_Note-C4_Piano.wav')
x, Fs = librosa.load(fn_wav, sr=None)
X = librosa.stft(x)
Y = np.abs(X)
def spectrogram_average_filter_naive(Y, filter_len):
K, N = Y.shape
filter_left = filter_len // 2
filter_right = filter_len - filter_left - 1
Y_pad = np.concatenate((np.zeros((K, filter_left)), Y,
np.zeros((K, filter_right))), axis=1)
Y_new = np.empty_like(Y)
for k in range(K):
for n in range(N):
Y_new[k, n] = Y_pad[k, n:n+filter_len].sum() / filter_len
return Y_new
filter_length = 21
Y_filt = spectrogram_average_filter_naive(Y, filter_length)
plt.figure(figsize=(10, 3))
plt.subplot(1, 2, 1)
plt.imshow(np.log(1 + 100 * Y), aspect='auto', origin='lower', cmap='gray_r')
plt.title('Original spectrogram')
plt.colorbar()
plt.subplot(1, 2, 2)
plt.imshow(np.log(1 + 100 * Y_filt), aspect='auto', origin='lower', cmap='gray_r')
plt.title('Smoothed spectrogram')
plt.colorbar()
plt.tight_layout()
Using the same function we use the jit
-decorator to compile it. In the following code, we check that the outputs of the resulting function is the same as before and then report on the runtime.
from numba import jit
import timeit
@jit(nopython=True)
def spectrogram_average_filter_jit(Y, filter_len):
K, N = Y.shape
filter_left = filter_len // 2
filter_right = filter_len - filter_left - 1
Y_pad = np.concatenate((np.zeros((K, filter_left)), Y,
np.zeros((K, filter_right))), axis=1)
Y_new = np.empty_like(Y)
for k in range(K):
for n in range(N):
Y_new[k, n] = Y_pad[k, n:n+filter_len].sum() / filter_len
return Y_new
filter_length = 21
Y_filt_naive = spectrogram_average_filter_naive(Y, filter_length)
Y_filt_jit = spectrogram_average_filter_jit(Y, filter_length)
assert np.allclose(Y_filt_naive, Y_filt_jit)
execuctions = 3
time_nai = timeit.timeit(lambda: spectrogram_average_filter_naive(Y, filter_length),
number=execuctions) / execuctions
time_jit = timeit.timeit(lambda: spectrogram_average_filter_jit(Y, filter_length),
number=execuctions) / execuctions
print('Runtime for naive implementation: %7.5f seconds' % time_nai)
print('Runtime for jit implementation: %7.5f seconds' % time_jit)
# An alternative for measuring running time:
# %timeit spectrogram_average_filter_naive(Y, filter_length)
# %timeit spectrogram_average_filter_jit(Y, filter_length)
On a standard computer, this the jit
-compiled function may be about 100 times faster then the original one. With multiple CPU cores, one can obtain further accelerations by parallelizing the loops.
import numba
from numba import jit, prange
@jit(nopython=True, parallel=True)
def spectrogram_average_filter_parallel(Y, filter_len):
K, N = Y.shape
filter_left = filter_len // 2
filter_right = filter_len - filter_left - 1
Y_pad = np.concatenate((np.zeros((K, filter_left)), Y,
np.zeros((K, filter_right))), axis=1)
Y_new = np.empty_like(Y)
for k in prange(K):
for n in prange(N):
Y_new[k, n] = Y_pad[k, n:n+filter_len].sum() / filter_len
return Y_new
filter_length = 21
Y_filt_naive = spectrogram_average_filter_naive(Y, filter_length)
Y_filt_jit = spectrogram_average_filter_jit(Y, filter_length)
Y_filt_parallel = spectrogram_average_filter_parallel(Y, filter_length)
assert np.allclose(Y_filt_naive, Y_filt_parallel)
execuctions=3
time_nai = timeit.timeit(lambda: spectrogram_average_filter_naive(Y, filter_length),
number=execuctions) / execuctions
time_jit = timeit.timeit(lambda: spectrogram_average_filter_jit(Y, filter_length),
number=execuctions) / execuctions
time_par = timeit.timeit(lambda: spectrogram_average_filter_parallel(Y, filter_length),
number=execuctions) / execuctions
num_threads = numba.config.NUMBA_DEFAULT_NUM_THREADS
print('Runtime for naive implementation: %7.5f seconds' % time_nai)
print('Runtime for jit implementation: %7.5f seconds' % time_jit)
print('Runtime for parallel implementation (using %d threads): %7.5f seconds' % (num_threads, time_par))
The function implements the smoothing filter as a naive double-nested look. When using packages like NumPy, it is often much more efficient to use matrix operations while avoiding loop structures. Still the jit
-compiled versions may be a bit faster.
def spectrogram_average_filter_matrix(Y, filter_len):
K, N = Y.shape
filter_left = filter_len // 2
filter_right = filter_len - filter_left - 1
Y_pad = np.concatenate((np.zeros((K, filter_left)), Y, np.zeros((K, filter_right))), axis=1)
Y_new = np.empty_like(Y)
for n in range(N):
Y_new[:, n] = np.mean(Y_pad[:, n:n+filter_len], axis=1)
return Y_new
filter_length = 21
Y_filt_naive = spectrogram_average_filter_naive(Y, filter_length)
Y_filt_jit = spectrogram_average_filter_jit(Y, filter_length)
Y_filt_parallel = spectrogram_average_filter_parallel(Y, filter_length)
Y_filt_matrix = spectrogram_average_filter_matrix(Y, filter_length)
assert np.allclose(Y_filt_naive, Y_filt_matrix)
execuctions = 3
time_naive = timeit.timeit(lambda: spectrogram_average_filter_naive(Y, filter_length),
number=execuctions) / execuctions
time_jit = timeit.timeit(lambda: spectrogram_average_filter_jit(Y, filter_length),
number=execuctions) / execuctions
time_par = timeit.timeit(lambda: spectrogram_average_filter_parallel(Y, filter_length),
number=execuctions) / execuctions
time_mat = timeit.timeit(lambda: spectrogram_average_filter_matrix(Y, filter_length),
number=execuctions) / execuctions
num_threads = numba.config.NUMBA_DEFAULT_NUM_THREADS
print('Runtime for naive implementation: %7.5f seconds' % time_nai)
print('Runtime for jit implementation: %7.5f seconds' % time_jit)
print('Runtime for parallel implementation (using %d threads): %7.5f seconds' % (num_threads, time_par))
print('Runtime for matrix implementation: %7.5f seconds' % time_mat)
Note that not all features of Python and Numpy are available when compiling with Numba, see the list of supported Python features and the list of supported NumPy features for more details. For example, we cannot jit
-compile the last version of our function spectrogram_average_filter_matrix
due to the keyword axis
used in the function mean
.
@jit(nopython=True)
def spectrogram_average_filter_matrixJit(Y, filter_len):
K, N = Y.shape
filter_left = filter_len // 2
filter_right = filter_len - filter_left
Y_pad = np.concatenate((np.zeros((K, filter_left)), Y, np.zeros((K, filter_right))), axis=1)
Y_new = np.empty_like(Y)
for n in range(K):
Y_new[:, n] = np.mean(Y_pad[:, n:n+filter_len], axis=1)
return Y_new
try:
Y_filt_matrixJit = spectrogram_average_filter_matrixJit(Y, 12)
except Exception as ex:
print('Got a %s because of an unsupported numpy feature in numba.' % type(ex).__name__)
In particular, the usage of data types that are determined during runtime may cause unexpected errors. In the following, we give some examples, which work for usual Python functions, but cause problems when using jit
:
np.zeros([n, n])
does not work when using jit
. It needs to be replaced by np.zeros((n, n))
.axis
keyword in numpy function $\leadsto$ looparg=None
) $\leadsto$ use different variable name inside functiondef matrix_zeros_nojit(n=5):
result1 = np.zeros([n, n])
result2 = np.zeros((n, n), 'int')
result3 = np.zeros((n, n)).astype('int')
return result1, result2, result3
@jit(nopython=True)
def matrix_zeros_jit(n=5):
result1 = np.zeros((n, n))
result2 = np.zeros((n, n), np.int64)
result3 = np.zeros((n, n)).astype(np.int32)
return result1, result2, result3
def average_axis0_nojit(x):
result = np.mean(x, axis=0)
return result
@jit(nopython=True)
def average_axis0_jit(x):
result = np.empty(x.shape[1])
for i in range(x.shape[1]):
result[i] = np.mean(x[:, i])
return result
def optional_arg_nojit(x, idx=None):
x = np.arange(3)
if idx is None:
idx = np.argmin(x)
return x[idx]
@jit(nopython=True)
def optional_arg_jit(x, idx=None):
if idx is None:
_idx = np.argmin(x)
else:
_idx = idx
return x[_idx]
def treshold_nojit(x, tresh):
x = x.copy()
x[x > tresh] = 0
return x
@jit(nopython=True)
def treshold_jit(x, tresh):
x = x.copy()
for idx1, idx2 in zip(*np.where(x > tresh)):
x[idx1, idx2] = 0.0
return x