This notebook demonstrates the functionalities of libtsm - a Python library for Time-Scale Modification (TSM) and pitch-shifting. It is based on a re-implementation of the Matlab TSM toolbox by Jonathan Driedger and Meinard Müller.
If you are using libtsm for your work, please cite:
Sebastian Rosenzweig, Simon Schwär, Jonathan Driedger, and Meinard Müller:
Adaptive Pitch-Shifting with Applications to Intonation Adjustment in A Cappella Recordings
Proceedings of the International Conference on Digital Audio Effects (DAFx), 2021.
Further contributors:
import numpy as np
import librosa
import IPython.display as ipd
import scipy.io as sio
import libtsm
# Choose File
#filename = 'Bongo'
#filename = 'BeethovenOrchestra'
#filename = 'BeethovenPiano'
filename = 'CastanetsViolin'
#filename = 'DrumSolo'
#filename = 'Glockenspiel'
#filename = 'Stepdad'
#filename = 'Jazz'
#filename = 'Pop'
#filename = 'SingingVoice'
#filename = 'SynthMono'
#filename = 'SynthPoly'
#filename = 'Scale_Cmajor_Piano'
directory = './data/'
audio_file = directory + filename + '.wav'
x, Fs = librosa.load(audio_file)
#x = 0.5 * np.sin(2*np.pi*440*np.arange(0, len(x)/Fs, 1/Fs))
print('Original signal', flush=True)
ipd.display(ipd.Audio(x, rate=Fs, normalize=True))
alpha = 1.8 # time-stretch factor
y_ola = libtsm.wsola_tsm(x, alpha, tol=0)
print('Original signal', flush=True)
ipd.display(ipd.Audio(x, rate=Fs, normalize=True))
print('Time-Scale modified signal with OLA', flush=True)
ipd.display(ipd.Audio(y_ola[:, 0], rate=Fs, normalize=True))
alpha = 1.8 # time-stretch factor
y_wsola = libtsm.wsola_tsm(x, alpha)
print('Original signal', flush=True)
ipd.display(ipd.Audio(x, rate=Fs, normalize=True))
print('Time-Scale modified signal with WSOLA', flush=True)
ipd.display(ipd.Audio(y_wsola[:, 0], rate=Fs, normalize=True))
alpha = 1.8 # Time stretching factor
y_pv = libtsm.pv_tsm(x, alpha, phase_locking=False)
y_pvpl = libtsm.pv_tsm(x, alpha, phase_locking=True)
print('Original signal', flush=True)
ipd.display(ipd.Audio(x, rate=Fs, normalize=True))
print('Time-Scale modified signal with Phase Vocoder', flush=True)
ipd.display(ipd.Audio(y_pv[:, 0], rate=Fs, normalize=True))
print('Time-Scale modified signal with Phase Vocoder (phase locking)', flush=True)
ipd.display(ipd.Audio(y_pvpl[:, 0], rate=Fs, normalize=True))
alpha = 1.8
# Harmonic-Percussive Separation
x_harm, x_perc = libtsm.hps(x)
# Phase Vocoder for harmonic part
y_harm = libtsm.pv_tsm(x_harm, alpha)
# OLA for percussive part
y_perc = libtsm.wsola_tsm(x_perc, alpha, tol=0)
# Synthesis
y = y_harm + y_perc
print('Original signal', flush=True)
ipd.display(ipd.Audio(x, rate=Fs, normalize=True))
print('Harmonic part', flush=True)
ipd.display(ipd.Audio(x_harm[:, 0], rate=Fs, normalize=True))
print('Percussive part', flush=True)
ipd.display(ipd.Audio(x_perc[:, 0], rate=Fs, normalize=True))
print('Time-Scale modified harmonic part', flush=True)
ipd.display(ipd.Audio(y_harm[:, 0], rate=Fs, normalize=True))
print('Time-Scale modified percussive part', flush=True)
ipd.display(ipd.Audio(y_perc[:, 0], rate=Fs, normalize=True))
print('Time-Scale modified signal (HPS-TSM)', flush=True)
ipd.display(ipd.Audio(y[:, 0], rate=Fs, normalize=True))
alpha = 4.1
y_two_steps = libtsm.two_step_tsm(x, alpha)
print('Original signal', flush=True)
ipd.display(ipd.Audio(x, rate=Fs, normalize=True))
print('Time-Scale modified signal with efficient two-step approach', flush=True)
ipd.display(ipd.Audio(y_two_steps[:, 0], rate=Fs, normalize=True))
# load two audio files
audio_file1 = directory + 'BeethovenOrchestra.wav'
audio_file2 = directory + 'BeethovenPiano.wav'
x1, Fs1 = librosa.load(audio_file1)
x2, Fs2 = librosa.load(audio_file2)
# extract anchor points from .MAT file
mat_file = directory + 'BeethovenAnchorpoints.mat'
mat = sio.loadmat(mat_file)
anchors = mat['anchorpoints'] - 1 # substract 1 for Python version
print(anchors)
# HPS-TSM using anchorpoints to synchronize Orchestra with Piano file
y_hpstsm = libtsm.hps_tsm(x1, anchors)
y = np.hstack((x2.reshape(-1, 1), y_hpstsm))
print('Original signal 1', flush=True)
ipd.display(ipd.Audio(x1, rate=Fs, normalize=True))
print('Original signal 2', flush=True)
ipd.display(ipd.Audio(x2, rate=Fs, normalize=True))
print('Synchronized versions', flush=True)
ipd.display(ipd.Audio(y.T, rate=Fs, normalize=True))
p = 100
y_pitch_shift_1 = libtsm.pitch_shift_original(x, p) # Matlab-inspired implementation
y_pitch_shift_2 = libtsm.pitch_shift(x, p, order="res-tsm") # new implementation for fixed and adaptive pitch shifts
y_pitch_shift_3 = libtsm.pitch_shift(x, p, order="tsm-res") # new implementation for fixed and adaptive pitch shifts
print('Original signal', flush=True)
ipd.display(ipd.Audio(x, rate=Fs, normalize=True))
print('Pitch-shifted signal using Matlab re-implementation', flush=True)
ipd.display(ipd.Audio(y_pitch_shift_1[:, 0], rate=Fs, normalize=True))
print('Pitch-shifted signal using new implementation (Resampling-TSM)', flush=True)
ipd.display(ipd.Audio(y_pitch_shift_2[:, 0], rate=Fs, normalize=True))
print('Pitch-shifted signal using new implementation (TSM-Resampling)', flush=True)
ipd.display(ipd.Audio(y_pitch_shift_3[:, 0], rate=Fs, normalize=True))
# input signal
sig_len = 10 # seconds
Fs = 22050 # Hz
t_sine = np.arange(0, sig_len, 1/Fs)
F_sine = 440
sine = np.sin(2*np.pi*F_sine*t_sine)
# time-varying pitch-shift (sinusoidal)
F_shift = 1 # Hz
p = np.sin(2*np.pi*F_shift*t_sine) * 200
# pitch-shifting
siren = libtsm.pitch_shift(sine, p, t_p=t_sine)
print('Original signal')
ipd.display(ipd.Audio(sine, rate=Fs, normalize=True))
print('Pitch-shifted signal')
ipd.display(ipd.Audio(siren[:, 0], rate=Fs, normalize=True))
# Load Audio File
fn_in = 'data/three_sinusoidals.wav'
x, Fs = librosa.load(fn_in, sr=22050)
# TSM Algorithms
alpha = 1.8 # scaling factor
y_wsola = libtsm.wsola_tsm(x, alpha)
y_pv = libtsm.pv_tsm(x, alpha)
y_hps = libtsm.hps_tsm(x, alpha)
# Fixed Pitch-Shifting (Figure 2)
p = 1200 # cents
y_psf = libtsm.pitch_shift(x, p)
# Adaptive Pitch-Shifting (Figure 3)
t = np.arange(0, len(x)/Fs, 1/Fs) # sec
N = len(t)
t_1 = t[0:N//3]
t_2 = t[N//3:2*N//3]
t_3 = t[2*N//3:]
p = np.concatenate((np.zeros(len(t_1)), 800*np.sin(2*np.pi*1*t_2), np.linspace(0, 1200, len(t_3)))) # cents
y_psa = libtsm.pitch_shift(x, p, t)
print('Original signal')
ipd.display(ipd.Audio(x, rate=Fs, normalize=True))
print('Pitch-shifted signal (Figure 2)')
ipd.display(ipd.Audio(y_psf[:, 0], rate=Fs, normalize=True))
print('Pitch-shifted signal (Figure 3)')
ipd.display(ipd.Audio(y_psa[:, 0], rate=Fs, normalize=True))