工具代码
https://web.stanford.edu/class/physics91si/2013/handouts/Pdb_Commands.pdf
import ipdb; ipdb.set_trace()
import argparse
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--sr", type=int, default=16000, help="sampling rate")
parser.add_argument("--in_dir", type=str, default="vctk", help="path to source dir")
parser.add_argument("--use_cuda", default=False, action="store_true")
parser.add_argument('-m', '--model', type=str, required=True, help='Model name')
args = parser.parse_args()
import os
from glob import glob
from tqdm import tqdm
from multiprocessing import Pool, cpu_count
def process(wav_name):
# process wav_name
pass
filenames = glob(f'{args.in_dir}/*/*.wav', recursive=True)
filenames = os.listdir(os.path.join(args.in_dir, speaker))
pool = Pool(processes=cpu_count()-2)
for _ in tqdm(pool.imap_unordered(process, filenames)):
pass
def print_network(model, name):
"""Print out the network information."""
num_params = 0
for p in model.parameters():
num_params += p.numel()
print(model, flush=True)
print(name,flush=True)
print("The number of parameters: {}".format(num_params), flush=True)
for k,v in net_g.named_parameters():
if 'cond' not in k:
v.requires_grad = False # 固定参数
def get_f0(path, p_len=None, f0_up_key=0):
x, _ = librosa.load(path, 16000)
if p_len is None:
p_len = x.shape[0]//320
else:
assert abs(p_len-x.shape[0]//320) < 2, (path, p_len, x.shape)
time_step = 320 / 16000 * 1000
f0_min = 50
f0_max = 1100
f0_mel_min = 1127 * np.log(1 + f0_min / 700)
f0_mel_max = 1127 * np.log(1 + f0_max / 700)
f0 = parselmouth.Sound(x, 16000).to_pitch_ac(
time_step=time_step / 1000, voicing_threshold=0.6,
pitch_floor=f0_min, pitch_ceiling=f0_max).selected_array['frequency']
pad_size=(p_len - len(f0) + 1) // 2
if(pad_size>0 or p_len - len(f0) - pad_size>0):
f0 = np.pad(f0,[[pad_size,p_len - len(f0) - pad_size]], mode='constant')
f0bak = f0.copy()
f0 *= pow(2, f0_up_key / 12)
f0_mel = 1127 * np.log(1 + f0 / 700)
f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / (f0_mel_max - f0_mel_min) + 1
f0_mel[f0_mel <= 1] = 1
f0_mel[f0_mel > 255] = 255
f0_coarse = np.rint(f0_mel).astype(np.int)
return f0_coarse, f0bak
f0 = np.load(filename + "f0.npy")
f0 = torch.LongTensor(f0)
self.f0_emb = nn.Embedding(256, hidden_channels)
import json
# load: file -> object
with open("xx.json", "r", encoding="utf-8") as f:
data = json.load(f)
# loads: string -> object
data = "{'a':'aa', 'b':'bb'}"
data = json.loads(data)
# dump: object -> file
data = {'a':'aa', 'b':'bb'}
data = ['aaa', 'bbb']
with open("xx.json", "w", encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
# dumps: object -> string
data = json.dumps(data, ensure_ascii=False)
import os
import subprocess
from glob import glob
from tqdm import tqdm
def link_file(from_file, to_file):
subprocess.check_call(
f'ln -s "`realpath --relative-to="{os.path.dirname(to_file)}" "{from_file}"`" "{to_file}"', shell=True)
wavpaths = glob(f"/data/24k/**/*.wav", recursive=True)
for wavpath in tqdm(wavpaths):
basename = os.path.basename(wavpath)
tgtpath = os.path.join("./dataset", basename)
tgtdir = os.path.dirname(tgtpath)
os.makedirs(tgtdir, exist_ok=True)
link_file(wavpath, tgtpath)
try:
...
except Exception as e:
print(e)
print(type(e))
print(str(e))
model = Model()
print(model.__dict__.keys())
# https://github.com/NATSpeech/NATSpeech/blob/main/utils/commons/indexed_datasets.py
import pickle
from copy import deepcopy
import numpy as np
class IndexedDataset:
def __init__(self, path, num_cache=0):
super().__init__()
self.path = path
self.data_file = None
self.indexs = np.load(f"{path}.idx", allow_pickle=True).item()
self.data_file = open(f"{path}.data", 'rb', buffering=-1)
self.cache = []
self.num_cache = num_cache
def check_index(self, basename):
if basename not in self.indexs:
raise IndexError(f'{basename} not in {self.path}')
def __del__(self):
if self.data_file:
self.data_file.close()
def __getitem__(self, basename):
self.check_index(basename)
if self.num_cache > 0:
for c in self.cache:
if c[0] == basename:
return c[1]
offset = self.indexs[basename]['offset']
size = self.indexs[basename]['size']
self.data_file.seek(offset)
b = self.data_file.read(size)
item = pickle.loads(b)
if self.num_cache > 0:
self.cache = [(basename, deepcopy(item))] + self.cache[:-1]
return item
def __len__(self):
return len(self.data_offsets) - 1
class IndexedDatasetBuilder:
def __init__(self, path):
self.path = path
self.out_file = open(f"{path}.data", 'wb')
self.indexs = {}
self.last_offset = 0
def add_item(self, item, basename):
s = pickle.dumps(item)
bytes = self.out_file.write(s)
self.indexs[basename] = {"offset": self.last_offset, "size": bytes}
self.last_offset += bytes
def finalize(self):
self.out_file.close()
np.save(open(f"{self.path}.idx", 'wb'), self.indexs)
if __name__ == "__main__":
import os
import random
from tqdm import tqdm
os.makedirs("./tmp", exist_ok=True)
ds_path = './tmp/indexed_ds_example'
size = 100
items = [np.random.normal(size=[10000, 10]) for i in range(size)]
builder = IndexedDatasetBuilder(ds_path)
for i in tqdm(range(size)):
builder.add_item(items[i], i)
builder.finalize()
ds = IndexedDataset(ds_path)
for i in tqdm(range(10000)):
idx = random.randint(0, size - 1)
assert (ds[idx] == items[idx]).all()