diff --git a/configs/config.py b/configs/config.py index 0609845..701e08f 100644 --- a/configs/config.py +++ b/configs/config.py @@ -5,6 +5,10 @@ from multiprocessing import cpu_count import torch +import logging + +logger = logging.getLogger(__name__) + def use_fp32_config(): for config_file in [ @@ -110,11 +114,11 @@ class Config: or "1070" in self.gpu_name or "1080" in self.gpu_name ): - print("Found GPU", self.gpu_name, ", force to fp32") + logger.info("Found GPU", self.gpu_name, ", force to fp32") self.is_half = False use_fp32_config() else: - print("Found GPU", self.gpu_name) + logger.info("Found GPU", self.gpu_name) self.gpu_mem = int( torch.cuda.get_device_properties(i_device).total_memory / 1024 @@ -128,12 +132,12 @@ class Config: with open("infer/modules/train/preprocess.py", "w") as f: f.write(strr) elif self.has_mps(): - print("No supported Nvidia GPU found") + logger.info("No supported Nvidia GPU found") self.device = self.instead = "mps" self.is_half = False use_fp32_config() else: - print("No supported Nvidia GPU found") + logger.info("No supported Nvidia GPU found") self.device = self.instead = "cpu" self.is_half = False use_fp32_config() @@ -160,7 +164,7 @@ class Config: x_center = 30 x_max = 32 if self.dml: - print("Use DirectML instead") + logger.info("Use DirectML instead") if ( os.path.exists( "runtime\Lib\site-packages\onnxruntime\capi\DirectML.dll" @@ -188,7 +192,7 @@ class Config: self.is_half = False else: if self.instead: - print(f"Use {self.instead} instead") + logger.info(f"Use {self.instead} instead") if ( os.path.exists( "runtime\Lib\site-packages\onnxruntime\capi\onnxruntime_providers_cuda.dll" diff --git a/gui_v1.py b/gui_v1.py index 174676a..23d9627 100644 --- a/gui_v1.py +++ b/gui_v1.py @@ -1,5 +1,5 @@ import os -import pdb +import logging import sys os.environ["OMP_NUM_THREADS"] = "2" @@ -11,6 +11,8 @@ now_dir = os.getcwd() sys.path.append(now_dir) import multiprocessing +logger = logging.getLogger(__name__) + class Harvest(multiprocessing.Process): def __init__(self, inp_q, opt_q): @@ -356,7 +358,7 @@ if __name__ == "__main__": ) if event == "start_vc" and self.flag_vc == False: if self.set_values(values) == True: - print("Use CUDA:" + str(torch.cuda.is_available())) + logger.info("Use CUDA:" + str(torch.cuda.is_available())) self.start_vc() settings = { "pth_path": values["pth_path"], @@ -545,8 +547,8 @@ if __name__ == "__main__": ): while self.flag_vc: time.sleep(self.config.block_time) - print("Audio block passed.") - print("ENDing VC") + logger.debug("Audio block passed.") + logger.debug("ENDing VC") def audio_callback( self, indata: np.ndarray, outdata: np.ndarray, frames, times, status @@ -623,7 +625,7 @@ if __name__ == "__main__": sola_offset = sola_offset.item() else: sola_offset = torch.argmax(cor_nom[0, 0] / cor_den[0, 0]) - print("sola_offset =" + str(int(sola_offset))) + logger.debug("sola_offset =" + str(int(sola_offset))) self.output_wav[:] = infer_wav[sola_offset : sola_offset + self.block_frame] self.output_wav[: self.crossfade_frame] *= self.fade_in_window self.output_wav[: self.crossfade_frame] += self.sola_buffer[:] @@ -663,7 +665,7 @@ if __name__ == "__main__": outdata[:] = self.output_wav[:].repeat(2, 1).t().cpu().numpy() total_time = time.perf_counter() - start_time self.window["infer_time"].update(int(total_time * 1000)) - print("Infer time:" + str(total_time)) + logger.info("Infer time:" + str(total_time)) def get_devices(self, update: bool = True): """获取设备列表""" @@ -716,8 +718,8 @@ if __name__ == "__main__": sd.default.device[1] = output_device_indices[ output_devices.index(output_device) ] - print("Input device:" + str(sd.default.device[0]) + ":" + str(input_device)) - print( + logger.info("Input device:" + str(sd.default.device[0]) + ":" + str(input_device)) + logger.info( "Output device:" + str(sd.default.device[1]) + ":" + str(output_device) ) diff --git a/i18n/i18n.py b/i18n/i18n.py index a64ee23..00e91bf 100644 --- a/i18n/i18n.py +++ b/i18n/i18n.py @@ -18,11 +18,10 @@ class I18nAuto: if not os.path.exists(f"./i18n/locale/{language}.json"): language = "en_US" self.language = language - # print("Use Language:", language) self.language_map = load_language_list(language) def __call__(self, key): return self.language_map.get(key, key) - def print(self): - print("Use Language:", self.language) + def __repr__(self): + return "Use Language: " + self.language diff --git a/infer-web.py b/infer-web.py index dc0b5c7..5532a2c 100644 --- a/infer-web.py +++ b/infer-web.py @@ -32,6 +32,8 @@ from infer.modules.vc.modules import VC logging.getLogger("numba").setLevel(logging.WARNING) +logger = logging.getLogger(__name__) + tmp = os.path.join(now_dir, "TEMP") shutil.rmtree(tmp, ignore_errors=True) shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True) @@ -58,7 +60,7 @@ if config.dml == True: fairseq.modules.grad_multiply.GradMultiply.forward = forward_dml i18n = I18nAuto() -i18n.print() +logger.info(i18n) # 判断是否有能用来训练和加速推理的N卡 ngpu = torch.cuda.device_count() gpu_infos = [] @@ -213,7 +215,7 @@ def preprocess_dataset(trainset_dir, exp_dir, sr, n_p): % (trainset_dir, sr, n_p, now_dir, exp_dir) + str(config.noparallel) ) - print(cmd) + logger.info(cmd) p = Popen(cmd, shell=True) # , stdin=PIPE, stdout=PIPE,stderr=PIPE,cwd=now_dir ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 done = [False] @@ -232,7 +234,7 @@ def preprocess_dataset(trainset_dir, exp_dir, sr, n_p): break with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f: log = f.read() - print(log) + logger.info(log) yield log @@ -254,7 +256,7 @@ def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, gpus_rmvp f0method, ) ) - print(cmd) + logger.info(cmd) p = Popen( cmd, shell=True, cwd=now_dir ) # , stdin=PIPE, stdout=PIPE,stderr=PIPE @@ -281,7 +283,7 @@ def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, gpus_rmvp exp_dir, config.is_half, ) - print(cmd) + logger.info(cmd) p = Popen( cmd, shell=True, cwd=now_dir ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir @@ -304,7 +306,7 @@ def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, gpus_rmvp exp_dir, ) ) - print(cmd) + logger.info(cmd) p = Popen( cmd, shell=True, cwd=now_dir ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir @@ -320,7 +322,7 @@ def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, gpus_rmvp break with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: log = f.read() - print(log) + logger.info(log) yield log ####对不同part分别开多进程 """ @@ -342,7 +344,7 @@ def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, gpus_rmvp exp_dir, version19, ) - print(cmd) + logger.info(cmd) p = Popen( cmd, shell=True, cwd=now_dir ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir @@ -364,7 +366,7 @@ def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, gpus_rmvp break with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: log = f.read() - print(log) + logger.info(log) yield log @@ -378,12 +380,12 @@ def change_sr2(sr2, if_f0_3, version19): "assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK ) if not if_pretrained_generator_exist: - print( + logger.warn( "assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), "not exist, will not use pretrained model", ) if not if_pretrained_discriminator_exist: - print( + logger.warn( "assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), "not exist, will not use pretrained model", ) @@ -414,12 +416,12 @@ def change_version19(sr2, if_f0_3, version19): "assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK ) if not if_pretrained_generator_exist: - print( + logger.warn( "assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), "not exist, will not use pretrained model", ) if not if_pretrained_discriminator_exist: - print( + logger.warn( "assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), "not exist, will not use pretrained model", ) @@ -443,12 +445,12 @@ def change_f0(if_f0_3, sr2, version19): # f0method8,pretrained_G14,pretrained_D "assets/pretrained%s/f0D%s.pth" % (path_str, sr2), os.F_OK ) if not if_pretrained_generator_exist: - print( + logger.warn( "assets/pretrained%s/f0G%s.pth" % (path_str, sr2), "not exist, will not use pretrained model", ) if not if_pretrained_discriminator_exist: - print( + logger.warn( "assets/pretrained%s/f0D%s.pth" % (path_str, sr2), "not exist, will not use pretrained model", ) @@ -556,14 +558,14 @@ def click_train( shuffle(opt) with open("%s/filelist.txt" % exp_dir, "w") as f: f.write("\n".join(opt)) - print("Write filelist done") + logger.debug("Write filelist done") # 生成config#无需生成config # cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e mi-test -sr 40k -f0 1 -bs 4 -g 0 -te 10 -se 5 -pg pretrained/f0G40k.pth -pd pretrained/f0D40k.pth -l 1 -c 0" - print("Use gpus:", gpus16) + logger.info("Use gpus:", gpus16) if pretrained_G14 == "": - print("No pretrained Generator") + logger.info("No pretrained Generator") if pretrained_D15 == "": - print("No pretrained Discriminator") + logger.info("No pretrained Discriminator") if gpus16: cmd = get_quoted_python_cmd() + ' infer/modules/train/train.py -e "%s" -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s' % ( exp_dir1, @@ -599,7 +601,7 @@ def click_train( version19, ) ) - print(cmd) + logger.info(cmd) p = Popen(cmd, shell=True, cwd=now_dir) p.wait() return "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log" @@ -646,7 +648,7 @@ def train_index(exp_dir1, version19): ) except: info = traceback.format_exc() - print(info) + logger.info(info) infos.append(info) yield "\n".join(infos) diff --git a/infer/lib/infer_pack/models.py b/infer/lib/infer_pack/models.py index 9878048..c16700a 100644 --- a/infer/lib/infer_pack/models.py +++ b/infer/lib/infer_pack/models.py @@ -1,7 +1,6 @@ import math -import os -import pdb -from time import time as ttime +import logging +logger = logging.getLogger(__name__) import numpy as np import torch @@ -616,7 +615,7 @@ class SynthesizerTrnMs256NSFsid(nn.Module): inter_channels, hidden_channels, 5, 1, 3, gin_channels=gin_channels ) self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels) - print("gin_channels:", gin_channels, "self.spk_embed_dim:", self.spk_embed_dim) + logger.debug("gin_channels:", gin_channels, "self.spk_embed_dim:", self.spk_embed_dim) def remove_weight_norm(self): self.dec.remove_weight_norm() @@ -732,7 +731,7 @@ class SynthesizerTrnMs768NSFsid(nn.Module): inter_channels, hidden_channels, 5, 1, 3, gin_channels=gin_channels ) self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels) - print("gin_channels:", gin_channels, "self.spk_embed_dim:", self.spk_embed_dim) + logger.debug("gin_channels:", gin_channels, "self.spk_embed_dim:", self.spk_embed_dim) def remove_weight_norm(self): self.dec.remove_weight_norm() @@ -845,7 +844,7 @@ class SynthesizerTrnMs256NSFsid_nono(nn.Module): inter_channels, hidden_channels, 5, 1, 3, gin_channels=gin_channels ) self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels) - print("gin_channels:", gin_channels, "self.spk_embed_dim:", self.spk_embed_dim) + logger.debug("gin_channels:", gin_channels, "self.spk_embed_dim:", self.spk_embed_dim) def remove_weight_norm(self): self.dec.remove_weight_norm() @@ -951,7 +950,7 @@ class SynthesizerTrnMs768NSFsid_nono(nn.Module): inter_channels, hidden_channels, 5, 1, 3, gin_channels=gin_channels ) self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels) - print("gin_channels:", gin_channels, "self.spk_embed_dim:", self.spk_embed_dim) + logger.debug("gin_channels:", gin_channels, "self.spk_embed_dim:", self.spk_embed_dim) def remove_weight_norm(self): self.dec.remove_weight_norm() diff --git a/infer/lib/infer_pack/models_onnx.py b/infer/lib/infer_pack/models_onnx.py index 4642a90..dc16e00 100644 --- a/infer/lib/infer_pack/models_onnx.py +++ b/infer/lib/infer_pack/models_onnx.py @@ -1,7 +1,6 @@ import math -import os -import pdb -from time import time as ttime +import logging +logger = logging.getLogger(__name__) import numpy as np import torch @@ -620,7 +619,7 @@ class SynthesizerTrnMsNSFsidM(nn.Module): ) self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels) self.speaker_map = None - print("gin_channels:", gin_channels, "self.spk_embed_dim:", self.spk_embed_dim) + logger.debug("gin_channels:", gin_channels, "self.spk_embed_dim:", self.spk_embed_dim) def remove_weight_norm(self): self.dec.remove_weight_norm() diff --git a/infer/lib/infer_pack/onnx_inference.py b/infer/lib/infer_pack/onnx_inference.py index 0e52a73..5dc9d59 100644 --- a/infer/lib/infer_pack/onnx_inference.py +++ b/infer/lib/infer_pack/onnx_inference.py @@ -3,10 +3,13 @@ import numpy as np import onnxruntime import soundfile +import logging +logger = logging.getLogger(__name__) + class ContentVec: def __init__(self, vec_path="pretrained/vec-768-layer-12.onnx", device=None): - print("Load model(s) from {}".format(vec_path)) + logger.info("Load model(s) from {}".format(vec_path)) if device == "cpu" or device is None: providers = ["CPUExecutionProvider"] elif device == "cuda": diff --git a/infer/lib/rmvpe.py b/infer/lib/rmvpe.py index c3605c4..64df092 100644 --- a/infer/lib/rmvpe.py +++ b/infer/lib/rmvpe.py @@ -7,6 +7,10 @@ import torch.nn.functional as F from librosa.util import normalize, pad_center, tiny from scipy.signal import get_window +import logging + +logger = logging.getLogger(__name__) + ###stft codes from https://github.com/pseeth/torch-stft/blob/master/torch_stft/util.py def window_sumsquare( @@ -691,4 +695,4 @@ if __name__ == "__main__": # f0 = rmvpe.infer_from_audio(audio, thred=thred) # f0 = rmvpe.infer_from_audio(audio, thred=thred) t1 = ttime() - print(f0.shape, t1 - t0) + logger.info(f0.shape, t1 - t0) diff --git a/infer/lib/train/data_utils.py b/infer/lib/train/data_utils.py index db4e78f..a29929c 100644 --- a/infer/lib/train/data_utils.py +++ b/infer/lib/train/data_utils.py @@ -1,5 +1,7 @@ import os import traceback +import logging +logger = logging.getLogger(__name__) import numpy as np import torch @@ -110,7 +112,7 @@ class TextAudioLoaderMultiNSFsid(torch.utils.data.Dataset): try: spec = torch.load(spec_filename) except: - print(spec_filename, traceback.format_exc()) + logger.warn(spec_filename, traceback.format_exc()) spec = spectrogram_torch( audio_norm, self.filter_length, @@ -302,7 +304,7 @@ class TextAudioLoader(torch.utils.data.Dataset): try: spec = torch.load(spec_filename) except: - print(spec_filename, traceback.format_exc()) + logger.warn(spec_filename, traceback.format_exc()) spec = spectrogram_torch( audio_norm, self.filter_length, diff --git a/infer/lib/train/mel_processing.py b/infer/lib/train/mel_processing.py index 073047f..00ad0b9 100644 --- a/infer/lib/train/mel_processing.py +++ b/infer/lib/train/mel_processing.py @@ -1,6 +1,8 @@ import torch import torch.utils.data from librosa.filters import mel as librosa_mel_fn +import logging +logger = logging.getLogger(__name__) MAX_WAV_VALUE = 32768.0 @@ -51,9 +53,9 @@ def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False) """ # Validation if torch.min(y) < -1.07: - print("spectrogram_torch min value is ", torch.min(y)) + logger.debug("min value is ", torch.min(y)) if torch.max(y) > 1.07: - print("spectrogram_torch max value is ", torch.max(y)) + logger.debug("max value is ", torch.max(y)) # Window - Cache if needed global hann_window diff --git a/infer/lib/train/utils.py b/infer/lib/train/utils.py index 67693e9..47634ba 100644 --- a/infer/lib/train/utils.py +++ b/infer/lib/train/utils.py @@ -33,7 +33,7 @@ def load_checkpoint_d(checkpoint_path, combd, sbd, optimizer=None, load_opt=1): try: new_state_dict[k] = saved_state_dict[k] if saved_state_dict[k].shape != state_dict[k].shape: - print( + logger.warn( "shape-%s-mismatch. need: %s, get: %s" % (k, state_dict[k].shape, saved_state_dict[k].shape) ) # @@ -109,7 +109,7 @@ def load_checkpoint(checkpoint_path, model, optimizer=None, load_opt=1): try: new_state_dict[k] = saved_state_dict[k] if saved_state_dict[k].shape != state_dict[k].shape: - print( + logger.warn( "shape-%s-mismatch|need-%s|get-%s" % (k, state_dict[k].shape, saved_state_dict[k].shape) ) # @@ -207,7 +207,7 @@ def latest_checkpoint_path(dir_path, regex="G_*.pth"): f_list = glob.glob(os.path.join(dir_path, regex)) f_list.sort(key=lambda f: int("".join(filter(str.isdigit, f)))) x = f_list[-1] - print(x) + logger.debug(x) return x diff --git a/infer/modules/train/train.py b/infer/modules/train/train.py index ac52cf6..f033d94 100644 --- a/infer/modules/train/train.py +++ b/infer/modules/train/train.py @@ -1,5 +1,7 @@ import os import sys +import logging +logger = logging.getLogger(__name__) now_dir = os.getcwd() sys.path.append(os.path.join(now_dir)) @@ -82,7 +84,7 @@ def main(): n_gpus = 1 if n_gpus < 1: # patch to unblock people without gpus. there is probably a better way. - print("NO GPU DETECTED: falling back to CPU - this may take a while") + logger.warn("NO GPU DETECTED: falling back to CPU - this may take a while") n_gpus = 1 os.environ["MASTER_ADDR"] = "localhost" os.environ["MASTER_PORT"] = str(randint(20000, 55555)) @@ -209,7 +211,7 @@ def run(rank, n_gpus, hps): if hps.pretrainG != "": if rank == 0: logger.info("loaded pretrained %s" % (hps.pretrainG)) - print( + logger.info( net_g.module.load_state_dict( torch.load(hps.pretrainG, map_location="cpu")["model"] ) @@ -217,7 +219,7 @@ def run(rank, n_gpus, hps): if hps.pretrainD != "": if rank == 0: logger.info("loaded pretrained %s" % (hps.pretrainD)) - print( + logger.info( net_d.module.load_state_dict( torch.load(hps.pretrainD, map_location="cpu")["model"] ) diff --git a/infer/modules/uvr5/mdxnet.py b/infer/modules/uvr5/mdxnet.py index 4070b74..52c1c7e 100644 --- a/infer/modules/uvr5/mdxnet.py +++ b/infer/modules/uvr5/mdxnet.py @@ -1,4 +1,6 @@ import os +import logging +logger = logging.getLogger(__name__) import librosa import numpy as np @@ -88,7 +90,7 @@ class Predictor: def __init__(self, args): import onnxruntime as ort - print(ort.get_available_providers()) + logger.info(ort.get_available_providers()) self.args = args self.model_ = get_models( device=cpu, dim_f=args.dim_f, dim_t=args.dim_t, n_fft=args.n_fft @@ -101,7 +103,7 @@ class Predictor: "CPUExecutionProvider", ], ) - print("ONNX load done") + logger.info("ONNX load done") def demix(self, mix): samples = mix.shape[-1] diff --git a/infer/modules/uvr5/modules.py b/infer/modules/uvr5/modules.py index 91b1129..9961a31 100644 --- a/infer/modules/uvr5/modules.py +++ b/infer/modules/uvr5/modules.py @@ -1,5 +1,7 @@ import os import traceback +import logging +logger = logging.getLogger(__name__) import ffmpeg import torch @@ -92,5 +94,5 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format traceback.print_exc() if torch.cuda.is_available(): torch.cuda.empty_cache() - print("Executed torch.cuda.empty_cache()") + logger.info("Executed torch.cuda.empty_cache()") yield "\n".join(infos) diff --git a/infer/modules/uvr5/preprocess.py b/infer/modules/uvr5/preprocess.py index 96eecc4..bbb15cb 100644 --- a/infer/modules/uvr5/preprocess.py +++ b/infer/modules/uvr5/preprocess.py @@ -1,4 +1,6 @@ import os +import logging +logger = logging.getLogger(__name__) import librosa import numpy as np @@ -116,7 +118,7 @@ class AudioPre: ) else: wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, self.mp) - print("%s instruments done" % name) + logger.info("%s instruments done" % name) if format in ["wav", "flac"]: sf.write( os.path.join( @@ -150,7 +152,7 @@ class AudioPre: ) else: wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, self.mp) - print("%s vocals done" % name) + logger.info("%s vocals done" % name) if format in ["wav", "flac"]: sf.write( os.path.join( @@ -283,7 +285,7 @@ class AudioPreDeEcho: ) else: wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, self.mp) - print("%s instruments done" % name) + logger.info("%s instruments done" % name) if format in ["wav", "flac"]: sf.write( os.path.join( @@ -317,7 +319,7 @@ class AudioPreDeEcho: ) else: wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, self.mp) - print("%s vocals done" % name) + logger.info("%s vocals done" % name) if format in ["wav", "flac"]: sf.write( os.path.join( diff --git a/infer/modules/vc/modules.py b/infer/modules/vc/modules.py index 97b2b7c..01d8927 100644 --- a/infer/modules/vc/modules.py +++ b/infer/modules/vc/modules.py @@ -1,4 +1,6 @@ import traceback +import logging +logger = logging.getLogger(__name__) import numpy as np import soundfile as sf @@ -30,14 +32,7 @@ class VC: self.config = config def get_vc(self, sid, *to_return_protect): - person = f'{os.getenv("weight_root")}/{sid}' - print(f"Loading: {person}") - - self.cpt = torch.load(person, map_location="cpu") - self.tgt_sr = self.cpt["config"][-1] - self.cpt["config"][-3] = self.cpt["weight"]["emb_g.weight"].shape[0] # n_spk - self.if_f0 = self.cpt.get("f0", 1) - self.version = self.cpt.get("version", "v1") + logger.info("Get sid: " + sid) to_return_protect0 = { "visible": self.if_f0 != 0, @@ -54,6 +49,57 @@ class VC: "__type__": "update", } + if not sid: + if self.hubert_model is not None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的 + logger.info("Clean model cache") + del self.net_g, self.n_spk, self.vc, self.hubert_model, self.tgt_sr # ,cpt + self.hubert_model = self.net_g = self.n_spk = self.vc = self.hubert_model = self.tgt_sr = None + if torch.cuda.is_available(): + torch.cuda.empty_cache() + ###楼下不这么折腾清理不干净 + self.if_f0 = self.cpt.get("f0", 1) + self.version = self.cpt.get("version", "v1") + if self.version == "v1": + if self.if_f0 == 1: + self.net_g = SynthesizerTrnMs256NSFsid( + *self.cpt["config"], is_half=self.config.is_half + ) + else: + self.net_g = SynthesizerTrnMs256NSFsid_nono(*self.cpt["config"]) + elif self.version == "v2": + if self.if_f0 == 1: + self.net_g = SynthesizerTrnMs768NSFsid( + *self.cpt["config"], is_half=self.config.is_half + ) + else: + self.net_g = SynthesizerTrnMs768NSFsid_nono(*self.cpt["config"]) + del self.net_g, self.cpt + if torch.cuda.is_available(): + torch.cuda.empty_cache() + return ( + {"visible": False, "__type__": "update"}, + { + "visible": True, + "value": to_return_protect0, + "__type__": "update", + }, + { + "visible": True, + "value": to_return_protect1, + "__type__": "update", + }, + "", + "", + ) + person = f'{os.getenv("weight_root")}/{sid}' + logger.info(f"Loading: {person}") + + self.cpt = torch.load(person, map_location="cpu") + self.tgt_sr = self.cpt["config"][-1] + self.cpt["config"][-3] = self.cpt["weight"]["emb_g.weight"].shape[0] # n_spk + self.if_f0 = self.cpt.get("f0", 1) + self.version = self.cpt.get("version", "v1") + synthesizer_class = { ("v1", 1): SynthesizerTrnMs256NSFsid, ("v1", 0): SynthesizerTrnMs256NSFsid_nono, @@ -77,7 +123,7 @@ class VC: self.pipeline = Pipeline(self.tgt_sr, self.config) n_spk = self.cpt["config"][-3] index = {"value": get_index_path_from_model(sid), "__type__": "update"} - print("Select index:", index["value"]) + logger.info("Select index: " + index["value"]) return ( ( @@ -165,7 +211,7 @@ class VC: ) except: info = traceback.format_exc() - print(info) + logger.warn(info) return info, (None, None) def vc_multi( diff --git a/infer/modules/vc/pipeline.py b/infer/modules/vc/pipeline.py index 32d973b..8a07d8c 100644 --- a/infer/modules/vc/pipeline.py +++ b/infer/modules/vc/pipeline.py @@ -1,6 +1,9 @@ import os import sys import traceback +import logging +logger = logging.getLogger(__name__) + from functools import lru_cache from time import time as ttime @@ -139,7 +142,7 @@ class Pipeline(object): if not hasattr(self, "model_rmvpe"): from infer.lib.rmvpe import RMVPE - print( + logger.info( "Loading rmvpe model,%s" % "%s/rmvpe.pt" % os.environ["rmvpe_root"] ) self.model_rmvpe = RMVPE( @@ -152,7 +155,7 @@ class Pipeline(object): if "privateuseone" in str(self.device): # clean ortruntime memory del self.model_rmvpe.model del self.model_rmvpe - print("Cleaning ortruntime memory") + logger.info("Cleaning ortruntime memory") f0 *= pow(2, f0_up_key / 12) # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()])) diff --git a/tools/app.py b/tools/app.py index b4328e5..583b6ef 100644 --- a/tools/app.py +++ b/tools/app.py @@ -13,9 +13,10 @@ logging.getLogger("numba").setLevel(logging.WARNING) logging.getLogger("markdown_it").setLevel(logging.WARNING) logging.getLogger("urllib3").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) +logger = logging.getLogger(__name__) i18n = I18nAuto() -i18n.print() +logger.info(i18n) load_dotenv() config = Config() diff --git a/tools/calc_rvc_model_similarity.py b/tools/calc_rvc_model_similarity.py index 3da27ca..807dc19 100644 --- a/tools/calc_rvc_model_similarity.py +++ b/tools/calc_rvc_model_similarity.py @@ -1,7 +1,8 @@ # This code references https://huggingface.co/JosephusCheung/ASimilarityCalculatior/blob/main/qwerty.py # Fill in the path of the model to be queried and the root directory of the reference models, and this script will return the similarity between the model to be queried and all reference models. import os -import sys +import logging +logger = logging.getLogger(__name__) import torch import torch.nn as nn @@ -55,7 +56,7 @@ def main(path, root): torch.manual_seed(114514) model_a = torch.load(path, map_location="cpu")["weight"] - print("Query:\t\t%s\t%s" % (path, model_hash(path))) + logger.info("Query:\t\t%s\t%s" % (path, model_hash(path))) map_attn_a = {} map_rand_input = {} @@ -82,7 +83,7 @@ def main(path, root): sim = torch.mean(torch.cosine_similarity(attn_a, attn_b)) sims.append(sim) - print( + logger.info( "Reference:\t%s\t%s\t%s" % (path, model_hash(path), f"{torch.mean(torch.stack(sims)) * 1e2:.2f}%") ) diff --git a/tools/infer/infer-pm-index256.py b/tools/infer/infer-pm-index256.py index 6eb6d62..5d1ba47 100644 --- a/tools/infer/infer-pm-index256.py +++ b/tools/infer/infer-pm-index256.py @@ -3,7 +3,8 @@ 对源特征进行检索 """ import os -import pdb +import logging +logger = logging.getLogger(__name__) import parselmouth import torch @@ -15,7 +16,6 @@ from time import time as ttime # import pyworld import librosa import numpy as np -import scipy.signal as signal import soundfile as sf import torch.nn.functional as F from fairseq import checkpoint_utils @@ -34,7 +34,7 @@ from scipy.io import wavfile device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_path = r"E:\codes\py39\vits_vc_gpu_train\assets\hubert\hubert_base.pt" # -print("Load model(s) from {}".format(model_path)) +logger.info("Load model(s) from {}".format(model_path)) models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task( [model_path], suffix="", @@ -77,7 +77,7 @@ net_g = SynthesizerTrn256( # weights=torch.load("infer/ft-mi-freeze-vocoder_true_1k.pt") # weights=torch.load("infer/ft-mi-sim1k.pt") weights = torch.load("infer/ft-mi-no_opt-no_dropout.pt") -print(net_g.load_state_dict(weights, strict=True)) +logger.debug(net_g.load_state_dict(weights, strict=True)) net_g.eval().to(device) net_g.half() @@ -198,4 +198,4 @@ for idx, name in enumerate( wavfile.write("ft-mi-no_opt-no_dropout-%s.wav" % name, 40000, audio) ## -print(ta0, ta1, ta2) # +logger.debug(ta0, ta1, ta2) # diff --git a/tools/infer/train-index-v2.py b/tools/infer/train-index-v2.py index d90a1a4..70ba553 100644 --- a/tools/infer/train-index-v2.py +++ b/tools/infer/train-index-v2.py @@ -3,6 +3,9 @@ """ import os import traceback +import logging +logger = logging.getLogger(__name__) + from multiprocessing import cpu_count import faiss @@ -23,11 +26,11 @@ big_npy = np.concatenate(npys, 0) big_npy_idx = np.arange(big_npy.shape[0]) np.random.shuffle(big_npy_idx) big_npy = big_npy[big_npy_idx] -print(big_npy.shape) # (6196072, 192)#fp32#4.43G +logger.debug(big_npy.shape) # (6196072, 192)#fp32#4.43G if big_npy.shape[0] > 2e5: # if(1): info = "Trying doing kmeans %s shape to 10k centers." % big_npy.shape[0] - print(info) + logger.info(info) try: big_npy = ( MiniBatchKMeans( @@ -42,7 +45,7 @@ if big_npy.shape[0] > 2e5: ) except: info = traceback.format_exc() - print(info) + logger.warn(info) np.save("tools/infer/big_src_feature_mi.npy", big_npy) @@ -50,14 +53,14 @@ np.save("tools/infer/big_src_feature_mi.npy", big_npy) # big_npy=np.load("/bili-coeus/jupyter/jupyterhub-liujing04/vits_ch/inference_f0/big_src_feature_mi.npy") n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39) index = faiss.index_factory(768, "IVF%s,Flat" % n_ivf) # mi -print("Training...") +logger.info("Training...") index_ivf = faiss.extract_index_ivf(index) # index_ivf.nprobe = 1 index.train(big_npy) faiss.write_index( index, "tools/infer/trained_IVF%s_Flat_baseline_src_feat_v2.index" % (n_ivf) ) -print("Adding...") +logger.info("Adding...") batch_size_add = 8192 for i in range(0, big_npy.shape[0], batch_size_add): index.add(big_npy[i : i + batch_size_add]) diff --git a/tools/infer/train-index.py b/tools/infer/train-index.py index b2f7997..c0f2f0e 100644 --- a/tools/infer/train-index.py +++ b/tools/infer/train-index.py @@ -2,6 +2,8 @@ 格式:直接cid为自带的index位;aid放不下了,通过字典来查,反正就5w个 """ import os +import logging +logger = logging.getLogger(__name__) import faiss import numpy as np @@ -13,19 +15,19 @@ for name in sorted(list(os.listdir(inp_root))): phone = np.load("%s/%s" % (inp_root, name)) npys.append(phone) big_npy = np.concatenate(npys, 0) -print(big_npy.shape) # (6196072, 192)#fp32#4.43G +logger.debug(big_npy.shape) # (6196072, 192)#fp32#4.43G np.save("infer/big_src_feature_mi.npy", big_npy) ##################train+add # big_npy=np.load("/bili-coeus/jupyter/jupyterhub-liujing04/vits_ch/inference_f0/big_src_feature_mi.npy") -print(big_npy.shape) +logger.debug(big_npy.shape) index = faiss.index_factory(256, "IVF512,Flat") # mi -print("Training...") +logger.info("Training...") index_ivf = faiss.extract_index_ivf(index) # index_ivf.nprobe = 9 index.train(big_npy) faiss.write_index(index, "infer/trained_IVF512_Flat_mi_baseline_src_feat.index") -print("Adding...") +logger.info("Adding...") index.add(big_npy) faiss.write_index(index, "infer/added_IVF512_Flat_mi_baseline_src_feat.index") """ diff --git a/tools/rvc_for_realtime.py b/tools/rvc_for_realtime.py index 350a128..1b693b9 100644 --- a/tools/rvc_for_realtime.py +++ b/tools/rvc_for_realtime.py @@ -1,6 +1,9 @@ import os import sys import traceback +import logging +logger = logging.getLogger(__name__) + from time import time as ttime import fairseq @@ -67,7 +70,7 @@ class RVC: if index_rate != 0: self.index = faiss.read_index(index_path) self.big_npy = self.index.reconstruct_n(0, self.index.ntotal) - print("Index search enabled") + logger.info("Index search enabled") self.index_path = index_path self.index_rate = index_rate models, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task( @@ -102,7 +105,7 @@ class RVC: else: self.net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) del self.net_g.enc_q - print(self.net_g.load_state_dict(cpt["weight"], strict=False)) + logger.debug(self.net_g.load_state_dict(cpt["weight"], strict=False)) self.net_g.eval().to(device) # print(2333333333,device,config.device,self.device)#net_g是device,hubert是config.device if config.is_half: @@ -111,7 +114,7 @@ class RVC: self.net_g = self.net_g.float() self.is_half = config.is_half except: - print(traceback.format_exc()) + logger.warn(traceback.format_exc()) def change_key(self, new_key): self.f0_up_key = new_key @@ -120,7 +123,7 @@ class RVC: if new_index_rate != 0 and self.index_rate == 0: self.index = faiss.read_index(self.index_path) self.big_npy = self.index.reconstruct_n(0, self.index.ntotal) - print("Index search enabled") + logger.info("Index search enabled") self.index_rate = new_index_rate def get_f0_post(self, f0): @@ -237,7 +240,7 @@ class RVC: if hasattr(self, "model_rmvpe") == False: from infer.lib.rmvpe import RMVPE - print("Loading rmvpe model") + logger.info("Loading rmvpe model") self.model_rmvpe = RMVPE( # "rmvpe.pt", is_half=self.is_half if self.device.type!="privateuseone" else False, device=self.device if self.device.type!="privateuseone"else "cpu"####dml时强制对rmvpe用cpu跑 # "rmvpe.pt", is_half=False, device=self.device####dml配置 @@ -295,10 +298,10 @@ class RVC: + (1 - self.index_rate) * feats[0][-leng_replace_head:] ) else: - print("Index search FAILED or disabled") + logger.warn("Index search FAILED or disabled") except: traceback.print_exc() - print("Index search FAILED") + logger.warn("Index search FAILED") feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1) t3 = ttime() if self.if_f0 == 1: @@ -338,5 +341,5 @@ class RVC: .float() ) t5 = ttime() - print("Spent time: fea =", t2 - t1, ", index =", t3 - t2, ", f0 =", t4 - t3, ", model =", t5 - t4) + logger.info("Spent time: fea =", t2 - t1, ", index =", t3 - t2, ", f0 =", t4 - t3, ", model =", t5 - t4) return infered_audio