Skip to content

Commit f25ba32

Browse files
authored
Derive compatible compute type in the case of CPU use (#383)
* Fix default compute type for CPU * Update main.py * Rerun code formatter
1 parent 319d8e8 commit f25ba32

File tree

17 files changed

+19
-16
lines changed

17 files changed

+19
-16
lines changed

models/tta/picoaudio/picoaudio/audioldm/clap/open_clip/model.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
""" CLAP Model
1+
"""CLAP Model
22
33
Adapted from CLIP: https://github.com/openai/CLIP. Originally MIT License, Copyright (c) 2021 OpenAI.
44
Adapted to the Audio Task.

models/tta/picoaudio/picoaudio/audioldm/clap/open_clip/openai.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
""" OpenAI pretrained model functions
1+
"""OpenAI pretrained model functions
22
33
Adapted from https://github.com/openai/CLIP. Originally MIT License, Copyright (c) 2021 OpenAI.
44
"""

models/tta/picoaudio/picoaudio/audioldm/clap/open_clip/timm_model.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
""" timm model adapter
1+
"""timm model adapter
22
33
Wraps timm (https://github.com/rwightman/pytorch-image-models) models for use as a vision tower in CLIP model.
44
"""

models/tta/picoaudio/picoaudio/audioldm/clap/open_clip/tokenizer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
""" CLIP tokenizer
1+
"""CLIP tokenizer
22
33
Copied from https://github.com/openai/CLIP. Originally MIT License, Copyright (c) 2021 OpenAI.
44
"""

models/tts/debatts/utils/g2p/english.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
""" from https://github.com/keithito/tacotron """
1+
"""from https://github.com/keithito/tacotron"""
22

33
import re
44
from unidecode import unidecode

models/tts/valle_v2/modeling_llama.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
2424
# See the License for the specific language governing permissions and
2525
# limitations under the License.
26-
""" PyTorch LLaMA model."""
26+
"""PyTorch LLaMA model."""
2727
import math
2828
from typing import List, Optional, Tuple, Union
2929

preprocessors/Emilia/main.py

+3
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,9 @@ def main_process(audio_path, save_path=None, audio_name=None):
509509
logger.info("Using CPU")
510510
device_name = "cpu"
511511
device = torch.device(device_name)
512+
# whisperX expects compute type: int8
513+
logger.info("Overriding the compute type to int8")
514+
args.compute_type = "int8"
512515

513516
check_env(logger)
514517

processors/audio_features_extractor.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
1. Acoustic features such as Mel Spectrogram, F0, Energy, etc.
1212
2. Content features such as phonetic posteriorgrams (PPG) and bottleneck features (BNF) from pretrained models
1313
14-
Note:
14+
Note:
1515
All the features extraction are designed to utilize GPU to the maximum extent, which can ease the on-the-fly extraction for large-scale dataset.
1616
1717
"""

processors/descriptive_text_features_extractor.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
The common descriptive text features include:
1212
1. Global semantic guidance features that extracted some pretrained text models like T5. It can be adopted to TTA, TTM, etc.
1313
14-
Note:
14+
Note:
1515
All the features extraction are designed to utilize GPU to the maximum extent, which can ease the on-the-fly extraction for large-scale dataset.
1616
1717
"""

processors/text_features_extractor.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
The common text features include:
1212
1. phone features that are used for TTS, SVS, etc.
1313
14-
Note:
14+
Note:
1515
All the features extraction are designed to utilize GPU to the maximum extent, which can ease the on-the-fly extraction for large-scale dataset.
1616
1717
"""

text/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# This source code is licensed under the MIT license found in the
44
# LICENSE file in the root directory of this source tree.
55

6-
""" This code is modified from https://github.com/keithito/tacotron """
6+
"""This code is modified from https://github.com/keithito/tacotron"""
77
import re
88
from text import cleaners
99
from text.symbols import symbols

text/cleaners.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# This source code is licensed under the MIT license found in the
44
# LICENSE file in the root directory of this source tree.
55

6-
""" This code is modified from https://github.com/keithito/tacotron """
6+
"""This code is modified from https://github.com/keithito/tacotron"""
77

88
"""
99
Cleaners are transformations that run over the input text at both training and eval time.

text/cmudict.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# This source code is licensed under the MIT license found in the
44
# LICENSE file in the root directory of this source tree.
55

6-
""" This code is modified from https://github.com/keithito/tacotron """
6+
"""This code is modified from https://github.com/keithito/tacotron"""
77

88
import re
99

text/numbers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# This source code is licensed under the MIT license found in the
44
# LICENSE file in the root directory of this source tree.
55

6-
""" This code is modified from https://github.com/keithito/tacotron """
6+
"""This code is modified from https://github.com/keithito/tacotron"""
77

88
import inflect
99
import re

text/symbols.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# This source code is licensed under the MIT license found in the
44
# LICENSE file in the root directory of this source tree.
55

6-
""" This code is modified from https://github.com/keithito/tacotron """
6+
"""This code is modified from https://github.com/keithito/tacotron"""
77

88
"""
99
Defines the set of symbols used in text input to the model.

utils/cut_by_vad.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# This source code is licensed under the MIT license found in the
44
# LICENSE file in the root directory of this source tree.
55

6-
""" This code is modified from https://github.com/facebookresearch/libri-light/blob/main/data_preparation/cut_by_vad.py"""
6+
"""This code is modified from https://github.com/facebookresearch/libri-light/blob/main/data_preparation/cut_by_vad.py"""
77
import pathlib
88
import soundfile as sf
99
import numpy as np

utils/mfa_prepare.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# This source code is licensed under the MIT license found in the
44
# LICENSE file in the root directory of this source tree.
55

6-
""" This code is modified from https://montreal-forced-aligner.readthedocs.io/en/latest/user_guide/performance.html"""
6+
"""This code is modified from https://montreal-forced-aligner.readthedocs.io/en/latest/user_guide/performance.html"""
77

88
import os
99
import subprocess

0 commit comments

Comments
 (0)