Skip to content

Commit 7bab36c

Browse files
committed
feat: training on colab
0 parents  commit 7bab36c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

78 files changed

+13404
-0
lines changed

.gitattributes

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
*.json filter=lfs diff=lfs merge=lfs -text

.gitignore

+192
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
.vscode/
2+
.venv/
3+
.hermit/
4+
runs/
5+
outputs/
6+
wandb/
7+
*.parquet
8+
9+
artifacts/
10+
11+
output_audio_processor/
12+
output_tokenizer/
13+
14+
*.csv
15+
*.json
16+
epd_eval/
17+
.git/
18+
env_vars.sh
19+
sync_watch
20+
clearml.conf
21+
aml.md
22+
.DS_Store
23+
*.safetensors
24+
*.pt
25+
26+
# Byte-compiled / optimized / DLL files
27+
__pycache__/
28+
*.py[cod]
29+
*$py.class
30+
31+
# C extensions
32+
*.so
33+
34+
# Distribution / packaging
35+
.Python
36+
build/
37+
develop-eggs/
38+
dist/
39+
downloads/
40+
eggs/
41+
.eggs/
42+
lib/
43+
lib64/
44+
parts/
45+
sdist/
46+
var/
47+
wheels/
48+
share/python-wheels/
49+
*.egg-info/
50+
.installed.cfg
51+
*.egg
52+
MANIFEST
53+
54+
# PyInstaller
55+
# Usually these files are written by a python script from a template
56+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
57+
*.manifest
58+
*.spec
59+
60+
# Installer logs
61+
pip-log.txt
62+
pip-delete-this-directory.txt
63+
64+
# Unit test / coverage reports
65+
htmlcov/
66+
.tox/
67+
.nox/
68+
.coverage
69+
.coverage.*
70+
.cache
71+
nosetests.xml
72+
coverage.xml
73+
*.cover
74+
*.py,cover
75+
.hypothesis/
76+
.pytest_cache/
77+
cover/
78+
79+
# Translations
80+
*.mo
81+
*.pot
82+
83+
# Django stuff:
84+
*.log
85+
local_settings.py
86+
db.sqlite3
87+
db.sqlite3-journal
88+
89+
# Flask stuff:
90+
instance/
91+
.webassets-cache
92+
93+
# Scrapy stuff:
94+
.scrapy
95+
96+
# Sphinx documentation
97+
docs/_build/
98+
99+
# PyBuilder
100+
.pybuilder/
101+
target/
102+
103+
# Jupyter Notebook
104+
.ipynb_checkpoints
105+
106+
# IPython
107+
profile_default/
108+
ipython_config.py
109+
110+
# pyenv
111+
# For a library or package, you might want to ignore these files since the code is
112+
# intended to run in multiple environments; otherwise, check them in:
113+
# .python-version
114+
115+
# pipenv
116+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
117+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
118+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
119+
# install all needed dependencies.
120+
#Pipfile.lock
121+
122+
# poetry
123+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
124+
# This is especially recommended for binary packages to ensure reproducibility, and is more
125+
# commonly ignored for libraries.
126+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
127+
#poetry.lock
128+
129+
# pdm
130+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
131+
#pdm.lock
132+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
133+
# in version control.
134+
# https://pdm.fming.dev/#use-with-ide
135+
.pdm.toml
136+
137+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
138+
__pypackages__/
139+
140+
# Celery stuff
141+
celerybeat-schedule
142+
celerybeat.pid
143+
144+
# SageMath parsed files
145+
*.sage.py
146+
147+
# Environments
148+
.env
149+
.venv
150+
env/
151+
venv/
152+
ENV/
153+
env.bak/
154+
venv.bak/
155+
156+
# Spyder project settings
157+
.spyderproject
158+
.spyproject
159+
160+
# Rope project settings
161+
.ropeproject
162+
163+
# mkdocs documentation
164+
/site
165+
166+
# mypy
167+
.mypy_cache/
168+
.dmypy.json
169+
dmypy.json
170+
171+
# Pyre type checker
172+
.pyre/
173+
174+
# pytype static type analyzer
175+
.pytype/
176+
177+
# Cython debug symbols
178+
cython_debug/
179+
180+
# PyCharm
181+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
182+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
183+
# and can be added to the global gitignore or merged into this file. For a more nuclear
184+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
185+
.idea/
186+
.vscode/
187+
188+
.neptune/
189+
mds_output/
190+
mlruns/
191+
output/
192+

Justfile

+64
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
export WANDB_PROJECT:="ultravox"
2+
export WANDB_LOG_MODEL:="checkpoint"
3+
export PROJECT_DIR:="ultravox"
4+
export MCLOUD_CLUSTER:="r7z22p1"
5+
export MCLOUD_INSTANCE:="oci.bm.gpu.b4.8"
6+
7+
default: format check test
8+
9+
install:
10+
pip install poetry==1.7.1
11+
poetry install
12+
13+
format:
14+
poetry run autoflake ${PROJECT_DIR} --remove-all-unused-imports --quiet --in-place -r --exclude third_party
15+
poetry run isort ${PROJECT_DIR} --force-single-line-imports
16+
poetry run black ${PROJECT_DIR}
17+
18+
check:
19+
poetry run black ${PROJECT_DIR} --check
20+
poetry run isort ${PROJECT_DIR} --check --force-single-line-imports
21+
poetry run autoflake ${PROJECT_DIR} --check --quiet --remove-all-unused-imports -r --exclude third_party
22+
poetry run mypy ${PROJECT_DIR}
23+
24+
test *ARGS=".":
25+
cd ${PROJECT_DIR} && poetry run pytest --ignore third_party {{ARGS}}
26+
27+
test-verbose *ARGS=".":
28+
cd ${PROJECT_DIR} && poetry run pytest --ignore third_party {{ARGS}} -vv --log-cli-level=INFO {{ARGS}}
29+
30+
@python *FLAGS:
31+
poetry run python {{FLAGS}}
32+
33+
train *FLAGS:
34+
poetry run python -m ultravox.training.train {{FLAGS}}
35+
36+
train_asr *FLAGS:
37+
just train --config_path ultravox/training/configs/asr_tinyllama.yaml {{FLAGS}}
38+
39+
browse *FLAGS:
40+
poetry run python -m ultravox.tools.data_tool {{FLAGS}}
41+
42+
infer *FLAGS:
43+
poetry run python -m ultravox.tools.infer_tool {{FLAGS}}
44+
45+
eval *FLAGS:
46+
poetry run python -m ultravox.tools.eval_tool {{FLAGS}}
47+
48+
tts *FLAGS:
49+
poetry run python -m ultravox.tools.ds_tool.ds_tool tts {{FLAGS}}
50+
51+
ds_tool *FLAGS:
52+
poetry run python -m ultravox.tools.ds_tool.ds_tool {{FLAGS}}
53+
54+
mds *FLAGS:
55+
poetry run python -m ultravox.tools.mds_tool {{FLAGS}}
56+
57+
gradio *FLAGS:
58+
poetry run python -m ultravox.tools.gradio_demo {{FLAGS}}
59+
60+
run *FLAGS:
61+
poetry run mcli run -f mcloud.yaml --follow {{FLAGS}}
62+
63+
mcloud *FLAGS:
64+
poetry run mcli interactive {{FLAGS}} --cluster ${MCLOUD_CLUSTER} --instance ${MCLOUD_INSTANCE} --name `whoami` --command "bash -c \"$(cat setup.sh)\""

LICENSE

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2023 Fixie.ai
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

0 commit comments

Comments
 (0)