-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathaudio_comp.py
45 lines (30 loc) · 937 Bytes
/
audio_comp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import deepspeech
import time
import wave
import numpy as np
import pyaudio
from audio_gen import user_audio
import glob
import soundfile as sf
import torch
from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
import deep_speech
import wav2vec
from text_gen import text_gen
import tensorflow as tf
import tensorflow_hub as hub
# Module for calculating similarity between the audios
module_url = "https://tfhub.dev/google/universal-sentence-encoder/4"
embed_model = hub.load(module_url)
def cosine(u, v):
return np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v))
# Function to find similarity between 2 audios from the file
# path to the files
def similarity(audio1, audio2, model='deepspeech'):
gen = text_gen(model=model)
text1 = gen.text_from_file(audio1)
text2 = gen.text_from_file(audio2)
query = embed_model([text1])[0]
target =embed_model([text2])[0]
sim = cosine(query, target)
return sim