import logging
import datetime
import argparse
-import soundfile
-import librosa
-import numpy as np
-from scipy import signal
class Logger:
- """A logging object"""
+ """A file logger"""
def __init__(self, file):
self.logger = logging.getLogger('myapp')
raw_source_formats = ['webm',]
source_formats = ['webm', 'mp4']
dest_formats = {
- 'mp3' : {'in': '', 'out': '-vn -acodec libmp3lame -aq 6'},
+ # 'mp3' : {'in': '', 'out': '-vn -acodec libmp3lame -aq 6'},
'mp4' : {'in': '', 'out': '-c:v libx264-c:a aac -b:a 96k'},
'jpg' : {'in': '-ss 0:0:10', 'out': '-frames:v 1 -y'}
}
nvidia_formats = {'mp4': {'in': '', 'out': '-c:v h264_nvenc -maxrate 1100k -c:a aac -b:a 128k'}}
vaapi_formats = {'mp4': {'in': '-hwaccel vaapi -hwaccel_device /dev/dri/renderD128 -hwaccel_output_format vaapi',
'out': '-c:v h264_vaapi -c:a aac -b:a 96k'}}
+ transcription_format = "vtt"
date_limit = datetime.datetime(year=2024, month=4, day=19)
tmp_dir = "/tmp/"
self.verbose_mode = args.verbose
self.remux_only_mode = args.remux_only
self.date_filter = args.date_filter
+ self.transcribe_mode = args.transcribe
if args.input_formats:
self.input_formats = args.input_formats
return extension in exts
def get_offset(self, within_file, find_file, window=10):
+ import librosa
+ import numpy as np
+ from scipy import signal
y_within, sr_within = librosa.load(within_file, sr=None, duration=60.0)
y_find, _ = librosa.load(find_file, sr=sr_within, duration=60.0)
c = signal.correlate(y_within, y_find[:sr_within*window], mode='valid', method='fft')
self.touch(log)
+ def transcribe(self, file):
+ import whisper
+ filename, ext = os.path.splitext(file)
+ output_dir = os.path.dirname(file)
+ model = whisper.load_model("turbo")
+ self.logger.logger.info("Transcription started...")
+ result = model.transcribe(file, language="fr", verbose=self.verbose_mode)
+ output_writer = whisper.utils.get_writer(self.transcription_format, output_dir)
+ output_writer(result, filename)
+ self.logger.logger.info("Transcription written...")
+
def is_processed(self, source_files):
processed = False
for file in source_files:
if not file in offsets:
offset = offsets[1]['offsets'][file]
self.transcode(file, offset=offset)
+ if ext == "webm" and self.transcribe_mode:
+ self.transcribe(file)
def main():
parser.add_argument('-r', '--remux_only', help='remux only mode', action="store_true")
parser.add_argument('-i','--input_formats', nargs='+', help='Required input formats')
parser.add_argument('-o','--output_formats', nargs='+', help='Required output formats')
+ parser.add_argument('-tr', '--transcribe', help='transcribe audio to vtt', action="store_true")
args = parser.parse_args()
t = TeleCasterMastering(args)