From 5bd13deb779bad8b4436488243e3a4bf83da4c88 Mon Sep 17 00:00:00 2001 From: yomguy Date: Tue, 2 Mar 2010 15:14:53 +0000 Subject: [PATCH] - fix waveform scaling - fix spectral centroid process (need a few more optimizations) - hurray \o/ ! --- grapher/core.py | 121 +++++++++++++++----------------- tests/api/examples.py | 12 ++-- tests/api/test_pipe_waveform.py | 34 ++++----- 3 files changed, 72 insertions(+), 95 deletions(-) diff --git a/grapher/core.py b/grapher/core.py index 44b4c3b..cedc2a3 100644 --- a/grapher/core.py +++ b/grapher/core.py @@ -27,6 +27,7 @@ import optparse, math, sys import ImageFilter, ImageChops, Image, ImageDraw, ImageColor import numpy import scikits.audiolab as audiolab +from timeside.core import FixedSizeInputAdapter import Queue @@ -49,11 +50,11 @@ color_schemes = { } -class AudioProcessor(object): - def __init__(self, buffer_size, fft_size, channels, nframes, samplerate, window_function=numpy.ones): - self.buffer_size = buffer_size +class SpectralCentroid(object): + + def __init__(self, fft_size, nframes, samplerate, window_function=numpy.ones): self.fft_size = fft_size - self.channels = channels + self.buffer_size = self.fft_size self.window = window_function(self.fft_size) self.spectrum_range = None self.lower = 100 @@ -64,8 +65,9 @@ class AudioProcessor(object): self.q = Queue.Queue() self.nframes = nframes self.samplerate = samplerate + self.spectrum_adapter = FixedSizeInputAdapter(self.fft_size, 1, pad=True) - def read(self, samples, start, size, resize_if_less=False): + def trim(self, samples, start, size, resize_if_less=False): """ read size samples starting at start, if resize_if_less is True and less than size samples are read, resize the array to size and fill with zeros """ @@ -103,12 +105,12 @@ class AudioProcessor(object): return samples - def spectral_centroid(self, samples, cursor, spec_range=120.0): - """ starting at seek_point read fft_size samples, and calculate the spectral centroid """ + def process(self, frames, eod, spec_range=120.0): - #print 'Buffer size = ', len(samples) - #samples = self.read(samples, cursor - self.fft_size/2, self.fft_size, True) - #print 'FFT Buffer size = ', len(samples) + for buffer, end in self.spectrum_adapter.process(frames, True): + samples = buffer[:,0].copy() + if end: + break samples *= self.window fft = numpy.fft.fft(samples) spectrum = numpy.abs(fft[:fft.shape[0] / 2 + 1]) / float(self.fft_size) # normalized abs(FFT) between 0 and 1 @@ -116,40 +118,21 @@ class AudioProcessor(object): # scale the db spectrum from [- spec_range db ... 0 db] > [0..1] db_spectrum = ((20*(numpy.log10(spectrum + 1e-30))).clip(-spec_range, 0.0) + spec_range)/spec_range - energy = spectrum.sum() spectral_centroid = 0 if energy > 1e-20: # calculate the spectral centroid - if self.spectrum_range == None: self.spectrum_range = numpy.arange(length) spectral_centroid = (spectrum * self.spectrum_range).sum() / (energy * (length - 1)) * self.samplerate * 0.5 - # clip > log10 > scale between 0 and 1 spectral_centroid = (math.log10(self.clip(spectral_centroid, self.lower, self.higher)) - self.lower_log) / (self.higher_log - self.lower_log) return (spectral_centroid, db_spectrum) - def peaks(self, samples): - """ read all samples between start_seek and end_seek, then find the minimum and maximum peak - in that range. Returns that pair in the order they were found. So if min was found first, - it returns (min, max) else the other way around. """ - - max_index = numpy.argmax(samples) - max_value = samples[max_index] - - min_index = numpy.argmin(samples) - min_value = samples[min_index] - - if min_index < max_index: - return (min_value, max_value) - else: - return (max_value, min_value) - def interpolate_colors(colors, flat=False, num_colors=256): """ given a list of colors, create a larger list of colors interpolating the first one. If flatten is True a list of numers will be returned. If @@ -182,25 +165,15 @@ def interpolate_colors(colors, flat=False, num_colors=256): class WaveformImage(object): - def __init__(self, image_width, image_height, buffer_size, fft_size, nframes, nframes_adapter, - samplerate, channels, bg_color=None, color_scheme=None, filename=None): + def __init__(self, image_width, image_height, nframes, samplerate, buffer_size, bg_color=None, color_scheme=None, filename=None): + self.image_width = image_width self.image_height = image_height - self.fft_size = fft_size - self.buffer_size = buffer_size self.nframes = nframes - self.nframes_adapter = nframes_adapter self.samplerate = samplerate - self.channels = channels + self.fft_size = buffer_size + self.buffer_size = buffer_size self.filename = filename - self.samples_per_pixel = self.nframes / float(self.image_width) - self.nbuffers = self.nframes / self.buffer_size - self.pixel_per_buffer = self.buffer_size / self.samples_per_pixel - #print 'pixels per buffer = ', self.pixel_per_buffer - #print 'nframes (orig) = ', self.nframes - #print 'nframes (adapter) = ', self.nframes_adapter - #print 'frames per pixel = ', self.samples_per_pixel - #print 'nbuffers = ', self.nbuffers self.bg_color = bg_color if not bg_color: @@ -212,14 +185,38 @@ class WaveformImage(object): # this line gets the old "screaming" colors back... # colors = [self.color_from_value(value/29.0) for value in range(0,30)] self.color_lookup = interpolate_colors(colors) + + self.samples_per_pixel = self.nframes / float(self.image_width) + #self.nbuffers = self.nframes / self.buffer_size + #self.pixel_per_buffer = self.buffer_size / self.samples_per_pixel + + self.peaks_adapter = FixedSizeInputAdapter(int(self.samples_per_pixel), 1, pad=False) + self.peaks_adapter_nframes = self.peaks_adapter.nframes(self.nframes) + self.spectral_centroid = SpectralCentroid(self.fft_size, self.nframes, self.samplerate, numpy.hanning) + self.image = Image.new("RGB", (self.image_width, self.image_height), self.bg_color) - self.processor = AudioProcessor(self.buffer_size, self.fft_size, self.channels, self.nframes, self.samplerate, numpy.hanning) + self.pixel = self.image.load() self.draw = ImageDraw.Draw(self.image) self.previous_x, self.previous_y = None, None - self.pixel = self.image.load() self.frame_cursor = 0 self.pixel_cursor = 0 + def peaks(self, samples): + """ read all samples between start_seek and end_seek, then find the minimum and maximum peak + in that range. Returns that pair in the order they were found. So if min was found first, + it returns (min, max) else the other way around. """ + + max_index = numpy.argmax(samples) + max_value = samples[max_index] + + min_index = numpy.argmin(samples) + min_value = samples[min_index] + + if min_index < max_index: + return (min_value, max_value) + else: + return (max_value, min_value) + def color_from_value(self, value): """ given a value between 0 and 1, return an (r,g,b) tuple """ @@ -272,29 +269,22 @@ class WaveformImage(object): self.pixel[x, y_min_int - 1] = (r,g,b) def process(self, frames, eod): - buffer = numpy.transpose(frames)[0].copy() - buffer_copy = buffer.copy() - - #if eod: - #buffer_size = self.nframes_adapter - self.nframes - #print buffer_size - #self.pixel_per_buffer = buffer_size / self.samples_per_pixel - #print self.pixel_per_buffer - ##buffer = buffer[0:buffer_size].copy() - - for x in range(int(self.pixel_per_buffer)): - # FIXME : the peaks don't scale to the image width beacause self.pixel_per_buffer is not an integer - # Will be fixed later... - frame_cursor = int(x * self.samples_per_pixel) - frame_next_cursor = int((x + 1) * self.samples_per_pixel) - buffer_copy_trim = buffer[frame_cursor:frame_next_cursor].copy() - peaks = self.processor.peaks(buffer_copy_trim) - (spectral_centroid, db_spectrum) = self.processor.spectral_centroid(buffer_copy, frame_cursor) + if len(frames) == 1: + frames.shape = (len(frames),1) + buffer = frames.copy() + else: + buffer = frames[:,0].copy() + buffer.shape = (len(buffer),1) + + for samples, end in self.peaks_adapter.process(buffer, eod): + if self.pixel_cursor == self.image_width: + break + peaks = self.peaks(samples) + # FIXME : too much repeated spectrum computation cycle + (spectral_centroid, db_spectrum) = self.spectral_centroid.process(buffer, True) self.draw_peaks(self.pixel_cursor, peaks, spectral_centroid) self.pixel_cursor += 1 - #print self.pixel_cursor - def save(self): a = 25 for x in range(self.image_width): @@ -377,7 +367,6 @@ def create_spectrogram_png(input_filename, output_filename_s, image_width, image print " done" - class Noise(object): """A class that mimics audiolab.sndfile but generates noise instead of reading a wave file. Additionally it can be told to have a "broken" header and thus crashing diff --git a/tests/api/examples.py b/tests/api/examples.py index 9c8a708..506b209 100644 --- a/tests/api/examples.py +++ b/tests/api/examples.py @@ -211,10 +211,9 @@ class Waveform(Processor): implements(IGrapher) BUFFER_SIZE = 1024 - FFT_SIZE = 1024 @interfacedoc - def __init__(self, width=None, height=None, output=None): + def __init__(self, width=None, height=None, output=None, bg_color=None, color_scheme=None): if width: self.width = width else: @@ -223,8 +222,8 @@ class Waveform(Processor): self.height = height else: self.height = 200 - self.bg_color = None - self.color_scheme = None + self.bg_color = bg_color + self.color_scheme = color_scheme self.filename = output self.graph = None @@ -249,9 +248,8 @@ class Waveform(Processor): if self.graph: self.graph = None self.adapter = FixedSizeInputAdapter(self.BUFFER_SIZE, channels, pad=True) - self.graph = WaveformImage(self.width, self.height, self.BUFFER_SIZE, self.FFT_SIZE, - self.nframes(), self.adapter.nframes(self.input_nframes), - self.samplerate(), self.channels(), filename=self.filename) + self.graph = WaveformImage(self.width, self.height, self.nframes(), self.samplerate(), self.BUFFER_SIZE, + bg_color=self.bg_color, color_scheme=self.color_scheme, filename=self.filename) @interfacedoc def process(self, frames, eod=False): diff --git a/tests/api/test_pipe_waveform.py b/tests/api/test_pipe_waveform.py index a07c4a8..24b5242 100644 --- a/tests/api/test_pipe_waveform.py +++ b/tests/api/test_pipe_waveform.py @@ -2,34 +2,24 @@ from timeside.tests.api import examples from timeside.core import * from timeside.api import * -from sys import stdout - import os.path -source = os.path.join(os.path.dirname(__file__), "../samples/sweep_source.wav") -waveform_image = './waveform.png' - -print "Normalizing %s" % source -decoder = examples.FileDecoder(source) -maxlevel = examples.MaxLevel() -waveform = examples.Waveform(width=936, height=200, output=waveform_image) -(decoder | maxlevel).run() +use_gst = 1 +if use_gst: + from timeside.tests.api.gstreamer import FileDecoder, WavEncoder +else: + from timeside.tests.api.examples import FileDecoder, WavEncoder -gain = 1 -if maxlevel.result() > 0: - gain = 0.99 / maxlevel.result() +image_file = './waveform.png' +source = os.path.join(os.path.dirname(__file__), "../samples/sweep_source.mp3") -print "input maxlevel: %f" % maxlevel.result() -print "gain: %f" % gain +decoder = FileDecoder(source) +waveform = examples.Waveform(width=1024, height=256, output=image_file, bg_color=(0,0,0), color_scheme='default') -gain = examples.Gain(gain) -subpipe = gain | maxlevel +(decoder | waveform).run() -(decoder | subpipe | waveform).run() - -print "render waveform to: %s" % waveform_image +print 'frames per pixel = ', waveform.graph.samples_per_pixel +print "render waveform to: %s" % image_file waveform.render() -print "output maxlevel: %f" % maxlevel.result() - -- 2.39.5