server playback
This commit is contained in:
64
audio-service/src/audio_clip.py
Normal file
64
audio-service/src/audio_clip.py
Normal file
@ -0,0 +1,64 @@
|
||||
import scipy.signal
|
||||
import scipy.io.wavfile as wavfile
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
class AudioClip:
|
||||
def __init__(self, metadata, target_sample_rate=44100):
|
||||
"""
|
||||
metadata: dict with keys 'filename', 'start', 'end' (seconds)
|
||||
target_sample_rate: sample rate for playback
|
||||
"""
|
||||
self.metadata = metadata
|
||||
self.file_path = metadata['filename']
|
||||
self.start = metadata.get('startTime', 0)
|
||||
self.end = metadata.get('endTime', None)
|
||||
self.target_sample_rate = target_sample_rate
|
||||
self.volume = metadata.get('volume', 1.0)
|
||||
self.finished = False
|
||||
self.audio_data, self.sample_rate = self._load_and_process_audio()
|
||||
print(f"AudioClip created for {self.file_path} with start={self.start}s, end={self.end}s, sample_rate={self.sample_rate}Hz, length={len(self.audio_data)/self.sample_rate:.2f}s")
|
||||
self.position = 0 # sample index for playback
|
||||
|
||||
def _load_and_process_audio(self):
|
||||
# Load audio file
|
||||
sample_rate, data = wavfile.read(self.file_path)
|
||||
# Convert to float32
|
||||
if data.dtype != np.float32:
|
||||
data = data.astype(np.float32) / np.max(np.abs(data))
|
||||
# Convert to mono if needed
|
||||
if len(data.shape) > 1:
|
||||
data = np.mean(data, axis=1)
|
||||
# Resample if needed
|
||||
if sample_rate != self.target_sample_rate:
|
||||
num_samples = int(len(data) * self.target_sample_rate / sample_rate)
|
||||
data = scipy.signal.resample(data, num_samples)
|
||||
sample_rate = self.target_sample_rate
|
||||
# Cache only the clip region
|
||||
start_sample = int(self.start * sample_rate)
|
||||
end_sample = int(self.end * sample_rate) if self.end else len(data)
|
||||
cached = data[start_sample:end_sample]
|
||||
cached *= self.volume # Apply volume
|
||||
return cached, sample_rate
|
||||
|
||||
def get_samples(self, num_samples):
|
||||
# Return next chunk for playback
|
||||
if self.position >= len(self.audio_data):
|
||||
self.finished = True
|
||||
return np.zeros(num_samples, dtype=np.float32)
|
||||
end_pos = min(self.position + num_samples, len(self.audio_data))
|
||||
chunk = self.audio_data[self.position:end_pos]
|
||||
self.position = end_pos
|
||||
if self.position >= len(self.audio_data):
|
||||
self.finished = True
|
||||
# Pad if chunk is short
|
||||
if len(chunk) < num_samples:
|
||||
chunk = np.pad(chunk, (0, num_samples - len(chunk)), mode='constant')
|
||||
return chunk
|
||||
|
||||
def is_finished(self):
|
||||
return self.finished
|
||||
|
||||
def reset(self):
|
||||
self.position = 0
|
||||
self.finished = False
|
||||
Reference in New Issue
Block a user