server playback

This commit is contained in:
michalcourson
2026-02-24 19:08:27 -05:00
parent 47cdaa76b6
commit 8fda2a03af
17 changed files with 268 additions and 208 deletions

View File

@ -17,9 +17,9 @@
"endTime": 30, "endTime": 30,
"filename": "C:\\Users\\mickl\\Desktop\\cliptrim-ui\\ClipTrimApp\\audio-service\\recordings\\audio_capture_20260220_193822.wav", "filename": "C:\\Users\\mickl\\Desktop\\cliptrim-ui\\ClipTrimApp\\audio-service\\recordings\\audio_capture_20260220_193822.wav",
"name": "Pee pee\npoo poo", "name": "Pee pee\npoo poo",
"playbackType": "playStop", "playbackType": "playOverlap",
"startTime": 27.587412587412587, "startTime": 27.76674010920584,
"volume": 1 "volume": 0.25
}, },
{ {
"endTime": 27.516843118383072, "endTime": 27.516843118383072,

View File

@ -8,10 +8,10 @@
"save_path": "C:\\Users\\mickl\\Desktop\\cliptrim-ui\\ClipTrimApp\\audio-service\\recordings", "save_path": "C:\\Users\\mickl\\Desktop\\cliptrim-ui\\ClipTrimApp\\audio-service\\recordings",
"recording_length": 30, "recording_length": 30,
"output_device": { "output_device": {
"channels": 2,
"default_samplerate": 48000, "default_samplerate": 48000,
"index": 40, "index": 45,
"max_output_channels": 2, "name": "VM to Discord (VB-Audio Voicemeeter VAIO)"
"name": "Speakers (Realtek(R) Audio)"
}, },
"http_port": 5010 "http_port": 5010
} }

Binary file not shown.

View File

@ -0,0 +1,64 @@
import scipy.signal
import scipy.io.wavfile as wavfile
import numpy as np
import os
class AudioClip:
def __init__(self, metadata, target_sample_rate=44100):
"""
metadata: dict with keys 'filename', 'start', 'end' (seconds)
target_sample_rate: sample rate for playback
"""
self.metadata = metadata
self.file_path = metadata['filename']
self.start = metadata.get('startTime', 0)
self.end = metadata.get('endTime', None)
self.target_sample_rate = target_sample_rate
self.volume = metadata.get('volume', 1.0)
self.finished = False
self.audio_data, self.sample_rate = self._load_and_process_audio()
print(f"AudioClip created for {self.file_path} with start={self.start}s, end={self.end}s, sample_rate={self.sample_rate}Hz, length={len(self.audio_data)/self.sample_rate:.2f}s")
self.position = 0 # sample index for playback
def _load_and_process_audio(self):
# Load audio file
sample_rate, data = wavfile.read(self.file_path)
# Convert to float32
if data.dtype != np.float32:
data = data.astype(np.float32) / np.max(np.abs(data))
# Convert to mono if needed
if len(data.shape) > 1:
data = np.mean(data, axis=1)
# Resample if needed
if sample_rate != self.target_sample_rate:
num_samples = int(len(data) * self.target_sample_rate / sample_rate)
data = scipy.signal.resample(data, num_samples)
sample_rate = self.target_sample_rate
# Cache only the clip region
start_sample = int(self.start * sample_rate)
end_sample = int(self.end * sample_rate) if self.end else len(data)
cached = data[start_sample:end_sample]
cached *= self.volume # Apply volume
return cached, sample_rate
def get_samples(self, num_samples):
# Return next chunk for playback
if self.position >= len(self.audio_data):
self.finished = True
return np.zeros(num_samples, dtype=np.float32)
end_pos = min(self.position + num_samples, len(self.audio_data))
chunk = self.audio_data[self.position:end_pos]
self.position = end_pos
if self.position >= len(self.audio_data):
self.finished = True
# Pad if chunk is short
if len(chunk) < num_samples:
chunk = np.pad(chunk, (0, num_samples - len(chunk)), mode='constant')
return chunk
def is_finished(self):
return self.finished
def reset(self):
self.position = 0
self.finished = False

View File

@ -0,0 +1,166 @@
import sounddevice as sd
import numpy as np
import os
from datetime import datetime
import scipy.io.wavfile as wavfile
from metadata_manager import MetaDataManager
from audio_clip import AudioClip
# AudioClip class for clip playback
class AudioIO:
_instance = None
def __new__(cls, *args, **kwargs):
if cls._instance is None:
# print("Creating new AudioRecorder instance")
cls._instance = super().__new__(cls)
cls._instance.init()
return cls._instance
def init(self):
self.duration = 30
self.channels = 2
self.input_sample_rate = 44100
self.output_sample_rate = 44100
self.buffer = np.zeros((int(self.duration * self.input_sample_rate), self.channels), dtype=np.float32)
self.recordings_dir = "recordings"
sd.default.latency = 'low'
self.in_stream = sd.InputStream(
callback=self.record_callback
)
self.out_stream = sd.OutputStream(
callback=self.playback_callback,
latency=3
)
self.clip_map = {}
def refresh_streams(self):
was_active = self.in_stream.active
if was_active:
self.in_stream.stop()
self.out_stream.stop()
self.buffer = np.zeros((int(self.duration * self.input_sample_rate), self.channels), dtype=np.float32)
# print(f"AudioRecorder initialized with duration={self.duration}s, sample_rate={self.sample_rate}Hz, channels={self.channels}")
self.in_stream = sd.InputStream(
callback=self.record_callback
)
self.out_stream = sd.OutputStream(
callback=self.playback_callback
)
if was_active:
self.in_stream.start()
self.out_stream.start()
def record_callback(self, indata, frames, time, status):
if status:
# print(f"Recording status: {status}")
pass
# Circular buffer implementation
self.buffer = np.roll(self.buffer, -frames, axis=0)
self.buffer[-frames:] = indata
def playback_callback(self, outdata, frames, time, status):
if status:
# print(f"Playback status: {status}")
pass
outdata.fill(0)
# Iterate over a copy of the items to avoid modifying the dictionary during iteration
for clip_id, clip_list in list(self.clip_map.items()):
for clip in clip_list[:]: # Iterate over a copy of the list
if not clip.is_finished():
samples = clip.get_samples(frames)
outdata[:] += samples.reshape(-1, 1) # Mix into output
if clip.is_finished():
self.clip_map[clip_id].remove(clip)
if len(self.clip_map[clip_id]) == 0:
del self.clip_map[clip_id]
break # Exit inner loop since the key is deleted
def save_last_n_seconds(self):
# Create output directory if it doesn't exist
os.makedirs(self.recordings_dir, exist_ok=True)
# Generate filename with timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = os.path.join(self.recordings_dir, f"audio_capture_{timestamp}.wav")
# Normalize audio to prevent clipping
audio_data = self.buffer / np.max(np.abs(self.buffer)) * .5
# Convert float32 to int16 for WAV file
audio_data_int16 = (audio_data * 32767).astype(np.int16)
# Write buffer to file
wavfile.write(filename, int(self.input_sample_rate), audio_data_int16)
meta = MetaDataManager()
clip_metadata = {
"filename": filename,
"name": f"Clip {timestamp}",
"playbackType":"playStop",
"volume": 1.0,
}
meta.add_clip_to_collection("Uncategorized", clip_metadata )
return clip_metadata
def set_buffer_duration(self, duration):
self.duration = duration
self.buffer = np.zeros((int(duration * self.input_sample_rate), self.channels), dtype=np.float32)
def set_recording_directory(self, directory):
self.recordings_dir = directory
def start_recording(self):
if(self.in_stream.active):
# print("Already recording")
return
# print('number of channels', self.channels)
self.in_stream.start()
self.out_stream.start()
self.output_sample_rate = self.out_stream.samplerate
self.input_sample_rate = self.in_stream.samplerate
def stop_recording(self):
if(not self.in_stream.active):
# print("Already stopped")
return
self.in_stream.stop()
self.out_stream.stop()
def is_recording(self):
return self.in_stream.active
def play_clip(self, clip_metadata):
print(f"Playing clip: {clip_metadata}")
clip_id = clip_metadata.get("filename")
if clip_metadata.get("playbackType") == "playStop":
if clip_id in self.clip_map:
del self.clip_map[clip_id]
return
else:
self.clip_map[clip_id] = []
if clip_id not in self.clip_map:
self.clip_map[clip_id] = []
self.clip_map[clip_id].append(AudioClip(clip_metadata, target_sample_rate=self.output_sample_rate))

View File

@ -1,154 +0,0 @@
import sounddevice as sd
import numpy as np
import os
from datetime import datetime
import scipy.io.wavfile as wavfile
from metadata_manager import MetaDataManager
class AudioRecorder:
_instance = None
def __new__(cls, *args, **kwargs):
if cls._instance is None:
# print("Creating new AudioRecorder instance")
cls._instance = super().__new__(cls)
cls._instance.init()
return cls._instance
def init(self):
"""
Initialize audio recorder with configurable parameters.
:param duration: Length of audio buffer in seconds
:param sample_rate: Audio sample rate (if None, use default device sample rate)
:param channels: Number of audio channels
"""
# print(f"Initializing AudioRecorder")
self.duration = 30
self.sample_rate = 44100
self.channels = 2
self.buffer = np.zeros((int(self.duration * self.sample_rate), self.channels), dtype=np.float32)
self.recordings_dir = "recordings"
self.stream = sd.InputStream(
callback=self.record_callback
)
def refresh_stream(self):
"""
Refresh the audio stream with updated parameters.
"""
was_active = self.stream.active
if was_active:
self.stream.stop()
self.buffer = np.zeros((int(self.duration * self.sample_rate), self.channels), dtype=np.float32)
# print(f"AudioRecorder initialized with duration={self.duration}s, sample_rate={self.sample_rate}Hz, channels={self.channels}")
self.stream = sd.InputStream(
callback=self.record_callback
)
if was_active:
self.stream.start()
def record_callback(self, indata, frames, time, status):
"""
Circular buffer callback for continuous recording.
:param indata: Input audio data
:param frames: Number of frames
:param time: Timestamp
:param status: Recording status
"""
if status:
# print(f"Recording status: {status}")
pass
# Circular buffer implementation
self.buffer = np.roll(self.buffer, -frames, axis=0)
self.buffer[-frames:] = indata
def save_last_n_seconds(self):
"""
Save the last n seconds of audio to a file.
:param output_dir: Directory to save recordings
:return: Path to saved audio file
"""
# Create output directory if it doesn't exist
os.makedirs(self.recordings_dir, exist_ok=True)
# Generate filename with timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = os.path.join(self.recordings_dir, f"audio_capture_{timestamp}.wav")
# Normalize audio to prevent clipping
audio_data = self.buffer / np.max(np.abs(self.buffer)) * .5
# Convert float32 to int16 for WAV file
audio_data_int16 = (audio_data * 32767).astype(np.int16)
# Write buffer to file
wavfile.write(filename, int(self.sample_rate), audio_data_int16)
meta = MetaDataManager()
clip_metadata = {
"filename": filename,
"name": f"Clip {timestamp}",
"playbackType":"playStop",
"volume": 1.0,
}
meta.add_clip_to_collection("Uncategorized", clip_metadata )
return clip_metadata
def set_buffer_duration(self, duration):
"""
Set the duration of the audio buffer.
:param duration: New buffer duration in seconds
"""
self.duration = duration
self.buffer = np.zeros((int(duration * self.sample_rate), self.channels), dtype=np.float32)
def set_recording_directory(self, directory):
"""
Set the directory where recordings will be saved.
:param directory: Path to the recordings directory
"""
self.recordings_dir = directory
def start_recording(self):
"""
Start continuous audio recording with circular buffer.
"""
if(self.stream.active):
# print("Already recording")
return
# print('number of channels', self.channels)
self.stream.start()
def stop_recording(self):
"""
Stop continuous audio recording with circular buffer.
"""
if(not self.stream.active):
# print("Already stopped")
return
self.stream.stop()
def is_recording(self):
"""
Check if the audio stream is currently active.
:return: True if recording, False otherwise
"""
return self.stream.active

View File

@ -1,7 +1,7 @@
import argparse import argparse
import os import os
import sys import sys
from audio_recorder import AudioRecorder from audio_io import AudioIO
from windows_audio import WindowsAudioManager from windows_audio import WindowsAudioManager
import sounddevice as sd import sounddevice as sd
from metadata_manager import MetaDataManager from metadata_manager import MetaDataManager
@ -41,6 +41,8 @@ def main():
os.makedirs(settings.get_settings('save_path'), exist_ok=True) os.makedirs(settings.get_settings('save_path'), exist_ok=True)
io = AudioIO()
io.start_recording()
# Register blueprints # Register blueprints
app.register_blueprint(recording_bp) app.register_blueprint(recording_bp)
app.register_blueprint(device_bp) app.register_blueprint(device_bp)

View File

@ -1,11 +1,11 @@
from flask import Blueprint, request, jsonify from flask import Blueprint, request, jsonify
from windows_audio import WindowsAudioManager from windows_audio import WindowsAudioManager
from audio_recorder import AudioRecorder from audio_io import AudioIO
device_bp = Blueprint('device', __name__) device_bp = Blueprint('device', __name__)
audio_manager = WindowsAudioManager() audio_manager = WindowsAudioManager()
recorder = AudioRecorder() recorder = AudioIO()
# @device_bp.route('/device/set', methods=['POST']) # @device_bp.route('/device/set', methods=['POST'])
# def set_audio_device(): # def set_audio_device():

View File

@ -1,27 +1,27 @@
from flask import Blueprint, request, jsonify from flask import Blueprint, request, jsonify
from audio_recorder import AudioRecorder from audio_io import AudioIO
import os import os
recording_bp = Blueprint('recording', __name__) recording_bp = Blueprint('recording', __name__)
@recording_bp.route('/record/start', methods=['POST']) @recording_bp.route('/record/start', methods=['POST'])
def start_recording(): def start_recording():
recorder = AudioRecorder() recorder = AudioIO()
print('HTTP: Starting audio recording') print('HTTP: Starting audio recording')
recorder.start_recording() recorder.start_recording()
return jsonify({'status': 'recording started'}) return jsonify({'status': 'recording started'})
@recording_bp.route('/record/stop', methods=['POST']) @recording_bp.route('/record/stop', methods=['POST'])
def stop_recording(): def stop_recording():
recorder = AudioRecorder() recorder = AudioIO()
# print('HTTP: Stopping audio recording') # print('HTTP: Stopping audio recording')
recorder.stop_recording() recorder.stop_recording()
return jsonify({'status': 'recording stopped'}) return jsonify({'status': 'recording stopped'})
@recording_bp.route('/record/save', methods=['POST']) @recording_bp.route('/record/save', methods=['POST'])
def save_recording(): def save_recording():
recorder = AudioRecorder() recorder = AudioIO()
# print('HTTP: Saving audio recording') # print('HTTP: Saving audio recording')
saved_file = recorder.save_last_n_seconds() saved_file = recorder.save_last_n_seconds()
return jsonify({'status': 'recording saved', 'file': saved_file}) return jsonify({'status': 'recording saved', 'file': saved_file})
@ -29,7 +29,7 @@ def save_recording():
@recording_bp.route('/record/status', methods=['GET']) @recording_bp.route('/record/status', methods=['GET'])
def recording_status(): def recording_status():
recorder = AudioRecorder() recorder = AudioIO()
# print('HTTP: Checking recording status') # print('HTTP: Checking recording status')
status = 'recording' if recorder.is_recording() else 'stopped' status = 'recording' if recorder.is_recording() else 'stopped'
return jsonify({'status': status}) return jsonify({'status': status})
@ -45,8 +45,12 @@ def recording_delete():
@recording_bp.route('/playback/start', methods=['POST']) @recording_bp.route('/playback/start', methods=['POST'])
def playback_start(): def playback_start():
print(f"Playing clip")
# print('HTTP: Starting audio playback') # print('HTTP: Starting audio playback')
clip = request.json
try: try:
io = AudioIO()
io.play_clip(clip)
# os.remove(filename) # os.remove(filename)
return jsonify({'status': 'success'}) return jsonify({'status': 'success'})
except Exception as e: except Exception as e:

View File

@ -1,6 +1,6 @@
import os import os
import json import json
from audio_recorder import AudioRecorder from audio_io import AudioIO
from windows_audio import WindowsAudioManager from windows_audio import WindowsAudioManager
class SettingsManager: class SettingsManager:
@ -44,7 +44,6 @@ class SettingsManager:
input = self.settings["input_device"] input = self.settings["input_device"]
if("output_device" in self.settings): if("output_device" in self.settings):
output = self.settings["output_device"] output = self.settings["output_device"]
#see if input device is in "devices", if not set to the first index #see if input device is in "devices", if not set to the first index
if input is not None and any(d['name'] == input["name"] for d in input_devices): if input is not None and any(d['name'] == input["name"] for d in input_devices):
# print(f"Using saved input device index: {input}") # print(f"Using saved input device index: {input}")
@ -64,6 +63,7 @@ class SettingsManager:
if not "http_port" in self.settings: if not "http_port" in self.settings:
self.settings["http_port"] = 5010 self.settings["http_port"] = 5010
self.save_settings() self.save_settings()
@ -88,13 +88,14 @@ class SettingsManager:
json.dump(self.settings, f, indent=4) json.dump(self.settings, f, indent=4)
def refresh_settings(self): def refresh_settings(self):
recorder = AudioRecorder() recorder = AudioIO()
# Update recorder parameters based on new setting # Update recorder parameters based on new setting
recorder.set_buffer_duration(self.get_settings('recording_length')) recorder.set_buffer_duration(self.get_settings('recording_length'))
recorder.recordings_dir = self.get_settings('save_path') recorder.recordings_dir = self.get_settings('save_path')
audio_manager = WindowsAudioManager() audio_manager = WindowsAudioManager()
audio_manager.set_default_input_device(self.get_settings('input_device')['index']) audio_manager.set_default_input_device(self.get_settings('input_device')['index'])
audio_manager.set_default_output_device(self.get_settings('output_device')['index'])
recorder.refresh_stream() recorder.refresh_streams()

View File

@ -81,7 +81,7 @@ class WindowsAudioManager:
def set_default_input_device(self, device_index): def set_default_input_device(self, device_index):
if(device_index is None): if(device_index is None):
return self.get_current_input_device_sample_rate() return 0
""" """
Set the default input audio device. Set the default input audio device.
@ -95,41 +95,18 @@ class WindowsAudioManager:
device_info = sd.query_devices(device_index) device_info = sd.query_devices(device_index)
return device_info['default_samplerate'] return device_info['default_samplerate']
def get_current_input_device_sample_rate(self): def set_default_output_device(self, device_index):
if(device_index is None):
return self.get_current_output_device_sample_rate()
""" """
Get the sample rate of the current input device. Set the default output audio device.
:return: Sample rate of the current input device :param device_index: Index of the audio device
:return: Sample rate of the selected device
""" """
device_info = sd.query_devices(self.default_input) sd.default.device[1] = device_index
return device_info['default_samplerate'] self.default_output = device_index
def get_system_volume(self):
"""
Get the system master volume.
:return: Current system volume (0.0 to 1.0) # Get the sample rate of the selected device
""" device_info = sd.query_devices(device_index)
devices = AudioUtilities.GetSpeakers() return device_info['default_samplerate']
interface = devices.Activate(
IAudioEndpointVolume._iid_,
CLSCTX_ALL,
None
)
volume = interface.QueryInterface(IAudioEndpointVolume)
return volume.GetMasterVolumeLevelScalar()
def set_system_volume(self, volume_level):
"""
Set the system master volume.
:param volume_level: Volume level (0.0 to 1.0)
"""
devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(
IAudioEndpointVolume._iid_,
CLSCTX_ALL,
None
)
volume = interface.QueryInterface(IAudioEndpointVolume)
volume.SetMasterVolumeLevelScalar(volume_level, None)

View File

@ -43,7 +43,7 @@ export default class PythonSubprocessManager {
console.log(`Python stdout: ${data.toString()}`); console.log(`Python stdout: ${data.toString()}`);
}); });
this.process.stderr.on('data', (data: Buffer) => { this.process.stderr.on('data', (data: Buffer) => {
console.error(`Python stderr: ${data.toString()}`); // console.error(`Python stderr: ${data.toString()}`);
const lines = data.toString().split('\n'); const lines = data.toString().split('\n');
// eslint-disable-next-line no-restricted-syntax // eslint-disable-next-line no-restricted-syntax
for (const line of lines) { for (const line of lines) {