diff --git a/audio-service/metadata.json b/audio-service/metadata.json index aad62ef..386665c 100644 --- a/audio-service/metadata.json +++ b/audio-service/metadata.json @@ -17,9 +17,9 @@ "endTime": 30, "filename": "C:\\Users\\mickl\\Desktop\\cliptrim-ui\\ClipTrimApp\\audio-service\\recordings\\audio_capture_20260220_193822.wav", "name": "Pee pee\npoo poo", - "playbackType": "playStop", - "startTime": 27.587412587412587, - "volume": 1 + "playbackType": "playOverlap", + "startTime": 27.76674010920584, + "volume": 0.25 }, { "endTime": 27.516843118383072, diff --git a/audio-service/settings.json b/audio-service/settings.json index ed12104..6b19aa6 100644 --- a/audio-service/settings.json +++ b/audio-service/settings.json @@ -8,10 +8,10 @@ "save_path": "C:\\Users\\mickl\\Desktop\\cliptrim-ui\\ClipTrimApp\\audio-service\\recordings", "recording_length": 30, "output_device": { + "channels": 2, "default_samplerate": 48000, - "index": 40, - "max_output_channels": 2, - "name": "Speakers (Realtek(R) Audio)" + "index": 45, + "name": "VM to Discord (VB-Audio Voicemeeter VAIO)" }, "http_port": 5010 } \ No newline at end of file diff --git a/audio-service/src/__pycache__/audio_clip.cpython-313.pyc b/audio-service/src/__pycache__/audio_clip.cpython-313.pyc new file mode 100644 index 0000000..8850eb9 Binary files /dev/null and b/audio-service/src/__pycache__/audio_clip.cpython-313.pyc differ diff --git a/audio-service/src/__pycache__/audio_io.cpython-313.pyc b/audio-service/src/__pycache__/audio_io.cpython-313.pyc new file mode 100644 index 0000000..a24dadc Binary files /dev/null and b/audio-service/src/__pycache__/audio_io.cpython-313.pyc differ diff --git a/audio-service/src/__pycache__/settings.cpython-313.pyc b/audio-service/src/__pycache__/settings.cpython-313.pyc index 04c508e..fffd4fc 100644 Binary files a/audio-service/src/__pycache__/settings.cpython-313.pyc and b/audio-service/src/__pycache__/settings.cpython-313.pyc differ diff --git a/audio-service/src/__pycache__/windows_audio.cpython-313.pyc b/audio-service/src/__pycache__/windows_audio.cpython-313.pyc index 972b381..2dd30a5 100644 Binary files a/audio-service/src/__pycache__/windows_audio.cpython-313.pyc and b/audio-service/src/__pycache__/windows_audio.cpython-313.pyc differ diff --git a/audio-service/src/audio_clip.py b/audio-service/src/audio_clip.py new file mode 100644 index 0000000..d1a8297 --- /dev/null +++ b/audio-service/src/audio_clip.py @@ -0,0 +1,64 @@ +import scipy.signal +import scipy.io.wavfile as wavfile +import numpy as np +import os + +class AudioClip: + def __init__(self, metadata, target_sample_rate=44100): + """ + metadata: dict with keys 'filename', 'start', 'end' (seconds) + target_sample_rate: sample rate for playback + """ + self.metadata = metadata + self.file_path = metadata['filename'] + self.start = metadata.get('startTime', 0) + self.end = metadata.get('endTime', None) + self.target_sample_rate = target_sample_rate + self.volume = metadata.get('volume', 1.0) + self.finished = False + self.audio_data, self.sample_rate = self._load_and_process_audio() + print(f"AudioClip created for {self.file_path} with start={self.start}s, end={self.end}s, sample_rate={self.sample_rate}Hz, length={len(self.audio_data)/self.sample_rate:.2f}s") + self.position = 0 # sample index for playback + + def _load_and_process_audio(self): + # Load audio file + sample_rate, data = wavfile.read(self.file_path) + # Convert to float32 + if data.dtype != np.float32: + data = data.astype(np.float32) / np.max(np.abs(data)) + # Convert to mono if needed + if len(data.shape) > 1: + data = np.mean(data, axis=1) + # Resample if needed + if sample_rate != self.target_sample_rate: + num_samples = int(len(data) * self.target_sample_rate / sample_rate) + data = scipy.signal.resample(data, num_samples) + sample_rate = self.target_sample_rate + # Cache only the clip region + start_sample = int(self.start * sample_rate) + end_sample = int(self.end * sample_rate) if self.end else len(data) + cached = data[start_sample:end_sample] + cached *= self.volume # Apply volume + return cached, sample_rate + + def get_samples(self, num_samples): + # Return next chunk for playback + if self.position >= len(self.audio_data): + self.finished = True + return np.zeros(num_samples, dtype=np.float32) + end_pos = min(self.position + num_samples, len(self.audio_data)) + chunk = self.audio_data[self.position:end_pos] + self.position = end_pos + if self.position >= len(self.audio_data): + self.finished = True + # Pad if chunk is short + if len(chunk) < num_samples: + chunk = np.pad(chunk, (0, num_samples - len(chunk)), mode='constant') + return chunk + + def is_finished(self): + return self.finished + + def reset(self): + self.position = 0 + self.finished = False \ No newline at end of file diff --git a/audio-service/src/audio_io.py b/audio-service/src/audio_io.py new file mode 100644 index 0000000..72e52b9 --- /dev/null +++ b/audio-service/src/audio_io.py @@ -0,0 +1,166 @@ +import sounddevice as sd +import numpy as np +import os +from datetime import datetime +import scipy.io.wavfile as wavfile +from metadata_manager import MetaDataManager +from audio_clip import AudioClip + + +# AudioClip class for clip playback + + +class AudioIO: + _instance = None + + def __new__(cls, *args, **kwargs): + if cls._instance is None: + # print("Creating new AudioRecorder instance") + cls._instance = super().__new__(cls) + cls._instance.init() + return cls._instance + def init(self): + self.duration = 30 + self.channels = 2 + self.input_sample_rate = 44100 + self.output_sample_rate = 44100 + self.buffer = np.zeros((int(self.duration * self.input_sample_rate), self.channels), dtype=np.float32) + self.recordings_dir = "recordings" + + sd.default.latency = 'low' + + self.in_stream = sd.InputStream( + callback=self.record_callback + ) + + self.out_stream = sd.OutputStream( + callback=self.playback_callback, + latency=3 + ) + + self.clip_map = {} + + + def refresh_streams(self): + was_active = self.in_stream.active + if was_active: + self.in_stream.stop() + self.out_stream.stop() + + self.buffer = np.zeros((int(self.duration * self.input_sample_rate), self.channels), dtype=np.float32) + # print(f"AudioRecorder initialized with duration={self.duration}s, sample_rate={self.sample_rate}Hz, channels={self.channels}") + self.in_stream = sd.InputStream( + callback=self.record_callback + ) + + self.out_stream = sd.OutputStream( + callback=self.playback_callback + ) + + if was_active: + self.in_stream.start() + self.out_stream.start() + + + + def record_callback(self, indata, frames, time, status): + if status: + # print(f"Recording status: {status}") + pass + + # Circular buffer implementation + self.buffer = np.roll(self.buffer, -frames, axis=0) + self.buffer[-frames:] = indata + + def playback_callback(self, outdata, frames, time, status): + if status: + # print(f"Playback status: {status}") + pass + + outdata.fill(0) + + # Iterate over a copy of the items to avoid modifying the dictionary during iteration + for clip_id, clip_list in list(self.clip_map.items()): + for clip in clip_list[:]: # Iterate over a copy of the list + if not clip.is_finished(): + samples = clip.get_samples(frames) + outdata[:] += samples.reshape(-1, 1) # Mix into output + if clip.is_finished(): + self.clip_map[clip_id].remove(clip) + if len(self.clip_map[clip_id]) == 0: + del self.clip_map[clip_id] + break # Exit inner loop since the key is deleted + + + def save_last_n_seconds(self): + # Create output directory if it doesn't exist + os.makedirs(self.recordings_dir, exist_ok=True) + + # Generate filename with timestamp + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + filename = os.path.join(self.recordings_dir, f"audio_capture_{timestamp}.wav") + + # Normalize audio to prevent clipping + audio_data = self.buffer / np.max(np.abs(self.buffer)) * .5 + + # Convert float32 to int16 for WAV file + audio_data_int16 = (audio_data * 32767).astype(np.int16) + + # Write buffer to file + wavfile.write(filename, int(self.input_sample_rate), audio_data_int16) + + meta = MetaDataManager() + + clip_metadata = { + "filename": filename, + "name": f"Clip {timestamp}", + "playbackType":"playStop", + "volume": 1.0, + } + + meta.add_clip_to_collection("Uncategorized", clip_metadata ) + + + return clip_metadata + + def set_buffer_duration(self, duration): + self.duration = duration + self.buffer = np.zeros((int(duration * self.input_sample_rate), self.channels), dtype=np.float32) + + def set_recording_directory(self, directory): + self.recordings_dir = directory + + def start_recording(self): + if(self.in_stream.active): + # print("Already recording") + return + # print('number of channels', self.channels) + + self.in_stream.start() + self.out_stream.start() + self.output_sample_rate = self.out_stream.samplerate + self.input_sample_rate = self.in_stream.samplerate + + def stop_recording(self): + if(not self.in_stream.active): + # print("Already stopped") + return + + self.in_stream.stop() + self.out_stream.stop() + + def is_recording(self): + return self.in_stream.active + + def play_clip(self, clip_metadata): + print(f"Playing clip: {clip_metadata}") + clip_id = clip_metadata.get("filename") + if clip_metadata.get("playbackType") == "playStop": + if clip_id in self.clip_map: + del self.clip_map[clip_id] + return + else: + self.clip_map[clip_id] = [] + if clip_id not in self.clip_map: + self.clip_map[clip_id] = [] + self.clip_map[clip_id].append(AudioClip(clip_metadata, target_sample_rate=self.output_sample_rate)) \ No newline at end of file diff --git a/audio-service/src/audio_recorder.py b/audio-service/src/audio_recorder.py deleted file mode 100644 index d66ed94..0000000 --- a/audio-service/src/audio_recorder.py +++ /dev/null @@ -1,154 +0,0 @@ -import sounddevice as sd -import numpy as np -import os -from datetime import datetime -import scipy.io.wavfile as wavfile -from metadata_manager import MetaDataManager - -class AudioRecorder: - _instance = None - - def __new__(cls, *args, **kwargs): - if cls._instance is None: - # print("Creating new AudioRecorder instance") - cls._instance = super().__new__(cls) - cls._instance.init() - return cls._instance - def init(self): - """ - Initialize audio recorder with configurable parameters. - - :param duration: Length of audio buffer in seconds - :param sample_rate: Audio sample rate (if None, use default device sample rate) - :param channels: Number of audio channels - """ - # print(f"Initializing AudioRecorder") - self.duration = 30 - self.sample_rate = 44100 - self.channels = 2 - self.buffer = np.zeros((int(self.duration * self.sample_rate), self.channels), dtype=np.float32) - self.recordings_dir = "recordings" - - self.stream = sd.InputStream( - callback=self.record_callback - ) - - def refresh_stream(self): - """ - Refresh the audio stream with updated parameters. - """ - was_active = self.stream.active - if was_active: - self.stream.stop() - - self.buffer = np.zeros((int(self.duration * self.sample_rate), self.channels), dtype=np.float32) - # print(f"AudioRecorder initialized with duration={self.duration}s, sample_rate={self.sample_rate}Hz, channels={self.channels}") - self.stream = sd.InputStream( - callback=self.record_callback - ) - - if was_active: - self.stream.start() - - - - - def record_callback(self, indata, frames, time, status): - """ - Circular buffer callback for continuous recording. - - :param indata: Input audio data - :param frames: Number of frames - :param time: Timestamp - :param status: Recording status - """ - if status: - # print(f"Recording status: {status}") - pass - - # Circular buffer implementation - self.buffer = np.roll(self.buffer, -frames, axis=0) - self.buffer[-frames:] = indata - - def save_last_n_seconds(self): - """ - Save the last n seconds of audio to a file. - - :param output_dir: Directory to save recordings - :return: Path to saved audio file - """ - # Create output directory if it doesn't exist - os.makedirs(self.recordings_dir, exist_ok=True) - - # Generate filename with timestamp - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - filename = os.path.join(self.recordings_dir, f"audio_capture_{timestamp}.wav") - - # Normalize audio to prevent clipping - audio_data = self.buffer / np.max(np.abs(self.buffer)) * .5 - - # Convert float32 to int16 for WAV file - audio_data_int16 = (audio_data * 32767).astype(np.int16) - - # Write buffer to file - wavfile.write(filename, int(self.sample_rate), audio_data_int16) - - meta = MetaDataManager() - - clip_metadata = { - "filename": filename, - "name": f"Clip {timestamp}", - "playbackType":"playStop", - "volume": 1.0, - } - - meta.add_clip_to_collection("Uncategorized", clip_metadata ) - - - return clip_metadata - - def set_buffer_duration(self, duration): - """ - Set the duration of the audio buffer. - - :param duration: New buffer duration in seconds - """ - self.duration = duration - self.buffer = np.zeros((int(duration * self.sample_rate), self.channels), dtype=np.float32) - - def set_recording_directory(self, directory): - """ - Set the directory where recordings will be saved. - - :param directory: Path to the recordings directory - """ - self.recordings_dir = directory - - def start_recording(self): - """ - Start continuous audio recording with circular buffer. - """ - if(self.stream.active): - # print("Already recording") - return - # print('number of channels', self.channels) - - self.stream.start() - - def stop_recording(self): - """ - Stop continuous audio recording with circular buffer. - """ - if(not self.stream.active): - # print("Already stopped") - return - - self.stream.stop() - - def is_recording(self): - """ - Check if the audio stream is currently active. - - :return: True if recording, False otherwise - """ - return self.stream.active \ No newline at end of file diff --git a/audio-service/src/main.py b/audio-service/src/main.py index adc432d..b14ff0e 100644 --- a/audio-service/src/main.py +++ b/audio-service/src/main.py @@ -1,7 +1,7 @@ import argparse import os import sys -from audio_recorder import AudioRecorder +from audio_io import AudioIO from windows_audio import WindowsAudioManager import sounddevice as sd from metadata_manager import MetaDataManager @@ -41,6 +41,8 @@ def main(): os.makedirs(settings.get_settings('save_path'), exist_ok=True) + io = AudioIO() + io.start_recording() # Register blueprints app.register_blueprint(recording_bp) app.register_blueprint(device_bp) diff --git a/audio-service/src/routes/__pycache__/device.cpython-313.pyc b/audio-service/src/routes/__pycache__/device.cpython-313.pyc index 7a8e875..2c050b2 100644 Binary files a/audio-service/src/routes/__pycache__/device.cpython-313.pyc and b/audio-service/src/routes/__pycache__/device.cpython-313.pyc differ diff --git a/audio-service/src/routes/__pycache__/recording.cpython-313.pyc b/audio-service/src/routes/__pycache__/recording.cpython-313.pyc index 7ed3f26..435c7c2 100644 Binary files a/audio-service/src/routes/__pycache__/recording.cpython-313.pyc and b/audio-service/src/routes/__pycache__/recording.cpython-313.pyc differ diff --git a/audio-service/src/routes/device.py b/audio-service/src/routes/device.py index b9e7974..caa988f 100644 --- a/audio-service/src/routes/device.py +++ b/audio-service/src/routes/device.py @@ -1,11 +1,11 @@ from flask import Blueprint, request, jsonify from windows_audio import WindowsAudioManager -from audio_recorder import AudioRecorder +from audio_io import AudioIO device_bp = Blueprint('device', __name__) audio_manager = WindowsAudioManager() -recorder = AudioRecorder() +recorder = AudioIO() # @device_bp.route('/device/set', methods=['POST']) # def set_audio_device(): diff --git a/audio-service/src/routes/recording.py b/audio-service/src/routes/recording.py index c9a907d..97cd300 100644 --- a/audio-service/src/routes/recording.py +++ b/audio-service/src/routes/recording.py @@ -1,27 +1,27 @@ from flask import Blueprint, request, jsonify -from audio_recorder import AudioRecorder +from audio_io import AudioIO import os recording_bp = Blueprint('recording', __name__) @recording_bp.route('/record/start', methods=['POST']) def start_recording(): - recorder = AudioRecorder() + recorder = AudioIO() print('HTTP: Starting audio recording') recorder.start_recording() return jsonify({'status': 'recording started'}) @recording_bp.route('/record/stop', methods=['POST']) def stop_recording(): - recorder = AudioRecorder() + recorder = AudioIO() # print('HTTP: Stopping audio recording') recorder.stop_recording() return jsonify({'status': 'recording stopped'}) @recording_bp.route('/record/save', methods=['POST']) def save_recording(): - recorder = AudioRecorder() + recorder = AudioIO() # print('HTTP: Saving audio recording') saved_file = recorder.save_last_n_seconds() return jsonify({'status': 'recording saved', 'file': saved_file}) @@ -29,7 +29,7 @@ def save_recording(): @recording_bp.route('/record/status', methods=['GET']) def recording_status(): - recorder = AudioRecorder() + recorder = AudioIO() # print('HTTP: Checking recording status') status = 'recording' if recorder.is_recording() else 'stopped' return jsonify({'status': status}) @@ -45,8 +45,12 @@ def recording_delete(): @recording_bp.route('/playback/start', methods=['POST']) def playback_start(): + print(f"Playing clip") # print('HTTP: Starting audio playback') + clip = request.json try: + io = AudioIO() + io.play_clip(clip) # os.remove(filename) return jsonify({'status': 'success'}) except Exception as e: diff --git a/audio-service/src/settings.py b/audio-service/src/settings.py index f385c9d..580cdc3 100644 --- a/audio-service/src/settings.py +++ b/audio-service/src/settings.py @@ -1,6 +1,6 @@ import os import json -from audio_recorder import AudioRecorder +from audio_io import AudioIO from windows_audio import WindowsAudioManager class SettingsManager: @@ -44,7 +44,6 @@ class SettingsManager: input = self.settings["input_device"] if("output_device" in self.settings): output = self.settings["output_device"] - #see if input device is in "devices", if not set to the first index if input is not None and any(d['name'] == input["name"] for d in input_devices): # print(f"Using saved input device index: {input}") @@ -64,6 +63,7 @@ class SettingsManager: if not "http_port" in self.settings: self.settings["http_port"] = 5010 + self.save_settings() @@ -88,13 +88,14 @@ class SettingsManager: json.dump(self.settings, f, indent=4) def refresh_settings(self): - recorder = AudioRecorder() + recorder = AudioIO() # Update recorder parameters based on new setting recorder.set_buffer_duration(self.get_settings('recording_length')) recorder.recordings_dir = self.get_settings('save_path') audio_manager = WindowsAudioManager() audio_manager.set_default_input_device(self.get_settings('input_device')['index']) + audio_manager.set_default_output_device(self.get_settings('output_device')['index']) - recorder.refresh_stream() + recorder.refresh_streams() diff --git a/audio-service/src/windows_audio.py b/audio-service/src/windows_audio.py index 652d779..1724d1b 100644 --- a/audio-service/src/windows_audio.py +++ b/audio-service/src/windows_audio.py @@ -81,7 +81,7 @@ class WindowsAudioManager: def set_default_input_device(self, device_index): if(device_index is None): - return self.get_current_input_device_sample_rate() + return 0 """ Set the default input audio device. @@ -95,41 +95,18 @@ class WindowsAudioManager: device_info = sd.query_devices(device_index) return device_info['default_samplerate'] - def get_current_input_device_sample_rate(self): + def set_default_output_device(self, device_index): + if(device_index is None): + return self.get_current_output_device_sample_rate() """ - Get the sample rate of the current input device. + Set the default output audio device. - :return: Sample rate of the current input device + :param device_index: Index of the audio device + :return: Sample rate of the selected device """ - device_info = sd.query_devices(self.default_input) - return device_info['default_samplerate'] - - def get_system_volume(self): - """ - Get the system master volume. + sd.default.device[1] = device_index + self.default_output = device_index - :return: Current system volume (0.0 to 1.0) - """ - devices = AudioUtilities.GetSpeakers() - interface = devices.Activate( - IAudioEndpointVolume._iid_, - CLSCTX_ALL, - None - ) - volume = interface.QueryInterface(IAudioEndpointVolume) - return volume.GetMasterVolumeLevelScalar() - - def set_system_volume(self, volume_level): - """ - Set the system master volume. - - :param volume_level: Volume level (0.0 to 1.0) - """ - devices = AudioUtilities.GetSpeakers() - interface = devices.Activate( - IAudioEndpointVolume._iid_, - CLSCTX_ALL, - None - ) - volume = interface.QueryInterface(IAudioEndpointVolume) - volume.SetMasterVolumeLevelScalar(volume_level, None) \ No newline at end of file + # Get the sample rate of the selected device + device_info = sd.query_devices(device_index) + return device_info['default_samplerate'] \ No newline at end of file diff --git a/electron-ui/src/main/service.ts b/electron-ui/src/main/service.ts index 5013be9..e79633c 100644 --- a/electron-ui/src/main/service.ts +++ b/electron-ui/src/main/service.ts @@ -43,7 +43,7 @@ export default class PythonSubprocessManager { console.log(`Python stdout: ${data.toString()}`); }); this.process.stderr.on('data', (data: Buffer) => { - console.error(`Python stderr: ${data.toString()}`); + // console.error(`Python stderr: ${data.toString()}`); const lines = data.toString().split('\n'); // eslint-disable-next-line no-restricted-syntax for (const line of lines) {