/**
 **************************************************
 *
 * @file        speechPrompt.ino
 * 
 * @brief       Records a short WAV-format voice prompt into RAM while a button is held, 
 *              after it is released it is sent to OpenAI, which returns a text response
 * 
 *              Enter your WiFi credentials and your OpenAI API key, which you can get by going to https://platform.openai.com/api-keys
 *              You can change the Speaker pins as well as audio recording settings to suit your setup
 *
 * @link        Devices used in this example: solde.red/333357 : Soldered Microphone SPK0641HT4H-1
 *
 * @authors     Josip Šimun Kuči @ soldered.com
 ***************************************************/

#include <Arduino.h>
#include "Soldered-Microphone-SPK0641HT.h"
#include "Soldered-OpenAI-Library.h"
#include "WiFi.h"

//Microphone pin configuration
constexpr int PDM_CLK = 4;   // PDM clock pin connected to microphone
constexpr int PDM_DIN = 16;  // PDM data pin connected to microphone

//Button pin configuration
constexpr int BUTTON_PIN = 48; // Record button input pin

const char* ssid = "YOUR_SSID_HERE";
const char* password = "YOUR_PASSWORD_HERE";

// API key and model used in this example
const char* api_key = "YOUR_API_KEY_HERE";
const char* model = "gpt-4o-audio-preview";

//Audio recording parameters
const int SAMPLE_RATE = 8000;     // Samples per second
const int BIT_DEPTH = 16;          // 16-bit PCM
const int CHANNELS = 1;            // Mono
const int MAX_DURATION_MS = 8000;  // 8 seconds max recording
const int MAX_SAMPLES = (SAMPLE_RATE * MAX_DURATION_MS) / 1000;

// Each sample is 2 bytes (int16_t)
const size_t AUDIO_BUFFER_SIZE = MAX_SAMPLES * sizeof(int16_t);

//Microphone object
Microphone mic;

//Create an instance of the LLM object using the defined api key, model and a max token cap of 600
LLM ai(api_key, model, 600);

//WAV header structure
struct WavHeader {
  char riff[4] = {'R', 'I', 'F', 'F'};
  uint32_t chunkSize = 36;
  char wave[4] = {'W', 'A', 'V', 'E'};
  char fmt[4] = {'f', 'm', 't', ' '};
  uint32_t fmtChunkSize = 16;
  uint16_t audioFormat = 1; // PCM
  uint16_t numChannels = CHANNELS;
  uint32_t sampleRate = SAMPLE_RATE;
  uint32_t byteRate = SAMPLE_RATE * CHANNELS * (BIT_DEPTH / 8);
  uint16_t blockAlign = CHANNELS * (BIT_DEPTH / 8);
  uint16_t bitsPerSample = BIT_DEPTH;
  char data[4] = {'d', 'a', 't', 'a'};
  uint32_t dataChunkSize = 0;
};

WavHeader wavHeader;

//RAM audio buffer
uint8_t *audioBuffer = nullptr;
size_t audioDataLength = 0;

//State flags
bool isRecording = false;
unsigned long recordStart = 0;

/**
 * @brief Initialize microphone and button
 */
void setup() {
  Serial.begin(115200);
  delay(1000);
  Serial.println("\n=== ESP32 Voice Prompt to OpenAI ===");

    // Connect to WiFi
  Serial.print("Connecting to WiFi");
  WiFi.begin(ssid, password);
  while (WiFi.status() != WL_CONNECTED) {
      delay(1000);
      Serial.print(".");
  }
  Serial.println("\nWiFi Connected!");

  // Configure button pin
  pinMode(BUTTON_PIN, INPUT_PULLUP);

  // Initialize microphone
  Serial.println("Initializing microphone...");
  mic.begin(PDM_DIN, PDM_CLK, SAMPLE_RATE, BIT_DEPTH, 512);
  Serial.println("Mic began");
  mic.setHPF(true);
  mic.setGainDb(10.0f);
  Serial.println("Microphone ready!");

  // Allocate audio buffer
  audioBuffer = (uint8_t *)malloc(AUDIO_BUFFER_SIZE);
  if (!audioBuffer) {
    Serial.println("ERROR: Not enough memory for audio buffer!");
    while (true)
      delay(1000);
  }

  Serial.println("Hold button on pin 48 to record a voice prompt...");
}

//Add WAV header to front of buffer and send to LLM
void sendToAI() {
  Serial.println("Preparing WAV buffer for transmission...");

  // Create final WAV in memory: header + PCM samples
  size_t totalSize = sizeof(WavHeader) + audioDataLength;
  uint8_t *wavData = (uint8_t *)malloc(totalSize);
  if (!wavData) {
    Serial.println("ERROR: Memory allocation failed!");
    return;
  }

  // Update header sizes
  wavHeader.dataChunkSize = audioDataLength;
  wavHeader.chunkSize = 36 + audioDataLength;

  // Copy header and audio data
  memcpy(wavData, &wavHeader, sizeof(WavHeader));
  memcpy(wavData + sizeof(WavHeader), audioBuffer, audioDataLength);

  Serial.printf("Sending %u bytes of WAV data to AI...\n", totalSize);

  // Ask the AI
  String response = ai.askAudio(wavData, totalSize, "wav", "Answer the question asked in this recording");
  Serial.println("\n=== AI Response ===");
  Serial.println(response);
  Serial.println("===================");

  free(wavData);
}


void loop() {
  static bool lastButtonState = HIGH;
  bool buttonState = digitalRead(BUTTON_PIN);

  if (buttonState == LOW && lastButtonState == HIGH) {
    //Button pressed: start recording
    Serial.println("Recording started...");
    isRecording = true;
    recordStart = millis();
    audioDataLength = 0;
  } else if (buttonState == HIGH && lastButtonState == LOW) {
    //Button released: stop recording
    Serial.println("Recording stopped.");
    isRecording = false;
    sendToAI();
  }

  lastButtonState = buttonState;

  //Capture audio if recording
  if (isRecording) {
    int16_t samples[512];
    size_t count = mic.read(samples, 512);

    if (count > 0) {
      size_t bytesToWrite = count * sizeof(int16_t);
      if (audioDataLength + bytesToWrite < AUDIO_BUFFER_SIZE) {
        memcpy(audioBuffer + audioDataLength, samples, bytesToWrite);
        audioDataLength += bytesToWrite;
      } else {
        Serial.println("Buffer full, stopping recording!");
        isRecording = false;
        sendToAI();
      }
    }

    // Stop automatically after max duration
    if (millis() - recordStart >= MAX_DURATION_MS) {
      Serial.println("Max duration reached!");
      isRecording = false;
      sendToAI();
    }
  }

  delay(5);
}