import React, {
  useState,
  useEffect,
  useCallback,
  useRef,
  forwardRef,
  useImperativeHandle,
} from "react";
import { createModel } from "vosk-browser";
import AudioCaptureService from "./AudioCaptureService";
import AudioAnalysis from "./AudioAnalysis";
import TextSimilarity from "./TextSimilarity";
import TranscriptLogger from "./TranscriptLogging";
import { MicrophoneIcon, StopCircleIcon } from "@heroicons/react/24/outline";

const TranscriptionService = forwardRef(
  (
    {
      setupCompleted = false,
      onTranscript,
      onStateChange,
      audioSource = "tab-audio",
      label = "Audio Transcription",
      otherSourceActive = false,
      textSimilarityInstance = null,
    },
    ref
  ) => {
    const [isListening, setIsListening] = useState(false);
    const [error, setError] = useState(null);

    const voskModelRef = useRef(null);
    const audioServiceRef = useRef(null);
    const audioContextRef = useRef(null);
    const scriptProcessorRef = useRef(null);
    const recognizerRef = useRef(null);
    const sourceNodeRef = useRef(null);
    const isStoppingRef = useRef(false);

    // Audio analysis refs
    const audioAnalyserRef = useRef(null);
    const lastVolumeRef = useRef(0);
    const volumeThresholdRef = useRef(0);
    const activeSpeechSegmentRef = useRef(false);
    const silenceTimeoutRef = useRef(null);
    const hasPartialResultRef = useRef(false);
    const audioAnalysisRef = useRef(null);
    const textSimilarityRef = useRef(textSimilarityInstance);
    // Add new ref for logger
    const loggerRef = useRef({
      tab: new TranscriptLogger("[Transcription-tab-audio]"),
      mic: new TranscriptLogger("[Transcription-microphone]"),
    });

    useEffect(() => {
      if (textSimilarityInstance) {
        textSimilarityRef.current = textSimilarityInstance;
      }
    }, [textSimilarityInstance]);

    // Throw error if no instance provided
    useEffect(() => {
      if (!textSimilarityRef.current) {
        console.error(
          "TextSimilarity instance not provided to TranscriptionService"
        );
      }
    }, []);

    const log = useCallback(
      (message, ...args) => {
        console.log(`[Transcription-${audioSource}] ${message}`, ...args);
      },
      [audioSource]
    );

    const createRecognizer = useCallback(async () => {
      if (!voskModelRef.current) {
        log("Loading Vosk model...");
        voskModelRef.current = await createModel("/model.tar.gz");
      }

      if (recognizerRef.current) {
        recognizerRef.current.remove();
      }

      recognizerRef.current = new voskModelRef.current.KaldiRecognizer(16000);

      recognizerRef.current.on("result", async ({ result }) => {
        if (!isStoppingRef.current && result.text && result.text.trim()) {
          const logger =
            loggerRef.current[audioSource === "tab-audio" ? "tab" : "mic"];
          log("Got result event:", result);
          log("Active speech segment status:", activeSpeechSegmentRef.current);

          try {
            // Check if the label already exists
            const speakerLabel =
              audioSource === "tab-audio"
                ? "[TAB AUDIO]: "
                : "[MICROPHONE]: ";
            const hasLabel =
              result.text.toUpperCase().startsWith("[TAB AUDIO]:") ||
              result.text.toUpperCase().startsWith("[MICROPHONE]:");
            const labeledText = hasLabel
              ? result.text
              : `${speakerLabel}${result.text}`;

            // Get the latest audio analysis results
            const audioAnalysis =
              audioAnalysisRef.current?.analyzeCharacteristics();
            log("Audio analysis:", audioAnalysis);

            // Add the current transcript to history before checking similarity
            logger.addToHistory({ text: labeledText });

            // Analyze for text similarity - now handles async
            const similarityAnalysis =
              await textSimilarityRef.current.analyzeTranscript(
                labeledText,
                audioSource,
                Date.now()
              );

            // Get complete analysis log
            const analysisLog = logger.logAnalysis(
              result,
              audioAnalysis,
              similarityAnalysis
            );

            // Decision-making logic using all available data
            let shouldDeliverTranscript = true;
            let skippedReason = null;

            // Enhanced duplicate detection using similarityAnalysis
            if (similarityAnalysis.isDuplicate) {
              const { bestMatch, details } = similarityAnalysis;
              log("Detected potential duplicate:", {
                similarity: bestMatch.similarity,
                timeDiff: bestMatch.timeDiff,
                source: bestMatch.source,
                details,
              });

              // Enhanced duplicate detection metrics
              const isHighSimilarity = bestMatch.similarity > 0.9;
              const isCloseInTime = Math.abs(bestMatch.timeDiff) < 2000; // 2 seconds
              const isDuplicateFromOtherSource =
                bestMatch.source !== audioSource;

              // For microphone source, be more conservative when tab audio is active
              if (audioSource === "microphone" && otherSourceActive) {
                if (details) {
                  const {
                    matchPercentage,
                    confidenceScore,
                    matchedWords,
                    mismatchedWords,
                  } = details;

                  // Strong duplicate indicators
                  const isHighConfidence = confidenceScore > 0.8;
                  const isHighWordMatch =
                    matchedWords > (matchedWords + mismatchedWords) * 0.85;

                  if (
                    (isHighConfidence &&
                      isCloseInTime &&
                      isDuplicateFromOtherSource) ||
                    (isHighWordMatch && mismatchedWords < 3)
                  ) {
                    shouldDeliverTranscript = false;
                    skippedReason = `High confidence duplicate (${(
                      confidenceScore * 100
                    ).toFixed(1)}% confidence, ${matchedWords} matched words)`;
                  }
                } else if (
                  isHighSimilarity &&
                  isCloseInTime &&
                  isDuplicateFromOtherSource
                ) {
                  shouldDeliverTranscript = false;
                  skippedReason = "High similarity duplicate from tab audio";
                }
              }
            }

            // Audio characteristics analysis
            if (
              shouldDeliverTranscript &&
              audioAnalysis &&
              audioSource === "microphone"
            ) {
              const { characteristics } = audioAnalysis;

              // Enhanced audio analysis using analysisLog metrics
              if (analysisLog.audioAnalysis) {
                const { confidence, isLeakedAudio } = analysisLog.audioAnalysis;

                // Strong leaked audio indicators
                if (isLeakedAudio && confidence > 0.85 && otherSourceActive) {
                  shouldDeliverTranscript = false;
                  skippedReason = `High confidence leaked audio (${(
                    confidence * 100
                  ).toFixed(1)}%)`;
                }
              }

              // Consistent non-direct speech detection
              if (
                characteristics.isLeakedAudio &&
                characteristics.confidence > 0.85
              ) {
                const confidenceScore =
                  audioAnalysisRef.current.getConfidenceScore();
                if (confidenceScore > 0.8) {
                  shouldDeliverTranscript = false;
                  skippedReason = `Consistent leaked audio detection (${(
                    confidenceScore * 100
                  ).toFixed(1)}% confidence)`;
                }
              }
            }

            // Clear partial result flag since we got a final result
            hasPartialResultRef.current = false;

            if (shouldDeliverTranscript) {
              // For tab audio or if we're in an active speech segment, send the transcript
              if (
                audioSource === "tab-audio" ||
                activeSpeechSegmentRef.current
              ) {
                onTranscript(labeledText);
                log("Delivered transcript:", labeledText, {
                  analysis: analysisLog,
                  audioSource,
                  activeSpeechSegment: activeSpeechSegmentRef.current,
                });

                // Recreate recognizer after final result
                createRecognizer().catch((err) => {
                  console.error("Error recreating recognizer:", err);
                  log("Error recreating recognizer:", err);
                });
                log("Recreated recognizer after final result");

                // Always keep the speech segment active for a bit after a final result
                if (silenceTimeoutRef.current) {
                  clearTimeout(silenceTimeoutRef.current);
                  silenceTimeoutRef.current = null;
                }

                // Set a new timeout to potentially end the segment
                silenceTimeoutRef.current = setTimeout(() => {
                  if (!hasPartialResultRef.current) {
                    activeSpeechSegmentRef.current = false;
                    log("Reset speech segment after final result timeout", {
                      finalAnalysis: analysisLog,
                    });
                  }
                  silenceTimeoutRef.current = null;
                }, 1000); // Give more time after a final result
              } else {
                log("Skipped transcript delivery - no active speech segment", {
                  analysis: analysisLog,
                  audioSource,
                  activeSpeechSegment: activeSpeechSegmentRef.current,
                });
              }
            } else {
              log("Skipped transcript delivery", {
                reason: skippedReason,
                analysis: analysisLog,
                audioSource,
                activeSpeechSegment: activeSpeechSegmentRef.current,
              });
            }

            // Keep analysis history updated with enhanced details
            logger.addToHistory({
              text: labeledText,
              analysis: {
                ...analysisLog,
                similarityDetails: similarityAnalysis.details,
                confidenceMetrics: similarityAnalysis.details?.confidenceScore,
              },
              delivered: shouldDeliverTranscript,
              skippedReason,
              timestamp: Date.now(),
            });
          } catch (error) {
            console.error("Error in result processing:", error);
            log("Result processing error:", error);

            // Fallback delivery logic - if we can't analyze properly, prefer delivering the transcript
            if (audioSource === "tab-audio" || activeSpeechSegmentRef.current) {
              onTranscript(result.text);
              log(
                "Delivered transcript (after processing error):",
                result.text
              );

              // Still try to recreate recognizer
              createRecognizer().catch((err) => {
                console.error("Error recreating recognizer after error:", err);
                log("Error recreating recognizer after error:", err);
              });
            }
          }
        }
      });

      recognizerRef.current.on("partialresult", ({ result }) => {
        if (!isStoppingRef.current && result.partial) {
          log("Got partial result event:", result);
          hasPartialResultRef.current = true;
          // Ensure speech segment stays active while we have partial results
          if (!activeSpeechSegmentRef.current) {
            log("Reactivating speech segment due to partial result");
            activeSpeechSegmentRef.current = true;
          }
        }
      });

      log("New recognizer created successfully");
    }, [audioSource, onTranscript, log]);

    const initializeVoskModel = useCallback(async () => {
      if (!recognizerRef.current) {
        try {
          await createRecognizer();
          log("Vosk model and recognizer loaded successfully");
        } catch (err) {
          console.error("Error loading Vosk model:", err);
          setError("Failed to load Vosk model");
        }
      }
    }, [createRecognizer, log]);

    // Analyze audio levels and manage speech segments
    const analyzeAudioLevel = useCallback(
      (inputBuffer) => {
        if (!audioAnalysisRef.current) return;

        const analysis = audioAnalysisRef.current.analyzeCharacteristics();
        const { characteristics } = analysis;

        // Use characteristics to determine speech segments
        if (
          characteristics.isDirectSpeech &&
          characteristics.confidence > 0.7
        ) {
          activeSpeechSegmentRef.current = true;
          // Clear any existing silence timeout
          if (silenceTimeoutRef.current) {
            clearTimeout(silenceTimeoutRef.current);
            silenceTimeoutRef.current = null;
          }
        } else if (characteristics.isLeakedAudio) {
          // Handle leaked audio differently
          if (otherSourceActive) {
            activeSpeechSegmentRef.current = false;
          }
        }

        // Rest of your existing volume analysis code...
      },
      [audioSource, otherSourceActive, log]
    );

    const processAudioData = useCallback(
      (inputBuffer) => {
        if (!recognizerRef.current || isStoppingRef.current) {
          return;
        }

        try {
          // Always analyze audio levels
          analyzeAudioLevel(inputBuffer);

          // Always process the audio data for transcription
          const audioData = inputBuffer.getChannelData(0);
          const buffer = audioContextRef.current.createBuffer(
            1,
            audioData.length,
            inputBuffer.sampleRate
          );
          const channel = buffer.getChannelData(0);

          for (let i = 0; i < audioData.length; i++) {
            channel[i] = audioData[i];
          }

          try {
            recognizerRef.current.acceptWaveformFloat(
              channel,
              buffer.sampleRate
            );
            log("Audio chunk processed");
          } catch (e) {
            log("Error processing chunk:", e);
            // If we get an error processing audio, recreate the recognizer
            createRecognizer().catch((err) => {
              console.error(
                "Error recreating recognizer after chunk error:",
                err
              );
            });
          }
        } catch (error) {
          console.error("Error processing audio:", error);
          log("Audio processing error:", error);
        }
      },
      [log, analyzeAudioLevel, createRecognizer]
    );

    const handleStop = useCallback(() => {
      if (isStoppingRef.current) return;
    
      try {
        isStoppingRef.current = true;
        log("Stopping transcription...");
    
        // Clean up audio nodes
        if (sourceNodeRef.current) {
          sourceNodeRef.current.disconnect();
          sourceNodeRef.current = null;
        }
    
        if (scriptProcessorRef.current) {
          scriptProcessorRef.current.onaudioprocess = null;
          scriptProcessorRef.current.disconnect();
          scriptProcessorRef.current = null;
        }
    
        if (audioAnalyserRef.current) {
          audioAnalyserRef.current.disconnect();
          audioAnalyserRef.current = null;
        }
    
        // Clean up audio service
        if (audioServiceRef.current) {
          audioServiceRef.current.stopCapture();
          audioServiceRef.current = null;
        }
    
        // Clean up recognizer
        if (recognizerRef.current) {
          recognizerRef.current.remove();
          recognizerRef.current = null;
        }
    
        // Clean up audio context
        if (audioContextRef.current && audioContextRef.current.state !== 'closed') {
          audioContextRef.current.close().then(() => {
            audioContextRef.current = null;
          }).catch(console.error);
        }
    
        // Clean up audio analysis
        if (audioAnalysisRef.current) {
          audioAnalysisRef.current.disconnect?.();
          audioAnalysisRef.current = null;
        }
    
        // Reset state flags
        activeSpeechSegmentRef.current = false;
        hasPartialResultRef.current = false;
        
        // Clear any pending timeouts
        if (silenceTimeoutRef.current) {
          clearTimeout(silenceTimeoutRef.current);
          silenceTimeoutRef.current = null;
        }
    
        setIsListening(false);
        onStateChange?.(false);
        log("Audio capture stopped successfully");
      } catch (err) {
        console.error("Error stopping transcription:", err);
        setError(err.message);
      } finally {
        isStoppingRef.current = false;
      }
    }, [onStateChange, log]);

    const setupAudioProcessing = useCallback(
      (stream) => {
        try {
          log("Setting up audio processing...");

          if (!audioContextRef.current) {
            audioContextRef.current = new (window.AudioContext ||
              window.webkitAudioContext)({
              sampleRate: 16000,
            });
          }

          log("Audio context created with settings:", {
            sampleRate: audioContextRef.current.sampleRate,
            state: audioContextRef.current.state,
          });

          if (audioContextRef.current.state !== "running") {
            log("Resuming audio context...");
            audioContextRef.current.resume();
          }

          // Create source node from input stream
          sourceNodeRef.current =
            audioContextRef.current.createMediaStreamSource(stream);
          log("Source node created from stream");

          // Initialize audio analysis
          if (!audioAnalysisRef.current) {
            audioAnalysisRef.current = new AudioAnalysis(
              audioContextRef.current,
              {
                fftSize: 2048,
                smoothingTimeConstant: 0.8,
                minDecibels: -90,
                maxDecibels: -10,
              }
            );
          }
          log("Audio analysis initialized");

          // Create and configure basic analyser node for volume detection
          audioAnalyserRef.current = audioContextRef.current.createAnalyser();
          audioAnalyserRef.current.fftSize = 256;
          audioAnalyserRef.current.smoothingTimeConstant = 0.5;
          log("Basic analyser configured for volume detection");

          // Create script processor for audio processing
          scriptProcessorRef.current =
            audioContextRef.current.createScriptProcessor(4096, 1, 1);

          // Set up audio processing chain
          scriptProcessorRef.current.onaudioprocess = (e) => {
            try {
              // Process audio for transcription
              processAudioData(e.inputBuffer);
            } catch (error) {
              log("Error in audio processing:", error);
            }
          };

          // Connect the audio processing chain:
          // 1. Source -> AudioAnalysis
          const analysisNode = audioAnalysisRef.current.connectSource(
            sourceNodeRef.current
          );

          // 2. AudioAnalysis -> Basic Analyser (for volume)
          analysisNode.connect(audioAnalyserRef.current);

          // 3. Basic Analyser -> Script Processor
          audioAnalyserRef.current.connect(scriptProcessorRef.current);

          // 4. Script Processor -> Destination
          scriptProcessorRef.current.connect(
            audioContextRef.current.destination
          );

          log("Audio processing chain setup complete:", {
            sourceConnected: !!sourceNodeRef.current,
            analysisConnected: !!audioAnalysisRef.current,
            analyserConnected: !!audioAnalyserRef.current,
            processorConnected: !!scriptProcessorRef.current,
          });

          // Initialize characteristic tracking
          lastVolumeRef.current = 0;
          volumeThresholdRef.current = 0;
          activeSpeechSegmentRef.current = false;
          hasPartialResultRef.current = false;

          // Clear any existing timeouts
          if (silenceTimeoutRef.current) {
            clearTimeout(silenceTimeoutRef.current);
            silenceTimeoutRef.current = null;
          }

          // Add stream track ended handler
          stream.getAudioTracks().forEach((track) => {
            track.onended = () => {
              log("Audio track ended, cleaning up...");
              handleStop();
            };
          });
        } catch (err) {
          console.error("Error setting up audio processing:", err);
          log("Setup error:", err);
          setError(err.message);
          throw err;
        }
      },
      [processAudioData, handleStop, log]
    );

    const startListening = useCallback(async () => {
      if (!setupCompleted) return;

      try {
        isStoppingRef.current = false;
        log("Starting transcription...");
        setError(null);
        await initializeVoskModel();

        audioServiceRef.current = new AudioCaptureService(
          setupAudioProcessing,
          (error) => setError(error.message),
          audioSource
        );

        await audioServiceRef.current.startCapture();
        setIsListening(true);
        onStateChange?.(true);
        log(`${audioSource} capture started successfully`);
      } catch (err) {
        console.error("Error starting transcription:", err);
        setError(err.message);
        handleStop();
      }
    }, [
      setupCompleted,
      initializeVoskModel,
      setupAudioProcessing,
      onStateChange,
      log,
      audioSource,
    ]);

    // useImperativeHandle to expose methods
    useImperativeHandle(
      ref,
      () => ({
        startListening: () => startListening(),
        handleStop: () => handleStop(),
        restartListening: async () => {
          await handleStop();
          await startListening();
        },
      }),
      [startListening, handleStop]
    );

    return (
      <div className="space-y-4">
        {error && (
          <div className="p-4 mb-4 text-red-800 bg-red-50 rounded-lg flex justify-between items-center">
            <div>{error}</div>
            <button
              onClick={() => setError(null)}
              className="text-red-600 hover:text-red-800"
            >
              Dismiss
            </button>
          </div>
        )}
      </div>
    );
  }
);

export default TranscriptionService;
