import stringSimilarity from 'string-similarity';

class TextSimilarity {
  constructor(options = {}) {
    this.options = {
      minSimilarityThreshold: 0.85,
      timeWindowMs: 1000,
      processingDelayMs: 200,
      ...options
    };
    
    this.transcriptBuffer = [];
    this.MAX_BUFFER_SIZE = 50;
  }

  normalizeText(text) {
    return text
      .toLowerCase()
      .replace(/[.,!?;:'"()\[\]]/g, '')
      .replace(/\s+/g, ' ')
      .trim();
  }

  addToBuffer(text, source, timestamp) {
    this.transcriptBuffer.push({
      text: this.normalizeText(text),
      originalText: text,
      source,
      timestamp
    });

    // Keep buffer size manageable
    while (this.transcriptBuffer.length > this.MAX_BUFFER_SIZE) {
      this.transcriptBuffer.shift();
    }

    // Clean old entries
    const timeThreshold = timestamp - this.options.timeWindowMs;
    this.transcriptBuffer = this.transcriptBuffer.filter(
      entry => entry.timestamp >= timeThreshold
    );
  }

  async analyzeTranscript(text, source, timestamp = Date.now()) {
    try {
      await new Promise(resolve => setTimeout(resolve, this.options.processingDelayMs));
      
      const normalizedText = this.normalizeText(text);
      const matches = [];

      // Get relevant transcripts from buffer
      const recentTranscripts = this.transcriptBuffer.filter(entry => 
        entry.source !== source &&
        Math.abs(timestamp - entry.timestamp) <= this.options.timeWindowMs
      );

      if (recentTranscripts.length > 0) {
        const transcriptTexts = recentTranscripts.map(t => t.text);
        const similarity = stringSimilarity.findBestMatch(normalizedText, transcriptTexts);

        if (similarity.bestMatch.rating >= this.options.minSimilarityThreshold) {
          const bestMatchIndex = similarity.bestMatchIndex;
          const matchedTranscript = recentTranscripts[bestMatchIndex];
          
          matches.push({
            text: matchedTranscript.originalText,
            normalizedText: matchedTranscript.text,
            similarity: similarity.bestMatch.rating,
            timeDiff: Math.abs(timestamp - matchedTranscript.timestamp),
            source: matchedTranscript.source
          });
        }
      }

      // Add current text to buffer
      this.addToBuffer(text, source, timestamp);

      // Find best match if any matches exist
      const bestMatch = matches.length > 0 
        ? matches.reduce((best, current) => 
            current.similarity > best.similarity ? current : best
          )
        : null;

      // Generate detailed comparison for best match
      const details = {
        matchPercentage: bestMatch ? bestMatch.similarity : 0,
        confidenceScore: bestMatch ? bestMatch.similarity : 0,
        matches: [],
        mismatches: [],
        additions: [],
        deletions: []
      };

      if (bestMatch) {
        const sourceWords = normalizedText.split(' ');
        const targetWords = bestMatch.normalizedText.split(' ');
        
        sourceWords.forEach(word => {
          if (targetWords.includes(word)) {
            details.matches.push(word);
          } else {
            details.deletions.push(word);
          }
        });

        targetWords.forEach(word => {
          if (!sourceWords.includes(word)) {
            details.additions.push(word);
          }
        });
      }

      return {
        isDuplicate: bestMatch !== null,
        bestMatch,
        details,
        allMatches: matches
      };

    } catch (error) {
      console.error('Error in analyzeTranscript:', error);
      return {
        isDuplicate: false,
        bestMatch: null,
        details: {
          matchPercentage: 0,
          confidenceScore: 0,
          matches: [],
          mismatches: [],
          additions: [],
          deletions: []
        },
        allMatches: []
      };
    }
  }
}

export default TextSimilarity;