import { Commands } from "../components/Editor/IEditor";
import { abbreviations } from "../components/Editor/constants";
import { IV3RecievedToken, IV3RecievedTranscript } from "../components/Libraries/ILibraries";

const LOWER_WORD_LIMIT = 200;
const UPPER_WORD_LIMIT = 300;
const SECOND_LIMIT = 2;

const isNumber = (text: string) => /^[0-9]+$/.test(text);
const isPunctuation = (text: string) => /[.?!]/.test(text);
const isAbbreviation = (text: string) => abbreviations.includes(text);


interface ITokenParagrapher {
    newTranscripts: IV3RecievedTranscript,
    tokenCount: number;
}

const paragraphTokens = (transcripts: IV3RecievedTranscript[], outerTokenCount?: number, recorderMiliseconds?: number): ITokenParagrapher => {
    console.log(JSON.stringify(transcripts, null, 2))

    if (transcripts.length === 0) {
        return {
            newTranscripts: {
                isFinal: true,
                startTimeMs: 0,
                endTimeMs: recorderMiliseconds,
                tokens: []
            },
            tokenCount: 0
        } as ITokenParagrapher;
    }

    const groupedTokens: IV3RecievedToken[] = []

    transcripts.forEach(transcripts => {
        const {
            startTimeMs,
        } = transcripts

        transcripts.tokens.forEach(token => {
            token.startOffsetMs += startTimeMs
            token.endOffsetMs += startTimeMs
            groupedTokens.push(token)
        })
    })
    
    const groupedTranscripts: IV3RecievedTranscript = {
        isFinal: true,
        startTimeMs: transcripts[0].startTimeMs,
        endTimeMs: transcripts[transcripts.length - 1].endTimeMs,
        tokens: groupedTokens
    }

    const newTranscripts: IV3RecievedTranscript = {
        isFinal: true,
        startTimeMs: transcripts[0].startTimeMs,
        endTimeMs: transcripts[transcripts.length - 1].endTimeMs,
        tokens: []
    }

    let tokenCount = outerTokenCount ? outerTokenCount : 0;
    let currentSpeakerCode: string | undefined = undefined;

    for (let tokenIndex = 0; tokenIndex < groupedTranscripts.tokens.length; tokenIndex++) {
        const currentToken = groupedTranscripts.tokens[tokenIndex]

        if (currentToken.speakerCode !== currentSpeakerCode) {
            tokenCount = 0;
            currentSpeakerCode = currentToken.speakerCode
        }
        
        tokenCount++
        newTranscripts.tokens.push(currentToken);
        
        if (LOWER_WORD_LIMIT < tokenCount && tokenCount < UPPER_WORD_LIMIT) {
            if (tokenIndex === 0) continue;
            const previousToken = groupedTranscripts.tokens[tokenIndex - 1]

            if (!isPunctuation(currentToken.text) || isNumber(previousToken.text) || isAbbreviation(previousToken.text)) continue;

            newTranscripts.tokens.push({
                isLeftHanded: false,
                isRightHanded: false,
                startOffsetMs: currentToken.endOffsetMs,
                endOffsetMs: currentToken.endOffsetMs,
                speakerCode: currentToken.speakerCode,
                text: Commands.NEW_PARAGRAPH,
            });

            tokenCount = 0;
        } else if (tokenCount >= UPPER_WORD_LIMIT) {
            if (tokenIndex === groupedTranscripts.tokens.length - 1) continue;
            
            const nextToken = groupedTranscripts.tokens[tokenIndex + 1]

            if (nextToken.startOffsetMs - currentToken.endOffsetMs < SECOND_LIMIT) continue;

            newTranscripts.tokens.push({
                isLeftHanded: false,
                isRightHanded: false,
                startOffsetMs: currentToken.endOffsetMs,
                endOffsetMs: currentToken.endOffsetMs,
                speakerCode: currentToken.speakerCode,
                text: Commands.NEW_PARAGRAPH,
            })

            tokenCount = 0;
        }
    }
    return {
        newTranscripts,
        tokenCount,
    };
}

export {
    paragraphTokens
}