import os
os.environ["CUDA_VISIBLE_DEVICES"] = ""

import re
import sys
import json
import email
import imaplib
import smtplib
import logging
import mimetypes
import io
import codecs
import subprocess
import tempfile
import unicodedata
from pathlib import Path
from typing import Tuple, List, Dict, Optional, Set
from email.header import decode_header
from email.message import EmailMessage
from email.utils import parseaddr
from email.mime.text import MIMEText

from dotenv import load_dotenv
from docx import Document
from docx.oxml import parse_xml

# ==============================
# Logs / ENV
# ==============================
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] - %(message)s",
    handlers=[logging.FileHandler("app.log", encoding="utf-8"), logging.StreamHandler(sys.stdout)],
)
load_dotenv()

# --- IA / Ferramentas ---
try:
    from openai import OpenAI
except ImportError:
    OpenAI = None

try:
    import language_tool_python
except ImportError:
    language_tool_python = None

try:
    from spellchecker import SpellChecker
except ImportError:
    SpellChecker = None

# ==============================
# Config .env
# ==============================
IMAP_HOST = os.getenv("IMAP_HOST", "").strip().replace('"', "")
IMAP_USER = os.getenv("IMAP_USER", "").strip().replace('"', "")
IMAP_PASS = os.getenv("IMAP_PASS", "").strip().replace('"', "")
IMAP_LABEL = os.getenv("IMAP_LABEL", "INBOX").strip()

SMTP_HOST = os.getenv("SMTP_HOST", "").strip().replace('"', "")
SMTP_PORT = int(os.getenv("SMTP_PORT", "465"))
SMTP_USER = os.getenv("SMTP_USER", "").strip().replace('"', "")
SMTP_PASS = os.getenv("SMTP_PASS", "").strip().replace('"', "")
USE_SMTP_SSL = os.getenv("USE_SMTP_SSL", "true").lower() == "true"

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "").strip()
OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini").strip()

DISCIPLINA_FALLBACK = os.getenv("DISCIPLINA", "Não Identificada").strip()

MAX_ISSUES_TECH = int(os.getenv("MAX_ISSUES_TECH", "12"))   # técnico: tolerante
MAX_ISSUES_LANG = int(os.getenv("MAX_ISSUES_LANG", "8"))    # língua: mais rigoroso
MAX_FORMAT_ISSUES = int(os.getenv("MAX_FORMAT_ISSUES", "12"))

COUNT_ANGLICISM_IN_LANGUAGE = os.getenv("COUNT_ANGLICISM_IN_LANGUAGE", "false").lower() == "true"

# NOVO: IA NÃO REPROVA (só alerta)
IA_CAN_FAIL = os.getenv("IA_CAN_FAIL", "false").lower() == "true"

EMAIL_APROVADO_PARA = os.getenv("EMAIL_APROVADO_PARA", "rouget.fundora@gmail.com").strip()
EMAIL_CC_GERAL = os.getenv("EMAIL_CC_GERAL", "rouget.ruano@insutec.ao").strip()
ASSINATURA = os.getenv("ASSINATURA", "Atenciosamente,\nEquipa de Validação").strip()

SCRIPT_DIR = Path(__file__).resolve().parent
SIGN_IMAGE_PATH_FROM_ENV = os.getenv("SIGN_IMAGE_PATH", "img/assina.png").strip()
SIGN_IMAGE_PATH = str(SCRIPT_DIR / SIGN_IMAGE_PATH_FROM_ENV)
SIGN_IMAGE_WIDTH_CM = float(os.getenv("SIGN_IMAGE_WIDTH_CM", "5"))

# ==============================
# Stopwords e Whitelist base
# ==============================
STOPWORDS_PT = {
    "a","o","os","as","um","uma","uns","umas",
    "de","do","da","dos","das","no","na","nos","nas",
    "e","ou","mas","que","se","ao","aos","à","às",
    "por","para","com","sem","em","como","não","sim",
    "era","são","é","foi","ser","estar","vai","vem","ter"
}

WHITELIST_TECNICA = {
    # redes/telecom
    "osi","tcp","udp","ip","http","https","dns","dhcp","vpn","lan","wan","wlan","nat","mpls","qos","bgp","ospf",
    "router","switch","firewall","proxy","gateway","bandwidth","throughput","packet","frame",
    "lte","5g","gsm","wcdma","cdma","ofdm","qam","psk","mimo",
    # programação
    "python","java","javascript","typescript","sql","nosql","json","xml","api","rest","docker","linux","windows",
    # física/matemática/engenharia
    "db","dbm","hz","khz","mhz","ghz","v","a","w","va","var","ohm","omega","kpa","mpa","gpa",
    "log","ln","sen","cos","tan","cot","sec","csc","det","lim",
    # instituições / locais
    "insutec","eisi","ert","luanda","angola","kz","ao",
    # ordinais
    "1ª","2ª","3ª","4ª","1º","2º","3º","4º",
}

# ==============================
# Inicialização verificadores
# ==============================
LANG_TOOL_PT = None
try:
    if language_tool_python is None:
        raise RuntimeError("language_tool_python não instalado")
    LANG_TOOL_PT = language_tool_python.LanguageTool("pt-PT")
except Exception as e:
    logging.error(f"Falha LanguageTool: {e}")

ENGLISH_CHECKER = None
if SpellChecker:
    try:
        ENGLISH_CHECKER = SpellChecker(language="en")
    except Exception:
        ENGLISH_CHECKER = None

# ==============================
# Utilitários
# ==============================
def clean_header(raw_header: Optional[str]) -> str:
    if not raw_header:
        return ""
    try:
        decoded_fragments = decode_header(raw_header)
        header_str = ""
        for bytes_content, encoding in decoded_fragments:
            if isinstance(bytes_content, bytes):
                enc = encoding if encoding else "utf-8"
                try:
                    header_str += bytes_content.decode(enc, errors="ignore")
                except LookupError:
                    header_str += bytes_content.decode("cp1252", errors="ignore")
            else:
                header_str += str(bytes_content)
        return header_str.replace("\n", "").replace("\r", "").strip()
    except Exception:
        return str(raw_header)

def verificar_assunto_inteligente(assunto_email: str) -> bool:
    if not assunto_email:
        return False
    texto_normalizado = unicodedata.normalize("NFKD", assunto_email).encode("ASCII", "ignore").decode("utf-8").lower()
    return bool(re.search(r"enunciad[oa]s?", texto_normalizado))

def converter_doc_para_docx(doc_bytes: bytes) -> Optional[bytes]:
    with tempfile.TemporaryDirectory() as temp_dir:
        temp_dir_path = Path(temp_dir)
        input_path = temp_dir_path / "temp_input.doc"
        input_path.write_bytes(doc_bytes)
        try:
            cmd = ["libreoffice", "--headless", "--convert-to", "docx", str(input_path), "--outdir", str(temp_dir_path)]
            subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
            output_path = temp_dir_path / "temp_input.docx"
            return output_path.read_bytes() if output_path.exists() else None
        except Exception as e:
            logging.error(f"Erro conversão .doc: {e}")
            return None

def extract_full_docx_text(bytes_content: bytes) -> str:
    try:
        with io.BytesIO(bytes_content) as file_stream:
            doc = Document(file_stream)
            lines: List[str] = ["--- BODY ---"]
            last_blank = False
            for p in doc.paragraphs:
                t = (p.text or "").replace("\u00a0", " ")
                t = re.sub(r"[ \t]+", " ", t).strip()
                if t:
                    lines.append(t)
                    last_blank = False
                else:
                    if not last_blank:
                        lines.append("")
                        last_blank = True
        while lines and lines[-1] == "":
            lines.pop()
        return "\n".join(lines)
    except Exception as e:
        logging.error(f"Erro extração DOCX: {e}")
        return ""

# ==============================
# CORREÇÃO: Extrair disciplina mesmo quando está em FORMULÁRIO (Content Control / SDT)
# - lê o XML do docx (document.xml e header*.xml)
# - aguenta nomes longos e quebras de linha
# ==============================
def _extract_docx_text_via_xml(docx_bytes: bytes, max_chars: int = 40000) -> str:
    """
    Extrai texto bruto do DOCX lendo diretamente os XMLs internos.
    Isso captura texto de formulários (w:sdt) que o python-docx pode ignorar.
    """
    if not docx_bytes:
        return ""
    try:
        import zipfile
        import html

        def xml_to_text(xml: str) -> str:
            # colocar separadores antes de remover tags
            xml = re.sub(r"</w:p\s*>", "\n", xml)
            xml = re.sub(r"</w:tc\s*>", "\n", xml)
            xml = re.sub(r"<w:br\s*/>", "\n", xml)
            xml = re.sub(r"<w:cr\s*/>", "\n", xml)
            xml = re.sub(r"<w:tab\s*/>", "\t", xml)

            # remover tags
            txt = re.sub(r"<[^>]+>", "", xml)
            txt = html.unescape(txt)

            # normalizações
            txt = txt.replace("\r\n", "\n").replace("\r", "\n")
            txt = txt.replace("\u00a0", " ")
            # não destruir quebras de linha, só normalizar espaços
            txt = re.sub(r"[ \t]+", " ", txt)
            txt = re.sub(r"\n{3,}", "\n\n", txt)
            return txt.strip()

        with zipfile.ZipFile(io.BytesIO(docx_bytes)) as z:
            parts = []

            # principais: documento e cabeçalhos
            candidates = []
            for name in z.namelist():
                if name == "word/document.xml":
                    candidates.append(name)
                elif name.startswith("word/header") and name.endswith(".xml"):
                    candidates.append(name)

            # se não achar nada, tenta qualquer xml do word/
            if not candidates:
                candidates = [n for n in z.namelist() if n.startswith("word/") and n.endswith(".xml")]

            for name in candidates:
                try:
                    xml = z.read(name).decode("utf-8", errors="ignore")
                    parts.append(xml_to_text(xml))
                except Exception:
                    continue

        out = "\n".join([p for p in parts if p])
        return out[:max_chars]
    except Exception as e:
        logging.error(f"Erro _extract_docx_text_via_xml: {e}")
        return ""

def _normalize_disciplina(val: str) -> str:
    v = (val or "").strip()
    v = v.replace("\u00a0", " ")
    v = re.sub(r"[ \t]+", " ", v)
    v = re.sub(r"\s*\n\s*", " ", v).strip()
    v = v.strip(" \t\r\n:;,.")
    return v

def try_extract_discipline_from_docx_bytes(docx_bytes: bytes) -> Optional[str]:
    """
    Prioriza extrair 'Disciplina' do DOCX (inclui formulários/SDT).
    Retorna None se não encontrar.
    """
    sample = _extract_docx_text_via_xml(docx_bytes, max_chars=40000)
    if not sample:
        return None

    # "stop labels" (quando aparece um destes, paramos o capture)
    stop_labels = (
        r"Ano\s*Lec?tivo|Ano\s*Letivo|Data|Curso|Docente|Nome|Prova|Dura(?:ç|c)ão|N[ºo]|Turma|Variante|Ano"
    )

    # Aceita nome longo e com quebras; para quando surgir o próximo campo conhecido
    patterns = [
        rf"(?is)\b(?:Disciplina|Cadeira|Unidade\s+Curricular|UC|U\.C\.|Curricular\s+Unit)\b\s*[:\-\.]\s*(.+?)\s*(?=\n\s*(?:{stop_labels})\b|$)",
        rf"(?is)\b(?:Assunto)\b\s*[:\-\.]\s*(.+?)\s*(?=\n\s*(?:{stop_labels})\b|$)",
    ]

    for pat in patterns:
        m = re.search(pat, sample)
        if m:
            val = _normalize_disciplina(m.group(1))
            if val:
                return val

    # fallback mais solto: "Disciplina Análise Matemática III..."
    m = re.search(r"(?is)\bdisciplina\b\s*[:\-\.]?\s*([^\n]{3,200})", sample)
    if m:
        val = _normalize_disciplina(m.group(1))
        # corta se colar outros campos na mesma linha
        val = re.split(rf"(?i)\b(?:{stop_labels})\b", val)[0].strip()
        val = _normalize_disciplina(val)
        if val:
            return val

    return None

def _line_offsets(text: str) -> List[Tuple[int, str]]:
    lines = text.splitlines()
    offsets: List[Tuple[int, str]] = []
    off = 0
    for ln in lines:
        offsets.append((off, ln))
        off += len(ln) + 1
    return offsets

# ==============================
# Disciplina -> perfil
# ==============================
LANGUAGE_KEYWORDS = {
    "língua", "lingua", "portuguesa", "português", "literatura", "gramática", "gramatica",
    "redação", "redacao", "interpretação", "interpretacao", "compreensão", "compreensao",
    "comunicação", "comunicacao", "comunicação pessoal", "comunicacao pessoal",
    "empresarial", "comunicação empresarial", "comunicacao empresarial",
    "expressão", "expressao", "texto", "produção textual", "producao textual",
}
TECH_KEYWORDS = {
    "telecom", "telecomun", "redes", "sistemas", "informática", "informatica",
    "física", "fisica", "matem", "cálculo", "calculo", "electr", "eletr", "circuit",
    "program", "engenharia", "química", "quimica", "estat", "econometr",
}

def try_extract_discipline_regex(text: str) -> Optional[str]:
    if not text:
        return None
    header_sample = text[:3500]
    lookahead = r"Ano\s*Lec?tivo\s*:|Data\s*:|Curso\s*:|Docente\s*:|Nome\s*:|Prova\s*:|Duração\s*:|Nº\s*:|Turma\s*:|Ano\s*:|$"
    patterns = [
        r"(?i)(?:Disciplina|Cadeira|Unidade Curricular|Curricular Unit)\s*[:\.]\s*(.+?)\s*(?=" + lookahead + r")",
        r"(?i)(?:Assunto)\s*[:\.]\s*(.+?)\s*(?=" + lookahead + r")",
    ]
    for pattern in patterns:
        m = re.search(pattern, header_sample)
        if m:
            v = m.group(1).strip().rstrip(".,;")
            v = _normalize_disciplina(v)
            return v if v else None
    return None

def classify_profile(disciplina: str, text: str) -> str:
    d = (disciplina or "").lower()
    t = (text or "").lower()

    if any(k in d for k in LANGUAGE_KEYWORDS):
        return "language"

    # conteúdo do enunciado (muito forte)
    content_hits = [
        "leitura e interpretação", "leitura e interpretacao",
        "funcionamento da língua", "funcionamento da lingua",
        "virgulação", "virgulacao",
        "produção textual", "producao textual",
        "pronominalize", "despronominaliza", "pronominalização", "pronominalizacao",
    ]
    if any(h in t for h in content_hits):
        return "language"

    if any(k in d for k in TECH_KEYWORDS):
        return "technical"

    return "technical"

# ==============================
# INSTRUÇÕES (onde ortografia deve ser melhor)
# ==============================
def detect_instructions_span(text: str) -> Optional[Tuple[int, int]]:
    if not text:
        return None
    offsets = _line_offsets(text)

    inst_i = None
    for i, (_, ln) in enumerate(offsets):
        if re.search(r"(?i)^\s*instru(ç|c)oes?\s*$", ln.strip()):
            inst_i = i
            break

    if inst_i is None:
        return (0, min(len(text), 2000))

    start_off = offsets[inst_i][0]

    stop_hdr = re.compile(
        r"(?i)^\s*(parte\s+[ivx]+|parte\s+\d+|texto\s*(?:i|ii|iii|1|2|3)|quest(?:ões|oes)|perguntas|exerc[íi]cios)\b"
    )

    for j in range(inst_i + 1, len(offsets)):
        ln = offsets[j][1].strip()
        if stop_hdr.search(ln):
            end_off = offsets[j][0]
            if end_off > start_off + 20:
                return (start_off, end_off)

    end_off = min(len(text), start_off + 2500)
    return (start_off, end_off)

def in_span(offset: int, span: Optional[Tuple[int, int]]) -> bool:
    if not span:
        return False
    a, b = span
    return a <= offset < b

# ==============================
# Whitelist dinâmica (termos repetidos)
# ==============================
def build_dynamic_tech_whitelist(text: str, min_freq: int = 2) -> Set[str]:
    wl: Set[str] = set()
    if not text:
        return wl
    tokens = re.findall(r"[A-Za-zÀ-ÖØ-öø-ÿ]{3,}", text)
    freq: Dict[str, int] = {}
    for t in tokens:
        tl = t.lower()
        if tl in STOPWORDS_PT:
            continue
        freq[tl] = freq.get(tl, 0) + 1
    for w, c in freq.items():
        if c >= min_freq:
            wl.add(w)
    return wl

# ==============================
# Filtros de tokens técnicos / fórmulas / símbolos
# ==============================
MATH_SYMBOLS_RE = re.compile(r"[=<>±×÷√∑∏∫≈≠°ΩµμπλΔαβγδθφω]")
UNITS_RE = re.compile(r"(?i)\b(khz|mhz|ghz|hz|dbm|db|mw|w|v|a|ohm|Ω|kΩ|mΩ|va|var)\b")
FORMULA_LIKE_RE = re.compile(r"(?i)\b([a-z]{1,3}\s*=\s*[^ ]+|[a-z]\([a-z]\)|[a-z]\^[0-9])\b")

def looks_like_formula_or_symbol(token: str) -> bool:
    if not token:
        return False
    if any(ch.isdigit() for ch in token) and any(ch.isalpha() for ch in token):
        return True
    if MATH_SYMBOLS_RE.search(token):
        return True
    if UNITS_RE.search(token):
        return True
    if FORMULA_LIKE_RE.search(token):
        return True
    if re.search(r"[_/\\@#%]", token):
        return True
    return False

def is_technical_or_token(word: str, dynamic_wl: Set[str]) -> bool:
    if not word:
        return True
    w = word.strip()
    wl = w.lower()
    if len(w) < 3:
        return True
    if wl in WHITELIST_TECNICA:
        return True
    if wl in dynamic_wl:
        return True
    if looks_like_formula_or_symbol(w):
        return True
    return False

# ==============================
# Ortografia por perfil (flexível)
# ==============================
def check_spelling_profiled(tool_pt, text: str, profile: str, instructions_span: Optional[Tuple[int, int]]) -> Dict[str, List[Dict]]:
    out = {"counted": [], "alerts": [], "ignored": []}
    if tool_pt is None or not text:
        return out

    dynamic_wl = build_dynamic_tech_whitelist(text, min_freq=2)

    # (opcional) em língua podemos incluir alguns rules de "whitespace/pontuação"
    ALLOWED_RULES = {"MORFOLOGIK_RULE_PT_PT"}
    LANGUAGE_EXTRA = {"WHITESPACE_RULE", "DOUBLE_PUNCTUATION"}

    try:
        matches = tool_pt.check(text)
        for m in matches:
            if profile == "language":
                if m.ruleId not in (ALLOWED_RULES | LANGUAGE_EXTRA):
                    continue
            else:
                if m.ruleId not in ALLOWED_RULES:
                    continue

            raw = text[m.offset:m.offset + m.errorLength]
            w = raw.strip(".,;:?!()[]{}\"'")
            if not w:
                continue

            ctx = (
                text[max(0, m.offset - 30):m.offset]
                + f"[[{text[m.offset:m.offset+m.errorLength]}]]"
                + text[m.offset + m.errorLength:m.offset + m.errorLength + 30]
            )

            # Nome próprio com maiúscula: ignorar sempre
            if w[0].isupper():
                out["ignored"].append({"message": "Ignorado (nome próprio).", "context": ctx, "rule": m.ruleId})
                continue

            # técnicos / fórmulas / símbolos
            if is_technical_or_token(w, dynamic_wl):
                out["ignored"].append({"message": "Ignorado (técnico/fórmula/símbolo).", "context": ctx, "rule": m.ruleId})
                continue

            # anglicismo
            is_english_word = False
            if ENGLISH_CHECKER:
                try:
                    is_english_word = not ENGLISH_CHECKER.unknown([w.lower()])
                except Exception:
                    is_english_word = False

            if is_english_word:
                if profile == "technical":
                    out["ignored"].append({"message": "Ignorado (anglicismo aceitável em técnico).", "context": ctx, "rule": m.ruleId})
                    continue
                else:
                    item = {"message": "Anglicismo detectado (língua).", "context": ctx, "rule": m.ruleId}
                    if COUNT_ANGLICISM_IN_LANGUAGE:
                        out["counted"].append(item)
                    else:
                        out["alerts"].append(item)
                    continue

            has_suggestions = bool(getattr(m, "replacements", None))
            item = {
                "message": "Possível erro.",
                "context": ctx,
                "rule": m.ruleId,
                "suggestions": list(getattr(m, "replacements", []))[:5],
            }

            if profile == "technical":
                # conta só em INSTRUÇÕES e só se houver sugestão
                if in_span(m.offset, instructions_span) and has_suggestions:
                    out["counted"].append(item)
                else:
                    out["alerts"].append({**item, "message": "Alerta (fora de INSTRUÇÕES em técnico)."})
            else:
                # LÍNGUA: para ser flexível, conta principalmente em INSTRUÇÕES/PERGUNTAS
                # (evita penalizar excertos)
                if in_span(m.offset, instructions_span) and has_suggestions:
                    out["counted"].append(item)
                else:
                    out["alerts"].append({**item, "message": "Alerta (fora de INSTRUÇÕES em língua)."})
        return out

    except Exception as e:
        logging.error(f"Erro check_spelling_profiled: {e}")
        return out

# ==============================
# Formatação
# ==============================
def detect_formatting_breaks(text: str) -> List[Dict]:
    issues: List[Dict] = []
    if not text:
        return issues
    pat3 = re.compile(r"\b([A-Za-zÀ-ÖØ-öø-ÿ]{2,5})\s+([A-Za-zÀ-ÖØ-öø-ÿ]{1,3})\s+([A-Za-zÀ-ÖØ-öø-ÿ]{1,3})\b")
    for m in pat3.finditer(text):
        a, b, c = m.group(1), m.group(2), m.group(3)
        al, bl, cl = a.lower(), b.lower(), c.lower()
        if al in STOPWORDS_PT or bl in STOPWORDS_PT or cl in STOPWORDS_PT:
            continue
        start, end = m.start(), m.end()
        ctx = text[max(0, start - 25):start] + f"[[{text[start:end]}]]" + text[end:end + 25]
        issues.append({"message": "Possível palavra partida por espaços (formatação).", "context": ctx, "rule": "FORMAT_SPLIT3"})
    return issues[:80]

# ==============================
# IA (agora só alerta por padrão)
# ==============================
def evaluate_proof_content(text: str, disciplina: str, profile: str) -> Tuple[bool, str, str]:
    if not OPENAI_API_KEY or OpenAI is None:
        return True, "Sem IA (não reprova)", ""

    try:
        client = OpenAI(api_key=OPENAI_API_KEY, timeout=30.0)
    except Exception as e:
        return True, f"IA indisponível (cliente): {e}", ""

    if profile == "language":
        prompt = (
            "Aja como Coordenador Pedagógico de Língua/Comunicação. "
            "Analise clareza, coerência e se há ambiguidades. "
            "Não assuma que ambiguidades são proibidas; em línguas pode haver interpretação. "
            "Responda EM JSON: {\"permissivel\":true|false, \"justificativa\":\"...\", \"sugestoes\":\"...\"}"
        )
    else:
        prompt = (
            "Aja como Coordenador Pedagógico de Engenharia. "
            "Analise clareza, completude e avaliabilidade. "
            "Termos técnicos, símbolos e fórmulas são esperados. "
            "Responda EM JSON: {\"permissivel\":true|false, \"justificativa\":\"...\", \"sugestoes\":\"...\"}"
        )

    try:
        resp = client.chat.completions.create(
            model=OPENAI_MODEL,
            messages=[{"role": "system", "content": prompt}, {"role": "user", "content": text}],
            temperature=0.0,
            response_format={"type": "json_object"},
        )
        data = json.loads(resp.choices[0].message.content or "{}")
        return bool(data.get("permissivel", True)), data.get("justificativa", ""), data.get("sugestoes", "")
    except Exception as e:
        return True, f"Erro IA: {e}", ""

# ==============================
# Assinatura (DOCX)
# ==============================
def add_signature_to_doc(doc_bytes: bytes, image_path_str: str, image_width_cm: float) -> Optional[bytes]:
    image_path = Path(image_path_str)
    if not image_path.is_file():
        return doc_bytes
    try:
        doc_stream = io.BytesIO(doc_bytes)
        doc = Document(doc_stream)

        def insert_floating_image_in_header(paragraph, image_path_str, width_cm):
            run = paragraph.add_run()
            rId, image = paragraph.part.get_or_add_image(image_path_str)
            width_emu = int(width_cm * 360000)
            img_size = image.px_width, image.px_height
            height_emu = int(width_emu * img_size[1] / img_size[0])
            graphic_xml = f"""<w:drawing xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" xmlns:pic="http://schemas.openxmlformats.org/drawingml/2006/picture"><wp:anchor distT="0" distB="0" distL="114300" distR="114300" simplePos="0" relativeHeight="251658240" behindDoc="1" locked="0" layoutInCell="1" allowOverlap="1"><wp:simplePos x="0" y="0"/><wp:positionH relativeFrom="margin"><wp:align>right</wp:align></wp:positionH><wp:positionV relativeFrom="page"><wp:posOffset>540000</wp:posOffset></wp:positionV><wp:extent cx="{width_emu}" cy="{height_emu}"/><wp:effectExtent l="0" t="0" r="0" b="0"/><wp:wrapNone/><wp:docPr id="1" name="Signature"/><wp:cNvGraphicFramePr><a:graphicFrameLocks noChangeAspect="1"/></wp:cNvGraphicFramePr><a:graphic><a:graphicData uri="http://schemas.openxmlformats.org/drawingml/2006/picture"><pic:pic><pic:nvPicPr><pic:cNvPr id="0" name="Sig"/><pic:cNvPicPr/></pic:nvPicPr><pic:blipFill><a:blip r:embed="{rId}" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"/><a:stretch><a:fillRect/></a:stretch></pic:blipFill><pic:spPr><a:xfrm><a:off x="0" y="0"/><a:ext cx="{width_emu}" cy="{height_emu}"/></a:xfrm><a:prstGeom prst="rect"><a:avLst/></a:prstGeom></pic:spPr></pic:pic></a:graphicData></a:graphic></wp:anchor></w:drawing>"""
            run._r.append(parse_xml(graphic_xml))

        section = doc.sections[0]
        header = section.header
        p = header.paragraphs[0] if header.paragraphs else header.add_paragraph()
        insert_floating_image_in_header(p, str(image_path), image_width_cm)

        out = io.BytesIO()
        doc.save(out)
        return out.getvalue()
    except Exception as e:
        logging.error(f"Erro assinatura DOCX: {e}")
        return None

# ==============================
# Email
# ==============================
def send_email(to, sub, body, atts=None, cc=None, assinatura=""):
    if not all([SMTP_HOST, SMTP_USER, SMTP_PASS]):
        return

    msg = EmailMessage()
    msg["From"], msg["To"], msg["Subject"] = SMTP_USER, to, sub
    if cc:
        msg["Cc"] = cc

    html_sig = f"<br><br><p>{assinatura.replace(chr(10), '<br>')}</p>" if assinatura else ""
    msg.set_content(f"{body}\n\n{assinatura}", "plain", "utf-8")
    msg.add_alternative(f"<html><body><p>{body.replace(chr(10), '<br>')}</p>{html_sig}</body></html>", "html", "utf-8")

    if atts:
        msg.make_mixed()
        for fname, fbytes in atts:
            try:
                mt, st = (mimetypes.guess_type(fname)[0] or "application/octet-stream").split("/", 1)
                if fname.endswith(".txt"):
                    part = MIMEText(fbytes.decode("utf-8-sig", errors="replace"), "plain", "utf-8")
                    part.add_header("Content-Disposition", "attachment", filename=fname)
                    msg.attach(part)
                else:
                    msg.add_attachment(fbytes, maintype=mt, subtype=st, filename=fname)
            except Exception:
                pass

    with (smtplib.SMTP_SSL if USE_SMTP_SSL else smtplib.SMTP)(SMTP_HOST, SMTP_PORT) as s:
        if not USE_SMTP_SSL:
            s.starttls()
        s.login(SMTP_USER, SMTP_PASS)
        s.send_message(msg)

# ==============================
# Principal
# ==============================
def process_inbox():
    if not all([IMAP_HOST, IMAP_USER, IMAP_PASS]):
        logging.error("Configurações de IMAP incompletas.")
        return

    try:
        imap = imaplib.IMAP4_SSL(IMAP_HOST)
        imap.login(IMAP_USER, IMAP_PASS)
        imap.select(f'"{IMAP_LABEL}"')
    except Exception as e:
        logging.error(f"Erro ao conectar IMAP: {e}")
        return

    status, data = imap.search(None, "(UNSEEN)")
    if status != "OK" or not data[0]:
        logging.info("Nenhum email novo.")
        imap.close(); imap.logout()
        return

    for num in data[0].split():
        try:
            res, raw = imap.fetch(num, "(BODY.PEEK[])")
            if res != "OK":
                continue

            msg = email.message_from_bytes(raw[0][1])
            subject = clean_header(msg.get("Subject", ""))
            if not verificar_assunto_inteligente(subject):
                continue

            sender = parseaddr(clean_header(msg.get("From", "")))[1]
            logging.info(f"Processando: {sender} | {subject}")

            docs_parts = []
            for part in msg.walk():
                raw_filename = part.get_filename()
                if not raw_filename:
                    continue
                fname = clean_header(raw_filename)
                if fname.startswith("~$"):
                    continue
                if fname.lower().endswith((".docx", ".doc")):
                    payload = part.get_payload(decode=True)
                    if payload:
                        docs_parts.append((fname, payload))

            if not docs_parts:
                imap.store(num, "+FLAGS", "\\Seen")
                continue

            for fname, fbytes in docs_parts:
                if fname.lower().endswith(".doc"):
                    converted = converter_doc_para_docx(fbytes)
                    if not converted:
                        send_email(sender, f"[ERRO] {fname}", "Falha ao converter .doc.", assinatura=ASSINATURA)
                        continue
                    fbytes = converted
                    fname = fname + "x"

                text = extract_full_docx_text(fbytes)
                if not text:
                    continue

                # ==============================
                # (mantido) Disciplina: agora funciona também para formulários / nomes longos
                # ==============================
                disciplina = try_extract_discipline_from_docx_bytes(fbytes) or try_extract_discipline_regex(text) or DISCIPLINA_FALLBACK

                profile = classify_profile(disciplina, text)

                instructions_span = detect_instructions_span(text)

                spell = check_spelling_profiled(LANG_TOOL_PT, text, profile, instructions_span)
                counted = len(spell["counted"])
                fmt_issues = detect_formatting_breaks(text)
                fmt_count = len(fmt_issues)

                ok_ia, just, sug = evaluate_proof_content(text, disciplina, profile)

                max_issues = MAX_ISSUES_LANG if profile == "language" else MAX_ISSUES_TECH

                # IA só reprova se IA_CAN_FAIL=true
                ia_blocks = (not ok_ia) and IA_CAN_FAIL

                aprovado = (not ia_blocks) and counted <= max_issues and fmt_count <= MAX_FORMAT_ISSUES
                status_tag = "APROVADO" if aprovado else "REVISÃO"

                def fmt_list(title: str, items: List[Dict], limit: int = 25) -> str:
                    if not items:
                        return f"{title}: 0\n"
                    s = f"{title}: {len(items)}\n"
                    for i, it in enumerate(items[:limit], 1):
                        sug_txt = ""
                        if it.get("suggestions"):
                            sug_txt = " | Sug.: " + ", ".join(it["suggestions"])
                        s += f"{i}. {it.get('context','')}{sug_txt}\n"
                    if len(items) > limit:
                        s += f"... ({len(items)-limit} ocultos)\n"
                    return s

                report = []
                report.append(f"VALIDAÇÃO: {status_tag}")
                report.append(f"Disciplina: {disciplina}")
                report.append(f"Perfil: {profile} (language=rigoroso, technical=flexível)")
                report.append("")
                report.append("[IA - ALERTA] (não reprova por padrão)")
                report.append(just or "Sem observações.")
                if sug:
                    report.append(f"\n[IA - Sugestões]:\n{sug}")
                report.append("\n====================")
                report.append("ORTOGRAFIA / GRAMÁTICA")
                report.append("- Nomes próprios / fórmulas / símbolos / termos técnicos: IGNORADOS.")
                report.append("- Anglicismos: técnico ignora; língua " + ("CONTA" if COUNT_ANGLICISM_IN_LANGUAGE else "ALERTA") + ".")
                report.append("")
                report.append(fmt_list("CONTAM (podem reprovar)", spell["counted"], limit=40))
                report.append(f"TOTAL_CONTAM = {counted} | LIMITE = {max_issues}\n")
                report.append(fmt_list("ALERTAS (não reprovam)", spell["alerts"], limit=25))
                report.append(fmt_list("IGNORADOS (esperados)", spell["ignored"], limit=25))
                report.append("\n====================")
                report.append("FORMATAÇÃO")
                report.append(fmt_list("Suspeitas de palavra partida", fmt_issues, limit=25))
                report.append(f"TOTAL_FORMATAÇÃO = {fmt_count} | LIMITE = {MAX_FORMAT_ISSUES}")

                rep_bytes = codecs.BOM_UTF8 + ("\n".join(report)).encode("utf-8")
                att_rep = ("relatorio.txt", rep_bytes)

                if aprovado:
                    doc_assinado = add_signature_to_doc(fbytes, SIGN_IMAGE_PATH, SIGN_IMAGE_WIDTH_CM)
                    if doc_assinado:
                        send_email(
                            EMAIL_APROVADO_PARA,
                            f"[APROVADO] {disciplina}",
                            "Segue enunciado assinado e relatório.",
                            atts=[(f"ASSINADO_{fname}", doc_assinado), att_rep],
                            cc=EMAIL_CC_GERAL,
                            assinatura=ASSINATURA
                        )
                    send_email(
                        sender,
                        f"[APROVADO] {disciplina}",
                        "Aprovado. Segue relatório.",
                        atts=[att_rep, (fname, fbytes)],
                        cc=EMAIL_CC_GERAL,
                        assinatura=ASSINATURA
                    )
                else:
                    motivos = []
                    if ia_blocks:
                        motivos.append("parecer IA (IA_CAN_FAIL=true)")
                    if counted > max_issues:
                        motivos.append("ocorrências que contam acima do limite")
                    if fmt_count > MAX_FORMAT_ISSUES:
                        motivos.append("formatação")

                    body = "Necessária revisão.\n"
                    if motivos:
                        body += "Motivos: " + "; ".join(motivos) + ".\n"
                    else:
                        body += "Motivo: regra interna (ver relatório).\n"

                    send_email(
                        sender,
                        f"[REVISÃO] {disciplina}",
                        body,
                        atts=[att_rep, (fname, fbytes)],
                        cc=EMAIL_CC_GERAL,
                        assinatura=ASSINATURA
                    )

            imap.store(num, "+FLAGS", "\\Seen")
        except Exception as e:
            logging.error(f"Erro crítico ID {num}: {e}")

    imap.close(); imap.logout()

def main():
    process_inbox()

if __name__ == "__main__":
    main()
