Bleu+pdf+work -

import pdfplumber from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction import re def clean_pdf_text(pdf_path): with pdfplumber.open(pdf_path) as pdf: full_text = "" for page in pdf.pages: text = page.extract_text() # Fix line-break hyphens text = re.sub(r'(\w+)-\n(\w+)', r'\1\2', text) # Replace newlines with spaces text = re.sub(r'\n+', ' ', text) full_text += text + " " return full_text.strip()

Introduction In the rapidly evolving world of machine translation (MT) and localization, three terms increasingly intersect in the daily workflow of linguists, developers, and project managers: BLEU , PDF , and Work . bleu+pdf+work

smoothing = SmoothingFunction().method1 scores = [] for ref, cand in zip(ref_sents, cand_sents): score = sentence_bleu([ref.split()], cand.split(), smoothing_function=smoothing) scores.append(score) import pdfplumber from nltk