Saltar al contenido principal

Model Training and Fine-tuning

This guide covers the complete process of training and fine-tuning machine learning models for the IRIS OCR platform, including data preparation, model architecture selection, training procedures, and evaluation metrics.

Training Architecture Overview

Data Preparation

Dataset Organization

data/
├── training/
│ ├── raw_images/ # Original document images
│ │ ├── cedula_identidad/
│ │ ├── ficha_residencia/
│ │ ├── pasaporte/
│ │ └── otros/
│ ├── processed/ # Preprocessed images
│ ├── annotations/ # Label files and metadata
│ └── splits/ # Train/validation/test splits
├── models/
│ ├── classification/ # Trained classification models
│ ├── ocr/ # Fine-tuned OCR models
│ └── embeddings/ # Embedding models
└── evaluation/
├── metrics/ # Evaluation results
├── confusion_matrices/ # Classification analysis
└── reports/ # Training reports

Data Collection Strategy

Document Type Requirements

Document TypeMinimum SamplesRecommended SamplesQuality Standards
Cédulas de Identidad5002000+Clear, various lighting conditions
Fichas de Residencia3001500+Handwritten and printed variants
Pasaportes2001000+Different passport designs
Otros Documentos100500+Diverse document types

Data Quality Criteria

import cv2
import numpy as np
from typing import Dict, Tuple, List

class DataQualityValidator:
"""Validates document images for training quality"""

def __init__(self):
self.quality_thresholds = {
'min_resolution': (800, 600), # Minimum width x height
'max_resolution': (4000, 3000), # Maximum to avoid memory issues
'min_contrast': 0.3, # Minimum contrast ratio
'max_blur_variance': 100, # Maximum blur (lower = more blurred)
'min_brightness': 50, # Minimum average brightness
'max_brightness': 200, # Maximum average brightness
}

def validate_image_quality(self, image_path: str) -> Dict[str, any]:
"""
Validates image quality for training suitability

Returns:
quality_report: Dictionary with quality metrics and pass/fail status
"""
try:
image = cv2.imread(image_path)
if image is None:
return {'valid': False, 'error': 'Cannot read image file'}

gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
height, width = gray.shape

# Resolution check
resolution_valid = (
width >= self.quality_thresholds['min_resolution'][0] and
height >= self.quality_thresholds['min_resolution'][1] and
width <= self.quality_thresholds['max_resolution'][0] and
height <= self.quality_thresholds['max_resolution'][1]
)

# Contrast check (standard deviation of pixel values)
contrast = np.std(gray) / 255.0
contrast_valid = contrast >= self.quality_thresholds['min_contrast']

# Blur detection (Laplacian variance)
blur_variance = cv2.Laplacian(gray, cv2.CV_64F).var()
blur_valid = blur_variance >= self.quality_thresholds['max_blur_variance']

# Brightness check
brightness = np.mean(gray)
brightness_valid = (
brightness >= self.quality_thresholds['min_brightness'] and
brightness <= self.quality_thresholds['max_brightness']
)

# Overall validity
overall_valid = all([resolution_valid, contrast_valid, blur_valid, brightness_valid])

return {
'valid': overall_valid,
'metrics': {
'resolution': (width, height),
'resolution_valid': resolution_valid,
'contrast': round(contrast, 3),
'contrast_valid': contrast_valid,
'blur_variance': round(blur_variance, 2),
'blur_valid': blur_valid,
'brightness': round(brightness, 2),
'brightness_valid': brightness_valid
},
'recommendations': self._generate_recommendations(
resolution_valid, contrast_valid, blur_valid, brightness_valid
)
}

except Exception as e:
return {'valid': False, 'error': str(e)}

def _generate_recommendations(self, resolution_valid: bool, contrast_valid: bool,
blur_valid: bool, brightness_valid: bool) -> List[str]:
"""Generate improvement recommendations for invalid images"""
recommendations = []

if not resolution_valid:
recommendations.append("Adjust image resolution to be between 800x600 and 4000x3000 pixels")
if not contrast_valid:
recommendations.append("Improve image contrast - current image appears too flat")
if not blur_valid:
recommendations.append("Image appears blurred - ensure proper focus when capturing")
if not brightness_valid:
recommendations.append("Adjust lighting conditions - image is too dark or too bright")

if not recommendations:
recommendations.append("Image meets all quality standards")

return recommendations

# Usage example
validator = DataQualityValidator()
quality_report = validator.validate_image_quality("sample_document.jpg")
print(f"Image valid: {quality_report['valid']}")
print(f"Recommendations: {quality_report['recommendations']}")

Data Augmentation Pipeline

import albumentations as A
from albumentations.pytorch import ToTensorV2
import random

class DocumentAugmentationPipeline:
"""Document-specific augmentation pipeline for training robustness"""

def __init__(self, augmentation_strength: str = "medium"):
self.strength = augmentation_strength
self.pipelines = self._create_augmentation_pipelines()

def _create_augmentation_pipelines(self) -> Dict[str, A.Compose]:
"""Create different augmentation pipelines based on strength"""

# Base geometric augmentations
geometric_augmentations = [
A.Rotate(limit=(-5, 5), p=0.5), # Slight rotation
A.Perspective(scale=(0.02, 0.05), p=0.3), # Perspective distortion
A.ElasticTransform(alpha=50, sigma=5, p=0.2), # Elastic deformation
A.GridDistortion(num_steps=3, distort_limit=0.1, p=0.2) # Grid distortion
]

# Lighting and quality augmentations
quality_augmentations = [
A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
A.CLAHE(clip_limit=2.0, tile_grid_size=(8, 8), p=0.3),
A.RandomGamma(gamma_limit=(80, 120), p=0.3),
A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=20, val_shift_limit=20, p=0.3)
]

# Noise and blur augmentations
noise_augmentations = [
A.GaussNoise(var_limit=(10.0, 50.0), p=0.3),
A.ISONoise(color_shift=(0.01, 0.05), intensity=(0.1, 0.5), p=0.2),
A.Blur(blur_limit=3, p=0.2),
A.MotionBlur(blur_limit=3, p=0.2)
]

# Document-specific augmentations
document_augmentations = [
A.RandomShadow(shadow_roi=(0, 0, 1, 1), num_shadows_lower=1,
num_shadows_upper=2, shadow_dimension=5, p=0.3),
A.RandomFog(fog_coef_lower=0.1, fog_coef_upper=0.3, p=0.1),
A.OpticalDistortion(distort_limit=0.1, shift_limit=0.1, p=0.2)
]

# Combine based on strength
if self.strength == "light":
augmentations = geometric_augmentations[:2] + quality_augmentations[:2]
elif self.strength == "medium":
augmentations = (geometric_augmentations[:3] + quality_augmentations[:3] +
noise_augmentations[:2])
else: # heavy
augmentations = (geometric_augmentations + quality_augmentations +
noise_augmentations + document_augmentations)

return {
'train': A.Compose([
*augmentations,
A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
ToTensorV2()
]),
'validation': A.Compose([
A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
ToTensorV2()
])
}

def apply_augmentation(self, image: np.ndarray, split: str = "train") -> np.ndarray:
"""Apply augmentation pipeline to image"""
pipeline = self.pipelines.get(split, self.pipelines['train'])
augmented = pipeline(image=image)
return augmented['image']

# Usage
augmenter = DocumentAugmentationPipeline(augmentation_strength="medium")
augmented_image = augmenter.apply_augmentation(original_image, split="train")

Classification Model Training

Model Architecture Selection

import torch
import torch.nn as nn
import timm
from typing import Dict, List

class IRISDocumentClassifier(nn.Module):
"""Document classification model based on EfficientNet"""

def __init__(self, num_classes: int = 5, model_name: str = "efficientnet_b0",
pretrained: bool = True, dropout_rate: float = 0.3):
super().__init__()

self.backbone = timm.create_model(
model_name,
pretrained=pretrained,
num_classes=0, # Remove classifier head
global_pool='avg'
)

# Get feature dimensions
with torch.no_grad():
dummy_input = torch.randn(1, 3, 224, 224)
features = self.backbone(dummy_input)
feature_dim = features.shape[1]

# Custom classification head
self.classifier = nn.Sequential(
nn.Dropout(dropout_rate),
nn.Linear(feature_dim, feature_dim // 2),
nn.ReLU(inplace=True),
nn.Dropout(dropout_rate / 2),
nn.Linear(feature_dim // 2, num_classes)
)

# Document type mapping
self.class_names = [
'cedula_identidad',
'ficha_residencia',
'pasaporte',
'otros',
'no_document'
]

def forward(self, x: torch.Tensor) -> torch.Tensor:
features = self.backbone(x)
logits = self.classifier(features)
return logits

def predict_with_confidence(self, x: torch.Tensor) -> Dict[str, any]:
"""Prediction with confidence scores and class names"""
self.eval()
with torch.no_grad():
logits = self.forward(x)
probabilities = torch.softmax(logits, dim=1)

# Get top predictions
top_probs, top_indices = torch.topk(probabilities, k=3, dim=1)

predictions = []
for i in range(x.shape[0]):
pred_list = []
for j in range(3):
class_idx = top_indices[i, j].item()
confidence = top_probs[i, j].item()
pred_list.append({
'class': self.class_names[class_idx],
'confidence': round(confidence, 4)
})
predictions.append(pred_list)

return {
'predictions': predictions,
'raw_logits': logits.cpu().numpy(),
'probabilities': probabilities.cpu().numpy()
}

class DocumentClassificationTrainer:
"""Training pipeline for document classification"""

def __init__(self, model: IRISDocumentClassifier, device: str = 'cuda'):
self.model = model.to(device)
self.device = device
self.training_history = []

def train_epoch(self, train_loader, optimizer, criterion, epoch: int) -> Dict[str, float]:
"""Train for one epoch"""
self.model.train()
total_loss = 0.0
correct_predictions = 0
total_samples = 0

for batch_idx, (images, labels) in enumerate(train_loader):
images, labels = images.to(self.device), labels.to(self.device)

# Forward pass
optimizer.zero_grad()
outputs = self.model(images)
loss = criterion(outputs, labels)

# Backward pass
loss.backward()
optimizer.step()

# Statistics
total_loss += loss.item()
_, predicted = torch.max(outputs.data, 1)
total_samples += labels.size(0)
correct_predictions += (predicted == labels).sum().item()

# Progress logging
if batch_idx % 10 == 0:
print(f'Epoch {epoch}, Batch {batch_idx}/{len(train_loader)}, '
f'Loss: {loss.item():.4f}')

avg_loss = total_loss / len(train_loader)
accuracy = correct_predictions / total_samples

return {'loss': avg_loss, 'accuracy': accuracy}

def validate_epoch(self, val_loader, criterion) -> Dict[str, float]:
"""Validate for one epoch"""
self.model.eval()
total_loss = 0.0
correct_predictions = 0
total_samples = 0

with torch.no_grad():
for images, labels in val_loader:
images, labels = images.to(self.device), labels.to(self.device)

outputs = self.model(images)
loss = criterion(outputs, labels)

total_loss += loss.item()
_, predicted = torch.max(outputs.data, 1)
total_samples += labels.size(0)
correct_predictions += (predicted == labels).sum().item()

avg_loss = total_loss / len(val_loader)
accuracy = correct_predictions / total_samples

return {'loss': avg_loss, 'accuracy': accuracy}

def train_model(self, train_loader, val_loader, num_epochs: int = 50,
learning_rate: float = 0.001, weight_decay: float = 1e-4) -> Dict:
"""Complete training pipeline"""

# Setup optimizer and scheduler
optimizer = torch.optim.AdamW(
self.model.parameters(),
lr=learning_rate,
weight_decay=weight_decay
)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
optimizer, mode='min', factor=0.5, patience=5, verbose=True
)

criterion = nn.CrossEntropyLoss()

best_val_accuracy = 0.0
patience_counter = 0
early_stopping_patience = 10

print(f"Starting training for {num_epochs} epochs...")

for epoch in range(num_epochs):
# Training phase
train_metrics = self.train_epoch(train_loader, optimizer, criterion, epoch)

# Validation phase
val_metrics = self.validate_epoch(val_loader, criterion)

# Learning rate scheduling
scheduler.step(val_metrics['loss'])

# Record metrics
epoch_metrics = {
'epoch': epoch,
'train_loss': train_metrics['loss'],
'train_accuracy': train_metrics['accuracy'],
'val_loss': val_metrics['loss'],
'val_accuracy': val_metrics['accuracy'],
'learning_rate': optimizer.param_groups[0]['lr']
}
self.training_history.append(epoch_metrics)

# Progress logging
print(f"Epoch {epoch}: Train Loss: {train_metrics['loss']:.4f}, "
f"Train Acc: {train_metrics['accuracy']:.4f}, "
f"Val Loss: {val_metrics['loss']:.4f}, "
f"Val Acc: {val_metrics['accuracy']:.4f}")

# Model checkpointing
if val_metrics['accuracy'] > best_val_accuracy:
best_val_accuracy = val_metrics['accuracy']
patience_counter = 0

# Save best model
torch.save({
'epoch': epoch,
'model_state_dict': self.model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'best_accuracy': best_val_accuracy,
'training_history': self.training_history
}, 'data/models/classification/best_model.pth')

print(f"New best model saved with validation accuracy: {best_val_accuracy:.4f}")
else:
patience_counter += 1

# Early stopping
if patience_counter >= early_stopping_patience:
print(f"Early stopping triggered after {epoch + 1} epochs")
break

return {
'best_accuracy': best_val_accuracy,
'training_history': self.training_history,
'final_epoch': epoch
}

Training Configuration

# Training configuration
TRAINING_CONFIG = {
'model': {
'architecture': 'efficientnet_b0',
'num_classes': 5,
'pretrained': True,
'dropout_rate': 0.3
},
'training': {
'batch_size': 32,
'num_epochs': 100,
'learning_rate': 0.001,
'weight_decay': 1e-4,
'early_stopping_patience': 10
},
'data': {
'image_size': (224, 224),
'augmentation_strength': 'medium',
'train_split': 0.8,
'val_split': 0.15,
'test_split': 0.05
},
'hardware': {
'device': 'cuda' if torch.cuda.is_available() else 'cpu',
'num_workers': 4,
'pin_memory': True
}
}

OCR Model Fine-tuning

PaddleOCR Customization

import paddleocr
from paddleocr import PaddleOCR
import numpy as np

class CustomOCRTrainer:
"""Fine-tune PaddleOCR for document-specific text recognition"""

def __init__(self, base_language: str = 'es'):
self.base_language = base_language
self.custom_configs = self._create_custom_configs()

def _create_custom_configs(self) -> Dict[str, Dict]:
"""Create custom OCR configurations for different document types"""
return {
'cedula_identidad': {
'det_db_thresh': 0.2, # Lower threshold for small text
'det_db_box_thresh': 0.6, # Higher box threshold for precision
'det_db_unclip_ratio': 2.0, # More aggressive unclipping
'rec_batch_num': 8, # Larger batch for efficiency
'use_angle_cls': True, # Handle rotated text
'use_space_char': True, # Preserve spaces
'drop_score': 0.2 # Lower drop score for more text
},
'ficha_residencia': {
'det_db_thresh': 0.3,
'det_db_box_thresh': 0.5,
'det_db_unclip_ratio': 1.6,
'rec_batch_num': 6,
'use_angle_cls': True,
'use_space_char': True,
'drop_score': 0.3
},
'pasaporte': {
'det_db_thresh': 0.3,
'det_db_box_thresh': 0.6,
'det_db_unclip_ratio': 1.8,
'rec_batch_num': 6,
'use_angle_cls': True,
'use_space_char': True,
'drop_score': 0.3
}
}

def create_custom_ocr_instance(self, document_type: str) -> PaddleOCR:
"""Create optimized OCR instance for specific document type"""
config = self.custom_configs.get(document_type, self.custom_configs['ficha_residencia'])

return PaddleOCR(
lang=self.base_language,
**config
)

def fine_tune_recognition_model(self, training_data: Dict, document_type: str):
"""
Fine-tune recognition model for specific document type
Note: This is a placeholder for PaddleOCR fine-tuning process
"""
print(f"Fine-tuning OCR model for {document_type}")
print("This would involve:")
print("1. Preparing labeled text recognition dataset")
print("2. Converting to PaddleOCR training format")
print("3. Running PaddleOCR training pipeline")
print("4. Evaluating on validation set")
print("5. Exporting fine-tuned model")

# In practice, this would call PaddleOCR training scripts
# paddle_command = f"python tools/train.py -c configs/rec/rec_mv3_none_bilstm_ctc.yml"

return f"data/models/ocr/custom_{document_type}_rec_model"

class OCRPerformanceEvaluator:
"""Evaluate OCR model performance on test datasets"""

def __init__(self):
self.metrics = {}

def calculate_edit_distance(self, ground_truth: str, prediction: str) -> int:
"""Calculate Levenshtein distance between strings"""
m, n = len(ground_truth), len(prediction)
dp = [[0] * (n + 1) for _ in range(m + 1)]

for i in range(m + 1):
dp[i][0] = i
for j in range(n + 1):
dp[0][j] = j

for i in range(1, m + 1):
for j in range(1, n + 1):
if ground_truth[i-1] == prediction[j-1]:
dp[i][j] = dp[i-1][j-1]
else:
dp[i][j] = 1 + min(dp[i-1][j], dp[i][j-1], dp[i-1][j-1])

return dp[m][n]

def evaluate_ocr_accuracy(self, test_data: List[Dict]) -> Dict[str, float]:
"""
Evaluate OCR accuracy on test dataset

Args:
test_data: List of dicts with 'image_path', 'ground_truth', 'document_type'
"""
document_metrics = {}

for item in test_data:
document_type = item['document_type']
if document_type not in document_metrics:
document_metrics[document_type] = {
'total_chars': 0,
'total_errors': 0,
'total_samples': 0,
'perfect_matches': 0
}

# Load OCR model for document type
ocr_trainer = CustomOCRTrainer()
ocr_model = ocr_trainer.create_custom_ocr_instance(document_type)

# Perform OCR
result = ocr_model.ocr(item['image_path'])

# Extract predicted text
predicted_text = ""
if result and result[0]:
predicted_text = " ".join([line[1][0] for line in result[0] if line[1][1] > 0.3])

ground_truth = item['ground_truth']

# Calculate metrics
edit_distance = self.calculate_edit_distance(ground_truth, predicted_text)
char_accuracy = max(0, 1 - edit_distance / max(len(ground_truth), 1))

# Update aggregated metrics
metrics = document_metrics[document_type]
metrics['total_chars'] += len(ground_truth)
metrics['total_errors'] += edit_distance
metrics['total_samples'] += 1

if ground_truth.strip().lower() == predicted_text.strip().lower():
metrics['perfect_matches'] += 1

# Calculate final metrics
final_metrics = {}
for doc_type, metrics in document_metrics.items():
final_metrics[doc_type] = {
'character_accuracy': 1 - (metrics['total_errors'] / max(metrics['total_chars'], 1)),
'perfect_match_rate': metrics['perfect_matches'] / max(metrics['total_samples'], 1),
'total_samples': metrics['total_samples']
}

return final_metrics

Model Evaluation and Validation

Comprehensive Evaluation Pipeline

import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
import json

class ModelEvaluationSuite:
"""Comprehensive evaluation suite for IRIS models"""

def __init__(self):
self.evaluation_results = {}

def evaluate_classification_model(self, model, test_loader, class_names: List[str]) -> Dict:
"""Evaluate classification model performance"""
model.eval()
all_predictions = []
all_labels = []
all_probabilities = []

with torch.no_grad():
for images, labels in test_loader:
outputs = model(images)
probabilities = torch.softmax(outputs, dim=1)
_, predicted = torch.max(outputs, 1)

all_predictions.extend(predicted.cpu().numpy())
all_labels.extend(labels.cpu().numpy())
all_probabilities.extend(probabilities.cpu().numpy())

# Calculate metrics
classification_rep = classification_report(
all_labels, all_predictions,
target_names=class_names,
output_dict=True
)

confusion_mat = confusion_matrix(all_labels, all_predictions)

# Calculate per-class metrics
per_class_metrics = {}
for i, class_name in enumerate(class_names):
class_probs = [prob[i] for prob in all_probabilities]
class_labels = [1 if label == i else 0 for label in all_labels]

# Calculate AUC-ROC
from sklearn.metrics import roc_auc_score
try:
auc_score = roc_auc_score(class_labels, class_probs)
except ValueError:
auc_score = 0.0 # Handle case where class doesn't appear in test set

per_class_metrics[class_name] = {
'precision': classification_rep[class_name]['precision'],
'recall': classification_rep[class_name]['recall'],
'f1_score': classification_rep[class_name]['f1-score'],
'auc_roc': auc_score
}

return {
'classification_report': classification_rep,
'confusion_matrix': confusion_mat.tolist(),
'per_class_metrics': per_class_metrics,
'overall_accuracy': classification_rep['accuracy'],
'macro_avg_f1': classification_rep['macro avg']['f1-score'],
'weighted_avg_f1': classification_rep['weighted avg']['f1-score']
}

def generate_evaluation_report(self, model_results: Dict, output_dir: str = "data/evaluation"):
"""Generate comprehensive evaluation report"""

# Create output directory
import os
os.makedirs(output_dir, exist_ok=True)

# Save results to JSON
with open(f"{output_dir}/evaluation_results.json", 'w') as f:
json.dump(model_results, f, indent=2)

# Generate confusion matrix visualization
self._plot_confusion_matrix(
model_results['confusion_matrix'],
model_results['class_names'],
f"{output_dir}/confusion_matrix.png"
)

# Generate per-class performance chart
self._plot_per_class_metrics(
model_results['per_class_metrics'],
f"{output_dir}/per_class_performance.png"
)

# Generate evaluation summary
self._generate_summary_report(model_results, f"{output_dir}/summary_report.md")

def _plot_confusion_matrix(self, confusion_matrix: List[List[int]],
class_names: List[str], output_path: str):
"""Plot and save confusion matrix"""
plt.figure(figsize=(10, 8))
sns.heatmap(
confusion_matrix,
annot=True,
fmt='d',
cmap='Blues',
xticklabels=class_names,
yticklabels=class_names
)
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.tight_layout()
plt.savefig(output_path, dpi=300, bbox_inches='tight')
plt.close()

def _plot_per_class_metrics(self, per_class_metrics: Dict, output_path: str):
"""Plot per-class performance metrics"""
classes = list(per_class_metrics.keys())
metrics = ['precision', 'recall', 'f1_score', 'auc_roc']

fig, axes = plt.subplots(2, 2, figsize=(15, 10))
axes = axes.flatten()

for i, metric in enumerate(metrics):
values = [per_class_metrics[cls][metric] for cls in classes]
axes[i].bar(classes, values)
axes[i].set_title(f'{metric.replace("_", " ").title()}')
axes[i].set_ylabel('Score')
axes[i].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.savefig(output_path, dpi=300, bbox_inches='tight')
plt.close()

def _generate_summary_report(self, results: Dict, output_path: str):
"""Generate markdown summary report"""
report = f"""# IRIS Model Evaluation Report

## Overall Performance

- **Overall Accuracy**: {results['overall_accuracy']:.4f}
- **Macro Average F1**: {results['macro_avg_f1']:.4f}
- **Weighted Average F1**: {results['weighted_avg_f1']:.4f}

## Per-Class Performance

| Class | Precision | Recall | F1-Score | AUC-ROC |
|-------|-----------|--------|----------|---------|
"""

for class_name, metrics in results['per_class_metrics'].items():
report += f"| {class_name} | {metrics['precision']:.4f} | {metrics['recall']:.4f} | {metrics['f1_score']:.4f} | {metrics['auc_roc']:.4f} |\n"

report += f"""
## Model Recommendations

"""
# Add recommendations based on performance
if results['overall_accuracy'] > 0.95:
report += "- ✅ Excellent model performance - ready for production deployment\n"
elif results['overall_accuracy'] > 0.90:
report += "- ✅ Good model performance - suitable for production with monitoring\n"
elif results['overall_accuracy'] > 0.85:
report += "- ⚠️ Moderate performance - consider additional training data or model tuning\n"
else:
report += "- ❌ Poor performance - requires significant improvement before deployment\n"

# Check for class imbalance issues
f1_scores = [metrics['f1_score'] for metrics in results['per_class_metrics'].values()]
if max(f1_scores) - min(f1_scores) > 0.2:
report += "- ⚠️ Significant class imbalance detected - consider data balancing strategies\n"

with open(output_path, 'w') as f:
f.write(report)

Training Execution Scripts

Main Training Script

#!/usr/bin/env python3
# scripts/training/train_classifier.py

import argparse
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import yaml
import os
from datetime import datetime

def main():
parser = argparse.ArgumentParser(description='Train IRIS document classifier')
parser.add_argument('--config', type=str, default='configs/training_config.yaml',
help='Path to training configuration file')
parser.add_argument('--data_dir', type=str, default='data/training',
help='Path to training data directory')
parser.add_argument('--output_dir', type=str, default='data/models/classification',
help='Path to save trained models')
parser.add_argument('--resume', type=str, default=None,
help='Path to checkpoint to resume training from')

args = parser.parse_args()

# Load configuration
with open(args.config, 'r') as f:
config = yaml.safe_load(f)

# Setup device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Create data loaders
train_loader, val_loader, test_loader = create_data_loaders(args.data_dir, config)

# Create model
model = IRISDocumentClassifier(
num_classes=config['model']['num_classes'],
model_name=config['model']['architecture'],
pretrained=config['model']['pretrained'],
dropout_rate=config['model']['dropout_rate']
)

# Create trainer
trainer = DocumentClassificationTrainer(model, device)

# Resume from checkpoint if specified
if args.resume:
checkpoint = torch.load(args.resume)
model.load_state_dict(checkpoint['model_state_dict'])
print(f"Resumed training from {args.resume}")

# Train model
print("Starting training...")
training_results = trainer.train_model(
train_loader=train_loader,
val_loader=val_loader,
num_epochs=config['training']['num_epochs'],
learning_rate=config['training']['learning_rate'],
weight_decay=config['training']['weight_decay']
)

# Evaluate on test set
print("Evaluating on test set...")
evaluator = ModelEvaluationSuite()
evaluation_results = evaluator.evaluate_classification_model(
model, test_loader, model.class_names
)

# Generate evaluation report
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
eval_output_dir = f"{args.output_dir}/evaluation_{timestamp}"
evaluator.generate_evaluation_report(evaluation_results, eval_output_dir)

print(f"Training completed! Results saved to {eval_output_dir}")
print(f"Final test accuracy: {evaluation_results['overall_accuracy']:.4f}")

if __name__ == "__main__":
main()

Continuous Training Pipeline

#!/bin/bash
# scripts/training/continuous_training.sh

set -e

echo "IRIS Continuous Training Pipeline"
echo "================================="

# Configuration
DATA_DIR="data/training"
MODEL_DIR="data/models"
BACKUP_DIR="data/backups"
LOG_DIR="logs/training"

# Create directories
mkdir -p $LOG_DIR
mkdir -p $BACKUP_DIR

# Timestamp
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
LOG_FILE="$LOG_DIR/training_$TIMESTAMP.log"

echo "Starting training pipeline at $(date)" | tee $LOG_FILE

# Step 1: Data validation
echo "Step 1: Validating training data..." | tee -a $LOG_FILE
python scripts/training/validate_training_data.py --data_dir $DATA_DIR | tee -a $LOG_FILE

# Step 2: Data preprocessing
echo "Step 2: Preprocessing training data..." | tee -a $LOG_FILE
python scripts/training/preprocess_training_data.py --input_dir $DATA_DIR/raw_images --output_dir $DATA_DIR/processed | tee -a $LOG_FILE

# Step 3: Model training
echo "Step 3: Training classification model..." | tee -a $LOG_FILE
python scripts/training/train_classifier.py --config configs/training_config.yaml --output_dir $MODEL_DIR/classification | tee -a $LOG_FILE

# Step 4: Model evaluation
echo "Step 4: Evaluating trained model..." | tee -a $LOG_FILE
python scripts/training/evaluate_model.py --model_path $MODEL_DIR/classification/best_model.pth --test_data $DATA_DIR/test | tee -a $LOG_FILE

# Step 5: Model deployment (if evaluation passes)
echo "Step 5: Deploying model to production..." | tee -a $LOG_FILE
python scripts/deployment/deploy_model.py --model_path $MODEL_DIR/classification/best_model.pth | tee -a $LOG_FILE

echo "Training pipeline completed at $(date)" | tee -a $LOG_FILE
echo "Log file: $LOG_FILE"

This comprehensive training guide provides everything needed to train, fine-tune, and evaluate machine learning models for the IRIS OCR platform, ensuring high-quality document processing capabilities.