Custom Models with InferX

InferX provides powerful tools to optimize and deploy your own custom models across different hardware platforms. Whether you have a PyTorch, TensorFlow, or ONNX model, InferX can help you achieve optimal performance on any device.

Optimization Pipeline

from inferx.optimize import optimize_model

# Optimize your custom model
optimized_model = optimize_model(
    model_path="path/to/your/model.pth",
    target_device="auto",  # Auto-detects your hardware
    optimization_level="aggressive"
)

# Use the optimized model
result = optimized_model.inference(input_data)

Supported Model Formats

InferX supports multiple model formats and frameworks:

PyTorch Models

import torch
from inferx.optimize import optimize_model

# Load your PyTorch model
model = torch.load("my_model.pth")

# Optimize for current hardware
optimized = optimize_model(
    model=model,
    input_shape=(1, 3, 224, 224),
    optimization_level="balanced"
)

# Deploy with automatic hardware detection
result = optimized.inference(input_tensor)

TensorFlow/Keras Models

import tensorflow as tf
from inferx.optimize import optimize_model

# Load TensorFlow model
model = tf.keras.models.load_model("my_model.h5")

# Optimize for edge deployment
optimized = optimize_model(
    model=model,
    target_device="jetson",
    precision="fp16"  # Use half precision for speed
)

ONNX Models

from inferx.optimize import optimize_model

# Optimize ONNX model
optimized = optimize_model(
    model_path="model.onnx",
    target_device="gpu",
    batch_size=4
)

Hardware-Specific Optimizations

Jetson Optimization

# Optimize specifically for Jetson devices
jetson_model = optimize_model(
    model_path="model.pth",
    target_device="jetson",
    optimization_config={
        "precision": "fp16",
        "max_workspace_size": "1GB",
        "dla_cores": 2,  # Use Deep Learning Accelerator
        "enable_tensorrt": True
    }
)

GPU Optimization

# Optimize for high-end GPUs
gpu_model = optimize_model(
    model_path="model.pth", 
    target_device="gpu",
    optimization_config={
        "precision": "mixed",  # Mixed precision training
        "batch_size": 32,
        "enable_tensorrt": True,
        "enable_cudnn": True
    }
)

CPU Optimization

# Optimize for CPU deployment
cpu_model = optimize_model(
    model_path="model.pth",
    target_device="cpu",
    optimization_config={
        "num_threads": 8,
        "enable_mkldnn": True,
        "quantization": "int8"
    }
)

Advanced Optimization Features

Dynamic Batch Size

# Support variable batch sizes
model = optimize_model(
    model_path="model.pth",
    input_shape=[(1, 3, 224, 224), (4, 3, 224, 224), (8, 3, 224, 224)],
    dynamic_axes={'input': {0: 'batch_size'}}
)

# Use with different batch sizes
single_result = model.inference(single_image)
batch_result = model.inference(batch_images)

Quantization

# Apply quantization for smaller models
quantized_model = optimize_model(
    model_path="large_model.pth",
    quantization_config={
        "method": "dynamic",  # or "static", "qat"
        "precision": "int8",
        "calibration_dataset": calibration_data
    }
)

print(f"Model size reduced by {quantized_model.compression_ratio:.2f}x")

Pruning

# Apply model pruning to reduce parameters
pruned_model = optimize_model(
    model_path="model.pth",
    pruning_config={
        "method": "magnitude",
        "sparsity": 0.5,  # Remove 50% of parameters
        "structured": False
    }
)

Performance Monitoring

Benchmarking

from inferx.benchmark import benchmark_model

# Comprehensive performance analysis
results = benchmark_model(
    model=optimized_model,
    input_shape=(1, 3, 224, 224),
    num_runs=1000,
    warmup_runs=50
)

print(f"Average inference time: {results['avg_time']:.2f}ms")
print(f"Throughput: {results['throughput']:.1f} FPS")
print(f"Memory usage: {results['memory_mb']:.1f}MB")

Real-time Monitoring

import time
import psutil

class ModelMonitor:
    def __init__(self, model):
        self.model = model
        self.metrics = []
    
    def inference_with_monitoring(self, input_data):
        start_time = time.time()
        cpu_before = psutil.cpu_percent()
        memory_before = psutil.virtual_memory().percent
        
        # Run inference
        result = self.model.inference(input_data)
        
        end_time = time.time()
        cpu_after = psutil.cpu_percent()
        memory_after = psutil.virtual_memory().percent
        
        # Log metrics
        metrics = {
            'inference_time': end_time - start_time,
            'cpu_usage': cpu_after - cpu_before,
            'memory_delta': memory_after - memory_before,
            'timestamp': time.time()
        }
        
        self.metrics.append(metrics)
        return result, metrics

# Usage
monitor = ModelMonitor(optimized_model)
result, perf = monitor.inference_with_monitoring(test_input)
print(f"Inference took {perf['inference_time']*1000:.2f}ms")

Example: Complete Custom Model Pipeline

1. Model Training (External)

# Your existing training code
import torch
import torch.nn as nn

class CustomClassifier(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.backbone = torchvision.models.resnet18(pretrained=True)
        self.backbone.fc = nn.Linear(512, num_classes)
    
    def forward(self, x):
        return self.backbone(x)

# Train your model
model = CustomClassifier(num_classes=10)
# ... training code ...
torch.save(model.state_dict(), "custom_classifier.pth")

2. InferX Optimization

from inferx.optimize import optimize_model

# Load and optimize the trained model
model_state = torch.load("custom_classifier.pth")
model = CustomClassifier(num_classes=10)
model.load_state_dict(model_state)

# Optimize for deployment
optimized_model = optimize_model(
    model=model,
    input_shape=(1, 3, 224, 224),
    target_device="auto",
    optimization_config={
        "precision": "fp16",
        "enable_tensorrt": True,
        "optimization_level": "aggressive"
    }
)

# Save optimized model
optimized_model.save("custom_classifier_optimized.inferx")

3. Deployment

from inferx.models import load_model
import cv2
import numpy as np

# Load optimized model
model = load_model("custom_classifier_optimized.inferx")

# Preprocess function
def preprocess_image(image_path):
    image = cv2.imread(image_path)
    image = cv2.resize(image, (224, 224))
    image = image.astype(np.float32) / 255.0
    image = np.transpose(image, (2, 0, 1))  # HWC to CHW
    image = np.expand_dims(image, axis=0)   # Add batch dimension
    return image

# Run inference
image = preprocess_image("test_image.jpg")
result = model.inference(image)

print(f"Predicted class: {result['predicted_class']}")
print(f"Confidence: {result['confidence']:.3f}")

Production Deployment

Docker Container

FROM inferx/runtime:latest

# Copy your optimized model
COPY custom_classifier_optimized.inferx /app/model/

# Copy application code
COPY app.py /app/

# Install dependencies
RUN pip install opencv-python numpy

# Expose port
EXPOSE 8080

# Run application
CMD ["python", "/app/app.py"]

Kubernetes Deployment

apiVersion: apps/v1
kind: Deployment
metadata:
  name: custom-model-service
spec:
  replicas: 3
  selector:
    matchLabels:
      app: custom-model
  template:
    metadata:
      labels:
        app: custom-model
    spec:
      containers:
      - name: inferx-model
        image: your-registry/custom-model:latest
        resources:
          requests:
            memory: "512Mi"
            cpu: "250m"
          limits:
            memory: "2Gi"
            cpu: "1"
        env:
        - name: INFERX_DEVICE
          value: "auto"
        - name: INFERX_BATCH_SIZE
          value: "4"

Best Practices

1. Model Validation

# Always validate optimized model accuracy
def validate_optimized_model(original_model, optimized_model, test_data):
    original_outputs = []
    optimized_outputs = []
    
    for batch in test_data:
        orig_out = original_model(batch)
        opt_out = optimized_model.inference(batch)
        
        original_outputs.append(orig_out)
        optimized_outputs.append(opt_out)
    
    # Calculate accuracy difference
    accuracy_diff = compare_accuracy(original_outputs, optimized_outputs)
    print(f"Accuracy difference: {accuracy_diff:.2%}")
    
    return accuracy_diff < 0.01  # Less than 1% difference

is_valid = validate_optimized_model(original, optimized, validation_data)

2. Progressive Optimization

# Start with conservative optimization
conservative_model = optimize_model(
    model=model,
    optimization_level="conservative"
)

# If performance is good, try more aggressive optimization
if validate_model(conservative_model):
    aggressive_model = optimize_model(
        model=model,
        optimization_level="aggressive"
    )
    
    if validate_model(aggressive_model):
        # Use aggressive optimization
        final_model = aggressive_model
    else:
        # Fall back to conservative
        final_model = conservative_model

3. Hardware-Specific Testing

# Test on multiple hardware configurations
def test_across_hardware(model_path):
    devices = ["cpu", "gpu", "jetson"]
    results = {}
    
    for device in devices:
        try:
            optimized = optimize_model(
                model_path=model_path,
                target_device=device
            )
            
            benchmark = benchmark_model(optimized)
            results[device] = benchmark
            
        except Exception as e:
            print(f"Failed to optimize for {device}: {e}")
            results[device] = None
    
    return results

performance_results = test_across_hardware("my_model.pth")

Troubleshooting

Common Issues

Memory Issues: Reduce batch size or enable gradient checkpointing
Accuracy Loss: Use less aggressive quantization or pruning
Slow Inference: Check if hardware-specific optimizations are enabled
Compatibility Issues: Verify model format and input shapes

Debug Mode

# Enable debug mode for detailed optimization logs
optimized_model = optimize_model(
    model_path="model.pth",
    debug=True,
    verbose=True
)

# Check optimization report
print(optimized_model.optimization_report)

Next Steps

Learn about specific model types supported by InferX
Explore hardware optimization guides
Check out production deployment examples
Join our community discussions for help and tips

On this page

Custom Models with InferX
Optimization Pipeline
Supported Model Formats
PyTorch Models
TensorFlow/Keras Models
ONNX Models
Hardware-Specific Optimizations
Jetson Optimization
GPU Optimization
CPU Optimization
Advanced Optimization Features
Dynamic Batch Size
Quantization
Pruning
Performance Monitoring
Benchmarking
Real-time Monitoring
Example: Complete Custom Model Pipeline
1. Model Training (External)
2. InferX Optimization
3. Deployment
Production Deployment
Docker Container
Kubernetes Deployment
Best Practices
1. Model Validation
2. Progressive Optimization
3. Hardware-Specific Testing
Troubleshooting
Common Issues
Debug Mode
Next Steps

Custom Models with InferX

Optimization Pipeline

from inferx.optimize import optimize_model

# Optimize your custom model
optimized_model = optimize_model(
    model_path="path/to/your/model.pth",
    target_device="auto",  # Auto-detects your hardware
    optimization_level="aggressive"
)

# Use the optimized model
result = optimized_model.inference(input_data)

Supported Model Formats

InferX supports multiple model formats and frameworks:

PyTorch Models

import torch
from inferx.optimize import optimize_model

# Load your PyTorch model
model = torch.load("my_model.pth")

# Optimize for current hardware
optimized = optimize_model(
    model=model,
    input_shape=(1, 3, 224, 224),
    optimization_level="balanced"
)

# Deploy with automatic hardware detection
result = optimized.inference(input_tensor)

TensorFlow/Keras Models

import tensorflow as tf
from inferx.optimize import optimize_model

# Load TensorFlow model
model = tf.keras.models.load_model("my_model.h5")

# Optimize for edge deployment
optimized = optimize_model(
    model=model,
    target_device="jetson",
    precision="fp16"  # Use half precision for speed
)

ONNX Models

from inferx.optimize import optimize_model

# Optimize ONNX model
optimized = optimize_model(
    model_path="model.onnx",
    target_device="gpu",
    batch_size=4
)

Hardware-Specific Optimizations

Jetson Optimization

# Optimize specifically for Jetson devices
jetson_model = optimize_model(
    model_path="model.pth",
    target_device="jetson",
    optimization_config={
        "precision": "fp16",
        "max_workspace_size": "1GB",
        "dla_cores": 2,  # Use Deep Learning Accelerator
        "enable_tensorrt": True
    }
)

GPU Optimization

# Optimize for high-end GPUs
gpu_model = optimize_model(
    model_path="model.pth", 
    target_device="gpu",
    optimization_config={
        "precision": "mixed",  # Mixed precision training
        "batch_size": 32,
        "enable_tensorrt": True,
        "enable_cudnn": True
    }
)

CPU Optimization

# Optimize for CPU deployment
cpu_model = optimize_model(
    model_path="model.pth",
    target_device="cpu",
    optimization_config={
        "num_threads": 8,
        "enable_mkldnn": True,
        "quantization": "int8"
    }
)

Advanced Optimization Features

Dynamic Batch Size

# Support variable batch sizes
model = optimize_model(
    model_path="model.pth",
    input_shape=[(1, 3, 224, 224), (4, 3, 224, 224), (8, 3, 224, 224)],
    dynamic_axes={'input': {0: 'batch_size'}}
)

# Use with different batch sizes
single_result = model.inference(single_image)
batch_result = model.inference(batch_images)

Quantization

# Apply quantization for smaller models
quantized_model = optimize_model(
    model_path="large_model.pth",
    quantization_config={
        "method": "dynamic",  # or "static", "qat"
        "precision": "int8",
        "calibration_dataset": calibration_data
    }
)

print(f"Model size reduced by {quantized_model.compression_ratio:.2f}x")

Pruning

# Apply model pruning to reduce parameters
pruned_model = optimize_model(
    model_path="model.pth",
    pruning_config={
        "method": "magnitude",
        "sparsity": 0.5,  # Remove 50% of parameters
        "structured": False
    }
)

Performance Monitoring

Benchmarking

from inferx.benchmark import benchmark_model

# Comprehensive performance analysis
results = benchmark_model(
    model=optimized_model,
    input_shape=(1, 3, 224, 224),
    num_runs=1000,
    warmup_runs=50
)

print(f"Average inference time: {results['avg_time']:.2f}ms")
print(f"Throughput: {results['throughput']:.1f} FPS")
print(f"Memory usage: {results['memory_mb']:.1f}MB")

Real-time Monitoring

import time
import psutil

class ModelMonitor:
    def __init__(self, model):
        self.model = model
        self.metrics = []
    
    def inference_with_monitoring(self, input_data):
        start_time = time.time()
        cpu_before = psutil.cpu_percent()
        memory_before = psutil.virtual_memory().percent
        
        # Run inference
        result = self.model.inference(input_data)
        
        end_time = time.time()
        cpu_after = psutil.cpu_percent()
        memory_after = psutil.virtual_memory().percent
        
        # Log metrics
        metrics = {
            'inference_time': end_time - start_time,
            'cpu_usage': cpu_after - cpu_before,
            'memory_delta': memory_after - memory_before,
            'timestamp': time.time()
        }
        
        self.metrics.append(metrics)
        return result, metrics

# Usage
monitor = ModelMonitor(optimized_model)
result, perf = monitor.inference_with_monitoring(test_input)
print(f"Inference took {perf['inference_time']*1000:.2f}ms")

Example: Complete Custom Model Pipeline

1. Model Training (External)

# Your existing training code
import torch
import torch.nn as nn

class CustomClassifier(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.backbone = torchvision.models.resnet18(pretrained=True)
        self.backbone.fc = nn.Linear(512, num_classes)
    
    def forward(self, x):
        return self.backbone(x)

# Train your model
model = CustomClassifier(num_classes=10)
# ... training code ...
torch.save(model.state_dict(), "custom_classifier.pth")

2. InferX Optimization

from inferx.optimize import optimize_model

# Load and optimize the trained model
model_state = torch.load("custom_classifier.pth")
model = CustomClassifier(num_classes=10)
model.load_state_dict(model_state)

# Optimize for deployment
optimized_model = optimize_model(
    model=model,
    input_shape=(1, 3, 224, 224),
    target_device="auto",
    optimization_config={
        "precision": "fp16",
        "enable_tensorrt": True,
        "optimization_level": "aggressive"
    }
)

# Save optimized model
optimized_model.save("custom_classifier_optimized.inferx")

3. Deployment

from inferx.models import load_model
import cv2
import numpy as np

# Load optimized model
model = load_model("custom_classifier_optimized.inferx")

# Preprocess function
def preprocess_image(image_path):
    image = cv2.imread(image_path)
    image = cv2.resize(image, (224, 224))
    image = image.astype(np.float32) / 255.0
    image = np.transpose(image, (2, 0, 1))  # HWC to CHW
    image = np.expand_dims(image, axis=0)   # Add batch dimension
    return image

# Run inference
image = preprocess_image("test_image.jpg")
result = model.inference(image)

print(f"Predicted class: {result['predicted_class']}")
print(f"Confidence: {result['confidence']:.3f}")

Production Deployment

Docker Container

FROM inferx/runtime:latest

# Copy your optimized model
COPY custom_classifier_optimized.inferx /app/model/

# Copy application code
COPY app.py /app/

# Install dependencies
RUN pip install opencv-python numpy

# Expose port
EXPOSE 8080

# Run application
CMD ["python", "/app/app.py"]

Kubernetes Deployment

apiVersion: apps/v1
kind: Deployment
metadata:
  name: custom-model-service
spec:
  replicas: 3
  selector:
    matchLabels:
      app: custom-model
  template:
    metadata:
      labels:
        app: custom-model
    spec:
      containers:
      - name: inferx-model
        image: your-registry/custom-model:latest
        resources:
          requests:
            memory: "512Mi"
            cpu: "250m"
          limits:
            memory: "2Gi"
            cpu: "1"
        env:
        - name: INFERX_DEVICE
          value: "auto"
        - name: INFERX_BATCH_SIZE
          value: "4"

Best Practices

1. Model Validation

# Always validate optimized model accuracy
def validate_optimized_model(original_model, optimized_model, test_data):
    original_outputs = []
    optimized_outputs = []
    
    for batch in test_data:
        orig_out = original_model(batch)
        opt_out = optimized_model.inference(batch)
        
        original_outputs.append(orig_out)
        optimized_outputs.append(opt_out)
    
    # Calculate accuracy difference
    accuracy_diff = compare_accuracy(original_outputs, optimized_outputs)
    print(f"Accuracy difference: {accuracy_diff:.2%}")
    
    return accuracy_diff < 0.01  # Less than 1% difference

is_valid = validate_optimized_model(original, optimized, validation_data)

2. Progressive Optimization

# Start with conservative optimization
conservative_model = optimize_model(
    model=model,
    optimization_level="conservative"
)

# If performance is good, try more aggressive optimization
if validate_model(conservative_model):
    aggressive_model = optimize_model(
        model=model,
        optimization_level="aggressive"
    )
    
    if validate_model(aggressive_model):
        # Use aggressive optimization
        final_model = aggressive_model
    else:
        # Fall back to conservative
        final_model = conservative_model

3. Hardware-Specific Testing

# Test on multiple hardware configurations
def test_across_hardware(model_path):
    devices = ["cpu", "gpu", "jetson"]
    results = {}
    
    for device in devices:
        try:
            optimized = optimize_model(
                model_path=model_path,
                target_device=device
            )
            
            benchmark = benchmark_model(optimized)
            results[device] = benchmark
            
        except Exception as e:
            print(f"Failed to optimize for {device}: {e}")
            results[device] = None
    
    return results

performance_results = test_across_hardware("my_model.pth")

Troubleshooting

Common Issues

Memory Issues: Reduce batch size or enable gradient checkpointing
Accuracy Loss: Use less aggressive quantization or pruning
Slow Inference: Check if hardware-specific optimizations are enabled
Compatibility Issues: Verify model format and input shapes

Debug Mode

# Enable debug mode for detailed optimization logs
optimized_model = optimize_model(
    model_path="model.pth",
    debug=True,
    verbose=True
)

# Check optimization report
print(optimized_model.optimization_report)

Next Steps

Learn about specific model types supported by InferX
Explore hardware optimization guides
Check out production deployment examples
Join our community discussions for help and tips

On this page

Custom Models with InferX
Optimization Pipeline
Supported Model Formats
PyTorch Models
TensorFlow/Keras Models
ONNX Models
Hardware-Specific Optimizations
Jetson Optimization
GPU Optimization
CPU Optimization
Advanced Optimization Features
Dynamic Batch Size
Quantization
Pruning
Performance Monitoring
Benchmarking
Real-time Monitoring
Example: Complete Custom Model Pipeline
1. Model Training (External)
2. InferX Optimization
3. Deployment
Production Deployment
Docker Container
Kubernetes Deployment
Best Practices
1. Model Validation
2. Progressive Optimization
3. Hardware-Specific Testing
Troubleshooting
Common Issues
Debug Mode
Next Steps

​Custom Models with InferX

​Optimization Pipeline

​Supported Model Formats

​PyTorch Models

​TensorFlow/Keras Models

​ONNX Models

​Hardware-Specific Optimizations

​Jetson Optimization

​GPU Optimization

​CPU Optimization

​Advanced Optimization Features

​Dynamic Batch Size

​Quantization

​Pruning

​Performance Monitoring

​Benchmarking

​Real-time Monitoring

​Example: Complete Custom Model Pipeline

​1. Model Training (External)

​2. InferX Optimization

​3. Deployment

​Production Deployment

​Docker Container

​Kubernetes Deployment

​Best Practices

​1. Model Validation

​2. Progressive Optimization

​3. Hardware-Specific Testing

​Troubleshooting

​Common Issues

​Debug Mode

​Next Steps

Getting Started

Multimodal Models

Large Language Models

Computer Vision Models

Audio Models

Custom Models

Mobile SDK

​Custom Models with InferX

​Optimization Pipeline

​Supported Model Formats

​PyTorch Models

​TensorFlow/Keras Models

​ONNX Models

​Hardware-Specific Optimizations

​Jetson Optimization

​GPU Optimization

​CPU Optimization

​Advanced Optimization Features

​Dynamic Batch Size

​Quantization

​Pruning

​Performance Monitoring

​Benchmarking

​Real-time Monitoring

​Example: Complete Custom Model Pipeline

​1. Model Training (External)

​2. InferX Optimization

​3. Deployment

​Production Deployment

​Docker Container

​Kubernetes Deployment

​Best Practices

​1. Model Validation

​2. Progressive Optimization

​3. Hardware-Specific Testing

​Troubleshooting

​Common Issues

​Debug Mode

​Next Steps

Custom Models with InferX

Optimization Pipeline

Supported Model Formats

PyTorch Models

TensorFlow/Keras Models

ONNX Models

Hardware-Specific Optimizations

Jetson Optimization

GPU Optimization

CPU Optimization

Advanced Optimization Features

Dynamic Batch Size

Quantization

Pruning

Performance Monitoring

Benchmarking

Real-time Monitoring

Example: Complete Custom Model Pipeline

1. Model Training (External)

2. InferX Optimization

3. Deployment

Production Deployment

Docker Container

Kubernetes Deployment

Best Practices

1. Model Validation

2. Progressive Optimization

3. Hardware-Specific Testing

Troubleshooting

Common Issues

Debug Mode

Next Steps

Custom Models with InferX

Optimization Pipeline

Supported Model Formats

PyTorch Models

TensorFlow/Keras Models

ONNX Models

Hardware-Specific Optimizations

Jetson Optimization

GPU Optimization

CPU Optimization

Advanced Optimization Features

Dynamic Batch Size

Quantization

Pruning

Performance Monitoring

Benchmarking

Real-time Monitoring

Example: Complete Custom Model Pipeline

1. Model Training (External)

2. InferX Optimization

3. Deployment

Production Deployment

Docker Container

Kubernetes Deployment

Best Practices

1. Model Validation

2. Progressive Optimization

3. Hardware-Specific Testing

Troubleshooting

Common Issues

Debug Mode

Next Steps