- Complete transformer implementation from scratch - Training pipeline with gradient accumulation and mixed precision - Optimized inference with KV caching - Multi-format data processing (PDFs, images, code, text) - Comprehensive documentation - Apache 2.0 license - Example training plots included in docs/images/
36 lines
786 B
Python
36 lines
786 B
Python
"""
|
|
SheepOp LLM - A modern language model implementation
|
|
Optimized for production RAG systems
|
|
"""
|
|
from .transformer import TransformerModel
|
|
from .attention import MultiHeadAttention, PositionalEncoding
|
|
from .blocks import TransformerBlock, FeedForward
|
|
from .optimized_attention import (
|
|
OptimizedMultiHeadAttention,
|
|
RetrievalCache,
|
|
OptimizedInference,
|
|
KVCache,
|
|
)
|
|
from .prefetching import (
|
|
PrefetchDataLoader,
|
|
LookaheadRetriever,
|
|
BatchPrefetcher,
|
|
)
|
|
|
|
__all__ = [
|
|
'TransformerModel',
|
|
'MultiHeadAttention',
|
|
'PositionalEncoding',
|
|
'TransformerBlock',
|
|
'FeedForward',
|
|
'OptimizedMultiHeadAttention',
|
|
'RetrievalCache',
|
|
'OptimizedInference',
|
|
'KVCache',
|
|
'PrefetchDataLoader',
|
|
'LookaheadRetriever',
|
|
'BatchPrefetcher',
|
|
]
|
|
|
|
|