diff --git a/.github/instructions/cython.instructions.md b/.github/instructions/cython.instructions.md new file mode 100644 index 0000000000..a68c2c9c2c --- /dev/null +++ b/.github/instructions/cython.instructions.md @@ -0,0 +1,22 @@ +--- +applyTo: + - "dpctl/**/*.pyx" + - "dpctl/**/*.pxd" + - "dpctl/**/*.pxi" +--- + +# Cython Instructions + +See `dpctl/AGENTS.md` for full conventions. + +## Required Directives (after license) +```cython +# distutils: language = c++ +# cython: language_level=3 +# cython: linetrace=True +``` + +## Key Rules +- `cimport` for C-level, `import` for Python-level +- Store C refs as `_*_ref`, clean up in `__dealloc__` with NULL check +- Use `with nogil:` for blocking C operations diff --git a/.github/instructions/dpctl.instructions.md b/.github/instructions/dpctl.instructions.md new file mode 100644 index 0000000000..b97eb0e0a4 --- /dev/null +++ b/.github/instructions/dpctl.instructions.md @@ -0,0 +1,26 @@ +--- +applyTo: + - "**/*.py" + - "**/*.pyx" + - "**/*.pxd" + - "**/*.cpp" + - "**/*.hpp" + - "**/*.h" +--- + +# DPCTL General Instructions + +See `AGENTS.md` at repository root for project overview and architecture. +Each major directory has its own `AGENTS.md` with specific conventions. + +## Key References + +- **Code style:** `.pre-commit-config.yaml`, `.clang-format`, `.flake8` +- **License:** Apache 2.0 with Intel copyright - match existing file headers + +## Critical Rules + +1. **Device compatibility:** Not all devices support fp64/fp16 - never assume availability +2. **Queue consistency:** Arrays in same operation must share compatible queues +3. **Resource cleanup:** Clean up C resources in `__dealloc__` with NULL check +4. **NULL checks:** Always check C API returns before use diff --git a/.github/instructions/elementwise.instructions.md b/.github/instructions/elementwise.instructions.md new file mode 100644 index 0000000000..0b8c392fe1 --- /dev/null +++ b/.github/instructions/elementwise.instructions.md @@ -0,0 +1,24 @@ +--- +applyTo: + - "dpctl/tensor/libtensor/include/kernels/elementwise_functions/**" + - "dpctl/tensor/libtensor/source/elementwise_functions/**" + - "dpctl/tensor/_elementwise_*.py" + - "dpctl/tests/elementwise/**" +--- + +# Elementwise Operations Instructions + +Full stack: C++ kernel → pybind11 → Python wrapper → tests + +## References +- C++ kernels: `dpctl/tensor/libtensor/AGENTS.md` +- Python wrappers: `dpctl/tensor/AGENTS.md` +- Tests: `dpctl/tests/AGENTS.md` + +## Adding New Operation +1. `libtensor/include/kernels/elementwise_functions/op.hpp` - functor +2. `libtensor/source/elementwise_functions/op.cpp` - dispatch tables +3. Register in `tensor_elementwise.cpp` +4. `_elementwise_funcs.py` - Python wrapper +5. Export in `__init__.py` +6. `tests/elementwise/test_op.py` - full dtype/usm coverage diff --git a/.github/instructions/libsyclinterface.instructions.md b/.github/instructions/libsyclinterface.instructions.md new file mode 100644 index 0000000000..9b9d738d71 --- /dev/null +++ b/.github/instructions/libsyclinterface.instructions.md @@ -0,0 +1,23 @@ +--- +applyTo: + - "libsyclinterface/**/*.h" + - "libsyclinterface/**/*.hpp" + - "libsyclinterface/**/*.cpp" +--- + +# C API Instructions + +See `libsyclinterface/AGENTS.md` for conventions. + +## Naming +`DPCTL_` (e.g., `DPCTLDevice_Create`) + +## Ownership annotations (see `include/syclinterface/Support/MemOwnershipAttrs.h`) +- `__dpctl_give` - caller must free +- `__dpctl_take` - function takes ownership +- `__dpctl_keep` - function only observes + +## Key Rules +- Annotate all parameters and returns +- Return NULL on failure +- Use `DPCTL_API` for exports diff --git a/.github/instructions/libtensor-cpp.instructions.md b/.github/instructions/libtensor-cpp.instructions.md new file mode 100644 index 0000000000..6629139103 --- /dev/null +++ b/.github/instructions/libtensor-cpp.instructions.md @@ -0,0 +1,16 @@ +--- +applyTo: + - "dpctl/tensor/libtensor/**/*.hpp" + - "dpctl/tensor/libtensor/**/*.cpp" +--- + +# C++ SYCL Kernel Instructions + +See `dpctl/tensor/libtensor/AGENTS.md` for patterns and directory structure. + +## Key Rules +- Kernel class names must be globally unique +- Use `if constexpr` for compile-time type branching +- Complex types don't support vectorization +- Return `nullptr` from factory for unsupported types +- Check `include/kernels/elementwise_functions/common.hpp` for base patterns diff --git a/.github/instructions/memory.instructions.md b/.github/instructions/memory.instructions.md new file mode 100644 index 0000000000..ff99ac359f --- /dev/null +++ b/.github/instructions/memory.instructions.md @@ -0,0 +1,19 @@ +--- +applyTo: + - "dpctl/memory/**" + - "**/test_sycl_usm*.py" +--- + +# USM Memory Instructions + +See `dpctl/memory/AGENTS.md` for details. + +## USM Types +- `MemoryUSMDevice` - device-only (fastest) +- `MemoryUSMShared` - host and device accessible +- `MemoryUSMHost` - host memory, device accessible + +## Lifetime Rules +1. Memory is queue-bound +2. Keep memory alive until operations complete +3. Views extend base memory lifetime diff --git a/.github/instructions/tensor-python.instructions.md b/.github/instructions/tensor-python.instructions.md new file mode 100644 index 0000000000..b7dcefe840 --- /dev/null +++ b/.github/instructions/tensor-python.instructions.md @@ -0,0 +1,19 @@ +--- +applyTo: + - "dpctl/tensor/*.py" + - "dpctl/tensor/**/*.py" +--- + +# Tensor Python Instructions + +See `dpctl/tensor/AGENTS.md` for patterns. + +## Queue validation (required) +```python +exec_q = dpctl.utils.get_execution_queue([x.sycl_queue, y.sycl_queue]) +if exec_q is None: + raise ExecutionPlacementError("...") +``` + +## Adding operations +See checklist in `dpctl/tensor/AGENTS.md`. diff --git a/.github/instructions/testing.instructions.md b/.github/instructions/testing.instructions.md new file mode 100644 index 0000000000..560ec89b80 --- /dev/null +++ b/.github/instructions/testing.instructions.md @@ -0,0 +1,23 @@ +--- +applyTo: + - "dpctl/tests/**/*.py" + - "**/test_*.py" +--- + +# Testing Instructions + +See `dpctl/tests/AGENTS.md` for patterns. + +## Essential helpers (from `helper/_helper.py`) +```python +get_queue_or_skip() # Create queue or skip +skip_if_dtype_not_supported() # Skip if device lacks dtype +``` + +## Dtype/USM lists +Import from `elementwise/utils.py` - do not hardcode. + +## Coverage +- All dtypes from `_all_dtypes` +- All USM types: device, shared, host +- Edge cases: empty, scalar, broadcast diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000000..e64054f200 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,59 @@ +# AGENTS.md - AI Agent Guide for DPCTL + +## Overview + +**DPCTL** (Data Parallel Control) is a Python SYCL binding library for heterogeneous computing. It provides Python wrappers for SYCL runtime objects and implements the Python Array API standard for tensor operations. + +- **License:** Apache 2.0 (see `LICENSE`) +- **Copyright:** Intel Corporation + +## Architecture + +``` +Python API → Cython Bindings → C API → SYCL Runtime + dpctl/ _sycl_*.pyx libsyclinterface/ + +dpctl.tensor → pybind11 → C++ Kernels (libtensor/) → SYCL Runtime +``` + +## Directory Guide + +| Directory | AGENTS.md | Purpose | +|-----------|-----------|---------| +| `dpctl/` | [dpctl/AGENTS.md](dpctl/AGENTS.md) | Core SYCL bindings (Device, Queue, Context) | +| `dpctl/tensor/` | [dpctl/tensor/AGENTS.md](dpctl/tensor/AGENTS.md) | Array API tensor operations | +| `dpctl/tensor/libtensor/` | [dpctl/tensor/libtensor/AGENTS.md](dpctl/tensor/libtensor/AGENTS.md) | C++ SYCL kernels | +| `dpctl/memory/` | [dpctl/memory/AGENTS.md](dpctl/memory/AGENTS.md) | USM memory management | +| `dpctl/program/` | [dpctl/program/AGENTS.md](dpctl/program/AGENTS.md) | SYCL kernel compilation | +| `dpctl/utils/` | [dpctl/utils/AGENTS.md](dpctl/utils/AGENTS.md) | Utility functions | +| `dpctl/tests/` | [dpctl/tests/AGENTS.md](dpctl/tests/AGENTS.md) | Test suite | +| `libsyclinterface/` | [libsyclinterface/AGENTS.md](libsyclinterface/AGENTS.md) | C API layer | + +## Code Style + +Configuration files (do not hardcode versions - check these files): +- **Python/Cython:** `.pre-commit-config.yaml` +- **C/C++:** `.clang-format` +- **Linting:** `.flake8` + +## License Header + +All source files require Apache 2.0 header with Intel copyright. Reference existing files for exact format. + +## Quick Reference + +```python +import dpctl +import dpctl.tensor as dpt + +q = dpctl.SyclQueue("gpu") # Create queue +x = dpt.ones((100, 100), dtype="f4", sycl_queue=q) # Create array +np_array = dpt.asnumpy(x) # Transfer to host +``` + +## Key Concepts + +- **Queue:** Execution context binding device + context +- **USM:** Unified Shared Memory (device/shared/host types) +- **Filter string:** Device selector syntax `"backend:device_type:num"` +- **Array API:** Python standard for array operations (https://data-apis.org/array-api/) diff --git a/dpctl/AGENTS.md b/dpctl/AGENTS.md new file mode 100644 index 0000000000..23fbbf2d69 --- /dev/null +++ b/dpctl/AGENTS.md @@ -0,0 +1,62 @@ +# dpctl/ - Core SYCL Bindings + +## Purpose + +Python/Cython wrappers for SYCL runtime objects: Device, Queue, Context, Event, Platform. + +## Key Files + +| File | Purpose | +|------|---------| +| `_sycl_device.pyx` | `SyclDevice` wrapping `sycl::device` | +| `_sycl_queue.pyx` | `SyclQueue` wrapping `sycl::queue` | +| `_sycl_context.pyx` | `SyclContext` wrapping `sycl::context` | +| `_sycl_event.pyx` | `SyclEvent` wrapping `sycl::event` | +| `_sycl_platform.pyx` | `SyclPlatform` wrapping `sycl::platform` | +| `_sycl_device_factory.pyx` | Device enumeration and selection | +| `_sycl_queue_manager.pyx` | Queue management utilities | +| `_backend.pxd` | C API declarations from libsyclinterface | +| `enum_types.py` | Python enums for SYCL types | + +## Cython Conventions + +### Required Directives (after license header) +```cython +# distutils: language = c++ +# cython: language_level=3 +# cython: linetrace=True +``` + +### Extension Type Pattern +```cython +cdef class SyclDevice: + cdef DPCTLSyclDeviceRef _device_ref # C reference + + def __dealloc__(self): + if self._device_ref is not NULL: + DPCTLDevice_Delete(self._device_ref) + + cdef DPCTLSyclDeviceRef get_device_ref(self): + return self._device_ref +``` + +### Key Rules +- Store C references as `_*_ref` attributes +- Always clean up in `__dealloc__` with NULL check +- Use `with nogil:` for blocking C calls +- Check NULL before using C API returns + +### Exceptions +- `SyclDeviceCreationError` +- `SyclQueueCreationError` +- `SyclContextCreationError` + +## cimport vs import + +```cython +# cimport - C-level declarations (compile-time) +from ._backend cimport DPCTLSyclDeviceRef, DPCTLDevice_Create + +# import - Python-level (runtime) +from . import _device_selection +``` diff --git a/dpctl/memory/AGENTS.md b/dpctl/memory/AGENTS.md new file mode 100644 index 0000000000..ee1f232402 --- /dev/null +++ b/dpctl/memory/AGENTS.md @@ -0,0 +1,41 @@ +# dpctl/memory/ - USM Memory Management + +## Purpose + +Python classes for SYCL Unified Shared Memory (USM) allocation. + +## USM Types + +| Class | USM Type | Description | +|-------|----------|-------------| +| `MemoryUSMDevice` | Device | Device-only, fastest access | +| `MemoryUSMShared` | Shared | Host and device accessible | +| `MemoryUSMHost` | Host | Host memory, device accessible | + +## __sycl_usm_array_interface__ + +All memory classes implement this protocol: +```python +{ + "data": (ptr, readonly_flag), + "shape": (nbytes,), + "strides": None, + "typestr": "|u1", + "version": 1, + "syclobj": queue +} +``` + +## Memory Lifetime Rules + +1. **Queue-bound:** Memory tied to specific queue/context +2. **Outlive operations:** Keep memory alive until operations complete +3. **Views extend lifetime:** Views keep base memory alive + +## Key Files + +| File | Purpose | +|------|---------| +| `_memory.pyx` | Memory class implementations | +| `_memory.pxd` | Cython declarations | +| `__init__.py` | Public API exports | diff --git a/dpctl/program/AGENTS.md b/dpctl/program/AGENTS.md new file mode 100644 index 0000000000..3f24023452 --- /dev/null +++ b/dpctl/program/AGENTS.md @@ -0,0 +1,39 @@ +# dpctl/program/ - SYCL Kernel Compilation + +## Purpose + +Compile and manage SYCL kernels from OpenCL C or SPIR-V source. + +## Key Files + +| File | Purpose | +|------|---------| +| `_program.pyx` | `SyclProgram`, `SyclKernel` extension types | +| `_program.pxd` | Cython declarations | +| `__init__.py` | Public API exports | + +## Classes + +- **`SyclProgram`** - Compiled SYCL program containing one or more kernels +- **`SyclKernel`** - Individual kernel extracted from a program + +## Usage Pattern + +```python +from dpctl.program import create_program_from_source + +source = """ +__kernel void add(__global float* a, __global float* b, __global float* c) { + int i = get_global_id(0); + c[i] = a[i] + b[i]; +} +""" + +program = create_program_from_source(queue, source) +kernel = program.get_sycl_kernel("add") +``` + +## Notes + +- Programs are context-bound +- Follows same Cython patterns as core dpctl (see `../AGENTS.md`) diff --git a/dpctl/tensor/AGENTS.md b/dpctl/tensor/AGENTS.md new file mode 100644 index 0000000000..594809960e --- /dev/null +++ b/dpctl/tensor/AGENTS.md @@ -0,0 +1,54 @@ +# dpctl/tensor/ - Array API Tensor Operations + +## Purpose + +Python Array API-compliant tensor operations using `usm_ndarray` on SYCL devices. + +## Key Files + +| File | Purpose | +|------|---------| +| `_usmarray.pyx` | `usm_ndarray` extension type | +| `_elementwise_funcs.py` | Elementwise operation wrappers | +| `_elementwise_common.py` | `UnaryElementwiseFunc`, `BinaryElementwiseFunc` base classes | +| `_reduction.py` | Reduction operations (sum, prod, mean, etc.) | +| `_manipulation_functions.py` | reshape, concat, stack, split, etc. | +| `_ctors.py` | Array constructors (empty, zeros, ones, arange, etc.) | +| `_type_utils.py` | Type promotion and validation | +| `_sorting.py` | sort, argsort | +| `_searchsorted.py` | searchsorted, digitize | +| `_dlpack.pyx` | DLPack interoperability | +| `_copy_utils.py` | Copy and type casting | + +See [libtensor/AGENTS.md](libtensor/AGENTS.md) for C++ kernel implementation. + +## Elementwise Wrapper Pattern + +```python +from ._elementwise_common import UnaryElementwiseFunc +import dpctl.tensor._tensor_impl as ti + +abs = UnaryElementwiseFunc( + "abs", # Operation name + ti._abs_result_type, # Type inference function + ti._abs, # Kernel implementation (from pybind11) + _abs_docstring_ +) +``` + +## Queue Validation (required for all operations) + +```python +exec_q = dpctl.utils.get_execution_queue([x.sycl_queue, y.sycl_queue]) +if exec_q is None: + raise ExecutionPlacementError("Arrays on incompatible queues") +``` + +## Adding New Operations + +1. C++ kernel header: `libtensor/include/kernels//op.hpp` +2. C++ source: `libtensor/source//op.cpp` +3. Register in appropriate `tensor_*.cpp` entry point +4. Python wrapper in appropriate `_*.py` module +5. Export in `__init__.py` +6. Tests in `../tests/` with full dtype/usm coverage diff --git a/dpctl/tensor/libtensor/AGENTS.md b/dpctl/tensor/libtensor/AGENTS.md new file mode 100644 index 0000000000..29790f7299 --- /dev/null +++ b/dpctl/tensor/libtensor/AGENTS.md @@ -0,0 +1,83 @@ +# dpctl/tensor/libtensor/ - C++ SYCL Kernels + +## Purpose + +C++ SYCL kernel implementations for tensor operations, exposed to Python via pybind11. + +## Directory Structure + +``` +libtensor/ +├── include/ +│ ├── kernels/ +│ │ ├── elementwise_functions/ # Unary/binary operations +│ │ │ ├── common.hpp # Base iteration patterns +│ │ │ ├── common_inplace.hpp # In-place patterns +│ │ │ └── .hpp # Individual operations +│ │ ├── linalg_functions/ # GEMM, dot product +│ │ ├── sorting/ # Sort, searchsorted +│ │ ├── reductions.hpp +│ │ └── accumulators.hpp +│ └── utils/ +│ ├── type_dispatch.hpp # Runtime type lookup +│ ├── type_dispatch_building.hpp # Dispatch table generation +│ ├── offset_utils.hpp # Stride/offset calculations +│ └── sycl_alloc_utils.hpp # USM allocation helpers +├── source/ +│ ├── elementwise_functions/ +│ ├── reductions/ +│ ├── sorting/ +│ ├── linalg_functions/ +│ └── tensor_*.cpp # pybind11 module definitions +└── tests/ # C++ unit tests +``` + +## Type Dispatch + +14 supported types - see `include/utils/type_dispatch_building.hpp` for `typenum_t` enum. + +## Kernel Functor Pattern + +```cpp +template +struct MyOpFunctor { + // Vectorization hints (check common.hpp for usage) + using supports_sg_loadstore = std::true_type; + using supports_vec = std::true_type; + + // Scalar operation (required) + resT operator()(const argT &x) const { + return /* computation */; + } + + // Vectorized operation (optional, when supports_vec is true) + template + sycl::vec operator()( + const sycl::vec &x) const { + return /* vectorized computation */; + } +}; +``` + +## Factory Pattern + +```cpp +template +struct MyOpContigFactory { + fnT get() { + if constexpr (/* unsupported type combination */) { + return nullptr; // Signals unsupported + } else { + return my_op_contig_impl; + } + } +}; +``` + +## Key Rules + +- Kernel class names must be globally unique +- Use `if constexpr` for compile-time type branching +- Complex types typically don't support vectorization +- Return `nullptr` from factory for unsupported type combinations +- Check `common.hpp` and existing operations for patterns diff --git a/dpctl/tests/AGENTS.md b/dpctl/tests/AGENTS.md new file mode 100644 index 0000000000..46e0a9c378 --- /dev/null +++ b/dpctl/tests/AGENTS.md @@ -0,0 +1,48 @@ +# dpctl/tests/ - Test Suite + +## Purpose + +pytest-based test suite for dpctl functionality. + +## Key Files + +| File | Purpose | +|------|---------| +| `conftest.py` | Fixtures and pytest configuration | +| `helper/_helper.py` | `get_queue_or_skip()`, `skip_if_dtype_not_supported()` | +| `elementwise/utils.py` | Dtype and USM type lists for parametrization | + +## Essential Helpers + +From `helper/_helper.py`: +```python +get_queue_or_skip() # Create queue or skip test +skip_if_dtype_not_supported() # Skip if device lacks dtype (fp64/fp16) +``` + +## Dtype/USM Lists + +**Do not hardcode** - import from `elementwise/utils.py`: +```python +from .utils import _all_dtypes, _usm_types, _no_complex_dtypes +``` + +## Test Pattern + +```python +@pytest.mark.parametrize("dtype", _all_dtypes) +def test_operation(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + x = dpt.ones(100, dtype=dtype, sycl_queue=q) + result = dpt.operation(x) + # ... assertions +``` + +## Coverage Requirements + +- All supported dtypes (see `elementwise/utils.py`) +- All USM types: device, shared, host +- Memory orders: C, F where applicable +- Edge cases: empty arrays, 0-d arrays (scalars), broadcasting diff --git a/dpctl/utils/AGENTS.md b/dpctl/utils/AGENTS.md new file mode 100644 index 0000000000..8ab88fff5f --- /dev/null +++ b/dpctl/utils/AGENTS.md @@ -0,0 +1,34 @@ +# dpctl/utils/ - Utility Functions + +## Purpose + +Helper utilities for device queries, execution context management, and ordering. + +## Key Files + +| File | Purpose | +|------|---------| +| `_compute_follows_data.pyx` | `get_execution_queue()` for queue compatibility | +| `_order_manager.py` | `OrderManager` for memory layout handling | +| `_intel_device_info.py` | Intel-specific device information | +| `_onetrace_context.py` | Tracing/profiling context manager | + +## Key Functions + +### get_execution_queue() +Validates queue compatibility between arrays: +```python +from dpctl.utils import get_execution_queue + +exec_q = get_execution_queue([x.sycl_queue, y.sycl_queue]) +if exec_q is None: + raise ExecutionPlacementError("Incompatible queues") +``` + +### ExecutionPlacementError +Exception raised when arrays are on incompatible queues. + +## Notes + +- `get_execution_queue()` is critical for all tensor operations +- See usage examples in `dpctl/tensor/` modules diff --git a/libsyclinterface/AGENTS.md b/libsyclinterface/AGENTS.md new file mode 100644 index 0000000000..b724de8679 --- /dev/null +++ b/libsyclinterface/AGENTS.md @@ -0,0 +1,58 @@ +# libsyclinterface/ - C API Layer + +## Purpose + +C wrapper around SYCL C++ runtime for language interoperability (used by Cython bindings). + +## Directory Structure + +``` +libsyclinterface/ +├── include/syclinterface/ +│ ├── Support/ +│ │ ├── DllExport.h # DPCTL_API macro +│ │ ├── ExternC.h # extern "C" macros +│ │ └── MemOwnershipAttrs.h # Ownership annotations +│ └── dpctl_sycl_*.h # API headers +├── source/ # Implementation +└── tests/ # C interface tests +``` + +## Naming Convention + +``` +DPCTL_ +``` + +Examples: `DPCTLDevice_Create`, `DPCTLQueue_Submit`, `DPCTLContext_Delete` + +## Memory Ownership Annotations + +Defined in `Support/MemOwnershipAttrs.h`: + +| Annotation | Meaning | +|------------|---------| +| `__dpctl_give` | Caller receives ownership, must free | +| `__dpctl_take` | Function takes ownership, caller must not use after | +| `__dpctl_keep` | Function only observes, does not take ownership | +| `__dpctl_null` | NULL is valid value | + +## Function Pattern + +```c +DPCTL_API +__dpctl_give DPCTLSyclContextRef +DPCTLContext_Create(__dpctl_keep const DPCTLSyclDeviceRef DRef, + error_handler_callback *handler, + int properties); + +DPCTL_API +void DPCTLContext_Delete(__dpctl_take DPCTLSyclContextRef CRef); +``` + +## Key Rules + +- Always annotate ownership on parameters and returns +- Return NULL on failure +- Use `DPCTL_C_EXTERN_C_BEGIN/END` for C++ implementations +- Use `DPCTL_API` for exported functions