Skip to content

Version API

The version module provides types and functions for semantic versioning, change detection, and Git integration.

Enums

mlforge.version.ChangeType

Bases: Enum

Type of change detected between feature versions.

Used to determine semantic version bumps: - INITIAL: First build → 1.0.0 - MAJOR: Breaking change → X+1.0.0 - MINOR: Additive change → X.Y+1.0 - PATCH: Data refresh → X.Y.Z+1

Source code in src/mlforge/version.py
class ChangeType(Enum):
    """
    Type of change detected between feature versions.

    Used to determine semantic version bumps:
    - INITIAL: First build → 1.0.0
    - MAJOR: Breaking change → X+1.0.0
    - MINOR: Additive change → X.Y+1.0
    - PATCH: Data refresh → X.Y.Z+1
    """

    INITIAL = "initial"
    MAJOR = "major"
    MINOR = "minor"
    PATCH = "patch"

    def is_breaking(self) -> bool:
        """Check if this change type represents a breaking change."""
        return self == ChangeType.MAJOR

is_breaking

is_breaking() -> bool

Check if this change type represents a breaking change.

Source code in src/mlforge/version.py
def is_breaking(self) -> bool:
    """Check if this change type represents a breaking change."""
    return self == ChangeType.MAJOR

Data Classes

mlforge.version.ChangeSummary dataclass

Summary of changes that triggered a version bump.

Stored in FeatureMetadata.change_summary for auditability.

Attributes:

Name Type Description
change_type ChangeType

Type of version bump applied

reason str

Human-readable reason code

details list[str]

List of specific changes (e.g., column names added/removed)

Source code in src/mlforge/version.py
@dataclass
class ChangeSummary:
    """
    Summary of changes that triggered a version bump.

    Stored in FeatureMetadata.change_summary for auditability.

    Attributes:
        change_type: Type of version bump applied
        reason: Human-readable reason code
        details: List of specific changes (e.g., column names added/removed)
    """

    change_type: ChangeType
    reason: str
    details: list[str] = field(default_factory=list)

    def to_dict(self) -> dict[str, Any]:
        """Convert to dictionary for JSON serialization."""
        return {
            "bump_type": self.change_type.value,
            "reason": self.reason,
            "details": self.details,
        }

    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> ChangeSummary:
        """Create from dictionary."""
        return cls(
            change_type=ChangeType(data["bump_type"]),
            reason=data["reason"],
            details=data.get("details", []),
        )

from_dict classmethod

from_dict(data: dict[str, Any]) -> ChangeSummary

Create from dictionary.

Source code in src/mlforge/version.py
@classmethod
def from_dict(cls, data: dict[str, Any]) -> ChangeSummary:
    """Create from dictionary."""
    return cls(
        change_type=ChangeType(data["bump_type"]),
        reason=data["reason"],
        details=data.get("details", []),
    )

to_dict

to_dict() -> dict[str, Any]

Convert to dictionary for JSON serialization.

Source code in src/mlforge/version.py
def to_dict(self) -> dict[str, Any]:
    """Convert to dictionary for JSON serialization."""
    return {
        "bump_type": self.change_type.value,
        "reason": self.reason,
        "details": self.details,
    }

Version Parsing and Bumping

mlforge.version.parse_version

parse_version(version_str: str) -> tuple[int, int, int]

Parse semantic version string to tuple.

Parameters:

Name Type Description Default
version_str str

Version string like "1.2.3"

required

Returns:

Type Description
tuple[int, int, int]

Tuple of (major, minor, patch)

Raises:

Type Description
ValueError

If version string is invalid

Example

parse_version("1.2.3") (1, 2, 3)

Source code in src/mlforge/version.py
def parse_version(version_str: str) -> tuple[int, int, int]:
    """
    Parse semantic version string to tuple.

    Args:
        version_str: Version string like "1.2.3"

    Returns:
        Tuple of (major, minor, patch)

    Raises:
        ValueError: If version string is invalid

    Example:
        >>> parse_version("1.2.3")
        (1, 2, 3)
    """
    match = _VERSION_PATTERN.match(version_str)
    if not match:
        raise ValueError(
            f"Invalid version format: '{version_str}'. Expected 'X.Y.Z' (e.g., '1.0.0')"
        )
    return int(match.group(1)), int(match.group(2)), int(match.group(3))

mlforge.version.format_version

format_version(major: int, minor: int, patch: int) -> str

Format version tuple to string.

Parameters:

Name Type Description Default
major int

Major version number

required
minor int

Minor version number

required
patch int

Patch version number

required

Returns:

Type Description
str

Version string like "1.2.3"

Source code in src/mlforge/version.py
def format_version(major: int, minor: int, patch: int) -> str:
    """
    Format version tuple to string.

    Args:
        major: Major version number
        minor: Minor version number
        patch: Patch version number

    Returns:
        Version string like "1.2.3"
    """
    return f"{major}.{minor}.{patch}"

mlforge.version.bump_version

bump_version(current: str, change_type: ChangeType) -> str

Increment version by change type.

Parameters:

Name Type Description Default
current str

Current version string (e.g., "1.2.3")

required
change_type ChangeType

Type of version increment

required

Returns:

Type Description
str

New version string

Raises:

Type Description
ValueError

If change_type is INITIAL (use "1.0.0" directly)

Example

bump_version("1.2.3", ChangeType.MINOR) "1.3.0" bump_version("1.2.3", ChangeType.MAJOR) "2.0.0"

Source code in src/mlforge/version.py
def bump_version(current: str, change_type: ChangeType) -> str:
    """
    Increment version by change type.

    Args:
        current: Current version string (e.g., "1.2.3")
        change_type: Type of version increment

    Returns:
        New version string

    Raises:
        ValueError: If change_type is INITIAL (use "1.0.0" directly)

    Example:
        >>> bump_version("1.2.3", ChangeType.MINOR)
        "1.3.0"
        >>> bump_version("1.2.3", ChangeType.MAJOR)
        "2.0.0"
    """
    if change_type == ChangeType.INITIAL:
        raise ValueError(
            "Cannot bump INITIAL change type. Use '1.0.0' directly."
        )

    major, minor, patch = parse_version(current)

    match change_type:
        case ChangeType.MAJOR:
            return format_version(major + 1, 0, 0)
        case ChangeType.MINOR:
            return format_version(major, minor + 1, 0)
        case ChangeType.PATCH:
            return format_version(major, minor, patch + 1)
        case _:
            raise ValueError(f"Unexpected change type: {change_type}")

mlforge.version.sort_versions

sort_versions(versions: list[str]) -> list[str]

Sort version strings semantically.

Parameters:

Name Type Description Default
versions list[str]

List of version strings

required

Returns:

Type Description
list[str]

Sorted list (oldest to newest)

Example

sort_versions(["1.10.0", "1.2.0", "2.0.0"]) ["1.2.0", "1.10.0", "2.0.0"]

Source code in src/mlforge/version.py
def sort_versions(versions: list[str]) -> list[str]:
    """
    Sort version strings semantically.

    Args:
        versions: List of version strings

    Returns:
        Sorted list (oldest to newest)

    Example:
        >>> sort_versions(["1.10.0", "1.2.0", "2.0.0"])
        ["1.2.0", "1.10.0", "2.0.0"]
    """
    return sorted(versions, key=lambda v: parse_version(v))

mlforge.version.is_valid_version

is_valid_version(version_str: str) -> bool

Check if a string is a valid semantic version.

Parameters:

Name Type Description Default
version_str str

String to validate

required

Returns:

Type Description
bool

True if valid version format, False otherwise

Source code in src/mlforge/version.py
def is_valid_version(version_str: str) -> bool:
    """
    Check if a string is a valid semantic version.

    Args:
        version_str: String to validate

    Returns:
        True if valid version format, False otherwise
    """
    return _VERSION_PATTERN.match(version_str) is not None

Path Construction

mlforge.version.versioned_data_path

versioned_data_path(
    store_root: Path, feature_name: str, version: str
) -> Path

Get path to versioned feature data file.

Parameters:

Name Type Description Default
store_root Path

Root path of the feature store

required
feature_name str

Name of the feature

required
version str

Semantic version string (e.g., "1.0.0")

required

Returns:

Type Description
Path

Path to data.parquet file

Example

versioned_data_path(Path("./store"), "user_spend", "1.0.0") Path("./store/user_spend/1.0.0/data.parquet")

Source code in src/mlforge/version.py
def versioned_data_path(
    store_root: Path, feature_name: str, version: str
) -> Path:
    """
    Get path to versioned feature data file.

    Args:
        store_root: Root path of the feature store
        feature_name: Name of the feature
        version: Semantic version string (e.g., "1.0.0")

    Returns:
        Path to data.parquet file

    Example:
        >>> versioned_data_path(Path("./store"), "user_spend", "1.0.0")
        Path("./store/user_spend/1.0.0/data.parquet")
    """
    return store_root / feature_name / version / "data.parquet"

mlforge.version.versioned_metadata_path

versioned_metadata_path(
    store_root: Path, feature_name: str, version: str
) -> Path

Get path to versioned feature metadata file.

Parameters:

Name Type Description Default
store_root Path

Root path of the feature store

required
feature_name str

Name of the feature

required
version str

Semantic version string

required

Returns:

Type Description
Path

Path to .meta.json file

Source code in src/mlforge/version.py
def versioned_metadata_path(
    store_root: Path, feature_name: str, version: str
) -> Path:
    """
    Get path to versioned feature metadata file.

    Args:
        store_root: Root path of the feature store
        feature_name: Name of the feature
        version: Semantic version string

    Returns:
        Path to .meta.json file
    """
    return store_root / feature_name / version / ".meta.json"

mlforge.version.latest_pointer_path

latest_pointer_path(
    store_root: Path, feature_name: str
) -> Path

Get path to _latest.json pointer file.

Parameters:

Name Type Description Default
store_root Path

Root path of the feature store

required
feature_name str

Name of the feature

required

Returns:

Type Description
Path

Path to _latest.json file within feature directory

Source code in src/mlforge/version.py
def latest_pointer_path(store_root: Path, feature_name: str) -> Path:
    """
    Get path to _latest.json pointer file.

    Args:
        store_root: Root path of the feature store
        feature_name: Name of the feature

    Returns:
        Path to _latest.json file within feature directory
    """
    return store_root / feature_name / "_latest.json"

mlforge.version.feature_versions_dir

feature_versions_dir(
    store_root: Path, feature_name: str
) -> Path

Get path to feature's version directory (parent of all versions).

Parameters:

Name Type Description Default
store_root Path

Root path of the feature store

required
feature_name str

Name of the feature

required

Returns:

Type Description
Path

Path to feature directory containing version subdirectories

Source code in src/mlforge/version.py
def feature_versions_dir(store_root: Path, feature_name: str) -> Path:
    """
    Get path to feature's version directory (parent of all versions).

    Args:
        store_root: Root path of the feature store
        feature_name: Name of the feature

    Returns:
        Path to feature directory containing version subdirectories
    """
    return store_root / feature_name

Hash Computation

mlforge.version.compute_schema_hash

compute_schema_hash(columns: list[ColumnMetadata]) -> str

Compute hash of column names and dtypes for schema change detection.

Captures structural schema changes (columns added/removed, dtype changes).

Parameters:

Name Type Description Default
columns list[ColumnMetadata]

List of ColumnMetadata from feature result

required

Returns:

Type Description
str

Hex string hash (first 12 characters of SHA256)

Source code in src/mlforge/version.py
def compute_schema_hash(columns: list[ColumnMetadata]) -> str:
    """
    Compute hash of column names and dtypes for schema change detection.

    Captures structural schema changes (columns added/removed, dtype changes).

    Args:
        columns: List of ColumnMetadata from feature result

    Returns:
        Hex string hash (first 12 characters of SHA256)
    """
    # Sort columns by name for consistent hashing
    sorted_cols = sorted(columns, key=lambda c: c.name)

    # Include only name and dtype (not input/agg/window which are config)
    schema_data = [(c.name, c.dtype) for c in sorted_cols]

    serialized = json.dumps(schema_data, sort_keys=True)
    return hashlib.sha256(serialized.encode()).hexdigest()[:12]

mlforge.version.compute_config_hash

compute_config_hash(
    keys: list[str],
    timestamp: str | None,
    interval: str | None,
    metrics_config: list[dict[str, Any]] | None,
) -> str

Compute hash of feature configuration for config change detection.

Captures configuration changes that affect computation (keys, timing, metrics).

Parameters:

Name Type Description Default
keys list[str]

Entity key columns

required
timestamp str | None

Timestamp column name

required
interval str | None

Rolling interval string

required
metrics_config list[dict[str, Any]] | None

Serialized metrics configuration

required

Returns:

Type Description
str

Hex string hash (first 12 characters of SHA256)

Source code in src/mlforge/version.py
def compute_config_hash(
    keys: list[str],
    timestamp: str | None,
    interval: str | None,
    metrics_config: list[dict[str, Any]] | None,
) -> str:
    """
    Compute hash of feature configuration for config change detection.

    Captures configuration changes that affect computation (keys, timing, metrics).

    Args:
        keys: Entity key columns
        timestamp: Timestamp column name
        interval: Rolling interval string
        metrics_config: Serialized metrics configuration

    Returns:
        Hex string hash (first 12 characters of SHA256)
    """
    config_data = {
        "keys": sorted(keys),
        "timestamp": timestamp,
        "interval": interval,
        "metrics": metrics_config or [],
    }

    serialized = json.dumps(config_data, sort_keys=True)
    return hashlib.sha256(serialized.encode()).hexdigest()[:12]

mlforge.version.compute_content_hash

compute_content_hash(path: Path) -> str

Compute hash of parquet file content for data change detection.

Uses file-based hashing for efficiency with large files.

Parameters:

Name Type Description Default
path Path

Path to parquet file

required

Returns:

Type Description
str

Hex string hash (first 12 characters of SHA256)

Raises:

Type Description
FileNotFoundError

If path doesn't exist

Source code in src/mlforge/version.py
def compute_content_hash(path: Path) -> str:
    """
    Compute hash of parquet file content for data change detection.

    Uses file-based hashing for efficiency with large files.

    Args:
        path: Path to parquet file

    Returns:
        Hex string hash (first 12 characters of SHA256)

    Raises:
        FileNotFoundError: If path doesn't exist
    """
    hasher = hashlib.sha256()

    with open(path, "rb") as f:
        # Read in chunks for memory efficiency
        for chunk in iter(lambda: f.read(8192), b""):
            hasher.update(chunk)

    return hasher.hexdigest()[:12]

mlforge.version.compute_source_hash

compute_source_hash(path: Path | str) -> str

Compute hash of source data file for reproducibility verification.

Uses file-based hashing for efficiency with large files. This hash is stored in metadata and used by mlforge sync to verify that teammates have the same source data before rebuilding.

Parameters:

Name Type Description Default
path Path | str

Path to source data file (parquet, csv, etc.)

required

Returns:

Type Description
str

Hex string hash (first 12 characters of SHA256)

Raises:

Type Description
FileNotFoundError

If path doesn't exist

Source code in src/mlforge/version.py
def compute_source_hash(path: Path | str) -> str:
    """
    Compute hash of source data file for reproducibility verification.

    Uses file-based hashing for efficiency with large files. This hash
    is stored in metadata and used by `mlforge sync` to verify that
    teammates have the same source data before rebuilding.

    Args:
        path: Path to source data file (parquet, csv, etc.)

    Returns:
        Hex string hash (first 12 characters of SHA256)

    Raises:
        FileNotFoundError: If path doesn't exist
    """
    if isinstance(path, str):
        path = Path(path)

    hasher = hashlib.sha256()

    with open(path, "rb") as f:
        # Read in chunks for memory efficiency
        for chunk in iter(lambda: f.read(8192), b""):
            hasher.update(chunk)

    return hasher.hexdigest()[:12]

Change Detection

mlforge.version.detect_change_type

detect_change_type(
    previous_columns: list[str] | None,
    current_columns: list[str],
    previous_schema_hash: str | None,
    current_schema_hash: str,
    previous_config_hash: str | None,
    current_config_hash: str,
) -> ChangeType

Determine version bump type based on schema and config changes.

Change detection logic (from roadmap): - No previous version → INITIAL (1.0.0) - Columns removed → MAJOR (breaking) - Dtype changed → MAJOR (breaking, detected via schema_hash) - Columns added → MINOR (additive) - Config changed → MINOR - Same schema/config → PATCH (data refresh)

Parameters:

Name Type Description Default
previous_columns list[str] | None

Column names from previous version (None if first build)

required
current_columns list[str]

Column names from current build

required
previous_schema_hash str | None

Schema hash from previous version

required
current_schema_hash str

Schema hash from current build

required
previous_config_hash str | None

Config hash from previous version

required
current_config_hash str

Config hash from current build

required

Returns:

Type Description
ChangeType

ChangeType indicating required version bump

Example

detect_change_type(None, ["a", "b"], None, "abc123", None, "def456") ChangeType.INITIAL detect_change_type( ... ["a", "b", "c"], ["a", "b"], "abc", "def", "123", "123" ... ) ChangeType.MAJOR # Column removed

Source code in src/mlforge/version.py
def detect_change_type(
    previous_columns: list[str] | None,
    current_columns: list[str],
    previous_schema_hash: str | None,
    current_schema_hash: str,
    previous_config_hash: str | None,
    current_config_hash: str,
) -> ChangeType:
    """
    Determine version bump type based on schema and config changes.

    Change detection logic (from roadmap):
    - No previous version → INITIAL (1.0.0)
    - Columns removed → MAJOR (breaking)
    - Dtype changed → MAJOR (breaking, detected via schema_hash)
    - Columns added → MINOR (additive)
    - Config changed → MINOR
    - Same schema/config → PATCH (data refresh)

    Args:
        previous_columns: Column names from previous version (None if first build)
        current_columns: Column names from current build
        previous_schema_hash: Schema hash from previous version
        current_schema_hash: Schema hash from current build
        previous_config_hash: Config hash from previous version
        current_config_hash: Config hash from current build

    Returns:
        ChangeType indicating required version bump

    Example:
        >>> detect_change_type(None, ["a", "b"], None, "abc123", None, "def456")
        ChangeType.INITIAL
        >>> detect_change_type(
        ...     ["a", "b", "c"], ["a", "b"], "abc", "def", "123", "123"
        ... )
        ChangeType.MAJOR  # Column removed
    """
    # First build - no previous version
    if previous_columns is None or previous_schema_hash is None:
        return ChangeType.INITIAL

    previous_set = set(previous_columns)
    current_set = set(current_columns)

    # Check for removed columns (breaking change)
    removed_columns = previous_set - current_set
    if removed_columns:
        return ChangeType.MAJOR

    # Check for schema hash change (dtype changes are breaking)
    # Note: We already checked for removed columns, so if schema_hash differs
    # and no columns removed, it must be dtype change or column addition
    if previous_schema_hash != current_schema_hash:
        # Added columns (additive change)
        added_columns = current_set - previous_set
        if added_columns:
            return ChangeType.MINOR
        # Same columns but different hash = dtype changed (breaking)
        return ChangeType.MAJOR

    # Check for config changes (interval, metrics, etc.)
    if previous_config_hash != current_config_hash:
        return ChangeType.MINOR

    # Same schema and config = data refresh only
    return ChangeType.PATCH

mlforge.version.build_change_summary

build_change_summary(
    change_type: ChangeType,
    previous_columns: list[str] | None,
    current_columns: list[str],
) -> ChangeSummary

Build structured change summary for metadata.

Parameters:

Name Type Description Default
change_type ChangeType

Detected change type

required
previous_columns list[str] | None

Previous version columns

required
current_columns list[str]

Current version columns

required

Returns:

Type Description
ChangeSummary

ChangeSummary with bump_type, reason, and details

Source code in src/mlforge/version.py
def build_change_summary(
    change_type: ChangeType,
    previous_columns: list[str] | None,
    current_columns: list[str],
) -> ChangeSummary:
    """
    Build structured change summary for metadata.

    Args:
        change_type: Detected change type
        previous_columns: Previous version columns
        current_columns: Current version columns

    Returns:
        ChangeSummary with bump_type, reason, and details
    """
    if change_type == ChangeType.INITIAL:
        return ChangeSummary(
            change_type=ChangeType.INITIAL,
            reason="first_build",
            details=[],
        )

    previous_set = set(previous_columns or [])
    current_set = set(current_columns)

    removed = sorted(previous_set - current_set)
    added = sorted(current_set - previous_set)

    if removed:
        return ChangeSummary(
            change_type=ChangeType.MAJOR,
            reason="columns_removed",
            details=removed,
        )

    if added:
        return ChangeSummary(
            change_type=ChangeType.MINOR,
            reason="columns_added",
            details=added,
        )

    if change_type == ChangeType.MINOR:
        return ChangeSummary(
            change_type=ChangeType.MINOR,
            reason="config_changed",
            details=[],
        )

    if change_type == ChangeType.MAJOR:
        return ChangeSummary(
            change_type=ChangeType.MAJOR,
            reason="dtype_changed",
            details=[],
        )

    return ChangeSummary(
        change_type=ChangeType.PATCH,
        reason="data_refresh",
        details=[],
    )

Version Discovery

mlforge.version.list_versions

list_versions(
    store_root: Path, feature_name: str
) -> list[str]

List all versions of a feature, sorted semantically.

Scans the feature directory for version subdirectories.

Parameters:

Name Type Description Default
store_root Path

Root path of the feature store

required
feature_name str

Name of the feature

required

Returns:

Type Description
list[str]

Sorted list of version strings (oldest to newest), empty if none

Example

list_versions(Path("./store"), "user_spend") ["1.0.0", "1.0.1", "1.1.0"]

Source code in src/mlforge/version.py
def list_versions(store_root: Path, feature_name: str) -> list[str]:
    """
    List all versions of a feature, sorted semantically.

    Scans the feature directory for version subdirectories.

    Args:
        store_root: Root path of the feature store
        feature_name: Name of the feature

    Returns:
        Sorted list of version strings (oldest to newest), empty if none

    Example:
        >>> list_versions(Path("./store"), "user_spend")
        ["1.0.0", "1.0.1", "1.1.0"]
    """
    feature_dir = feature_versions_dir(store_root, feature_name)

    if not feature_dir.exists():
        return []

    versions = []
    for path in feature_dir.iterdir():
        if path.is_dir() and is_valid_version(path.name):
            versions.append(path.name)

    return sort_versions(versions)

mlforge.version.get_latest_version

get_latest_version(
    store_root: Path, feature_name: str
) -> str | None

Get latest version from _latest.json pointer.

Parameters:

Name Type Description Default
store_root Path

Root path of the feature store

required
feature_name str

Name of the feature

required

Returns:

Type Description
str | None

Latest version string, or None if no versions exist

Source code in src/mlforge/version.py
def get_latest_version(store_root: Path, feature_name: str) -> str | None:
    """
    Get latest version from _latest.json pointer.

    Args:
        store_root: Root path of the feature store
        feature_name: Name of the feature

    Returns:
        Latest version string, or None if no versions exist
    """
    pointer_path = latest_pointer_path(store_root, feature_name)

    if not pointer_path.exists():
        return None

    with open(pointer_path) as f:
        data = json.load(f)

    return data.get("version")

mlforge.version.write_latest_pointer

write_latest_pointer(
    store_root: Path, feature_name: str, version: str
) -> None

Write _latest.json pointer file.

Parameters:

Name Type Description Default
store_root Path

Root path of the feature store

required
feature_name str

Name of the feature

required
version str

Version to mark as latest

required
Source code in src/mlforge/version.py
def write_latest_pointer(
    store_root: Path, feature_name: str, version: str
) -> None:
    """
    Write _latest.json pointer file.

    Args:
        store_root: Root path of the feature store
        feature_name: Name of the feature
        version: Version to mark as latest
    """
    pointer_path = latest_pointer_path(store_root, feature_name)
    pointer_path.parent.mkdir(parents=True, exist_ok=True)

    with open(pointer_path, "w") as f:
        json.dump({"version": version}, f, indent=2)

mlforge.version.resolve_version

resolve_version(
    store_root: Path, feature_name: str, version: str | None
) -> str | None

Resolve version string to actual version.

Parameters:

Name Type Description Default
store_root Path

Root path of the feature store

required
feature_name str

Name of the feature

required
version str | None

Explicit version or None for latest

required

Returns:

Type Description
str | None

Resolved version string, or None if feature doesn't exist

Source code in src/mlforge/version.py
def resolve_version(
    store_root: Path,
    feature_name: str,
    version: str | None,
) -> str | None:
    """
    Resolve version string to actual version.

    Args:
        store_root: Root path of the feature store
        feature_name: Name of the feature
        version: Explicit version or None for latest

    Returns:
        Resolved version string, or None if feature doesn't exist
    """
    if version is not None:
        return version
    return get_latest_version(store_root, feature_name)

Git Integration

mlforge.version.write_feature_gitignore

write_feature_gitignore(
    store_root: Path, feature_name: str
) -> bool

Write .gitignore to feature directory if not already present.

Creates a .gitignore file that ignores data.parquet files in version subdirectories. This allows committing metadata (.meta.json, _latest.json) while excluding large data files that can be rebuilt from source.

Parameters:

Name Type Description Default
store_root Path

Root path of the feature store

required
feature_name str

Name of the feature

required

Returns:

Type Description
bool

True if .gitignore was created, False if it already existed

Source code in src/mlforge/version.py
def write_feature_gitignore(store_root: Path, feature_name: str) -> bool:
    """
    Write .gitignore to feature directory if not already present.

    Creates a .gitignore file that ignores data.parquet files in version
    subdirectories. This allows committing metadata (.meta.json, _latest.json)
    while excluding large data files that can be rebuilt from source.

    Args:
        store_root: Root path of the feature store
        feature_name: Name of the feature

    Returns:
        True if .gitignore was created, False if it already existed
    """
    feature_dir = feature_versions_dir(store_root, feature_name)
    gitignore_path = feature_dir / ".gitignore"

    if gitignore_path.exists():
        return False

    feature_dir.mkdir(parents=True, exist_ok=True)
    gitignore_path.write_text(_GITIGNORE_CONTENT)
    return True