Skip to content

tradai.data.core.entities

OHLCV data models and related entities.

Core entities for tradai-data library.

Value objects that eliminate code duplication: - DateRange: Eliminates 6 duplicate validation sites - SymbolList: Eliminates 4 duplicate conversions - Timeframe: Eliminates 3 fragile regex parsings - OHLCVData: Validated DataFrame wrapper

All entities are immutable (frozen=True) for thread safety.

DateRange

Bases: BaseModel

Validated date range for queries.

Eliminates duplicate validation logic across 6 sites in the codebase. Provides duration calculations and range checks.

Example

dr = DateRange.from_strings("2024-01-01", "2024-01-31") dr.duration_days 30 dr.contains(datetime(2024, 1, 15)) True

Source code in libs/tradai-data/src/tradai/data/core/entities.py
class DateRange(BaseModel):
    """
    Validated date range for queries.

    Eliminates duplicate validation logic across 6 sites in the codebase.
    Provides duration calculations and range checks.

    Example:
        >>> dr = DateRange.from_strings("2024-01-01", "2024-01-31")
        >>> dr.duration_days
        30
        >>> dr.contains(datetime(2024, 1, 15))
        True
    """

    start: datetime
    end: datetime

    model_config = {"frozen": True}  # Immutable

    @field_validator("end")
    @classmethod
    def validate_range(cls, v: datetime, info: ValidationInfo) -> datetime:
        """Ensure end is after start."""
        if info.data.get("start") and v < info.data["start"]:
            raise ValueError("end must be after start")
        return v

    @classmethod
    def from_strings(
        cls,
        start: str | datetime | pd.Timestamp | np.datetime64,
        end: str | datetime | pd.Timestamp | np.datetime64,
    ) -> "DateRange":
        """
        Create DateRange from various date formats.

        Args:
            start: Start date (ISO string, datetime, pandas Timestamp, or numpy datetime64)
            end: End date (ISO string, datetime, pandas Timestamp, or numpy datetime64)

        Returns:
            Validated DateRange instance

        Example:
            >>> DateRange.from_strings("2024-01-01", "2024-01-31")
            >>> DateRange.from_strings(pd.Timestamp("2024-01-01"), datetime(2024, 1, 31))
        """
        return cls(start=_to_datetime(start), end=_to_datetime(end))

    @property
    def duration_days(self) -> int:
        """Calculate duration in days."""
        return (self.end - self.start).days

    def contains(self, date: datetime) -> bool:
        """Check if date is within range (inclusive)."""
        return self.start <= date <= self.end

duration_days: int property

Calculate duration in days.

validate_range(v: datetime, info: ValidationInfo) -> datetime classmethod

Ensure end is after start.

Source code in libs/tradai-data/src/tradai/data/core/entities.py
@field_validator("end")
@classmethod
def validate_range(cls, v: datetime, info: ValidationInfo) -> datetime:
    """Ensure end is after start."""
    if info.data.get("start") and v < info.data["start"]:
        raise ValueError("end must be after start")
    return v

from_strings(start: str | datetime | pd.Timestamp | np.datetime64, end: str | datetime | pd.Timestamp | np.datetime64) -> DateRange classmethod

Create DateRange from various date formats.

Parameters:

Name Type Description Default
start str | datetime | Timestamp | datetime64

Start date (ISO string, datetime, pandas Timestamp, or numpy datetime64)

required
end str | datetime | Timestamp | datetime64

End date (ISO string, datetime, pandas Timestamp, or numpy datetime64)

required

Returns:

Type Description
DateRange

Validated DateRange instance

Example

DateRange.from_strings("2024-01-01", "2024-01-31") DateRange.from_strings(pd.Timestamp("2024-01-01"), datetime(2024, 1, 31))

Source code in libs/tradai-data/src/tradai/data/core/entities.py
@classmethod
def from_strings(
    cls,
    start: str | datetime | pd.Timestamp | np.datetime64,
    end: str | datetime | pd.Timestamp | np.datetime64,
) -> "DateRange":
    """
    Create DateRange from various date formats.

    Args:
        start: Start date (ISO string, datetime, pandas Timestamp, or numpy datetime64)
        end: End date (ISO string, datetime, pandas Timestamp, or numpy datetime64)

    Returns:
        Validated DateRange instance

    Example:
        >>> DateRange.from_strings("2024-01-01", "2024-01-31")
        >>> DateRange.from_strings(pd.Timestamp("2024-01-01"), datetime(2024, 1, 31))
    """
    return cls(start=_to_datetime(start), end=_to_datetime(end))

contains(date: datetime) -> bool

Check if date is within range (inclusive).

Source code in libs/tradai-data/src/tradai/data/core/entities.py
def contains(self, date: datetime) -> bool:
    """Check if date is within range (inclusive)."""
    return self.start <= date <= self.end

SymbolList

Bases: BaseModel

Validated list of trading symbols.

Eliminates duplicate symbol preparation logic across 4 sites. Handles str/list inputs, deduplication, and validation.

Example

sl = SymbolList.from_input(["BTC/USDT:USDT", "ETH/USDT:USDT"]) sl.to_list() ['BTC/USDT:USDT', 'ETH/USDT:USDT']

Source code in libs/tradai-data/src/tradai/data/core/entities.py
class SymbolList(BaseModel):
    """
    Validated list of trading symbols.

    Eliminates duplicate symbol preparation logic across 4 sites.
    Handles str/list inputs, deduplication, and validation.

    Example:
        >>> sl = SymbolList.from_input(["BTC/USDT:USDT", "ETH/USDT:USDT"])
        >>> sl.to_list()
        ['BTC/USDT:USDT', 'ETH/USDT:USDT']
    """

    symbols: frozenset[str] = Field(..., min_length=1)

    model_config = {"frozen": True}  # Immutable

    @field_validator("symbols")
    @classmethod
    def validate_non_empty(cls, v: frozenset[str]) -> frozenset[str]:
        """Ensure at least one symbol."""
        if not v or len(v) == 0:
            raise ValueError("Must provide at least 1 symbol")
        return v

    @classmethod
    def from_input(cls, symbols: str | list[str]) -> "SymbolList":
        """
        Create SymbolList from string or list input.

        Automatically deduplicates symbols using frozenset.

        Args:
            symbols: Single symbol string or list of symbols

        Returns:
            Validated SymbolList instance

        Raises:
            ValidationError: If symbols is empty string or empty list

        Example:
            >>> SymbolList.from_input("BTC/USDT:USDT")
            >>> SymbolList.from_input(["BTC/USDT:USDT", "ETH/USDT:USDT"])
            >>> SymbolList.from_input(["BTC/USDT:USDT", "BTC/USDT:USDT"])  # Deduplicates
        """
        if isinstance(symbols, str):
            if not symbols:
                from pydantic import ValidationError

                raise ValidationError.from_exception_data(
                    "SymbolList",
                    [
                        {
                            "type": "value_error",
                            "loc": ("symbols",),
                            "input": symbols,
                            "ctx": {"error": ValueError("Symbol string cannot be empty")},
                        }
                    ],
                )
            return cls(symbols=frozenset([symbols]))

        # Wrap in try-catch to provide better error message
        try:
            return cls(symbols=frozenset(symbols))
        except PydanticValidationError as e:
            # Re-raise with clearer message
            if not symbols:
                from pydantic import ValidationError

                raise ValidationError.from_exception_data(
                    "SymbolList",
                    [
                        {
                            "type": "value_error",
                            "loc": ("symbols",),
                            "input": symbols,
                            "ctx": {"error": ValueError("Must provide at least 1 symbol")},
                        }
                    ],
                ) from e
            raise

    def to_list(self) -> list[str]:
        """Convert to sorted list for API calls."""
        return sorted(self.symbols)

validate_non_empty(v: frozenset[str]) -> frozenset[str] classmethod

Ensure at least one symbol.

Source code in libs/tradai-data/src/tradai/data/core/entities.py
@field_validator("symbols")
@classmethod
def validate_non_empty(cls, v: frozenset[str]) -> frozenset[str]:
    """Ensure at least one symbol."""
    if not v or len(v) == 0:
        raise ValueError("Must provide at least 1 symbol")
    return v

from_input(symbols: str | list[str]) -> SymbolList classmethod

Create SymbolList from string or list input.

Automatically deduplicates symbols using frozenset.

Parameters:

Name Type Description Default
symbols str | list[str]

Single symbol string or list of symbols

required

Returns:

Type Description
SymbolList

Validated SymbolList instance

Raises:

Type Description
ValidationError

If symbols is empty string or empty list

Example

SymbolList.from_input("BTC/USDT:USDT") SymbolList.from_input(["BTC/USDT:USDT", "ETH/USDT:USDT"]) SymbolList.from_input(["BTC/USDT:USDT", "BTC/USDT:USDT"]) # Deduplicates

Source code in libs/tradai-data/src/tradai/data/core/entities.py
@classmethod
def from_input(cls, symbols: str | list[str]) -> "SymbolList":
    """
    Create SymbolList from string or list input.

    Automatically deduplicates symbols using frozenset.

    Args:
        symbols: Single symbol string or list of symbols

    Returns:
        Validated SymbolList instance

    Raises:
        ValidationError: If symbols is empty string or empty list

    Example:
        >>> SymbolList.from_input("BTC/USDT:USDT")
        >>> SymbolList.from_input(["BTC/USDT:USDT", "ETH/USDT:USDT"])
        >>> SymbolList.from_input(["BTC/USDT:USDT", "BTC/USDT:USDT"])  # Deduplicates
    """
    if isinstance(symbols, str):
        if not symbols:
            from pydantic import ValidationError

            raise ValidationError.from_exception_data(
                "SymbolList",
                [
                    {
                        "type": "value_error",
                        "loc": ("symbols",),
                        "input": symbols,
                        "ctx": {"error": ValueError("Symbol string cannot be empty")},
                    }
                ],
            )
        return cls(symbols=frozenset([symbols]))

    # Wrap in try-catch to provide better error message
    try:
        return cls(symbols=frozenset(symbols))
    except PydanticValidationError as e:
        # Re-raise with clearer message
        if not symbols:
            from pydantic import ValidationError

            raise ValidationError.from_exception_data(
                "SymbolList",
                [
                    {
                        "type": "value_error",
                        "loc": ("symbols",),
                        "input": symbols,
                        "ctx": {"error": ValueError("Must provide at least 1 symbol")},
                    }
                ],
            ) from e
        raise

to_list() -> list[str]

Convert to sorted list for API calls.

Source code in libs/tradai-data/src/tradai/data/core/entities.py
def to_list(self) -> list[str]:
    """Convert to sorted list for API calls."""
    return sorted(self.symbols)

Timeframe

Bases: BaseModel

Validated CCXT-compatible timeframe.

Uses pandas.Timedelta for flexible input parsing (handles aliases like "1 hour", "5min", etc.) but validates against CCXT-supported timeframes.

Supported timeframes: 1s, 1m, 3m, 5m, 15m, 30m, 1h, 2h, 4h, 6h, 8h, 12h, 1d, 3d, 1w, 1M

Example

tf = Timeframe.parse("1h") # Standard tf = Timeframe.parse("1 hour") # Alias works tf = Timeframe.parse("4 hours") # Alias works tf.seconds 3600 tf.normalized '1h'

Source code in libs/tradai-data/src/tradai/data/core/entities.py
class Timeframe(BaseModel):
    """
    Validated CCXT-compatible timeframe.

    Uses pandas.Timedelta for flexible input parsing (handles aliases like
    "1 hour", "5min", etc.) but validates against CCXT-supported timeframes.

    Supported timeframes: 1s, 1m, 3m, 5m, 15m, 30m, 1h, 2h, 4h, 6h, 8h, 12h, 1d, 3d, 1w, 1M

    Example:
        >>> tf = Timeframe.parse("1h")      # Standard
        >>> tf = Timeframe.parse("1 hour")  # Alias works
        >>> tf = Timeframe.parse("4 hours") # Alias works
        >>> tf.seconds
        3600
        >>> tf.normalized
        '1h'
    """

    value: str
    seconds: int

    model_config = {"frozen": True}  # Immutable

    # CCXT-supported timeframes: seconds -> canonical value
    _SECONDS_TO_CCXT: ClassVar[dict[int, str]] = {
        1: "1s",
        60: "1m",
        180: "3m",
        300: "5m",
        900: "15m",
        1800: "30m",
        3600: "1h",
        7200: "2h",
        14400: "4h",
        21600: "6h",
        28800: "8h",
        43200: "12h",
        86400: "1d",
        259200: "3d",
        604800: "1w",
        2592000: "1M",
    }

    @classmethod
    def parse(cls, timeframe: str) -> "Timeframe":
        """
        Parse timeframe string with alias support, validated against CCXT.

        Uses pandas.Timedelta for flexible parsing, then validates that the
        resulting duration matches a CCXT-supported timeframe exactly.

        Args:
            timeframe: Timeframe string (e.g., "1h", "1 hour", "5min", "4 hours")

        Returns:
            Validated Timeframe instance

        Raises:
            ValidationError: If timeframe is invalid or not CCXT-supported

        Example:
            >>> Timeframe.parse("1h")       # -> Timeframe(value='1h', seconds=3600)
            >>> Timeframe.parse("1 hour")   # -> Timeframe(value='1h', seconds=3600)
            >>> Timeframe.parse("1h 30m")   # Raises: not a valid CCXT timeframe
        """
        try:
            td = pd.Timedelta(timeframe)
            total_seconds = int(td.total_seconds())
        except ValueError as e:
            from pydantic import ValidationError

            raise ValidationError.from_exception_data(
                "Timeframe",
                [
                    {
                        "type": "value_error",
                        "loc": ("timeframe",),
                        "input": timeframe,
                        "ctx": {"error": ValueError(f"Invalid timeframe format: {timeframe}")},
                    }
                ],
            ) from e

        if total_seconds not in cls._SECONDS_TO_CCXT:
            valid_list = list(cls._SECONDS_TO_CCXT.values())
            from pydantic import ValidationError

            raise ValidationError.from_exception_data(
                "Timeframe",
                [
                    {
                        "type": "value_error",
                        "loc": ("timeframe",),
                        "input": timeframe,
                        "ctx": {
                            "error": ValueError(
                                f"Timeframe '{timeframe}' ({total_seconds}s) is not supported by CCXT. "
                                f"Valid timeframes: {', '.join(valid_list)}"
                            )
                        },
                    }
                ],
            )

        ccxt_value = cls._SECONDS_TO_CCXT[total_seconds]
        return cls(value=ccxt_value, seconds=total_seconds)

    @property
    def minutes(self) -> int:
        """Get timeframe in minutes."""
        return self.seconds // 60

    @property
    def normalized(self) -> str:
        """Get normalized CCXT timeframe string for API calls."""
        return self.value

minutes: int property

Get timeframe in minutes.

normalized: str property

Get normalized CCXT timeframe string for API calls.

parse(timeframe: str) -> Timeframe classmethod

Parse timeframe string with alias support, validated against CCXT.

Uses pandas.Timedelta for flexible parsing, then validates that the resulting duration matches a CCXT-supported timeframe exactly.

Parameters:

Name Type Description Default
timeframe str

Timeframe string (e.g., "1h", "1 hour", "5min", "4 hours")

required

Returns:

Type Description
Timeframe

Validated Timeframe instance

Raises:

Type Description
ValidationError

If timeframe is invalid or not CCXT-supported

Example

Timeframe.parse("1h") # -> Timeframe(value='1h', seconds=3600) Timeframe.parse("1 hour") # -> Timeframe(value='1h', seconds=3600) Timeframe.parse("1h 30m") # Raises: not a valid CCXT timeframe

Source code in libs/tradai-data/src/tradai/data/core/entities.py
@classmethod
def parse(cls, timeframe: str) -> "Timeframe":
    """
    Parse timeframe string with alias support, validated against CCXT.

    Uses pandas.Timedelta for flexible parsing, then validates that the
    resulting duration matches a CCXT-supported timeframe exactly.

    Args:
        timeframe: Timeframe string (e.g., "1h", "1 hour", "5min", "4 hours")

    Returns:
        Validated Timeframe instance

    Raises:
        ValidationError: If timeframe is invalid or not CCXT-supported

    Example:
        >>> Timeframe.parse("1h")       # -> Timeframe(value='1h', seconds=3600)
        >>> Timeframe.parse("1 hour")   # -> Timeframe(value='1h', seconds=3600)
        >>> Timeframe.parse("1h 30m")   # Raises: not a valid CCXT timeframe
    """
    try:
        td = pd.Timedelta(timeframe)
        total_seconds = int(td.total_seconds())
    except ValueError as e:
        from pydantic import ValidationError

        raise ValidationError.from_exception_data(
            "Timeframe",
            [
                {
                    "type": "value_error",
                    "loc": ("timeframe",),
                    "input": timeframe,
                    "ctx": {"error": ValueError(f"Invalid timeframe format: {timeframe}")},
                }
            ],
        ) from e

    if total_seconds not in cls._SECONDS_TO_CCXT:
        valid_list = list(cls._SECONDS_TO_CCXT.values())
        from pydantic import ValidationError

        raise ValidationError.from_exception_data(
            "Timeframe",
            [
                {
                    "type": "value_error",
                    "loc": ("timeframe",),
                    "input": timeframe,
                    "ctx": {
                        "error": ValueError(
                            f"Timeframe '{timeframe}' ({total_seconds}s) is not supported by CCXT. "
                            f"Valid timeframes: {', '.join(valid_list)}"
                        )
                    },
                }
            ],
        )

    ccxt_value = cls._SECONDS_TO_CCXT[total_seconds]
    return cls(value=ccxt_value, seconds=total_seconds)

OHLCVData

Bases: BaseModel

Validated OHLCV market data container.

Wraps pandas DataFrame with validation and immutability guarantees. Required columns: symbol, date, open, high, low, close, volume.

Example

df = pd.DataFrame({...}) data = OHLCVData.from_dataframe(df) data.symbols {'BTC/USDT:USDT', 'ETH/USDT:USDT'} data.row_count 100

Source code in libs/tradai-data/src/tradai/data/core/entities.py
class OHLCVData(BaseModel):
    """
    Validated OHLCV market data container.

    Wraps pandas DataFrame with validation and immutability guarantees.
    Required columns: symbol, date, open, high, low, close, volume.

    Example:
        >>> df = pd.DataFrame({...})
        >>> data = OHLCVData.from_dataframe(df)
        >>> data.symbols
        {'BTC/USDT:USDT', 'ETH/USDT:USDT'}
        >>> data.row_count
        100
    """

    symbols: frozenset[str]
    row_count: int
    _df: pd.DataFrame = PrivateAttr()

    model_config = {"frozen": True, "arbitrary_types_allowed": True}

    @classmethod
    def from_dataframe(cls, df: pd.DataFrame) -> "OHLCVData":
        """
        Create OHLCVData from pandas DataFrame.

        Args:
            df: DataFrame with required columns

        Returns:
            Validated OHLCVData instance

        Raises:
            ValidationError: If DataFrame is empty or missing required columns

        Example:
            >>> df = pd.DataFrame({
            ...     "symbol": ["BTC/USDT:USDT"],
            ...     "date": [datetime(2024, 1, 1)],
            ...     "open": [45000.0],
            ...     "high": [45200.0],
            ...     "low": [44900.0],
            ...     "close": [45100.0],
            ...     "volume": [100.0],
            ... })
            >>> data = OHLCVData.from_dataframe(df)
        """
        if df.empty:
            from pydantic import ValidationError

            raise ValidationError.from_exception_data(
                "OHLCVData",
                [
                    {
                        "type": "value_error",
                        "loc": ("df",),
                        "input": "empty",
                        "ctx": {"error": ValueError("DataFrame cannot be empty")},
                    }
                ],
            )

        required_cols = {"symbol", "date", "open", "high", "low", "close", "volume"}
        missing = required_cols - set(df.columns)
        if missing:
            from pydantic import ValidationError

            raise ValidationError.from_exception_data(
                "OHLCVData",
                [
                    {
                        "type": "value_error",
                        "loc": ("df",),
                        "input": list(df.columns),
                        "ctx": {
                            "error": ValueError(f"DataFrame missing required columns: {missing}")
                        },
                    }
                ],
            )

        symbols = frozenset(df["symbol"].unique())
        instance = cls(symbols=symbols, row_count=len(df))
        instance._df = df.copy()
        return instance

    def to_dataframe(self) -> pd.DataFrame:
        """Return copy of internal DataFrame to prevent external modification."""
        return self._df.copy()

    def get_symbol_data(self, symbol: str) -> pd.DataFrame:
        """Filter data by symbol."""
        return self._df[self._df["symbol"] == symbol].copy()

    @classmethod
    def empty(cls) -> "OHLCVData":
        """
        Create an empty OHLCVData instance.

        Useful for returning when no data is collected.

        Returns:
            Empty OHLCVData with no rows

        Example:
            >>> empty_data = OHLCVData.empty()
            >>> empty_data.row_count
            0
            >>> empty_data.is_empty
            True
        """
        instance = cls(symbols=frozenset(), row_count=0)
        instance._df = pd.DataFrame(
            columns=["symbol", "date", "open", "high", "low", "close", "volume"]
        )
        return instance

    @classmethod
    def merge(cls, datasets: list["OHLCVData"]) -> "OHLCVData":
        """
        Merge multiple OHLCVData instances into one.

        Args:
            datasets: List of OHLCVData to merge

        Returns:
            Combined OHLCVData with all data

        Raises:
            ValueError: If datasets list is empty

        Example:
            >>> btc_data = OHLCVData.from_dataframe(btc_df)
            >>> eth_data = OHLCVData.from_dataframe(eth_df)
            >>> combined = OHLCVData.merge([btc_data, eth_data])
        """
        if not datasets:
            return cls.empty()

        # Filter out empty datasets
        non_empty = [d for d in datasets if d.row_count > 0]
        if not non_empty:
            return cls.empty()

        # Concatenate DataFrames
        dfs = [d.to_dataframe() for d in non_empty]
        combined_df = pd.concat(dfs, ignore_index=True)

        # Remove duplicates based on symbol + date
        combined_df = combined_df.drop_duplicates(subset=["symbol", "date"], keep="last")

        return cls.from_dataframe(combined_df)

    @property
    def is_empty(self) -> bool:
        """Check if data is empty."""
        return self.row_count == 0

    @property
    def date_range(self) -> DateRange:
        """Get actual date range from data."""
        min_date = self._df["date"].min()
        max_date = self._df["date"].max()

        # Convert pandas Timestamp to datetime if needed
        if isinstance(min_date, pd.Timestamp):
            min_date = min_date.to_pydatetime()
        if isinstance(max_date, pd.Timestamp):
            max_date = max_date.to_pydatetime()

        return DateRange(start=min_date, end=max_date)

is_empty: bool property

Check if data is empty.

date_range: DateRange property

Get actual date range from data.

from_dataframe(df: pd.DataFrame) -> OHLCVData classmethod

Create OHLCVData from pandas DataFrame.

Parameters:

Name Type Description Default
df DataFrame

DataFrame with required columns

required

Returns:

Type Description
OHLCVData

Validated OHLCVData instance

Raises:

Type Description
ValidationError

If DataFrame is empty or missing required columns

Example

df = pd.DataFrame({ ... "symbol": ["BTC/USDT:USDT"], ... "date": [datetime(2024, 1, 1)], ... "open": [45000.0], ... "high": [45200.0], ... "low": [44900.0], ... "close": [45100.0], ... "volume": [100.0], ... }) data = OHLCVData.from_dataframe(df)

Source code in libs/tradai-data/src/tradai/data/core/entities.py
@classmethod
def from_dataframe(cls, df: pd.DataFrame) -> "OHLCVData":
    """
    Create OHLCVData from pandas DataFrame.

    Args:
        df: DataFrame with required columns

    Returns:
        Validated OHLCVData instance

    Raises:
        ValidationError: If DataFrame is empty or missing required columns

    Example:
        >>> df = pd.DataFrame({
        ...     "symbol": ["BTC/USDT:USDT"],
        ...     "date": [datetime(2024, 1, 1)],
        ...     "open": [45000.0],
        ...     "high": [45200.0],
        ...     "low": [44900.0],
        ...     "close": [45100.0],
        ...     "volume": [100.0],
        ... })
        >>> data = OHLCVData.from_dataframe(df)
    """
    if df.empty:
        from pydantic import ValidationError

        raise ValidationError.from_exception_data(
            "OHLCVData",
            [
                {
                    "type": "value_error",
                    "loc": ("df",),
                    "input": "empty",
                    "ctx": {"error": ValueError("DataFrame cannot be empty")},
                }
            ],
        )

    required_cols = {"symbol", "date", "open", "high", "low", "close", "volume"}
    missing = required_cols - set(df.columns)
    if missing:
        from pydantic import ValidationError

        raise ValidationError.from_exception_data(
            "OHLCVData",
            [
                {
                    "type": "value_error",
                    "loc": ("df",),
                    "input": list(df.columns),
                    "ctx": {
                        "error": ValueError(f"DataFrame missing required columns: {missing}")
                    },
                }
            ],
        )

    symbols = frozenset(df["symbol"].unique())
    instance = cls(symbols=symbols, row_count=len(df))
    instance._df = df.copy()
    return instance

to_dataframe() -> pd.DataFrame

Return copy of internal DataFrame to prevent external modification.

Source code in libs/tradai-data/src/tradai/data/core/entities.py
def to_dataframe(self) -> pd.DataFrame:
    """Return copy of internal DataFrame to prevent external modification."""
    return self._df.copy()

get_symbol_data(symbol: str) -> pd.DataFrame

Filter data by symbol.

Source code in libs/tradai-data/src/tradai/data/core/entities.py
def get_symbol_data(self, symbol: str) -> pd.DataFrame:
    """Filter data by symbol."""
    return self._df[self._df["symbol"] == symbol].copy()

empty() -> OHLCVData classmethod

Create an empty OHLCVData instance.

Useful for returning when no data is collected.

Returns:

Type Description
OHLCVData

Empty OHLCVData with no rows

Example

empty_data = OHLCVData.empty() empty_data.row_count 0 empty_data.is_empty True

Source code in libs/tradai-data/src/tradai/data/core/entities.py
@classmethod
def empty(cls) -> "OHLCVData":
    """
    Create an empty OHLCVData instance.

    Useful for returning when no data is collected.

    Returns:
        Empty OHLCVData with no rows

    Example:
        >>> empty_data = OHLCVData.empty()
        >>> empty_data.row_count
        0
        >>> empty_data.is_empty
        True
    """
    instance = cls(symbols=frozenset(), row_count=0)
    instance._df = pd.DataFrame(
        columns=["symbol", "date", "open", "high", "low", "close", "volume"]
    )
    return instance

merge(datasets: list[OHLCVData]) -> OHLCVData classmethod

Merge multiple OHLCVData instances into one.

Parameters:

Name Type Description Default
datasets list[OHLCVData]

List of OHLCVData to merge

required

Returns:

Type Description
OHLCVData

Combined OHLCVData with all data

Raises:

Type Description
ValueError

If datasets list is empty

Example

btc_data = OHLCVData.from_dataframe(btc_df) eth_data = OHLCVData.from_dataframe(eth_df) combined = OHLCVData.merge([btc_data, eth_data])

Source code in libs/tradai-data/src/tradai/data/core/entities.py
@classmethod
def merge(cls, datasets: list["OHLCVData"]) -> "OHLCVData":
    """
    Merge multiple OHLCVData instances into one.

    Args:
        datasets: List of OHLCVData to merge

    Returns:
        Combined OHLCVData with all data

    Raises:
        ValueError: If datasets list is empty

    Example:
        >>> btc_data = OHLCVData.from_dataframe(btc_df)
        >>> eth_data = OHLCVData.from_dataframe(eth_df)
        >>> combined = OHLCVData.merge([btc_data, eth_data])
    """
    if not datasets:
        return cls.empty()

    # Filter out empty datasets
    non_empty = [d for d in datasets if d.row_count > 0]
    if not non_empty:
        return cls.empty()

    # Concatenate DataFrames
    dfs = [d.to_dataframe() for d in non_empty]
    combined_df = pd.concat(dfs, ignore_index=True)

    # Remove duplicates based on symbol + date
    combined_df = combined_df.drop_duplicates(subset=["symbol", "date"], keep="last")

    return cls.from_dataframe(combined_df)