Skip to content

Group By

"""
Group By example for aaiclick.

This example demonstrates how to use group_by operations on Objects
to aggregate data per group, including single/multi-key grouping,
various aggregation methods, multi-agg, HAVING filters, and View support.
"""

import asyncio

from aaiclick import create_object_from_value
from aaiclick.data import Agg
from aaiclick.data.data_context import data_context


async def example():
    """Run all group_by examples."""
    # Example 1: Basic group_by with sum
    print("Example 1: Basic group_by with sum")
    print("-" * 50)

    sales = await create_object_from_value(
        {
            "category": ["Electronics", "Electronics", "Clothing", "Clothing", "Food"],
            "amount": [500, 300, 150, 200, 80],
        }
    )
    result = await sales.group_by("category").sum("amount")
    data = await result.data()
    print("Sales by category (sum):")
    for cat, amt in sorted(zip(data["category"], data["amount"], strict=False)):
        print(f"  {cat}: ${amt}")

    # Example 2: Count per group
    print("\n" + "=" * 50)
    print("Example 2: Count per group")
    print("-" * 50)

    result = await sales.group_by("category").count()
    data = await result.data()
    print("Transaction count by category:")
    for cat, cnt in sorted(zip(data["category"], data["_count"], strict=False)):
        print(f"  {cat}: {cnt} transactions")

    # Example 3: Multiple group keys
    print("\n" + "=" * 50)
    print("Example 3: Multiple group keys")
    print("-" * 50)

    orders = await create_object_from_value(
        {
            "region": ["East", "East", "West", "West", "East", "West"],
            "category": ["A", "B", "A", "B", "A", "A"],
            "revenue": [100, 200, 150, 250, 120, 180],
        }
    )
    result = await orders.group_by("region", "category").sum("revenue")
    data = await result.data()
    print("Revenue by region + category:")
    triples = sorted(zip(data["region"], data["category"], data["revenue"], strict=False))
    for region, cat, rev in triples:
        print(f"  {region} / {cat}: ${rev}")

    # Example 4: Multi-aggregation with agg()
    print("\n" + "=" * 50)
    print("Example 4: Multi-aggregation with agg()")
    print("-" * 50)

    products = await create_object_from_value(
        {
            "category": ["Electronics", "Electronics", "Clothing", "Clothing"],
            "price": [999.99, 499.99, 59.99, 89.99],
            "quantity": [10, 25, 100, 75],
        }
    )
    result = await products.group_by("category").agg(
        {
            "price": "mean",
            "quantity": "sum",
        }
    )
    data = await result.data()
    print("Product stats by category:")
    for i, cat in enumerate(data["category"]):
        print(f"  {cat}: avg price=${data['price'][i]:.2f}, total qty={data['quantity'][i]}")

    # Example 5: Multiple aggregations on the same column
    print("\n" + "=" * 50)
    print("Example 5: Multiple aggregations on the same column")
    print("-" * 50)

    result = await products.group_by("category").agg(
        {
            "price": [Agg("min", "price_min"), Agg("max", "price_max"), Agg("mean", "price_avg")],
            "quantity": Agg("sum", "total_qty"),
        }
    )
    data = await result.data()
    print("Product price range + total qty by category:")
    for i, cat in enumerate(data["category"]):
        print(
            f"  {cat}: "
            f"${data['price_min'][i]:.2f} – ${data['price_max'][i]:.2f} "
            f"(avg ${data['price_avg'][i]:.2f}), "
            f"total qty={data['total_qty'][i]}"
        )

    # Example 6: Statistical aggregations
    print("\n" + "=" * 50)
    print("Example 6: Statistical aggregations")
    print("-" * 50)

    scores = await create_object_from_value(
        {
            "class": ["A", "A", "A", "A", "B", "B", "B", "B"],
            "score": [85, 90, 78, 92, 70, 95, 60, 88],
        }
    )
    result = await scores.group_by("class").agg(
        {
            "score": "mean",
        }
    )
    data = await result.data()
    print("Mean score by class:")
    for cls, score in sorted(zip(data["class"], data["score"], strict=False)):
        print(f"  Class {cls}: {score:.1f}")

    std_result = await scores.group_by("class").std("score")
    std_data = await std_result.data()
    print("Score std deviation by class:")
    for cls, std in sorted(zip(std_data["class"], std_data["score"], strict=False)):
        print(f"  Class {cls}: {std:.2f}")

    # Example 7: HAVING — filter groups after aggregation
    print("\n" + "=" * 50)
    print("Example 7: HAVING — filter groups after aggregation")
    print("-" * 50)

    transactions = await create_object_from_value(
        {
            "store": ["NYC", "NYC", "NYC", "LA", "LA", "Chicago"],
            "amount": [500, 300, 200, 100, 50, 800],
        }
    )
    print("All stores:")
    all_result = await transactions.group_by("store").sum("amount")
    all_data = await all_result.data()
    for store, amt in sorted(zip(all_data["store"], all_data["amount"], strict=False)):
        print(f"  {store}: ${amt}")

    print("\nStores with total > $200 (HAVING):")
    filtered = await transactions.group_by("store").having("sum(amount) > 200").sum("amount")
    fdata = await filtered.data()
    for store, amt in sorted(zip(fdata["store"], fdata["amount"], strict=False)):
        print(f"  {store}: ${amt}")

    # Example 8: WHERE + HAVING combined
    print("\n" + "=" * 50)
    print("Example 8: WHERE + HAVING combined")
    print("-" * 50)

    print("Filter rows WHERE amount >= 100, then HAVING count() >= 2:")
    view = transactions.view(where="amount >= 100")
    result = await view.group_by("store").having("count() >= 2").count()
    data = await result.data()
    for store, cnt in sorted(zip(data["store"], data["_count"], strict=False)):
        print(f"  {store}: {cnt} large transactions")

    # Example 9: Chained HAVING with AND
    print("\n" + "=" * 50)
    print("Example 9: Chained HAVING with AND")
    print("-" * 50)

    print("Stores with total > $200 AND at least 2 transactions:")
    result = await transactions.group_by("store").having("sum(amount) > 200").having("count() >= 2").sum("amount")
    data = await result.data()
    for store, amt in sorted(zip(data["store"], data["amount"], strict=False)):
        print(f"  {store}: ${amt}")

    # Example 10: OR HAVING
    print("\n" + "=" * 50)
    print("Example 10: OR HAVING")
    print("-" * 50)

    print("Stores with total > $700 OR only 1 transaction:")
    result = await transactions.group_by("store").having("sum(amount) > 700").or_having("count() = 1").sum("amount")
    data = await result.data()
    for store, amt in sorted(zip(data["store"], data["amount"], strict=False)):
        print(f"  {store}: ${amt}")

    # Example 11: Array value_counts pattern
    print("\n" + "=" * 50)
    print("Example 11: Array value_counts pattern")
    print("-" * 50)

    colors = await create_object_from_value(["red", "blue", "red", "green", "blue", "red"])
    counts = await colors.group_by("value").count()
    data = await counts.data()
    print("Color frequencies:")
    for val, cnt in sorted(zip(data["value"], data["_count"], strict=False), key=lambda x: -x[1]):
        print(f"  {val}: {cnt}")

    # Example 12: Working with group_by results
    print("\n" + "=" * 50)
    print("Example 12: Working with group_by results")
    print("-" * 50)

    result = await sales.group_by("category").sum("amount")
    print("Group by result is a normal dict Object")

    # Field selection on result
    amounts = result["amount"]
    print(f"  Select 'amount' column: {await amounts.data()}")

    # Further aggregation on result
    total = await amounts.sum()
    print(f"  Total across all groups: ${await total.data()}")  # → $1230

    # Orient as records
    records = await result.data(orient="records")
    print(f"  As records: {records}")

    print("\n" + "=" * 50)
    print("Cleanup: All context-created objects will be cleaned up automatically")
    print("-" * 50)


async def amain():
    """Main entry point that creates data_context() and calls example."""
    async with data_context():
        await example()


if __name__ == "__main__":
    print("=" * 50)
    print("aaiclick Group By Example")
    print("=" * 50)
    print("\nNote: This example requires a running ClickHouse server")
    print("      on localhost:8123\n")
    asyncio.run(amain())