python Utilities

CSV Data Processor

Flexible CSV processor with support for filtering, transforming, aggregating, and exporting data using pandas.

Apex Logic 0 copies
python
import pandas as pd
from pathlib import Path
from typing import Dict, List, Optional, Callable

class CSVProcessor:
    def __init__(self, filepath: str, encoding: str = "utf-8"):
        self.filepath = Path(filepath)
        self.df = pd.read_csv(filepath, encoding=encoding)
        self.original_shape = self.df.shape
        print(f"Loaded {self.original_shape[0]} rows, {self.original_shape[1]} columns")

    def filter_rows(self, column: str, condition: Callable) -> "CSVProcessor":
        self.df = self.df[self.df[column].apply(condition)]
        return self

    def rename_columns(self, mapping: Dict[str, str]) -> "CSVProcessor":
        self.df = self.df.rename(columns=mapping)
        return self

    def drop_duplicates(self, subset: Optional[List[str]] = None) -> "CSVProcessor":
        before = len(self.df)
        self.df = self.df.drop_duplicates(subset=subset)
        print(f"Removed {before - len(self.df)} duplicates")
        return self

    def fill_missing(self, column: str, value) -> "CSVProcessor":
        self.df[column] = self.df[column].fillna(value)
        return self

    def aggregate(self, group_by: str, agg_dict: Dict[str, str]) -> pd.DataFrame:
        return self.df.groupby(group_by).agg(agg_dict).reset_index()

    def export(self, output: str, index: bool = False) -> None:
        self.df.to_csv(output, index=index)
        print(f"Exported {len(self.df)} rows to {output}")

    def summary(self) -> None:
        print(f"\nShape: {self.df.shape}")
        print(f"Columns: {list(self.df.columns)}")
        print(f"Missing values:\n{self.df.isnull().sum()}")

if __name__ == "__main__":
    processor = CSVProcessor("data/sales.csv")
    processor.drop_duplicates(["order_id"]) \
        .filter_rows("amount", lambda x: x > 0) \
        .fill_missing("region", "Unknown") \
        .export("data/cleaned_sales.csv")

Tags

csv pandas data-processing etl

Related Snippets

javascript

File Upload Handler with Validation

python

Web Scraper with BeautifulSoup

python

REST API Client with Retry

python

Email Sender with Templates