Csv Handler
/install csv-handler
\r
CSV Handler for Construction Data\r
\r
Overview\r
CSV is the universal exchange format in construction - from scheduling exports to cost databases. This skill handles encoding issues, delimiter detection, and data cleaning.\r \r
Python Implementation\r
\r
import pandas as pd\r
import csv\r
from typing import Dict, Any, List, Optional, Tuple\r
from pathlib import Path\r
from dataclasses import dataclass\r
import chardet\r
\r
\r
@dataclass\r
class CSVProfile:\r
"""Profile of CSV file."""\r
encoding: str\r
delimiter: str\r
has_header: bool\r
row_count: int\r
column_count: int\r
columns: List[str]\r
\r
\r
class ConstructionCSVHandler:\r
"""Handle CSV files from construction software."""\r
\r
COMMON_DELIMITERS = [',', ';', ' ', '|']\r
COMMON_ENCODINGS = ['utf-8', 'utf-8-sig', 'latin-1', 'cp1252', 'iso-8859-1']\r
\r
def __init__(self):\r
self.last_profile: Optional[CSVProfile] = None\r
\r
def detect_encoding(self, file_path: str) -> str:\r
"""Detect file encoding."""\r
with open(file_path, 'rb') as f:\r
raw = f.read(10000)\r
result = chardet.detect(raw)\r
return result.get('encoding', 'utf-8') or 'utf-8'\r
\r
def detect_delimiter(self, file_path: str, encoding: str) -> str:\r
"""Detect CSV delimiter."""\r
with open(file_path, 'r', encoding=encoding, errors='replace') as f:\r
sample = f.read(5000)\r
\r
# Count occurrences\r
counts = {d: sample.count(d) for d in self.COMMON_DELIMITERS}\r
\r
# Return most common that appears consistently\r
if counts:\r
return max(counts, key=counts.get)\r
return ','\r
\r
def profile_csv(self, file_path: str) -> CSVProfile:\r
"""Profile CSV file."""\r
encoding = self.detect_encoding(file_path)\r
delimiter = self.detect_delimiter(file_path, encoding)\r
\r
# Read sample\r
df = pd.read_csv(file_path, encoding=encoding, delimiter=delimiter,\r
nrows=10, on_bad_lines='skip')\r
\r
has_header = not df.columns[0].replace('.', '').replace('-', '').isdigit()\r
\r
# Full row count\r
with open(file_path, 'r', encoding=encoding, errors='replace') as f:\r
row_count = sum(1 for _ in f) - (1 if has_header else 0)\r
\r
profile = CSVProfile(\r
encoding=encoding,\r
delimiter=delimiter,\r
has_header=has_header,\r
row_count=row_count,\r
column_count=len(df.columns),\r
columns=list(df.columns)\r
)\r
self.last_profile = profile\r
return profile\r
\r
def read_csv(self, file_path: str,\r
encoding: Optional[str] = None,\r
delimiter: Optional[str] = None,\r
clean: bool = True) -> pd.DataFrame:\r
"""Read CSV with auto-detection."""\r
\r
# Auto-detect if not provided\r
if encoding is None:\r
encoding = self.detect_encoding(file_path)\r
if delimiter is None:\r
delimiter = self.detect_delimiter(file_path, encoding)\r
\r
# Read with error handling\r
df = pd.read_csv(\r
file_path,\r
encoding=encoding,\r
delimiter=delimiter,\r
on_bad_lines='skip',\r
low_memory=False\r
)\r
\r
if clean:\r
df = self.clean_dataframe(df)\r
\r
return df\r
\r
def clean_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:\r
"""Clean construction CSV data."""\r
# Clean column names\r
df.columns = [self._clean_column_name(c) for c in df.columns]\r
\r
# Remove empty rows and columns\r
df = df.dropna(how='all')\r
df = df.dropna(axis=1, how='all')\r
\r
# Strip whitespace from strings\r
for col in df.select_dtypes(include=['object']):\r
df[col] = df[col].str.strip() if df[col].dtype == 'object' else df[col]\r
\r
return df\r
\r
def _clean_column_name(self, name: str) -> str:\r
"""Clean column name."""\r
if not isinstance(name, str):\r
return str(name)\r
\r
# Remove special characters, replace spaces\r
clean = name.strip().lower()\r
clean = clean.replace(' ', '_').replace('-', '_')\r
clean = ''.join(c for c in clean if c.isalnum() or c == '_')\r
return clean\r
\r
def merge_csvs(self, file_paths: List[str],\r
on_column: Optional[str] = None) -> pd.DataFrame:\r
"""Merge multiple CSV files."""\r
dfs = []\r
for path in file_paths:\r
df = self.read_csv(path)\r
df['_source_file'] = Path(path).name\r
dfs.append(df)\r
\r
if not dfs:\r
return pd.DataFrame()\r
\r
if on_column and on_column in dfs[0].columns:\r
result = dfs[0]\r
for df in dfs[1:]:\r
result = pd.merge(result, df, on=on_column, how='outer')\r
return result\r
\r
return pd.concat(dfs, ignore_index=True)\r
\r
def split_csv(self, df: pd.DataFrame,\r
group_column: str,\r
output_dir: str) -> List[str]:\r
"""Split CSV by column values."""\r
output_path = Path(output_dir)\r
output_path.mkdir(parents=True, exist_ok=True)\r
\r
files = []\r
for value in df[group_column].unique():\r
subset = df[df[group_column] == value]\r
filename = f"{group_column}_{value}.csv"\r
filepath = output_path / filename\r
subset.to_csv(filepath, index=False)\r
files.append(str(filepath))\r
\r
return files\r
\r
def convert_types(self, df: pd.DataFrame,\r
type_map: Dict[str, str] = None) -> pd.DataFrame:\r
"""Convert column types intelligently."""\r
df = df.copy()\r
\r
if type_map:\r
for col, dtype in type_map.items():\r
if col in df.columns:\r
try:\r
df[col] = df[col].astype(dtype)\r
except:\r
pass\r
else:\r
# Auto-convert\r
for col in df.columns:\r
# Try numeric\r
try:\r
df[col] = pd.to_numeric(df[col])\r
continue\r
except:\r
pass\r
\r
# Try datetime\r
try:\r
df[col] = pd.to_datetime(df[col])\r
except:\r
pass\r
\r
return df\r
\r
def export_csv(self, df: pd.DataFrame,\r
file_path: str,\r
encoding: str = 'utf-8-sig',\r
delimiter: str = ',') -> str:\r
"""Export DataFrame to CSV."""\r
df.to_csv(file_path, encoding=encoding, sep=delimiter, index=False)\r
return file_path\r
\r
\r
# Specialized handlers\r
class ScheduleCSVHandler(ConstructionCSVHandler):\r
"""Handler for project schedule CSVs."""\r
\r
SCHEDULE_COLUMNS = ['task_id', 'task_name', 'start_date', 'end_date',\r
'duration', 'predecessors', 'resources']\r
\r
def parse_schedule(self, file_path: str) -> pd.DataFrame:\r
"""Parse schedule CSV."""\r
df = self.read_csv(file_path)\r
\r
# Convert date columns\r
for col in df.columns:\r
if 'date' in col.lower() or 'start' in col.lower() or 'end' in col.lower():\r
try:\r
df[col] = pd.to_datetime(df[col])\r
except:\r
pass\r
\r
return df\r
\r
\r
class CostCSVHandler(ConstructionCSVHandler):\r
"""Handler for cost/estimate CSVs."""\r
\r
def parse_costs(self, file_path: str) -> pd.DataFrame:\r
"""Parse cost CSV."""\r
df = self.read_csv(file_path)\r
\r
# Find and convert numeric columns\r
for col in df.columns:\r
if any(word in col.lower() for word in ['cost', 'price', 'amount', 'total', 'qty', 'quantity']):\r
df[col] = pd.to_numeric(df[col].replace(r'[\$,]', '', regex=True), errors='coerce')\r
\r
return df\r
```\r
\r
## Quick Start\r
\r
```python\r
handler = ConstructionCSVHandler()\r
\r
# Profile CSV first\r
profile = handler.profile_csv("export.csv")\r
print(f"Encoding: {profile.encoding}, Delimiter: '{profile.delimiter}'")\r
\r
# Read with auto-detection\r
df = handler.read_csv("export.csv")\r
print(f"Loaded {len(df)} rows, {len(df.columns)} columns")\r
```\r
\r
## Common Use Cases\r
\r
### 1. Merge Multiple Exports\r
```python\r
files = ["jan_export.csv", "feb_export.csv", "mar_export.csv"]\r
merged = handler.merge_csvs(files)\r
```\r
\r
### 2. Split by Category\r
```python\r
handler.split_csv(df, group_column='category', output_dir='./split_files')\r
```\r
\r
### 3. Schedule Import\r
```python\r
schedule_handler = ScheduleCSVHandler()\r
schedule = schedule_handler.parse_schedule("p6_export.csv")\r
```\r
\r
## Resources\r
- **DDC Book**: Chapter 2.1 - Structured Data\r
- Make sure OpenClaw is installed (local or Docker)
- Run the install command in chat:
/install csv-handler - After installation, invoke the skill by name or use
/csv-handler - Provide required inputs per the skill's parameter spec and get structured output
What is Csv Handler?
Handle CSV files from construction software exports. Auto-detect delimiters, encodings, and clean messy data. It is an AI Agent Skill for Claude Code / OpenClaw, with 2280 downloads so far.
How do I install Csv Handler?
Run "/install csv-handler" in the OpenClaw or Claude Code chat to install it in one step — no extra setup required.
Is Csv Handler free?
Yes, Csv Handler is completely free (open-source). You can download, install and use it at no cost.
Which platforms does Csv Handler support?
Csv Handler is cross-platform and runs anywhere OpenClaw / Claude Code is available (darwin, linux, win32).
Who created Csv Handler?
It is built and maintained by datadrivenconstruction (@datadrivenconstruction); the current version is v2.1.0.