Image To Data
/install image-to-data
\r \r
Image To Data\r
\r
Overview\r
\r Based on DDC methodology (Chapter 2.4), this skill extracts structured data from construction images using computer vision, OCR, and AI models to analyze site photos, scanned documents, and drawings.\r \r Book Reference: "Преобразование данных в структурированную форму" / "Data Transformation to Structured Form"\r \r
Quick Start\r
\r
from dataclasses import dataclass, field\r
from enum import Enum\r
from typing import List, Dict, Optional, Any, Tuple\r
from datetime import datetime\r
import json\r
import base64\r
\r
class ImageType(Enum):\r
"""Types of construction images"""\r
SITE_PHOTO = "site_photo"\r
SCANNED_DOCUMENT = "scanned_document"\r
FLOOR_PLAN = "floor_plan"\r
ELEVATION = "elevation"\r
DETAIL_DRAWING = "detail_drawing"\r
PROGRESS_PHOTO = "progress_photo"\r
SAFETY_PHOTO = "safety_photo"\r
DEFECT_PHOTO = "defect_photo"\r
MATERIAL_PHOTO = "material_photo"\r
EQUIPMENT_PHOTO = "equipment_photo"\r
\r
class ExtractionType(Enum):\r
"""Types of data extraction"""\r
OCR_TEXT = "ocr_text"\r
TABLE = "table"\r
OBJECT_DETECTION = "object_detection"\r
MEASUREMENT = "measurement"\r
CLASSIFICATION = "classification"\r
PROGRESS = "progress"\r
\r
@dataclass\r
class BoundingBox:\r
"""Bounding box for detected region"""\r
x: int\r
y: int\r
width: int\r
height: int\r
confidence: float = 1.0\r
\r
@dataclass\r
class TextRegion:\r
"""Extracted text region from image"""\r
text: str\r
bbox: BoundingBox\r
confidence: float\r
language: str = "en"\r
\r
@dataclass\r
class DetectedObject:\r
"""Detected object in image"""\r
label: str\r
bbox: BoundingBox\r
confidence: float\r
attributes: Dict[str, Any] = field(default_factory=dict)\r
\r
@dataclass\r
class ExtractedTable:\r
"""Extracted table from image"""\r
headers: List[str]\r
rows: List[List[str]]\r
bbox: BoundingBox\r
confidence: float\r
\r
@dataclass\r
class ProgressMeasurement:\r
"""Progress measurement from image"""\r
element_type: str\r
total_count: int\r
completed_count: int\r
percent_complete: float\r
area_sqft: Optional[float] = None\r
volume_cuft: Optional[float] = None\r
\r
@dataclass\r
class ImageAnalysisResult:\r
"""Complete image analysis result"""\r
image_id: str\r
image_type: ImageType\r
text_regions: List[TextRegion]\r
detected_objects: List[DetectedObject]\r
tables: List[ExtractedTable]\r
progress: Optional[ProgressMeasurement] = None\r
metadata: Dict[str, Any] = field(default_factory=dict)\r
processing_time: float = 0.0\r
\r
\r
class OCREngine:\r
"""OCR engine for text extraction"""\r
\r
def __init__(self, engine: str = "tesseract"):\r
self.engine = engine\r
self.supported_languages = ["en", "ru", "de", "fr", "es"]\r
\r
def extract_text(\r
self,\r
image_data: bytes,\r
language: str = "en"\r
) -> List[TextRegion]:\r
"""Extract text from image"""\r
# Simulated OCR extraction (use actual OCR library in production)\r
# In production: pytesseract, EasyOCR, or cloud OCR services\r
\r
regions = []\r
\r
# Simulate detecting title block in drawing\r
regions.append(TextRegion(\r
text="PROJECT: OFFICE BUILDING",\r
bbox=BoundingBox(x=100, y=50, width=300, height=30, confidence=0.95),\r
confidence=0.95,\r
language=language\r
))\r
\r
regions.append(TextRegion(\r
text="DRAWING: A-101",\r
bbox=BoundingBox(x=100, y=90, width=200, height=25, confidence=0.92),\r
confidence=0.92,\r
language=language\r
))\r
\r
regions.append(TextRegion(\r
text="SCALE: 1:100",\r
bbox=BoundingBox(x=100, y=120, width=150, height=20, confidence=0.88),\r
confidence=0.88,\r
language=language\r
))\r
\r
return regions\r
\r
def extract_structured_text(\r
self,\r
image_data: bytes,\r
template: Optional[Dict] = None\r
) -> Dict[str, str]:\r
"""Extract structured text using template matching"""\r
# Extract text regions\r
regions = self.extract_text(image_data)\r
\r
# Match to template fields\r
structured = {}\r
\r
if template:\r
for field_name, field_config in template.items():\r
# Find matching region\r
for region in regions:\r
if field_config.get("keyword") in region.text.lower():\r
structured[field_name] = region.text\r
break\r
else:\r
# Default extraction\r
for region in regions:\r
if "PROJECT:" in region.text:\r
structured["project_name"] = region.text.split(":")[-1].strip()\r
elif "DRAWING:" in region.text:\r
structured["drawing_number"] = region.text.split(":")[-1].strip()\r
elif "SCALE:" in region.text:\r
structured["scale"] = region.text.split(":")[-1].strip()\r
\r
return structured\r
\r
\r
class ObjectDetector:\r
"""Object detection for construction images"""\r
\r
def __init__(self, model: str = "yolov8"):\r
self.model = model\r
self.construction_classes = self._load_construction_classes()\r
\r
def _load_construction_classes(self) -> Dict[str, Dict]:\r
"""Load construction-specific object classes"""\r
return {\r
# Equipment\r
"excavator": {"category": "equipment", "safety_zone": 20},\r
"crane": {"category": "equipment", "safety_zone": 30},\r
"forklift": {"category": "equipment", "safety_zone": 10},\r
"concrete_mixer": {"category": "equipment", "safety_zone": 5},\r
"scaffolding": {"category": "equipment", "safety_zone": 5},\r
\r
# Safety\r
"hard_hat": {"category": "ppe", "required": True},\r
"safety_vest": {"category": "ppe", "required": True},\r
"safety_glasses": {"category": "ppe", "required": False},\r
"harness": {"category": "ppe", "required": False},\r
\r
# Materials\r
"rebar_bundle": {"category": "material", "unit": "bundle"},\r
"concrete_block": {"category": "material", "unit": "pallet"},\r
"lumber_stack": {"category": "material", "unit": "bundle"},\r
"pipe_stack": {"category": "material", "unit": "bundle"},\r
\r
# Workers\r
"worker": {"category": "person", "track": True},\r
\r
# Building elements\r
"column": {"category": "structure"},\r
"beam": {"category": "structure"},\r
"slab": {"category": "structure"},\r
"wall": {"category": "structure"},\r
}\r
\r
def detect(\r
self,\r
image_data: bytes,\r
confidence_threshold: float = 0.5\r
) -> List[DetectedObject]:\r
"""Detect objects in image"""\r
# Simulated detection (use actual model in production)\r
# In production: YOLO, Faster R-CNN, etc.\r
\r
detected = []\r
\r
# Simulate detected objects\r
sample_detections = [\r
("worker", 0.92, BoundingBox(200, 300, 80, 180, 0.92)),\r
("hard_hat", 0.88, BoundingBox(210, 300, 30, 25, 0.88)),\r
("safety_vest", 0.85, BoundingBox(210, 340, 60, 80, 0.85)),\r
("scaffolding", 0.78, BoundingBox(400, 100, 200, 400, 0.78)),\r
("concrete_block", 0.72, BoundingBox(50, 450, 100, 50, 0.72)),\r
]\r
\r
for label, conf, bbox in sample_detections:\r
if conf >= confidence_threshold:\r
class_info = self.construction_classes.get(label, {})\r
detected.append(DetectedObject(\r
label=label,\r
bbox=bbox,\r
confidence=conf,\r
attributes=class_info\r
))\r
\r
return detected\r
\r
def detect_safety_compliance(\r
self,\r
image_data: bytes\r
) -> Dict:\r
"""Detect safety compliance in image"""\r
objects = self.detect(image_data)\r
\r
workers = [o for o in objects if o.label == "worker"]\r
hard_hats = [o for o in objects if o.label == "hard_hat"]\r
vests = [o for o in objects if o.label == "safety_vest"]\r
\r
compliance = {\r
"workers_detected": len(workers),\r
"hard_hats_detected": len(hard_hats),\r
"vests_detected": len(vests),\r
"hard_hat_compliance": len(hard_hats) / len(workers) if workers else 1.0,\r
"vest_compliance": len(vests) / len(workers) if workers else 1.0,\r
"overall_compliance": "compliant" if len(hard_hats) >= len(workers) else "non-compliant",\r
"violations": []\r
}\r
\r
if len(hard_hats) \x3C len(workers):\r
compliance["violations"].append({\r
"type": "missing_hard_hat",\r
"count": len(workers) - len(hard_hats)\r
})\r
\r
return compliance\r
\r
\r
class TableExtractor:\r
"""Extract tables from images"""\r
\r
def extract_tables(\r
self,\r
image_data: bytes,\r
detect_headers: bool = True\r
) -> List[ExtractedTable]:\r
"""Extract tables from image"""\r
# Simulated table extraction\r
# In production: Camelot, Tabula, or custom CNN\r
\r
tables = []\r
\r
# Simulate a schedule table\r
tables.append(ExtractedTable(\r
headers=["Activity", "Start", "End", "Duration"],\r
rows=[\r
["Foundation", "2024-01-01", "2024-01-15", "14 days"],\r
["Framing", "2024-01-16", "2024-02-28", "44 days"],\r
["MEP Rough-in", "2024-03-01", "2024-03-31", "31 days"]\r
],\r
bbox=BoundingBox(50, 200, 500, 200, 0.85),\r
confidence=0.85\r
))\r
\r
return tables\r
\r
def table_to_dataframe(self, table: ExtractedTable) -> Dict:\r
"""Convert table to dictionary (DataFrame-like)"""\r
return {\r
"columns": table.headers,\r
"data": table.rows,\r
"records": [\r
dict(zip(table.headers, row))\r
for row in table.rows\r
]\r
}\r
\r
\r
class ProgressAnalyzer:\r
"""Analyze construction progress from images"""\r
\r
def __init__(self):\r
self.reference_models = {}\r
\r
def analyze_progress(\r
self,\r
current_image: bytes,\r
reference_image: Optional[bytes] = None,\r
element_type: str = "general"\r
) -> ProgressMeasurement:\r
"""Analyze progress by comparing images"""\r
# Simulated progress analysis\r
# In production: Use semantic segmentation + comparison\r
\r
# Simulate progress detection\r
return ProgressMeasurement(\r
element_type=element_type,\r
total_count=100,\r
completed_count=65,\r
percent_complete=65.0,\r
area_sqft=15000.0,\r
volume_cuft=None\r
)\r
\r
def compare_with_plan(\r
self,\r
site_photo: bytes,\r
plan_image: bytes\r
) -> Dict:\r
"""Compare site photo with plan"""\r
return {\r
"match_score": 0.78,\r
"deviations": [],\r
"completion_estimate": 65.0,\r
"areas_of_concern": []\r
}\r
\r
\r
class ConstructionImageAnalyzer:\r
"""\r
Main class for construction image analysis.\r
Based on DDC methodology Chapter 2.4.\r
"""\r
\r
def __init__(self):\r
self.ocr = OCREngine()\r
self.detector = ObjectDetector()\r
self.table_extractor = TableExtractor()\r
self.progress_analyzer = ProgressAnalyzer()\r
\r
def analyze_image(\r
self,\r
image_data: bytes,\r
image_type: ImageType,\r
image_id: str = "img_001",\r
extract_types: Optional[List[ExtractionType]] = None\r
) -> ImageAnalysisResult:\r
"""\r
Analyze a construction image.\r
\r
Args:\r
image_data: Image data as bytes\r
image_type: Type of image\r
image_id: Unique image identifier\r
extract_types: Types of extraction to perform\r
\r
Returns:\r
Complete analysis result\r
"""\r
start_time = datetime.now()\r
\r
if extract_types is None:\r
extract_types = [ExtractionType.OCR_TEXT, ExtractionType.OBJECT_DETECTION]\r
\r
text_regions = []\r
detected_objects = []\r
tables = []\r
progress = None\r
\r
# OCR extraction\r
if ExtractionType.OCR_TEXT in extract_types:\r
text_regions = self.ocr.extract_text(image_data)\r
\r
# Object detection\r
if ExtractionType.OBJECT_DETECTION in extract_types:\r
detected_objects = self.detector.detect(image_data)\r
\r
# Table extraction\r
if ExtractionType.TABLE in extract_types:\r
tables = self.table_extractor.extract_tables(image_data)\r
\r
# Progress analysis\r
if ExtractionType.PROGRESS in extract_types:\r
progress = self.progress_analyzer.analyze_progress(image_data)\r
\r
processing_time = (datetime.now() - start_time).total_seconds()\r
\r
return ImageAnalysisResult(\r
image_id=image_id,\r
image_type=image_type,\r
text_regions=text_regions,\r
detected_objects=detected_objects,\r
tables=tables,\r
progress=progress,\r
metadata={"extraction_types": [e.value for e in extract_types]},\r
processing_time=processing_time\r
)\r
\r
def analyze_site_photo(\r
self,\r
image_data: bytes,\r
image_id: str = "site_001"\r
) -> Dict:\r
"""Analyze site photo for progress and safety"""\r
result = self.analyze_image(\r
image_data,\r
ImageType.SITE_PHOTO,\r
image_id,\r
[ExtractionType.OBJECT_DETECTION, ExtractionType.PROGRESS]\r
)\r
\r
safety = self.detector.detect_safety_compliance(image_data)\r
\r
return {\r
"image_id": result.image_id,\r
"objects_detected": len(result.detected_objects),\r
"progress": result.progress,\r
"safety_compliance": safety,\r
"equipment": [o.label for o in result.detected_objects if o.attributes.get("category") == "equipment"],\r
"materials": [o.label for o in result.detected_objects if o.attributes.get("category") == "material"]\r
}\r
\r
def extract_drawing_data(\r
self,\r
image_data: bytes,\r
image_id: str = "dwg_001"\r
) -> Dict:\r
"""Extract data from scanned drawing"""\r
result = self.analyze_image(\r
image_data,\r
ImageType.FLOOR_PLAN,\r
image_id,\r
[ExtractionType.OCR_TEXT, ExtractionType.TABLE]\r
)\r
\r
# Extract title block info\r
title_block = self.ocr.extract_structured_text(image_data)\r
\r
return {\r
"image_id": result.image_id,\r
"title_block": title_block,\r
"text_regions": len(result.text_regions),\r
"tables": [\r
self.table_extractor.table_to_dataframe(t)\r
for t in result.tables\r
],\r
"all_text": [r.text for r in result.text_regions]\r
}\r
\r
def batch_analyze(\r
self,\r
images: List[Tuple[bytes, ImageType, str]]\r
) -> List[ImageAnalysisResult]:\r
"""Analyze multiple images"""\r
results = []\r
for image_data, image_type, image_id in images:\r
result = self.analyze_image(image_data, image_type, image_id)\r
results.append(result)\r
return results\r
\r
def export_results(\r
self,\r
result: ImageAnalysisResult,\r
format: str = "json"\r
) -> str:\r
"""Export analysis results"""\r
data = {\r
"image_id": result.image_id,\r
"image_type": result.image_type.value,\r
"text_count": len(result.text_regions),\r
"object_count": len(result.detected_objects),\r
"table_count": len(result.tables),\r
"texts": [\r
{"text": r.text, "confidence": r.confidence}\r
for r in result.text_regions\r
],\r
"objects": [\r
{"label": o.label, "confidence": o.confidence}\r
for o in result.detected_objects\r
],\r
"processing_time": result.processing_time\r
}\r
\r
if format == "json":\r
return json.dumps(data, indent=2)\r
else:\r
raise ValueError(f"Unsupported format: {format}")\r
```\r
\r
## Common Use Cases\r
\r
### Analyze Site Photo\r
\r
```python\r
analyzer = ConstructionImageAnalyzer()\r
\r
# Load image (in production, read from file)\r
with open("site_photo.jpg", "rb") as f:\r
image_data = f.read()\r
\r
result = analyzer.analyze_site_photo(image_data)\r
\r
print(f"Objects detected: {result['objects_detected']}")\r
print(f"Safety compliance: {result['safety_compliance']['overall_compliance']}")\r
print(f"Progress: {result['progress'].percent_complete}%")\r
```\r
\r
### Extract Drawing Data\r
\r
```python\r
with open("floor_plan.png", "rb") as f:\r
drawing_data = f.read()\r
\r
data = analyzer.extract_drawing_data(drawing_data)\r
\r
print(f"Drawing: {data['title_block'].get('drawing_number')}")\r
print(f"Project: {data['title_block'].get('project_name')}")\r
for table in data['tables']:\r
print(f"Table with {len(table['records'])} rows")\r
```\r
\r
### Detect Safety Violations\r
\r
```python\r
detector = ObjectDetector()\r
\r
with open("site_photo.jpg", "rb") as f:\r
image_data = f.read()\r
\r
safety = detector.detect_safety_compliance(image_data)\r
\r
if safety['overall_compliance'] == 'non-compliant':\r
for violation in safety['violations']:\r
print(f"Violation: {violation['type']} - Count: {violation['count']}")\r
```\r
\r
## Quick Reference\r
\r
| Component | Purpose |\r
|-----------|---------|\r
| `ConstructionImageAnalyzer` | Main analysis engine |\r
| `OCREngine` | Text extraction |\r
| `ObjectDetector` | Object detection |\r
| `TableExtractor` | Table extraction |\r
| `ProgressAnalyzer` | Progress analysis |\r
| `ImageAnalysisResult` | Complete analysis result |\r
\r
## Resources\r
\r
- **Book**: "Data-Driven Construction" by Artem Boiko, Chapter 2.4\r
- **Website**: https://datadrivenconstruction.io\r
\r
## Next Steps\r
\r
- Use [cad-to-data](../cad-to-data/SKILL.md) for CAD/BIM extraction\r
- Use [defect-detection-ai](../../../DDC_Innovative/defect-detection-ai/SKILL.md) for defects\r
- Use [safety-compliance-checker](../../../DDC_Innovative/safety-compliance-checker/SKILL.md) for safety\r
- Make sure OpenClaw is installed (local or Docker)
- Run the install command in chat:
/install image-to-data - After installation, invoke the skill by name or use
/image-to-data - Provide required inputs per the skill's parameter spec and get structured output
What is Image To Data?
Extract data from construction images using AI Vision. Analyze site photos, scanned documents, drawings. It is an AI Agent Skill for Claude Code / OpenClaw, with 1693 downloads so far.
How do I install Image To Data?
Run "/install image-to-data" in the OpenClaw or Claude Code chat to install it in one step — no extra setup required.
Is Image To Data free?
Yes, Image To Data is completely free (open-source). You can download, install and use it at no cost.
Which platforms does Image To Data support?
Image To Data is cross-platform and runs anywhere OpenClaw / Claude Code is available (cross-platform).
Who created Image To Data?
It is built and maintained by datadrivenconstruction (@datadrivenconstruction); the current version is v2.0.0.