Chapter 4

Data Types Reference

MySQL Data Types Complete Reference

Understanding MySQL data types is fundamental to database design. Choosing the right data type affects storage, performance, memory usage, and query execution. This comprehensive guide covers all MySQL data types with examples, performance considerations, and best practices.

1. Numeric Data Types

1.1 Integer Types

Integer types store whole numbers without decimal points.

Type Storage (bytes) Signed Range Unsigned Range Use Case
TINYINT 1 -128 to 127 0 to 255 Boolean flags, status codes
SMALLINT 2 -32,768 to 32,767 0 to 65,535 Small quantities, ages
MEDIUMINT 3 -8,388,608 to 8,388,607 0 to 16,777,215 File sizes, pixel counts
INT/INTEGER 4 -2.1B to 2.1B 0 to 4.3B Standard integer type, IDs
BIGINT 8 -9.2E18 to 9.2E18 0 to 1.8E19 Large IDs, timestamps, counts
      **Examples:**

-- Define table with integer types
CREATE TABLE user_metrics (
  id BIGINT UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT,
  user_id INT UNSIGNED NOT NULL,
  age TINYINT UNSIGNED,
  status TINYINT DEFAULT 1,
  post_count INT UNSIGNED DEFAULT 0,
  total_views BIGINT UNSIGNED DEFAULT 0,
  rank_position SMALLINT UNSIGNED,
  file_size_bytes MEDIUMINT UNSIGNED
);

-- Insert examples
INSERT INTO user_metrics (user_id, age, post_count)
VALUES (1, 25, 100);

-- Query with range check
SELECT * FROM user_metrics
WHERE age BETWEEN 18 AND 65 AND post_count > 10;

-- Aggregate with numeric types
SELECT
  age,
  COUNT(*) as count,
  AVG(post_count) as avg_posts,
  MAX(total_views) as max_views
FROM user_metrics
GROUP BY age
ORDER BY avg_posts DESC;

      **Performance Considerations:**

1.2 Decimal/Fixed-Point Types

For exact decimal arithmetic without rounding errors.

      **DECIMAL vs FLOAT/DOUBLE:**

-- DECIMAL(precision, scale)
-- Stores exact values, important for financial data
CREATE TABLE products (
  id INT PRIMARY KEY,
  name VARCHAR(100),
  price DECIMAL(10, 2),           -- 8 digits before decimal, 2 after
  tax_rate DECIMAL(5, 4),         -- 1 digit before decimal, 4 after (e.g., 0.1999)
  cost DECIMAL(8, 2)
);

-- Insert examples
INSERT INTO products VALUES
  (1, 'Laptop', 999.99, 0.1999, 500.00),
  (2, 'Mouse', 25.50, 0.1999, 5.00);

-- Arithmetic precision
SELECT
  id,
  name,
  price,
  tax_rate,
  ROUND(price * tax_rate, 2) as tax,
  ROUND(price * (1 + tax_rate), 2) as total
FROM products;

-- Comparison: DECIMAL vs FLOAT
CREATE TABLE precision_test (
  id INT,
  decimal_val DECIMAL(10, 8),
  float_val FLOAT,
  double_val DOUBLE
);

INSERT INTO precision_test VALUES (1, 0.12345678, 0.12345678, 0.12345678);

SELECT
  id,
  decimal_val,
  float_val,
  double_val,
  decimal_val = 0.12345678 as decimal_match,
  float_val = 0.12345678 as float_match,
  double_val = 0.12345678 as double_match
FROM precision_test;
-- Output: decimal_match=true, float_match=false, double_match=false

      **Guidelines:**

1.3 Floating-Point Types


-- FLOAT (4 bytes, ~7 significant digits)
-- DOUBLE (8 bytes, ~15 significant digits)
CREATE TABLE scientific_data (
  id INT PRIMARY KEY,
  sensor_reading FLOAT,
  temperature DOUBLE,
  confidence FLOAT
);

-- Insert with scientific notation
INSERT INTO scientific_data VALUES
  (1, 1.23456789, 1.234567890123456, 0.95),
  (2, 9.87654321, 9.876543210123456, 0.87);

-- Note: Floating point has rounding errors
SELECT
  sensor_reading,
  sensor_reading * 10 as multiplied,
  ROUND(temperature, 5) as rounded_temp
FROM scientific_data;

-- Avoid equality comparison with floats
SELECT * FROM scientific_data
WHERE ABS(confidence - 0.95) < 0.001;  -- Use margin of error

2. String Data Types

2.1 VARCHAR vs CHAR

VARCHAR stores variable-length strings; CHAR stores fixed-length strings.

      **Performance Comparison:**

-- CHAR(50) — Always allocates 50 bytes (padded with spaces)
-- VARCHAR(50) — Allocates only needed space + 1-2 length bytes
CREATE TABLE user_profiles (
  id INT PRIMARY KEY,
  username VARCHAR(30),           -- Optimal for names/usernames
  gender CHAR(1),                 -- Optimal for fixed single char
  country CHAR(2),                -- Optimal for ISO country codes
  bio VARCHAR(500),               -- Variable length content
  email VARCHAR(100),
  phone CHAR(15)                  -- International phone format
);

-- Storage example:
-- username 'alice' (5 chars): stores as 5 + 2 bytes = 7 bytes (VARCHAR)
-- gender 'M': stores as 1 byte + padding (CHAR) = 1 byte
-- bio 'Hello': stores as 5 + 2 bytes = 7 bytes (VARCHAR)

INSERT INTO user_profiles VALUES
  (1, 'alice', 'F', 'US', 'Software engineer from Silicon Valley', '[email protected]', '+1-408-555-0123'),
  (2, 'bob123', 'M', 'UK', 'Data scientist', '[email protected]', '+44-20-7946-0958');

-- Query with string functions
SELECT
  id,
  username,
  UPPER(username) as username_upper,
  LENGTH(username) as username_length,
  SUBSTRING(email, 1, POSITION('@' IN email) - 1) as email_user
FROM user_profiles;

      **When to use each:**

2.2 Text Types


-- TINYTEXT, TEXT, MEDIUMTEXT, LONGTEXT
-- Stored separately from main table (off-page storage)
CREATE TABLE articles (
  id INT PRIMARY KEY,
  title VARCHAR(255) NOT NULL,
  summary VARCHAR(1000),
  content LONGTEXT,               -- For very long content (4GB max)
  tags TEXT,                      -- For comma-separated or JSON
  metadata MEDIUMTEXT
);

-- Storage by type:
-- TINYTEXT: 2^8 - 1 = 255 bytes
-- TEXT: 2^16 - 1 = 64KB
-- MEDIUMTEXT: 2^24 - 1 = 16MB
-- LONGTEXT: 2^32 - 1 = 4GB

INSERT INTO articles VALUES
  (1,
   'Database Performance',
   'Tips for optimizing MySQL performance',
   'Long article content here...',
   'mysql,performance,optimization',
   '{"category":"database","difficulty":"intermediate"}');

-- Search in TEXT fields
SELECT id, title
FROM articles
WHERE content LIKE '%index%'
OR FIND_IN_SET('performance', tags) > 0;

-- Extract JSON from TEXT
SELECT
  id,
  title,
  JSON_EXTRACT(metadata, '$.category') as category,
  JSON_EXTRACT(metadata, '$.difficulty') as difficulty
FROM articles;

2.3 Binary String Types


-- BINARY, VARBINARY, BLOB
-- For storing binary data (images, files, encrypted values)
CREATE TABLE file_uploads (
  id INT PRIMARY KEY,
  filename VARCHAR(255),
  file_hash BINARY(32),           -- SHA-256 hash (256 bits = 32 bytes)
  thumbnail VARBINARY(100000),    -- Compressed image
  file_data LONGBLOB,             -- Large binary data
  uploaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- Insert binary data
INSERT INTO file_uploads (filename, file_hash, thumbnail)
VALUES
  ('document.pdf', 0xA1B2C3D4E5F6..., 0xFFD8FFE000...),
  ('image.jpg', 0x1A2B3C4D5E6F..., 0x89504E470D...);

-- Use hexadecimal for reading binary
SELECT
  filename,
  HEX(file_hash) as hash_hex,
  LENGTH(thumbnail) as thumbnail_size
FROM file_uploads;

-- Compare binary values
SELECT * FROM file_uploads
WHERE file_hash = UNHEX('A1B2C3D4E5F6...');

3. Date and Time Types

3.1 DATE, TIME, DATETIME


CREATE TABLE events (
  id INT PRIMARY KEY,
  event_name VARCHAR(100),
  event_date DATE,                -- Date only (YYYY-MM-DD)
  event_time TIME,                -- Time only (HH:MM:SS)
  event_datetime DATETIME,        -- Date + time (YYYY-MM-DD HH:MM:SS)
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
);

-- Insert with various formats
INSERT INTO events VALUES
  (1, 'Conference', '2024-06-15', '09:00:00', '2024-06-15 09:00:00', NOW(), NOW()),
  (2, 'Meeting', '2024-06-16', '14:30:00', '2024-06-16 14:30:00', NOW(), NOW());

-- Date arithmetic
SELECT
  id,
  event_name,
  event_date,
  DATE_ADD(event_date, INTERVAL 7 DAY) as next_week,
  DATE_SUB(event_date, INTERVAL 1 MONTH) as one_month_ago,
  DATEDIFF(event_date, CURDATE()) as days_until_event,
  DAY(event_date) as day_of_month,
  MONTH(event_date) as month,
  YEAR(event_date) as year,
  QUARTER(event_date) as quarter,
  DAYNAME(event_date) as day_name,
  MONTHNAME(event_date) as month_name
FROM events;

-- Time calculations
SELECT
  event_time,
  TIME_ADD(event_time, INTERVAL 30 MINUTE) as thirty_mins_later,
  SEC_TO_TIME(TIME_TO_SEC(event_time) + 3600) as one_hour_later,
  TIMEDIFF('17:00:00', event_time) as time_difference
FROM events;

-- Datetime range queries
SELECT * FROM events
WHERE event_datetime BETWEEN '2024-06-01' AND '2024-06-30'
AND HOUR(event_datetime) BETWEEN 8 AND 18;  -- Business hours

      **Type Comparison:**
Type Storage Format Range
DATE 3 bytes YYYY-MM-DD '1000-01-01' to '9999-12-31'
TIME 3 bytes HH:MM:SS '-838:59:59' to '838:59:59'
DATETIME 8 bytes YYYY-MM-DD HH:MM:SS '1000-01-01 00:00:00' to '9999-12-31 23:59:59'
TIMESTAMP 4 bytes YYYY-MM-DD HH:MM:SS '1970-01-01' to '2038-01-19' (Y2038 problem)
YEAR 1 byte YYYY 1901 to 2155

3.2 TIMESTAMP Special Features


-- TIMESTAMP: Stores as UTC internally, displays in timezone
-- Automatically updated on modification
CREATE TABLE user_activity (
  id INT PRIMARY KEY,
  user_id INT,
  action VARCHAR(50),
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  last_login TIMESTAMP DEFAULT '0000-00-00 00:00:00'
);

INSERT INTO user_activity (user_id, action) VALUES (1, 'login');
INSERT INTO user_activity (user_id, action) VALUES (2, 'post_created');

-- Timestamp automatically updated
UPDATE user_activity SET action = 'updated' WHERE id = 1;
-- updated_at changes to current time automatically

SELECT * FROM user_activity;

-- Timezone consideration
SET time_zone = '+05:00';
SELECT created_at, @@session.time_zone FROM user_activity;

-- DATETIME doesn't change with timezone
-- Use TIMESTAMP for automatic current_timestamp tracking
-- Use DATETIME for fixed values

4. Special Data Types

4.1 JSON Type


-- JSON: Stores JSON documents with validation and functions
CREATE TABLE products (
  id INT PRIMARY KEY,
  name VARCHAR(100),
  attributes JSON,                -- Validated JSON storage
  metadata JSON
);

-- Insert JSON data
INSERT INTO products VALUES
  (1, 'Laptop',
   '{"brand":"Dell","cpu":"Intel i7","ram":"16GB","storage":"512GB SSD"}',
   '{"weight":1.8,"color":"Silver","warranty_months":24}'),
  (2, 'Monitor',
   '{"brand":"LG","resolution":"4K","refresh_rate":"60Hz","size":"27\""}',
   '{"power_consumption":"50W","warranty_months":36}');

-- Query JSON fields
SELECT
  id,
  name,
  JSON_EXTRACT(attributes, '$.brand') as brand,
  JSON_EXTRACT(attributes, '$.cpu') as cpu,
  JSON_EXTRACT(attributes, '$.ram') as ram,
  JSON_UNQUOTE(JSON_EXTRACT(attributes, '$.cpu')) as cpu_unquoted
FROM products;

-- JSON search and comparison
SELECT * FROM products
WHERE JSON_EXTRACT(attributes, '$.ram') = '"16GB"';

-- Update JSON fields
UPDATE products
SET attributes = JSON_SET(attributes, '$.warranty', '2 years')
WHERE id = 1;

-- Extract array elements
INSERT INTO products VALUES
  (3, 'Phone',
   '{"brand":"Apple","models":["iPhone 14","iPhone 14 Pro","iPhone 14 Max"]}',
   '{}');

SELECT
  id,
  JSON_ARRAY_LENGTH(JSON_EXTRACT(attributes, '$.models')) as model_count
FROM products WHERE id = 3;

-- JSON aggregation
SELECT
  JSON_OBJECTAGG(name, JSON_EXTRACT(attributes, '$.brand')) as products_by_brand
FROM products;

4.2 ENUM Type


-- ENUM: Efficient storage of predefined values
CREATE TABLE orders (
  id INT PRIMARY KEY,
  order_number VARCHAR(20),
  status ENUM('pending','processing','shipped','delivered','cancelled'),
  priority ENUM('low','medium','high','urgent'),
  payment_method ENUM('credit_card','debit_card','paypal','bank_transfer'),
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- Insert with ENUM values
INSERT INTO orders VALUES
  (1, 'ORD-001', 'pending', 'high', 'credit_card', NOW()),
  (2, 'ORD-002', 'shipped', 'medium', 'paypal', NOW()),
  (3, 'ORD-003', 'delivered', 'low', 'bank_transfer', NOW());

-- ENUM internally stored as small integers (1, 2, 3...)
-- Storage: 1 byte for up to 255 values, 2 bytes for up to 65535 values

SELECT * FROM orders WHERE status = 'shipped';

-- Get enum values
SELECT COLUMN_TYPE FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = 'orders' AND COLUMN_NAME = 'status';

-- Enumerate values (1='pending', 2='processing', 3='shipped'...)
SELECT
  id,
  status,
  CAST(status AS UNSIGNED) as status_code
FROM orders;

-- Change ENUM values (requires ALTER TABLE)
ALTER TABLE orders MODIFY status ENUM('pending','processing','shipped','delivered','cancelled','refunded');

      **When to use ENUM:**

4.3 SET Type


-- SET: Stores zero or more values from a predefined list
CREATE TABLE user_permissions (
  id INT PRIMARY KEY,
  username VARCHAR(50),
  permissions SET('read','write','delete','admin','user_manage')
);

-- Insert with multiple values
INSERT INTO user_permissions VALUES
  (1, 'alice', 'read,write'),
  (2, 'bob', 'read,write,delete,admin'),
  (3, 'charlie', 'read'),
  (4, 'diana', '');  -- No permissions

-- Query with set membership
SELECT * FROM user_permissions
WHERE FIND_IN_SET('admin', permissions) > 0;

-- Add permission
UPDATE user_permissions
SET permissions = CONCAT(permissions, IF(permissions='','',',' ), 'write')
WHERE username = 'charlie';

-- Or use SET operations
UPDATE user_permissions
SET permissions = CONCAT_WS(',', permissions, 'write')
WHERE username = 'charlie';

-- Remove permission
UPDATE user_permissions
SET permissions = TRIM(BOTH ',' FROM REPLACE(CONCAT(',',permissions,','), ',write,', ','))
WHERE username = 'bob' AND FIND_IN_SET('write', permissions) > 0;

-- Get all permissions as array
SELECT
  username,
  permissions,
  IF(FIND_IN_SET('read', permissions) > 0, 'YES', 'NO') as can_read,
  IF(FIND_IN_SET('write', permissions) > 0, 'YES', 'NO') as can_write,
  IF(FIND_IN_SET('delete', permissions) > 0, 'YES', 'NO') as can_delete,
  IF(FIND_IN_SET('admin', permissions) > 0, 'YES', 'NO') as is_admin
FROM user_permissions;

5. Data Type Selection Guidelines

Purpose Recommended Type Why?
User ID / Primary Key BIGINT UNSIGNED AUTO_INCREMENT Supports distributed systems, future scaling
Foreign Key (references INT) INT UNSIGNED Match referenced column type exactly
Email Address VARCHAR(255) Variable length, RFC 5321 max is 254 chars
URL VARCHAR(2083) Most browsers support up to 2083 chars
Price / Money DECIMAL(19,2) Exact calculation, no floating point errors
Percentage/Ratio DECIMAL(5,4) 0.9999 (99.99%) = 4 decimal places
Boolean Flag TINYINT(1) or BOOLEAN 1 byte storage, TRUE/FALSE alias to 1/0
Status/Category ENUM or VARCHAR ENUM if fixed set, VARCHAR if dynamic
Country Code CHAR(2) Fixed 2-char ISO code
Timestamp TIMESTAMP or DATETIME TIMESTAMP for auto-update, DATETIME for fixed
Phone Number VARCHAR(20) Variable lengths, special characters
IP Address INT UNSIGNED or VARCHAR(15) INT for v4 (4 bytes), VARCHAR for v6
Long Text Content LONGTEXT or BLOB Stored off-page, efficient for large data
Structured Data JSON Native JSON support with functions

6. Performance Optimization Tips

6.1 Index Considerations


-- Indexes on numeric columns are faster than string columns
CREATE TABLE users (
  id BIGINT UNSIGNED PRIMARY KEY AUTO_INCREMENT,
  email VARCHAR(255) UNIQUE,      -- Can be indexed
  age TINYINT UNSIGNED,           -- Small size helps with caching
  user_type ENUM('free','premium','admin') -- Very efficient for filtering
);

-- Index on INT is 4 bytes, on VARCHAR(255) is variable
CREATE INDEX idx_age ON users(age);      -- Very small index
CREATE INDEX idx_email ON users(email);  -- Much larger index

-- Composite index strategy
CREATE INDEX idx_type_age ON users(user_type, age);

-- Query using indexes efficiently
SELECT * FROM users WHERE user_type = 'premium' AND age > 18;  -- Uses composite index

-- NULL handling
CREATE TABLE orders (
  id INT PRIMARY KEY,
  user_id INT,
  deleted_at DATETIME,  -- NULL if not deleted
  cancelled_at DATETIME  -- NULL if not cancelled
);

-- Queries using indexes with NULL
SELECT * FROM orders WHERE deleted_at IS NULL;
SELECT * FROM orders WHERE cancelled_at IS NOT NULL;

6.2 Storage Efficiency


-- Good: Optimized types reduce storage and improve performance
CREATE TABLE page_visits (
  id BIGINT UNSIGNED PRIMARY KEY AUTO_INCREMENT,
  page_id INT UNSIGNED NOT NULL,
  user_id INT UNSIGNED,
  visit_date DATE NOT NULL,
  visit_time TIME,
  referer VARCHAR(2083),
  is_bot TINYINT(1) DEFAULT 0,  -- 1 byte for boolean
  session_id CHAR(32)             -- Fixed-length hash
) ENGINE=InnoDB ROW_FORMAT=COMPACT;

-- Check table size
SELECT
  TABLE_NAME,
  ROUND((DATA_LENGTH + INDEX_LENGTH) / 1024 / 1024, 2) as size_mb
FROM INFORMATION_SCHEMA.TABLES
WHERE TABLE_SCHEMA = 'myapp';

-- Analyze storage per column
SELECT
  SUM(STAT_VALUE) as total_rows,
  AVG(STAT_VALUE) as avg_value
FROM mysql.innodb_table_stats
WHERE TABLE_NAME = 'page_visits';

7. Common Mistakes and Solutions

Mistake 1: Using VARCHAR(255) for everything


-- Bad: Wastes space for short strings
CREATE TABLE users (
  id INT,
  title VARCHAR(255),       -- Usually <50 chars
  status VARCHAR(255),      -- Usually <20 chars
  name VARCHAR(255)         -- Usually <100 chars
);

-- Better: Use appropriate sizes
CREATE TABLE users (
  id INT,
  title VARCHAR(50),
  status VARCHAR(20),
  name VARCHAR(100)
);

Mistake 2: Using FLOAT for currency


-- Bad: Floating point rounding errors
CREATE TABLE transactions (
  amount FLOAT  -- WRONG! Loses precision
);

INSERT INTO transactions VALUES (0.1), (0.2), (0.3);
SELECT SUM(amount) FROM transactions;  -- May show 0.5999999... instead of 0.6

-- Better: Use DECIMAL
CREATE TABLE transactions (
  amount DECIMAL(10, 2)  -- Exact decimal storage
);

Mistake 3: Choosing wrong text type


-- Bad: Using TEXT for short values
CREATE TABLE articles (
  id INT,
  content TEXT  -- Overkill for short content
);

-- Better: Use VARCHAR for most cases
CREATE TABLE articles (
  id INT,
  title VARCHAR(200),
  excerpt VARCHAR(500),      -- Short description
  content LONGTEXT           -- Only for very long content
);

8. FAQ and Best Practices

Q: Should I use TIMESTAMP or DATETIME?

A: Use TIMESTAMP when you want automatic tracking:


-- TIMESTAMP: auto-updates, stores UTC, uses 4 bytes
CREATE TABLE audit_log (
  id INT,
  action VARCHAR(50),
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
);

-- DATETIME: fixed values, uses 8 bytes, stores as-is
CREATE TABLE events (
  event_date DATETIME,  -- Doesn't auto-update
  scheduled_for DATETIME
);

Q: Is JSON type worth it vs storing as VARCHAR?

A: Yes, if you query JSON structure frequently:


-- JSON type benefits:
-- 1. Validation: Invalid JSON rejected
SELECT JSON_EXTRACT(data, '$.name') -- Works only if valid JSON

-- 2. Functions: JSON-specific operations
SELECT JSON_KEYS(data) as all_keys FROM table;

-- 3. Index support (MySQL 5.7.9+)
CREATE INDEX idx_brand ON products((JSON_EXTRACT(attrs, '$.brand')));

-- For simple key-value data you'll always query fully, TEXT is fine
-- For complex nested structures with partial queries, use JSON

Q: What's the difference between INT and INTEGER?

A: No difference. INTEGER is an alias for INT.

Q: Can I store very large numbers?

A: Use DECIMAL for arbitrary precision:


-- BIGINT maxes out at 9,223,372,036,854,775,807
-- For larger numbers, use DECIMAL or VARCHAR
CREATE TABLE high_precision (
  big_number DECIMAL(65, 0),  -- 65 digits, no decimals
  scientific VARCHAR(100)      -- String representation
);

Conclusion

Choosing the right MySQL data types is crucial for optimal database performance. Remember these key principles:

Rate this chapter
4.5  / 5  (88 ratings)

💬 Comments