Pydantic 是 FastAPI 的数据层基石。本文深入 Pydantic v2 的模型定义、字段验证、自定义验证器、嵌套模型、序列化控制和配置管理,帮你掌握类型安全的数据处理能力。
3.1 Pydantic 是什么
Pydantic 是一个基于 Python 类型提示的数据验证和序列化库。它的核心理念:用类型提示定义数据结构,运行时自动验证和转换。
from pydantic import BaseModel, EmailStr
class User(BaseModel):
name: str
email: EmailStr
age: int | None = None
# 自动验证 + 类型转换
user = User(name="Alice", email="alice@example.com", age="25")
print(user.age) # 25(字符串自动转为 int)
print(type(user.age)) # <class 'int'>
# 验证失败
User(name="Bob", email="not-an-email")
# ValidationError: value is not a valid email address
Pydantic v2 相比 v1 的主要改进:
| 维度 | v1 | v2 |
|---|---|---|
| 核心实现 | 纯 Python | Rust(pydantic-core) |
| 验证速度 | 基准 | 快 5-50 倍 |
| 序列化 | dict() / json() | model_dump() / model_dump_json() |
| 配置 | class Config | model_config |
| 验证器 | @validator | @field_validator |
| 模型继承 | __fields__ | model_fields |
3.2 模型定义基础
3.2.1 基本模型
from pydantic import BaseModel, Field
from datetime import datetime
from uuid import UUID, uuid4
class Product(BaseModel):
id: UUID = Field(default_factory=uuid4)
name: str
price: float
description: str = ""
is_active: bool = True
created_at: datetime = Field(default_factory=datetime.now)
# 创建实例
product = Product(name="Laptop", price=999.99)
# 转为字典
product.model_dump()
# {'id': UUID('...'), 'name': 'Laptop', 'price': 999.99, ...}
# 转为 JSON 字符串
product.model_dump_json()
# '{"id":"...","name":"Laptop","price":999.99,...}'
3.2.2 字段类型支持
Pydantic 支持所有 Python 内置类型和大量常用类型:
from pydantic import BaseModel, EmailStr, HttpUrl, IPvAnyAddress
from datetime import datetime, date, time, timedelta
from decimal import Decimal
from uuid import UUID
from pathlib import Path
from enum import Enum
class Status(str, Enum):
ACTIVE = "active"
INACTIVE = "inactive"
class Comprehensive(BaseModel):
# 基础类型
name: str
count: int
ratio: float
flag: bool
# 数值精度
price: Decimal = Decimal("0.00")
# 时间
created_at: datetime
birth_date: date
alarm_time: time
duration: timedelta
# 标识符
id: UUID
email: EmailStr
website: HttpUrl
ip_address: IPvAnyAddress
# 路径
file_path: Path
# 枚举
status: Status
# 容器
tags: list[str]
scores: set[int]
metadata: dict[str, str]
matrix: list[list[int]]
3.2.3 可选字段与默认值
from pydantic import BaseModel, Field
class Config(BaseModel):
# 必填字段(无默认值)
name: str
# 可选字段(有默认值)
debug: bool = False
port: int = 8000
# 可选字段(None 允许)
description: str | None = None
# 使用 None 和默认值的区别
# str | None = None -> 不传时为 None
# str = "" -> 不传时为空字符串
# 使用 Field 设置默认值工厂
tags: list[str] = Field(default_factory=list)
created_at: datetime = Field(default_factory=datetime.now)
3.3 字段验证
3.3.1 数值验证
from pydantic import BaseModel, Field
class OrderItem(BaseModel):
product_id: int = Field(..., gt=0) # 大于 0
quantity: int = Field(..., ge=1, le=999) # 1 到 999
unit_price: float = Field(..., gt=0) # 大于 0
discount: float = Field(0.0, ge=0.0, lt=1.0) # 0 到 0.999...
# 整数倍数约束
# multiple_of: 数值必须是指定值的倍数
pack_size: int = Field(..., ge=1, multiple_of=6) # 必须是 6 的倍数
3.3.2 字符串验证
from pydantic import BaseModel, Field
class UserProfile(BaseModel):
username: str = Field(
...,
min_length=3, # 最少 3 个字符
max_length=20, # 最多 20 个字符
pattern=r"^[a-zA-Z0-9_]+$", # 只允许字母、数字、下划线
)
display_name: str = Field(
...,
min_length=1,
max_length=50,
)
bio: str = Field(
"",
max_length=500, # 允许为空,最长 500
)
phone: str = Field(
...,
pattern=r"^\+?[1-9]\d{1,14}$", # E.164 电话格式
)
3.3.3 集合验证
from pydantic import BaseModel, Field
class Survey(BaseModel):
# 列表长度
answers: list[int] = Field(
...,
min_length=1, # 至少 1 个元素
max_length=10, # 最多 10 个元素
)
# 集合元素唯一性由 set 类型保证
unique_tags: set[str] = Field(default_factory=set)
# 字典键值约束
scores: dict[str, float] = Field(
default_factory=dict,
)
3.4 自定义验证器
3.4.1 字段验证器(field_validator)
from pydantic import BaseModel, field_validator
class User(BaseModel):
name: str
email: str
username: str
@field_validator("name")
@classmethod
def name_must_not_be_empty(cls, v: str) -> str:
v = v.strip()
if not v:
raise ValueError("姓名不能为空")
return v.title() # 自动首字母大写
@field_validator("email")
@classmethod
def email_normalize(cls, v: str) -> str:
return v.lower().strip() # 邮箱统一小写
@field_validator("username")
@classmethod
def username_alphanumeric(cls, v: str) -> str:
if not v.isalnum():
raise ValueError("用户名只能包含字母和数字")
return v
3.4.2 模型验证器(model_validator)
跨字段验证,用于字段之间有依赖关系的场景。
from pydantic import BaseModel, model_validator
from datetime import date
class DateRange(BaseModel):
start_date: date
end_date: date
@model_validator(mode="after")
def validate_date_range(self) -> "DateRange":
if self.end_date <= self.start_date:
raise ValueError("结束日期必须晚于开始日期")
return self
class PasswordForm(BaseModel):
password: str
confirm_password: str
@model_validator(mode="after")
def passwords_match(self) -> "PasswordForm":
if self.password != self.confirm_password:
raise ValueError("两次密码输入不一致")
return self
3.4.3 验证模式
Pydantic v2 有两种验证模式:
# mode="before" -- 在标准验证之前执行,接收原始输入
@field_validator("phone", mode="before")
@classmethod
def clean_phone(cls, v):
if isinstance(v, str):
return v.replace(" ", "").replace("-", "")
return v
# mode="after" -- 在标准验证之后执行,接收已验证的值
@field_validator("email", mode="after")
@classmethod
def check_email_domain(cls, v: str) -> str:
allowed_domains = ["example.com", "company.org"]
domain = v.split("@")[1]
if domain not in allowed_domains:
raise ValueError(f"邮箱域名必须是 {allowed_domains} 之一")
return v
# mode="wrap" -- 包裹标准验证,可自定义前后行为
@field_validator("name", mode="wrap")
@classmethod
def wrap_name_validation(cls, v, handler):
# 前置处理
v = v.strip()
# 执行标准验证
result = handler(v)
# 后置处理
return result.title()
3.5 嵌套模型与复杂结构
3.5.1 嵌套模型
from pydantic import BaseModel, Field
class Address(BaseModel):
street: str
city: str
province: str
zip_code: str = Field(..., pattern=r"^\d{6}$")
country: str = "CN"
class Education(BaseModel):
school: str
degree: str # bachelor / master / doctor
year: int = Field(..., ge=1950, le=2030)
class UserProfile(BaseModel):
name: str
email: str
address: Address # 嵌套模型
education: list[Education] = [] # 模型列表
emergency_contact: dict[str, str] | None = None
# 使用示例
user = UserProfile(
name="Alice",
email="alice@example.com",
address={
"street": "123 Main St",
"city": "Shanghai",
"province": "Shanghai",
"zip_code": "200000",
},
education=[
{"school": "Fudan University", "degree": "bachelor", "year": 2018},
],
)
3.5.2 联合类型(Union Types)
from pydantic import BaseModel
from typing import Union
# 简单联合
class TextInput(BaseModel):
type: str = "text"
content: str
class ImageInput(BaseModel):
type: str = "image"
url: str
alt: str = ""
# 使用 Union(Pydantic v2 会按顺序尝试匹配)
class Message(BaseModel):
body: TextInput | ImageInput # Python 3.10+ 语法
# 鉴别联合(Discriminated Union)-- 更高效
from typing import Annotated, Literal
from pydantic import Discriminator, Tag
class TextInputV2(BaseModel):
type: Literal["text"] = "text"
content: str
class ImageInputV2(BaseModel):
type: Literal["image"] = "image"
url: str
class MessageV2(BaseModel):
body: Annotated[
TextInputV2 | ImageInputV2,
Discriminator("type"), # 根据 type 字段选择模型
]
# 使用
msg = MessageV2(body={"type": "text", "content": "Hello!"})
3.5.3 继承
from pydantic import BaseModel, Field
class BaseUser(BaseModel):
name: str
email: str
is_active: bool = True
class CreateUserData(BaseUser):
password: str = Field(..., min_length=8)
class UpdateUserData(BaseModel):
name: str | None = None
email: str | None = None
class UserInDB(BaseUser):
id: int
hashed_password: str
created_at: datetime
3.6 序列化控制
3.6.1 字段别名
from pydantic import BaseModel, Field
class ExternalAPI(BaseModel):
# 使用别名映射外部 API 的字段名
user_id: int = Field(..., alias="userId")
display_name: str = Field(..., alias="displayName")
created_at: str = Field(..., alias="createdAt")
# 从外部数据创建
data = {"userId": 1, "displayName": "Alice", "createdAt": "2026-01-01"}
user = ExternalAPI(**data) # 使用别名
# 序列化时默认使用 Python 字段名
user.model_dump() # {"user_id": 1, "display_name": "Alice", ...}
# 序列化时使用别名
user.model_dump(by_alias=True) # {"userId": 1, "displayName": "Alice", ...}
3.6.2 模型配置
from pydantic import BaseModel, ConfigDict
class User(BaseModel):
model_config = ConfigDict(
# 序列化时使用别名
populate_by_name=True, # 允许用字段名或别名赋值
from_attributes=True, # 支持 ORM 对象转换
str_strip_whitespace=True, # 自动去除字符串首尾空格
str_min_length=0, # 字符串最小长度
use_enum_values=True, # 使用枚举的值而非枚举对象
validate_default=True, # 验证默认值
json_schema_extra={ # 额外的 JSON Schema 信息
"examples": [{"name": "Alice", "email": "alice@example.com"}]
},
)
name: str
email: str
3.6.3 排除字段
from pydantic import BaseModel
class User(BaseModel):
id: int
name: str
email: str
password_hash: str
user = User(id=1, name="Alice", email="alice@example.com", password_hash="xxx")
# 排除特定字段
user.model_dump(exclude={"password_hash"})
# {"id": 1, "name": "Alice", "email": "alice@example.com"}
# 只包含特定字段
user.model_dump(include={"id", "name"})
# {"id": 1, "name": "Alice"}
# 排除未设置的字段(PATCH 场景)
class UserUpdate(BaseModel):
name: str | None = None
email: str | None = None
update = UserUpdate(name="Bob")
update.model_dump(exclude_unset=True)
# {"name": "Bob"} -- email 不会出现在结果中
3.6.4 自定义序列化器
from pydantic import BaseModel, field_serializer
from datetime import datetime
class Event(BaseModel):
name: str
start_time: datetime
price: float
@field_serializer("start_time")
@classmethod
def serialize_datetime(cls, v: datetime) -> str:
return v.strftime("%Y-%m-%d %H:%M:%S")
@field_serializer("price")
@classmethod
def serialize_price(cls, v: float) -> str:
return f"¥{v:.2f}"
event = Event(name="Conference", start_time=datetime(2026, 6, 1, 9, 0), price=199.5)
event.model_dump()
# {"name": "Conference", "start_time": "2026-06-01 09:00:00", "price": "¥199.50"}
3.7 配置管理(Settings)
Pydantic 的 BaseSettings 是管理应用配置的最佳方式,支持环境变量、.env 文件。
3.7.1 基础配置
from pydantic_settings import BaseSettings
class Settings(BaseSettings):
# 应用配置
APP_NAME: str = "My API"
DEBUG: bool = False
VERSION: str = "0.1.0"
# 服务器配置
HOST: str = "0.0.0.0"
PORT: int = 8000
# 数据库配置
DATABASE_URL: str = "sqlite+aiosqlite:///./app.db"
# 安全配置
SECRET_KEY: str = "change-me-in-production"
ACCESS_TOKEN_EXPIRE_MINUTES: int = 30
ALGORITHM: str = "HS256"
# CORS 配置
CORS_ORIGINS: list[str] = ["http://localhost:3000"]
model_config = {
"env_file": ".env", # 从 .env 文件读取
"env_file_encoding": "utf-8",
"case_sensitive": False, # 环境变量不区分大小写
}
# 使用
settings = Settings()
print(settings.APP_NAME)
print(settings.DATABASE_URL)
3.7.2 .env 文件
# .env
APP_NAME=Production API
DEBUG=false
DATABASE_URL=postgresql+asyncpg://user:pass@localhost:5432/mydb
SECRET_KEY=your-super-secret-key-here
CORS_ORIGINS=["https://example.com","https://admin.example.com"]
3.7.3 嵌套配置
from pydantic import BaseModel
from pydantic_settings import BaseSettings
class DatabaseConfig(BaseModel):
url: str = "sqlite+aiosqlite:///./app.db"
pool_size: int = 5
max_overflow: int = 10
echo: bool = False
class RedisConfig(BaseModel):
url: str = "redis://localhost:6379/0"
max_connections: int = 10
class Settings(BaseSettings):
app_name: str = "My API"
debug: bool = False
database: DatabaseConfig = DatabaseConfig()
redis: RedisConfig = RedisConfig()
model_config = {
"env_file": ".env",
"env_nested_delimiter": "__", # DATABASE__URL -> database.url
}
# 环境变量:
# DATABASE__URL=postgresql+asyncpg://...
# DATABASE__POOL_SIZE=10
# REDIS__URL=redis://...
3.8 FastAPI 中的 Pydantic 实践
3.8.1 请求/响应模型分离模式
from pydantic import BaseModel, Field, EmailStr
from datetime import datetime
# 基础字段(共享)
class UserBase(BaseModel):
name: str = Field(..., min_length=2, max_length=50)
email: EmailStr
# 创建请求
class UserCreate(UserBase):
password: str = Field(..., min_length=8)
# 更新请求(全部可选)
class UserUpdate(BaseModel):
name: str | None = Field(None, min_length=2, max_length=50)
email: EmailStr | None = None
# 数据库模型
class UserInDB(UserBase):
id: int
hashed_password: str
is_active: bool = True
created_at: datetime
updated_at: datetime | None = None
# API 响应(无密码)
class UserResponse(UserBase):
id: int
is_active: bool
created_at: datetime
updated_at: datetime | None = None
model_config = {"from_attributes": True}
# 列表响应(精简字段)
class UserSummary(BaseModel):
id: int
name: str
email: str
model_config = {"from_attributes": True}
3.8.2 通用响应包装
from pydantic import BaseModel, Field
from typing import Generic, TypeVar
T = TypeVar("T")
class ApiResponse(BaseModel, Generic[T]):
"""通用 API 响应"""
code: int = 200
message: str = "success"
data: T | None = None
class PaginatedData(BaseModel, Generic[T]):
"""分页数据"""
items: list[T]
total: int
page: int
page_size: int
total_pages: int
class PaginatedResponse(BaseModel, Generic[T]):
"""分页响应"""
code: int = 200
message: str = "success"
data: PaginatedData[T] | None = None
# 使用
@app.get("/users/", response_model=PaginatedResponse[UserSummary])
async def list_users():
...
3.9 小结
| 知识点 | 关键 API |
|---|---|
| 模型定义 | class MyModel(BaseModel) |
| 字段验证 | Field(gt=0, min_length=1, pattern=r"...") |
| 自定义验证 | @field_validator / @model_validator |
| 模型配置 | model_config = ConfigDict(...) |
| 序列化控制 | model_dump(exclude=..., by_alias=True) |
| 配置管理 | BaseSettings + .env |
| 泛型模型 | BaseModel, Generic[T] |
| 鉴别联合 | Discriminator("type") |

