Python案例如何实现关联查询?

wen python案例 28

本文目录导读:

Python案例如何实现关联查询?

  1. 使用SQL数据库(SQLite/MySQL/PostgreSQL)
  2. 使用Pandas处理数据关联
  3. 使用字典和列表手动关联
  4. 使用itertools实现复杂关联
  5. 使用MongoDB进行文档关联
  6. 性能优化建议
  7. 完整案例:电商订单系统关联查询

在Python中实现关联查询通常涉及数据库操作或数据处理,以下是几种常见场景的实现方法:

使用SQL数据库(SQLite/MySQL/PostgreSQL)

使用SQLAlchemy ORM

from sqlalchemy import create_engine, Column, Integer, String, ForeignKey
from sqlalchemy.orm import declarative_base, relationship, sessionmaker
# 定义模型
Base = declarative_base()
class User(Base):
    __tablename__ = 'users'
    id = Column(Integer, primary_key=True)
    name = Column(String)
    posts = relationship("Post", back_populates="user")
class Post(Base):
    __tablename__ = 'posts'
    id = Column(Integer, primary_key=True)= Column(String)
    user_id = Column(Integer, ForeignKey('users.id'))
    user = relationship("User", back_populates="posts")
# 创建连接和会话
engine = create_engine('sqlite:///example.db')
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()
# 关联查询
results = session.query(User).join(User.posts).filter(Post.title.like('%Python%')).all()
for user in results:
    print(f"User: {user.name}")
    for post in user.posts:
        print(f"  Post: {post.title}")

使用PyMySQL直接查询

import pymysql
connection = pymysql.connect(
    host='localhost',
    user='root',
    password='password',
    database='mydb'
)
try:
    with connection.cursor() as cursor:
        # INNER JOIN
        sql = """
        SELECT u.name, o.order_date, o.total
        FROM users u
        INNER JOIN orders o ON u.id = o.user_id
        WHERE u.name = %s
        """
        cursor.execute(sql, ('张三',))
        results = cursor.fetchall()
        for row in results:
            print(f"用户: {row[0]}, 订单日期: {row[1]}, 金额: {row[2]}")
finally:
    connection.close()

使用Pandas处理数据关联

DataFrame合并操作

import pandas as pd
# 创建示例数据
users_df = pd.DataFrame({
    'user_id': [1, 2, 3, 4],
    'name': ['张三', '李四', '王五', '赵六'],
    'age': [25, 30, 28, 35]
})
orders_df = pd.DataFrame({
    'order_id': [101, 102, 103, 104],
    'user_id': [1, 2, 2, 3],
    'product': ['手机', '电脑', '平板', '耳机'],
    'amount': [5000, 8000, 3000, 2000]
})
# INNER JOIN
inner_join = pd.merge(users_df, orders_df, on='user_id', how='inner')
print("内连接结果:")
print(inner_join)
# LEFT JOIN
left_join = pd.merge(users_df, orders_df, on='user_id', how='left')
print("\n左连接结果:")
print(left_join)
# 多列关联
another_df = pd.DataFrame({
    'user_id': [1, 2],
    'product': ['手机', '电脑'],
    'quantity': [2, 1]
})
multi_join = pd.merge(users_df, another_df, on=['user_id', 'product'], how='inner')

使用字典和列表手动关联

# 模拟两个数据集
users = [
    {'id': 1, 'name': '张三', 'dept_id': 101},
    {'id': 2, 'name': '李四', 'dept_id': 102},
    {'id': 3, 'name': '王五', 'dept_id': 101}
]
departments = [
    {'id': 101, 'name': '技术部'},
    {'id': 102, 'name': '市场部'}
]
# 创建部门索引
dept_index = {d['id']: d for d in departments}
# 关联查询
results = []
for user in users:
    dept = dept_index.get(user['dept_id'])
    if dept:  # 相当于INNER JOIN
        results.append({
            **user,
            'dept_name': dept['name']
        })
print("手动关联结果:")
for r in results:
    print(f"用户: {r['name']}, 部门: {r['dept_name']}")

使用itertools实现复杂关联

from itertools import product
# 三个数据集
students = ['张三', '李四', '王五']
courses = ['数学', '英语', '编程']
scores = {
    ('张三', '数学'): 90,
    ('张三', '英语'): 85,
    ('李四', '数学'): 88,
    ('王五', '编程'): 92
}
# 实现类似LEFT JOIN
for student, course in product(students, courses):
    score = scores.get((student, course), None)
    print(f"{student} - {course}: {score if score else '未选课'}")

使用MongoDB进行文档关联

from pymongo import MongoClient
client = MongoClient('localhost', 27017)
db = client.mydb
# 插入关联数据
db.users.insert_many([
    {'_id': 1, 'name': '张三', 'email': 'zhang@example.com'},
    {'_id': 2, 'name': '李四', 'email': 'li@example.com'}
])
db.posts.insert_many([
    {'title': 'Python教程', 'author_id': 1},
    {'title': '数据库入门', 'author_id': 2}
])
# 使用$lookup进行关联查询
pipeline = [
    {
        '$lookup': {
            'from': 'users',
            'localField': 'author_id',
            'foreignField': '_id',
            'as': 'author_info'
        }
    },
    {'$unwind': '$author_info'}
]
results = list(db.posts.aggregate(pipeline))
for post in results:
    print(f"文章: {post['title']}, 作者: {post['author_info']['name']}")

性能优化建议

# 1. 使用索引加速关联
CREATE INDEX idx_user_id ON orders(user_id);
# 2. 批量查询避免N+1问题
from sqlalchemy.orm import joinedload
# 预加载关联对象
users = session.query(User).options(joinedload(User.posts)).all()
# 3. 使用子查询优化
subquery = session.query(Post.user_id, func.count(Post.id).label('post_count')).group_by(Post.user_id).subquery()
results = session.query(User, subquery.c.post_count).outerjoin(subquery, User.id == subquery.c.user_id).all()

完整案例:电商订单系统关联查询

# 创建增强版示例
import pandas as pd
from datetime import datetime
# 用户数据
users = pd.DataFrame({
    'user_id': [1, 2, 3],
    'name': ['张三', '李四', '王五'],
    'city': ['北京', '上海', '广州']
})
# 订单数据
orders = pd.DataFrame({
    'order_id': [101, 102, 103, 104],
    'user_id': [1, 2, 2, 3],
    'product': ['手机', '电脑', '平板', '耳机'],
    'amount': [5000, 8000, 3000, 2000],
    'order_date': pd.to_datetime(['2023-01-01', '2023-02-15', '2023-03-10', '2023-04-20'])
})
# 复杂关联查询
result = (users
    .merge(orders, on='user_id', how='inner')
    .groupby(['name', 'city'])
    .agg(
        total_orders=('order_id', 'count'),
        total_amount=('amount', 'sum'),
        avg_amount=('amount', 'mean')
    )
    .reset_index()
    .sort_values('total_amount', ascending=False)
)
print("用户订单统计:")
print(result)

选择哪种方法取决于你的具体需求:

  • 数据库操作:使用ORM(SQLAlchemy)或原生SQL
  • 数据分析:使用Pandas的merge功能
  • 小型数据:手动使用字典和列表
  • NoSQL数据库:使用聚合管道进行关联

建议根据项目规模和数据量选择合适的方案,并考虑性能优化。

抱歉,评论功能暂时关闭!