Python案例怎么节省内存占用？

wen python案例 2026-06-07 82

本文目录导读：

Python案例怎么节省内存占用？

使用生成器替代列表
使用slots限制实例属性
使用array模块存储数字
使用struct模块压缩数据结构
使用collections模块的命名元组
使用numpy进行数值计算
使用数组索引替代字典
及时删除不再使用的对象
使用lru_cache进行记忆化
实战案例：处理百万级数据
内存优化的最佳实践总结

在Python中节省内存占用有很多实用技巧,我来分享一些经过验证的案例和方法：

使用生成器替代列表

❌ 内存密集型

# 一次性加载所有数据到内存
def read_large_file(filename):
    data = []
    with open(filename, 'r') as f:
        for line in f:
            data.append(line.strip())
    return data
# 占用内存：~800MB for 1GB file

✅ 内存友好型

# 使用生成器，按需读取
def read_large_file_efficient(filename):
    with open(filename, 'r') as f:
        for line in f:
            yield line.strip()
# 占用内存：~1MB，无论文件多大

使用slots限制实例属性

❌ 传统类定义

class Point:
    def __init__(self, x, y):
        self.x = x
        self.y = y
# 每个实例都有__dict__，占用约56字节

✅ 使用slots

class Point:
    __slots__ = ('x', 'y')  # 限制只能有这两个属性
    def __init__(self, x, y):
        self.x = x
        self.y = y
# 每个实例约占用40字节，节省约28%内存

实际测试对比：

import sys
class WithoutSlots:
    def __init__(self, x, y):
        self.x = x
        self.y = y
class WithSlots:
    __slots__ = ('x', 'y')
    def __init__(self, x, y):
        self.x = x
        self.y = y
# 创建100万个实例
points_without = [WithoutSlots(1, 2) for _ in range(1000000)]
points_with = [WithSlots(1, 2) for _ in range(1000000)]
print(f"Without slots: {sys.getsizeof(points_without[0])} bytes")
print(f"With slots: {sys.getsizeof(points_with[0])} bytes")

使用array模块存储数字

❌ 使用列表存储大量数字

numbers = [i for i in range(1000000)]
# 占用：约28MB

✅ 使用array模块

from array import array
numbers = array('i', range(1000000))  # 'i' 表示整数
# 占用：约4MB（节省约85%内存）

使用struct模块压缩数据结构

import struct
# 原始方式：使用元组存储
users = [(12345, 67890, 100.5) for _ in range(100000)]
# 占用：约24MB
# 优化方式：使用struct
class User:
    __slots__ = ('data',)
    FORMAT = 'IIf'  # 两个整数，一个浮点数
    def __init__(self, id1, id2, value):
        self.data = struct.pack(self.FORMAT, id1, id2, value)
    def get_values(self):
        return struct.unpack(self.FORMAT, self.data)
# 占用：约4MB（节省约83%内存）

使用collections模块的命名元组

from collections import namedtuple
import sys
# 普通类（使用__slots__）
class Student:
    __slots__ = ('name', 'age', 'score')
    def __init__(self, name, age, score):
        self.name = name
        self.age = age
        self.score = score
# 使用命名元组
StudentTuple = namedtuple('StudentTuple', ['name', 'age', 'score'])
# 比较内存使用
s1 = Student('Alice', 20, 95)
s2 = StudentTuple('Bob', 22, 88)
print(f"Class with __slots__: {sys.getsizeof(s1)} bytes")
print(f"Named tuple: {sys.getsizeof(s2)} bytes")

使用numpy进行数值计算

import numpy as np
# Python列表
py_list = [i for i in range(1000000)]
# 占用：约28MB
# NumPy数组
np_array = np.arange(1000000, dtype=np.int32)
# 占用：约4MB（节省约85%内存）

使用数组索引替代字典

# ❌ 字典方式（内存密集）
transactions = {}
for i in range(1000000):
    transactions[i] = {f'key_{j}': j for j in range(10)}
# 占用：大量内存
# ✅ 数组方式（内存友好）
import numpy as np
# 使用二维数组
transactions = np.zeros((1000000, 10), dtype=np.float64)
# 固定内存占用：80MB（1000000 * 10 * 8 bytes）
# 如果需要字典功能，使用索引映射
def get_transaction(transaction_id):
    return transactions[transaction_id]

及时删除不再使用的对象

# 手动触发垃圾回收
import gc
def process_large_data():
    large_data = load_large_dataset()  # 加载大数据
    result = process(large_data)
    # 显式删除大对象
    del large_data
    # 触发垃圾回收（但不推荐频繁使用）
    gc.collect()
    return result

使用lru_cache进行记忆化

from functools import lru_cache
# 不使用缓存
def fibonacci(n):
    if n < 2:
        return n
    return fibonacci(n-1) + fibonacci(n-2)
# 使用缓存（避免重复计算和内存浪费）
@lru_cache(maxsize=128)
def fibonacci_cached(n):
    if n < 2:
        return n
    return fibonacci_cached(n-1) + fibonacci_cached(n-2)

实战案例：处理百万级数据

import sys
from array import array
from collections import defaultdict
class MemoryEfficientProcessor:
    def __init__(self):
        # 使用array存储数据
        self.values = array('d')  # 双精度浮点数
    def process_data(self, data_stream):
        """处理数据流"""
        # 使用生成器处理
        for item in data_stream:
            # 使用__slots__风格的类
            processed = self._process_item(item)
            self.values.append(processed)
            # 定期清理
            if len(self.values) > 1000000:
                self._flush_to_disk()
    def _process_item(self, item):
        # 处理单个项目
        return item * 2
    def _flush_to_disk(self):
        # 将内存中的数据写入磁盘
        with open('data_backup.bin', 'ab') as f:
            self.values.tofile(f)
        self.values = array('d')
# 使用示例
processor = MemoryEfficientProcessor()
data = (i for i in range(10000000))  # 使用生成器
processor.process_data(data)