本文目录导读:

我来详细介绍断点续传的实现方法,主要从 HTTP 协议层面和文件操作层面来讲解。
核心原理
断点续传的核心是 HTTP Range 请求头,它允许客户端请求文件的某一部分而不是整个文件。
服务端支持
首先确保服务端支持 Range 请求:
# Python Flask 示例
from flask import Flask, Response, request
app = Flask(__name__)
@app.route('/download/<filename>')
def download_file(filename):
file_path = f'/path/to/{filename}'
file_size = os.path.getsize(file_path)
# 获取 Range 头
range_header = request.headers.get('Range', None)
if range_header:
# 解析 Range,如 "bytes=0-100"
start, end = parse_range(range_header, file_size)
# 读取文件指定部分
data = read_file_range(file_path, start, end)
return Response(
data,
206, # Partial Content 状态码
headers={
'Content-Range': f'bytes {start}-{end}/{file_size}',
'Content-Length': end - start + 1,
}
)
else:
# 返回完整文件
return Response(
read_file(file_path),
200,
headers={
'Content-Length': file_size,
'Accept-Ranges': 'bytes'
}
)
Python 客户端实现
import os
import requests
from typing import Optional
class ResumeDownloader:
def __init__(self, url: str, save_path: str):
self.url = url
self.save_path = save_path
self.temp_path = save_path + '.tmp' # 临时文件
self.downloaded_size = 0
def get_file_size(self) -> Optional[int]:
"""获取文件总大小"""
try:
response = requests.head(self.url)
content_length = response.headers.get('Content-Length')
return int(content_length) if content_length else None
except:
return None
def get_local_size(self) -> int:
"""获取已下载的文件大小"""
if os.path.exists(self.temp_path):
return os.path.getsize(self.temp_path)
return 0
def download(self):
"""主下载函数"""
# 获取文件总大小
total_size = self.get_file_size()
if not total_size:
print("无法获取文件大小")
return False
# 获取本地已下载大小
self.downloaded_size = self.get_local_size()
if self.downloaded_size >= total_size:
print("文件已完全下载")
return True
# 设置 Range 请求头
headers = {
'Range': f'bytes={self.downloaded_size}-'
}
# 以流式方式发送请求
response = requests.get(
self.url,
headers=headers,
stream=True,
timeout=30
)
if response.status_code == 206: # Partial Content
print(f"开始断点续传,从 {self.downloaded_size} 字节继续")
# 以追加模式打开文件
mode = 'ab' if self.downloaded_size > 0 else 'wb'
with open(self.temp_path, mode) as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
self.downloaded_size += len(chunk)
# 打印进度
progress = (self.downloaded_size / total_size) * 100
print(f"\r下载进度: {progress:.2f}%", end='')
print("\n下载完成!")
# 下载完成后重命名
os.rename(self.temp_path, self.save_path)
return True
else:
print(f"服务器不支持断点续传,状态码: {response.status_code}")
return False
# 使用示例
downloader = ResumeDownloader(
url='https://example.com/large-file.zip',
save_path='./downloads/large-file.zip'
)
downloader.download()
JavaScript 前端实现
class ResumeDownload {
constructor(url, savePath) {
this.url = url;
this.savePath = savePath;
this.db = null; // 使用 IndexedDB 存储下载状态
}
async getFileSize() {
const response = await fetch(this.url, { method: 'HEAD' });
return parseInt(response.headers.get('content-length'), 10);
}
async getDownloadedSize() {
// 从 IndexedDB 或 localStorage 获取已下载大小
const saved = localStorage.getItem(`download_${this.url}`);
return saved ? parseInt(saved, 10) : 0;
}
async download() {
const totalSize = await this.getFileSize();
let downloadedSize = await this.getDownloadedSize();
if (downloadedSize >= totalSize) {
console.log('文件已完全下载');
return;
}
const response = await fetch(this.url, {
headers: {
'Range': `bytes=${downloadedSize}-`
}
});
if (response.status === 206) {
const reader = response.body.getReader();
const chunks = [];
while (true) {
const { done, value } = await reader.read();
if (done) break;
chunks.push(value);
downloadedSize += value.length;
// 保存进度
localStorage.setItem(`download_${this.url}`, downloadedSize.toString());
// 更新进度显示
const progress = (downloadedSize / totalSize) * 100;
console.log(`进度: ${progress.toFixed(2)}%`);
}
// 组合所有 chunks 并保存
const blob = new Blob(chunks);
// 使用 File API 或发送到服务端保存
console.log('下载完成!');
}
}
}
多线程断点续传(Python 进阶版)
import threading
import requests
import os
class MultiThreadResumeDownload:
def __init__(self, url, save_path, num_threads=4):
self.url = url
self.save_path = save_path
self.num_threads = num_threads
self.lock = threading.Lock()
self.progress = 0
def get_file_size(self):
response = requests.head(self.url)
return int(response.headers.get('Content-Length', 0))
def download_part(self, start_byte, end_byte, thread_id):
headers = {'Range': f'bytes={start_byte}-{end_byte}'}
response = requests.get(self.url, headers=headers, stream=True)
# 计算该线程要写入的文件位置
with self.lock:
with open(self.save_path, 'r+b') as f:
f.seek(start_byte)
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
self.progress += len(chunk)
print(f"\r线程 {thread_id}: 进度 {self.progress/self.get_file_size()*100:.2f}%", end='')
def download(self):
file_size = self.get_file_size()
# 创建空文件
with open(self.save_path, 'wb') as f:
f.truncate(file_size)
# 计算每个线程的范围
part_size = file_size // self.num_threads
threads = []
for i in range(self.num_threads):
start = i * part_size
end = start + part_size - 1 if i < self.num_threads - 1 else file_size - 1
thread = threading.Thread(
target=self.download_part,
args=(start, end, i)
)
threads.append(thread)
thread.start()
# 等待所有线程完成
for thread in threads:
thread.join()
print("\n多线程下载完成!")
注意事项
服务器支持
- 检查
Accept-Ranges: bytes响应头 - 服务器返回
206 Partial Content状态码
文件完整性
- 下载完成后验证文件哈希值(MD5/SHA256)
- 使用临时文件,完成后重命名
错误处理
def safe_download(self):
retry_count = 0
max_retries = 3
while retry_count < max_retries:
try:
self.download()
break
except requests.exceptions.ConnectionError:
retry_count += 1
print(f"连接失败,重试 {retry_count}/{max_retries}")
time.sleep(2)
except Exception as e:
print(f"下载错误: {e}")
break
缓存策略
- 使用
If-Range头验证文件是否变更 - 记录下载元数据(URL、已下载大小、时间戳)
这个实现涵盖了断点续传的核心功能,可以根据具体需求进行调整和扩展。