前端⽂件上传的分析
原理概述
前端文件上传这个可以说是一个老生常谈的话题了,其中主要用的是全栈的思维,和对http协议 、node⽂件处理的深入了解。
在这里使用vue+element+nodejs来演示这个。
1.上传功能初步实现
formData
<input type="file" @change="handleFileChange" />
<el-button type="primary" @click="handleUpload">上传</el-button>
handleFileChange(e) {
const [file] = e.target.files;
if (!file) return;
form.append("filename", this.container.file.name);
form.append("file", this.container.file);
request({
url: '/upload',
data: form,
})
}
node
const http = require("http")
const path = require('path')
const Controller = require('./controller')
const schedule = require('./schedule')
const server = http.createServer()
const UPLOAD_DIR = path.resolve(__dirname, "..", "target"); // ⼤⽂件存储⽬录
const ctrl = new Controller(UPLOAD_DIR)
server.on("request", async (req, res)
=> {
res.setHeader("Access-Control-AllowOrigin", "*")
res.setHeader("Access-Control-AllowHeaders", "*")
if (req.method === "OPTIONS") {
res.status = 200
res.end()
return
}
if (req.method === "POST") {
if (req.url == '/upload') {
await ctrl.handleUpload(req, res)
return
}
}
})
server.listen(3000, () =>
console.log("正在监听 3000 端⼝"))
Controller.js
async handleUpload(req, res) {
const multipart = new multiparty.Form()
multipart.parse(req, async (err, field, file) => {
if (err) {
console.log(err)
return
}
const [chunk] = file.file
const [filename] = field.filename
const filePath = path.resolve(this.UPLOAD_DIR, `${fileHash}${extractExt(filename)}`)
const chunkDir = path.resolve(this.UPLOAD_DIR, fileHash)
// ⽂件存在直接返回
if (fse.existsSync(filePath)) {
res.end("file exist")
return
}
if (!fse.existsSync(chunkDir)) {
await fse.mkdirs(chunkDir)
}
await fse.move(chunk.path, `${chunkDir}/${hash}`)
res.end("received file chunk")
})
}
技术点汇总
FormData 对象的使用:
1.用一些键值对来模拟一系列表单控件:即把form中所有表单元素的name与value组装成一个queryString
2. 异步上传二进制文件。
httpserver:开启本地服务
fs⽂件处理
multiparty解析post数据
2.增加拖拽,粘贴功能
技术点:拖拽事件drop,clipboardData
<div class="drop-box" id="drop-box">
box.addEventListener("drop", function (e) {
e.preventDefault(); //取消浏览器默认拖拽效果
var fileList = e.dataTransfer.files; //获取拖拽中的⽂件对象
var len = fileList.length;//⽤来获取⽂件的⻓度(其实是获得⽂件数量)
const [file] = e.target.files;
if (!file) return;
...上传
}, false);
粘贴
box.addEventListener('paste', function (event) {
var data = (event.clipboardData)
....
});
3.大文件上传
技术点:blob.slice分片
const chunks = this.createFileChunk(this.container.file);
createFileChunk(file, size = SIZE)
{
// ⽣成⽂件块
const chunks = [];
let cur = 0;
while (cur < file.size) {
chunks.push({
file:
file.slice(cur, cur + size)
});
cur += size;
}
return chunks;
}
- 所有切⽚挨个发请求,然后merge
async handleMerge(req, res) {
const data = await resolvePost(req)
const { fileHash, filename, size } = data
const ext = extractExt(filename)
const filePath = path.resolve(this.UPLOAD_DIR, `${fileHash}${ext}`)
await this.mergeFileChunk(filePath, fileHash, size)
res.end(
JSON.stringify({
code: 0,
message: "file merged success"
})
)
}
4.断点续传+秒传
技术点:
md5计算,缓存思想 ⽂件⽤md5计算⼀个指纹,上传之前,先问后端,这个⽂件的hash在不在,在的话就不⽤传了,就是所谓的断点续传,如果整个⽂件都存在了就是秒传
async handleVerify(req, res) {
const data = await resolvePost(req)
const { filename, hash } = data
const ext = extractExt(filename)
const filePath = path.resolve(this.UPLOAD_DIR, `${hash}${ext}`)
// ⽂件是否存在
let uploaded = false
let uploadedList = []
if (fse.existsSync(filePath)) {
uploaded = true
} else {
// ⽂件没有完全上传完毕,但是可能存在部分切⽚上传完毕了
uploadedList = await
getUploadedList(path.resolve(this.UPLOAD_DIR, hash))
}
res.end(
JSON.stringify({
uploaded,
uploadedList // 过滤诡异的隐藏⽂件
})
)
}
5.计算hash优化
技术点:web-worker
⼤⽂件的md5太慢了,启⽤webworker计算
// web-worker
self.importScripts('spark-md5.min.js')
self.onmessage = e => {
// 接受主线程的通知
const { chunks } = e.data
const spark = new self.SparkMD5.ArrayBuffer()
let progress = 0
let count = 0
const loadNext = index => {
const reader = new FileReader()
reader.readAsArrayBuffer(chunks[index].file)
reader.onload = e => {
// 累加器 不能依赖index,
count++
// 增量计算md5
spark.append(e.target.result)
if (count === chunks.length) {
// 通知主线程,计算结束
self.postMessage({
progress: 100,
hash: spark.end()
})
} else {
// 每个区块计算结束,通知进度即可
progress += 100 / chunks.length
self.postMessage({
progress
})
// 计算下⼀个
loadNext(count)
}
}
}
// 启动
loadNext(0)
}
6.time-slice
技术点:react fiber架构学习,利⽤浏览器空闲时间
requestIdleCallback
requestIdelCallback(myNonEssentialWork);
function myNonEssentialWork(deadline) {
// deadline.timeRemaining()可以获取
到当前帧剩余时间
// 当前帧还有时间 并且任务队列不为空
while (deadline.timeRemaining() > 0 && tasks.length > 0) {
doWorkIfNeeded();
}
if (tasks.length > 0) {
requestIdleCallback(myNonEssentialWork
);
}
}
async calculateHashIdle(chunks) {
return new Promise(resolve => {
const spark = new SparkMD5.ArrayBuffer();
let count = 0;
// 根据⽂件内容追加计算
const appendToSpark = async file => {
return new Promise(resolve => {
const reader = new FileReader();
reader.readAsArrayBuffer(file);
reader.onload = e => {
spark.append(e.target.result);
resolve();
};
});
};
const workLoop = async deadline => {
// 有任务,并且当前帧还没结束
while (count < chunks.length
&& deadline.timeRemaining() > 1) {
await
appendToSpark(chunks[count].file);
count++;
// 没有了 计算完毕
if (count < chunks.length) {
// 计算中
this.hashProgress =
Number(((100 * count) / chunks.length).toFixed(2));
console.log(this.hashProgress)
} else {
// 计算完毕
this.hashProgress = 100;
resolve(spark.end());
}
}
window.requestIdleCallback(workLoop);
};
window.requestIdleCallback(workLoop);
});
}
7.抽样hash
布隆过滤器思想
async calculateHashSample() {
return new Promise(resolve => {
const spark = new SparkMD5.ArrayBuffer();
const reader = new FileReader();
const file = this.container.file;
// ⽂件⼤⼩
const size = this.container.file.size;
let offset = 2 * 1024 * 1024;
let chunks = [file.slice(0, offset)];
// 前⾯100K
let cur = offset;
while (cur < size) {
// 最后⼀块全部加进来
if (cur + offset >= size) {
chunks.push(file.slice(cur, cur + offset));
} else {
// 中间的 前中后去两个字节
const mid = cur + offset / 2;
const end = cur + offset;
chunks.push(file.slice(cur, cur + 2));
chunks.push(file.slice(mid, mid + 2));
chunks.push(file.slice(end - 2, end));
}
// 前取两个字节
cur += offset;
}
// 拼接
reader.readAsArrayBuffer(new Blob(chunks));
reader.onload = e => {
spark.append(e.target.result);
resolve(spark.end());
};
});
}
8.请求并发数控制和重试
async sendRequest(forms, max = 4) {
return new Promise(resolve => {
const len = forms.length;
let idx = 0;
let counter = 0;
const start = async () => {
// 有请求,有通道
while (idx < len && max > 0) {
max--; // 占⽤通道
console.log(idx, "start");
const form = forms[idx].form;
const index = forms[idx].index;
idx++
request({
url: '/upload',
data: form,
onProgress: this.createProgresshandler(this.chunks[index]),
requestList: this.requestList
}).then(() => {
max++; // 释放通道
counter++;
if (counter === len) {
resolve();
} else {
start();
}
});
}
}
start();
});
}
async uploadChunks(uploadedList = []) {
// 这⾥⼀起上传,碰⻅⼤⽂件就是灾难
// 没被hash计算打到,被⼀次性的tcp链接把浏览器稿挂了
// 异步并发控制策略
// ⽐如并发量控制成4
const list = this.chunks
.filter(chunk =>
uploadedList.indexOf(chunk.hash) == -1)
.map(({ chunk, hash, index }, i) => {
const form = new FormData();
form.append("chunk", chunk);
form.append("hash", hash);
form.append("filename", this.container.file.name);
form.append("fileHash", this.container.hash);
return { form, index };
})
.map(({ form, index }) =>
request({
url: "/upload",
data: form,
onProgress: this.createProgresshandler(this.chunks[index]),
requestList: this.requestList
})
);
// 直接全量并发
await Promise.all(list);
// 控制并发
const ret = await
this.sendRequest(list, 4)
if (uploadedList.length + list.length === this.chunks.length) {
// 上传和已经存在之和 等于全部的再合并
await this.mergeRequest();
}
}
9.慢启动策略
TCP拥塞控制的问题 其实就是根据当前⽹络情况,动态调整切⽚的⼤⼩
- chunk中带上size值,不过进度条数量不确定了,修改createFileChunk, 请求加上时间统计
- ⽐如我们理想是30秒传递⼀个
- 初始⼤⼩定为1M,如果上传花了10秒,那下⼀个区块⼤⼩变成3M
- 如果上传花了60秒,那下⼀个区块⼤⼩变成500KB以此类推
- 并发+慢启动的逻辑有些复杂,所以先⼀次只传⼀个切⽚,来演示这个逻辑,新建⼀个handleUpload1函数
async handleUpload1(){
// @todo数据缩放的⽐率 可以更平缓
// @todo 并发+慢启动
// 慢启动上传逻辑
const file = this.container.file
if (!file) return;
this.status = Status.uploading;
const fileSize = file.size
let offset = 1024 * 1024
let cur = 0
let count = 0
this.container.hash = await
this.calculateHashSample();
while (cur < fileSize) {
// 切割offfset⼤⼩
const chunk = file.slice(cur, cur + offset)
cur += offset
const chunkName = this.container.hash + "-" + count;
const form = new FormData();
form.append("chunk", chunk);
form.append("hash", chunkName);
form.append("filename", file.name);
form.append("fileHash", this.container.hash);
form.append("size", chunk.size);
let start = new Date().getTime()
await request({
url: '/upload', data: form
})
const now = new Date().getTime()
const time = ((now - start) / 1000).toFixed(4)
let rate = time / 30
// 速率有最⼤2和最⼩0.5
if (rate < 0.5) rate = 0.5
if (rate > 2) rate = 2
// 新的切⽚⼤⼩等⽐变化
console.log(`切⽚${count}⼤⼩是${this.format(offset)},耗时${time}秒,是30秒的${rate}倍,修正⼤⼩为${this.format(offset / rate)}`)
// 动态调整offset
offset = parseInt(offset / rate)
// if(time)
count++
}
}
切⽚0⼤⼩是1024.00KB,耗时13.2770秒,是30秒的0.5倍,修正⼤⼩为2.00MB
切⽚1⼤⼩是2.00MB,耗时25.4130秒,是30秒的0.8471倍,修正⼤⼩为2.36MB
切⽚2⼤⼩是2.36MB,耗时14.1260秒,是30秒的0.5倍,修正⼤⼩为4.72MB
10.碎片清理
// 为了⽅便测试,我改成每5秒扫⼀次, 过期1分钟的删除做演示
const fse = require('fs-extra')
const path = require('path')
const schedule = require('nodeschedule')
// 空⽬录删除
function remove(file, stats) {
const now = new Date().getTime()
const offset = now - stats.ctimeMs
if (offset > 1000 * 60) {
// ⼤于60秒的碎⽚
console.log(file, '过期了,浪费空间,删除')
fse.unlinkSync(file)
}
}
async function scan(dir, callback) {
const files = fse.readdirSync(dir)
files.forEach(filename => {
const fileDir = path.resolve(dir, filename)
const stats = fse.statSync(fileDir)
if (stats.isDirectory()) {
return scan(fileDir, remove)
}
if (callback) {
callback(fileDir, stats)
}
})
}
let start = function (UPLOAD_DIR) {
// 每5秒
schedule.scheduleJob("*/5 * * * * * ", function () {
console.log('开始扫描')
scan(UPLOAD_DIR)
})
}
exports.start = start
开始扫描
/upload/target/625c…/625c…-0 过期了,删除
/upload/target/625c…/625c…-1 过期了,删除
/upload/target/625c…/625c…-10 过期了,删除
/upload/target/625c…/625c…-11 过期了,删除
/upload/target/625c…/625c…-12 过期了,删除
本文地址:https://blog.csdn.net/qq_40665861/article/details/109616869
上一篇: js中常见的算法问题
推荐阅读
-
APP渗透测试 对文件上传功能的安全检测与webshell分析
-
laravel5.5框架的上传图片功能实例分析【仅传到服务器端】
-
Node.js + express实现上传大文件的方法分析【图片、文本文件】
-
可兼容php5与php7的cURL文件上传功能实例分析
-
jQuery实现移动端图片上传预览组件的方法分析
-
大数据你不懂爱——数据分析的七件做不到之事
-
利用 Chrome Dev Tools 进行页面性能分析的步骤说明(前端性能优化)
-
HTML5时代的纯前端上传图片预览及严格图片格式验证函数
-
TP3.2.3框架使用CKeditor编辑器在页面中上传图片的方法分析
-
基于JS实现前端压缩上传图片的实例代码