飞书的云文档中只支持单个文件下载,如果选择文件夹下载的话,只保留了文件目录结构,文件会被下载成url,格式如下:
[InternetShortcut]
URL=https://www.feishu.cn/${type}/${token}
Object=${token}
如果需要批量下载飞书云文档,则需要使用飞书的云文档API,https://open.feishu.cn/document/server-docs/docs/drive-v1/export_task/export-user-guide
其中的三个接口:
- 创建导出任务:
https://open.feishu.cn/open-apis/drive/v1/export_tasks
- 查询导出任务结果:
https://open.feishu.cn/open-apis/drive/v1/export_tasks/${ticket}?token=${token}
- 下载导出文件:
https://open.feishu.cn/open-apis/drive/v1/export_tasks/file/${file_token}/download
想要合理的下载飞书文档库中的文档,这里需要再飞书后台建立一个机器人应用,使用应用中的user_access_token
,如果有其他的办法能拿到也是可以的。
首先,下载文件夹
然后,遍历文件夹中的文件,提取url后缀的文件,读取其中的token和类型
URL=https://www.feishu.cn/${type}/${token}
然后,再逐步调用导出任务,查询结果,下载文件即可实现批量下载
下面是用nodejs实现的一段批量代码,其中只支持了docx和docs的下载类型
另外:不知道为什么,每次创建之后,5秒内就查询结果的话,则获取的数值是个空的,所以延迟了5秒
const axios = require('axios');
const process = require('process');
const path = require('path')
const fs = require('fs')
let Authorization = "u-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
async function httpRequest(config) {
try {
const response = await axios(config);
return response.data;
} catch (error) {
console.error(`Error making request: ${error}`);
}
}
function getExportURL(token, fileExt, type, sub_id) {
var data = JSON.stringify({
"file_extension": fileExt,
"token": token,
"type": type
});
var config = {
method: 'POST',
url: 'https://open.feishu.cn/open-apis/drive/v1/export_tasks',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${Authorization}`
},
data: data
};
return config
}
function getTicketURL(ticket, token) {
var config = {
method: 'GET',
url: `https://open.feishu.cn/open-apis/drive/v1/export_tasks/${ticket}?token=${token}`,
headers: {
'Authorization': `Bearer ${Authorization}`
}
};
return config
}
function getDownloadURL(file_token) {
var config = {
method: 'GET',
url: `https://open.feishu.cn/open-apis/drive/v1/export_tasks/file/${file_token}/download`,
headers: {
'Authorization': `Bearer ${Authorization}`
},
responseType: 'stream'
};
return config
}
async function downloadFile(file_token, localPath) {
try {
const response = await axios(getDownloadURL(file_token));
const writer = fs.createWriteStream(localPath);
response.data.pipe(writer);
return new Promise((resolve, reject) => {
writer.on('finish', resolve);
writer.on('error', reject);
});
} catch (error) {
console.error(`Error downloading file: ${error}`);
}
}
function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
async function downloadDocx(token, type) {
let ticket = ""
let file_token = ""
let file_name = ""
let succes_str = ""
let fail_str
let response = await httpRequest(getExportURL(token, "docx", type))
if (response && response.data && response.data.ticket) {
ticket = response.data.ticket
console.log(`请求ticket完成, ticket: ${ticket}`)
await sleep(5000)
response = await httpRequest(getTicketURL(ticket, token))
if (response && response.data && response.data.result && response.data.result.file_token) {
file_token = response.data.result.file_token
console.log(`请求file_token完成, file_token: ${file_token}`)
file_name = response.data.result.file_name.replace("/", "-") + "." + response.data.result.file_extension
const currentDirectory = process.cwd();
let localPath = path.join(currentDirectory, "download", file_name)
response = await downloadFile(file_token, localPath)
if (response && response.data && response.data.code == 0) {
succes_str = `下载${file_name}完成,保存在${localPath}`
} else {
fail_str = `下载${file_name}失败`
}
}
}
}
function readAllUrlFiles(dirPath) {
let fileList = []
let dirFiles = fs.readdirSync(dirPath, { recursive: true })
dirFiles.forEach(v => {
if (path.extname(v) == '.url') {
fileList.push(v)
}
})
return fileList
}
const dirPath = process.argv[2]
const files = readAllUrlFiles(dirPath)
const tokenRegex = /Object=(.+)/;
const typeRegex = /URL=https:\/\/www.feishu.cn\/(.+)\//;
async function downloadByUrl() {
if (files.length > 0) {
let docPath = files.shift()
let fullPath = path.join(dirPath, docPath)
let content = fs.readFileSync(fullPath, 'utf-8')
const tokenMatch = content.match(tokenRegex);
const typeMatch = content.match(typeRegex);
if (tokenMatch) {
if (typeMatch[1] == 'docx') {
console.log(`分析 ${docPath} 完成,token:${tokenMatch[1]}, 剩余文件数:${files.length}`)
await downloadDocx(tokenMatch[1], 'docx')
} else if (typeMatch[1] == 'docs' || typeMatch[1] == 'doc') {
console.log(`分析 ${docPath} 完成,token:${tokenMatch[1]}, 剩余文件数:${files.length}`)
await downloadDocx(tokenMatch[1], 'doc')
} else {
console.log(`${docPath} 的类型是 ${typeMatch[1]}, 暂时不支持`)
}
}
downloadByUrl()
}
}
downloadByUrl()