ScopeSentry/api/export.py

363 lines
13 KiB
Python
Raw Normal View History

2024-06-16 12:30:35 +00:00
# -------------------------------------
# @file : export.py
# @author : Autumn
# @contact : rainy-autumn@outlook.com
# @time : 2024/6/16 16:11
# -------------------------------------------
import os
from bson import ObjectId
from fastapi import APIRouter, Depends, BackgroundTasks
2024-06-16 15:27:55 +00:00
from openpyxl.utils.exceptions import IllegalCharacterError
2024-06-17 13:36:34 +00:00
from starlette.responses import FileResponse
2024-06-16 15:27:55 +00:00
2024-06-16 12:30:35 +00:00
from api.users import verify_token
from motor.motor_asyncio import AsyncIOMotorCursor
from core.db import get_mongo_db
import pandas as pd
from core.util import *
2024-06-16 15:27:55 +00:00
from pymongo import ASCENDING, DESCENDING, results
2024-06-16 12:30:35 +00:00
from loguru import logger
2024-07-06 11:32:03 +00:00
from openpyxl import Workbook
2024-06-16 12:30:35 +00:00
router = APIRouter()
keywords = {
"asset": {
2024-06-16 15:27:55 +00:00
'app': '',
'body': 'responsebody',
'header': 'rawheaders',
'project': 'project',
'title': 'title',
'statuscode': 'statuscode',
'icon': 'faviconmmh3',
'ip': ['host', 'ip'],
'domain': ['host', 'url', 'domain'],
'port': 'port',
'protocol': ['protocol', 'type'],
'banner': 'raw',
},
2024-06-16 12:30:35 +00:00
"subdomain": {
2024-06-16 15:27:55 +00:00
'domain': 'host',
'ip': 'ip',
'type': 'type',
'project': 'project',
'value': 'value'
},
2024-06-16 12:30:35 +00:00
"SubdoaminTakerResult": {
2024-06-16 15:27:55 +00:00
'domain': 'input',
'value': 'value',
'type': 'cname',
'response': 'response',
'project': 'project',
},
2024-06-16 12:30:35 +00:00
"UrlScan": {
2024-06-16 15:27:55 +00:00
'url': 'output',
'project': 'project',
'input': 'input',
'source': 'source',
"type": "outputtype"
},
2024-06-16 12:30:35 +00:00
"crawler": {
2024-06-16 15:27:55 +00:00
'url': 'url',
'method': 'method',
'body': 'body',
'project': 'project'
},
2024-06-16 12:30:35 +00:00
"SensitiveResult": {
2024-06-16 15:27:55 +00:00
'url': 'url',
'sname': 'sid',
"body": "body",
"info": "match",
'project': 'project',
'md5': 'md5'
},
2024-06-16 12:30:35 +00:00
"DirScanResult": {
2024-06-16 15:27:55 +00:00
'project': 'project',
'statuscode': 'status',
'url': 'url',
'redirect': 'msg'
},
2024-06-16 12:30:35 +00:00
"vulnerability": {
2024-06-16 15:27:55 +00:00
'url': 'url',
'vulname': 'vulname',
'project': 'project',
'matched': 'matched',
'request': 'request',
'response': 'response',
'level': 'level'
},
2024-06-16 12:30:35 +00:00
"PageMonitoring": {
'url': 'url',
'project': 'project',
'hash': 'hash',
'diff': 'diff',
'response': 'response'
}
}
@router.post("/export")
2024-06-16 15:27:55 +00:00
async def export_data(request_data: dict, db=Depends(get_mongo_db), _: dict = Depends(verify_token),
background_tasks: BackgroundTasks = BackgroundTasks()):
2024-06-16 12:30:35 +00:00
index = request_data.get("index", "")
quantity = int(request_data.get("quantity", 0))
export_type = request_data.get("type", "")
search_query = request_data.get("search", "")
if index == "" or quantity == 0 or export_type == "":
return {"code": 500, "message": f"get index, quantity, export_type null"}
query = {}
if export_type == "search":
query = await search_to_mongodb(search_query, keywords[index])
if query == "" or query is None:
return {"message": "Search condition parsing error", "code": 500}
query = query[0]
if index == "PageMonitoring":
query["diff"] = {"$ne": []}
file_name = generate_random_string(16)
result = await db.export.insert_one({
"file_name": file_name,
"create_time": get_now_time(),
"quantity": quantity,
"data_type": index,
"state": 0,
2024-06-16 15:27:55 +00:00
"end_time": "",
"file_size": ""
2024-06-16 12:30:35 +00:00
})
if result.inserted_id:
2024-07-04 15:28:11 +00:00
background_tasks.add_task(export_data_from_mongodb, quantity, query, file_name, index)
2024-06-16 12:30:35 +00:00
return {"message": "Successfully added data export task", "code": 200}
else:
return {"message": "Failed to export data", "code": 500}
async def fetch_data(db, collection, query, quantity, project_list):
# 构造替换字段值的pipeline
branches = []
for new_value, original_value in project_list.items():
branches.append({"case": {"$eq": ["$project", original_value]}, "then": new_value})
pipeline = [
{"$match": query},
{"$limit": quantity},
{"$addFields": {
"project": {
"$switch": {
"branches": branches,
"default": "$project"
}
}
}},
{"$project": {"_id": 0, "vulnid": 0}}
]
cursor = db[collection].aggregate(pipeline)
return cursor
2024-07-06 11:32:03 +00:00
def flatten_dict(d):
items = []
for k, v in d.items():
if isinstance(v, dict):
items.append((k, str(v)))
elif isinstance(v, list):
if k == "webfinger":
tem = ""
for w in v:
tem += str(APP[w]) + ","
items.append((k, tem.strip(",")))
else:
items.append((k, ', '.join(map(str, v))))
else:
items.append((k, v))
return dict(items)
2024-07-04 15:28:11 +00:00
async def export_data_from_mongodb(quantity, query, file_name, index):
2024-07-06 11:32:03 +00:00
logger.info("导出开始")
2024-07-04 15:28:11 +00:00
async for db in get_mongo_db():
try:
cursor = await fetch_data(db, index, query, quantity, Project_List)
result = await cursor.to_list(length=None)
relative_path = f'file/{file_name}.xlsx'
file_path = os.path.join(os.getcwd(), relative_path)
2024-07-06 11:32:03 +00:00
wb = Workbook()
2024-07-04 15:28:11 +00:00
if index == "asset":
http_columns = {
"timestamp": "时间",
"tlsdata": "TLS_Data",
"hashes": "Hash",
"cdnname": "Cdn_Name",
"port": "端口",
"url": "url",
"title": "标题",
"type": "类型",
"error": "错误",
"responsebody": "响应体",
"host": "IP",
"faviconmmh3": "图标Hash",
"faviconpath": "faviconpath",
"rawheaders": "响应头",
"jarm": "jarm",
"technologies": "technologies",
"statuscode": "响应码",
"contentlength": "contentlength",
"cdn": "cdn",
"webcheck": "webcheck",
"project": "项目",
"webfinger": "指纹",
"iconcontent": "图标",
"domain": "域名"
2024-06-16 15:27:55 +00:00
}
2024-07-04 15:28:11 +00:00
other_columns = {
"timestamp": "时间",
"host": "域名",
"ip": "IP",
"port": "端口",
"protocol": "协议",
"tls": "TLS",
"transport": "transport",
"version": "版本",
"raw": "banner",
"project": "项目",
"type": "类型"
2024-06-16 15:27:55 +00:00
}
2024-07-06 11:32:03 +00:00
# 创建两个工作表
http_ws = wb.active
http_ws.title = 'HTTP Data'
other_ws = wb.create_sheet(title='Other Data')
# 写入HTTP Data列名
http_ws.append(list(http_columns.values()))
# 写入Other Data列名
other_ws.append(list(other_columns.values()))
# 分别写入数据
2024-07-04 15:28:11 +00:00
for doc in result:
2024-07-06 11:32:03 +00:00
flattened_doc = flatten_dict(doc)
2024-07-04 15:28:11 +00:00
if doc["type"] == "other":
2024-07-06 11:32:03 +00:00
row = [flattened_doc.get(col, "") for col in other_columns.keys()]
other_ws.append(row)
2024-07-04 15:28:11 +00:00
else:
2024-07-06 11:32:03 +00:00
row = [flattened_doc.get(col, "") for col in http_columns.keys()]
http_ws.append(row)
2024-07-04 15:28:11 +00:00
else:
columns = {}
if index == "subdomain":
columns = {'host': '域名', 'type': '解析类型', 'value': '解析值', 'ip': '解析IP', 'project': '项目',
'time': '时间'}
if index == "SubdoaminTakerResult":
columns = {
'input': '源域名', 'value': '解析值', 'cname': '接管类型', 'response': '响应体', 'project': '项目'
}
if index == "UrlScan":
columns = {
'input': '输入', 'source': '来源', 'outputtype': '输出类型', 'output': '输出',
'statuscode': 'statuscode', 'length': 'length', 'time': '时间', 'project': '项目'
}
if index == "crawler":
columns = {
'url': 'URL', 'method': 'Method', 'body': 'Body', 'project': '项目'
}
if index == "SensitiveResult":
columns = {
'url': 'URL', 'sid': '规则名称', 'match': '匹配内容', 'project': '项目', 'body': '响应体',
'color': '等级', 'time': '时间', 'md5': '响应体MD5'
}
if index == "DirScanResult":
columns = {
'url': 'URL', 'status': '响应码', 'msg': '跳转', 'project': '项目'
}
if index == "vulnerability":
columns = {
'url': 'URL', 'vulname': '漏洞', 'matched': '匹配', 'project': '项目', 'level': '危害等级',
'time': '时间', 'request': '请求', 'response': '响应'
}
if index == "PageMonitoring":
columns = {
'url': 'URL', 'content': '响应体', 'hash': '响应体Hash', 'diff': 'Diff',
'state': '状态', 'project': '项目', 'time': '时间'
}
2024-07-06 11:32:03 +00:00
ws = wb.active
ws.title = index
ws.append(list(columns.values()))
for doc in result:
flattened_doc = flatten_dict(doc)
row = [flattened_doc.get(col, "") for col in columns.keys()]
ws.append(row)
try:
wb.save(file_path)
logger.info(f"Data saved to {file_path} successfully.")
except IllegalCharacterError as e:
logger.error("导出内容有不可见字符,忽略此错误")
2024-07-04 15:28:11 +00:00
file_size = os.path.getsize(file_path) / (1024 * 1024) # kb
update_document = {
"$set": {
"state": 1,
"end_time": get_now_time(),
"file_size": str(round(file_size, 2))
2024-06-16 15:27:55 +00:00
}
}
2024-07-04 15:28:11 +00:00
await db.export.update_one({"file_name": file_name}, update_document)
except Exception as e:
logger.error(str(e))
update_document = {
"$set": {
"state": 2,
}
2024-06-16 15:27:55 +00:00
}
2024-07-04 15:28:11 +00:00
await db.export.update_one({"file_name": file_name}, update_document)
2024-07-06 11:32:03 +00:00
logger.info("导出结束")
2024-06-16 12:30:35 +00:00
2024-06-17 13:36:34 +00:00
@router.get("/export/record")
async def get_export_record(db=Depends(get_mongo_db), _: dict = Depends(verify_token)):
cursor: AsyncIOMotorCursor = db.export.find({},
{"_id": 0, "id": {"$toString": "$_id"}, "file_name": 1, "end_time": 1,
"create_time": 1, "data_type": 1, "state": 1, 'file_size': 1}).sort([("create_time", DESCENDING)])
result = await cursor.to_list(length=None)
return {
"code": 200,
"data": {
'list': result
}
}
@router.post("/export/delete")
async def delete_export(request_data: dict, db=Depends(get_mongo_db), _: dict = Depends(verify_token)):
try:
export_ids = request_data.get("ids", [])
delete_filename = []
for id in export_ids:
flag = is_valid_string(id)
if flag and len(id) == 16:
relative_path = f'file/{id}.xlsx'
file_path = os.path.join(os.getcwd(), relative_path)
if os.path.exists(file_path):
os.remove(file_path)
delete_filename.append(id)
if len(delete_filename) == 0:
return {"code": 404, "message": "Export file not found"}
result = await db.export.delete_many({"file_name": {"$in": delete_filename}})
if result.deleted_count > 0:
return {"code": 200, "message": "Export file deleted successfully"}
else:
return {"code": 404, "message": "Export file not found"}
except Exception as e:
logger.error(str(e))
# Handle exceptions as needed
return {"message": "error", "code": 500}
@router.get("/export/download")
async def download_export(file_name: str):
if len(file_name) == 16 and is_valid_string(file_name):
relative_path = f'file/{file_name}.xlsx'
file_path = os.path.join(os.getcwd(), relative_path)
if os.path.exists(file_path):
return FileResponse(path=file_path, filename=file_name + '.xlsx')
else:
return {"message": "file not found", "code": 500}
else:
return {"message": "file not found", "code": 500}