This commit is contained in:
Autumn.home 2024-07-03 23:36:02 +08:00
parent b31181bd50
commit 0eb0f122a2
2 changed files with 150 additions and 8 deletions

View File

@ -168,14 +168,60 @@ async def save_deduplication_config(request_data: dict, _: dict = Depends(verify
async def do_asset_deduplication():
async for db in get_mongo_db():
result = await db.config.find_one({"name": "deduplication"})
print(result)
result.pop("_id")
result.pop("name")
result.pop("hour")
result.pop("flag")
f_g_k = {
"DirScanResult": {
"filters": [],
"groups": ["url", "status", "msg"]
},
"PageMonitoring": {
"filters": [],
"groups": ["url"]
},
"SensitiveResult": {
"filters": [],
"groups": ["url"]
},#############
"SubdoaminTakerResult": {
"filters": [],
"groups": ["input", "value"]
},
"UrlScan": {
"filters": [],
"groups": ["output"]
},
"asset": {
"filters": [],
"groups": [""]
},################
"crawler": {
"filters": [],
"groups": ["url", "body"]
},
"subdomain": {
"filters": [],
"groups": ["host", "type", "ip"]
},
"vulnerability": {
"filters": [],
"groups": ["url", "vulnid", "matched"]
}
}
for r in result:
if result[r]:
await asset_data_dedup(db, r, )
async def asset_data_dedup(db, filters, groups):
async def asset_data_dedup(db, collection_name, filters, groups):
# db[].update_many({}, {'$set': {'process_flag': timestamp}})
# 去重http资产
logger.info(f"{collection_name} 开始去重")
collection = db[collection_name]
timestamp = datetime.datetime.now()
db['asset'].update_many({}, {'$set': {'process_flag': timestamp}})
collection.update_many({}, {'$set': {'process_flag': timestamp}})
filter = {
"process_flag": timestamp
}
@ -183,7 +229,7 @@ async def asset_data_dedup(db, filters, groups):
filter[f] = filters[f]
group = {}
for g in groups:
group[g] = "$" + groups[g]
group[g.replace(".")] = "$" + g
pipeline = [
{
@ -203,5 +249,12 @@ async def asset_data_dedup(db, filters, groups):
}
]
latest_ids = []
for doc in db['asset'].aggregate(pipeline):
latest_ids.append(doc['latestId'])
for doc in collection.aggregate(pipeline):
latest_ids.append(doc['latestId'])
collection.update_many({'_id': {'$in': latest_ids}}, {'$set': {'latest': True}})
collection.delete_many({'process_flag': timestamp, 'latest': {'$ne': True}})
collection.update_many({'process_flag': timestamp}, {'$unset': {'process_flag': "", 'latest': ""}})
timestamp2 = datetime.datetime.now()
time_difference = timestamp2 - timestamp
time_difference_in_seconds = time_difference.total_seconds()
logger.info(f"{collection_name} 去重消耗时间: {time_difference_in_seconds}")

View File

@ -137,8 +137,10 @@ async def delete_sensitive_rules(request_data: dict, db=Depends(get_mongo_db), _
rule_ids = request_data.get("ids", [])
# Convert the provided rule_ids to ObjectId
obj_ids = [ObjectId(rule_id) for rule_id in rule_ids]
obj_ids = []
for rule_id in rule_ids:
if rule_id != None and rule_id != "":
obj_ids.append(ObjectId(rule_id))
# Delete the SensitiveRule documents based on the provided IDs
result = await db.SensitiveRule.delete_many({"_id": {"$in": obj_ids}})
@ -175,6 +177,14 @@ async def get_sensitive_result_rules(request_data: dict, db=Depends(get_mongo_db
if query == "" or query is None:
return {"message": "Search condition parsing error", "code": 500}
query = query[0]
filter = request_data.get("filter", {})
if filter:
query["$and"] = []
for f in filter:
tmp_or = []
for v in filter[f]:
tmp_or.append({f: v})
query["$and"].append({"$or": tmp_or})
total_count = await db['SensitiveResult'].count_documents(query)
cursor: AsyncIOMotorCursor = ((db['SensitiveResult'].find(query, {"_id": 0,
"id": {"$toString": "$_id"},
@ -212,6 +222,85 @@ async def get_sensitive_result_rules(request_data: dict, db=Depends(get_mongo_db
return {"message": "error","code":500}
@router.post("/sensitive/result/data2")
async def get_sensitive_result_rules(request_data: dict, db=Depends(get_mongo_db), _: dict = Depends(verify_token)):
try:
search_query = request_data.get("search", "")
page_index = request_data.get("pageIndex", 1)
page_size = request_data.get("pageSize", 10)
keyword = {
'url': 'url',
'sname': 'sid',
"body": "body",
"info": "match",
'project': 'project',
'md5': 'md5'
}
query = await search_to_mongodb(search_query, keyword)
if query == "" or query is None:
return {"message": "Search condition parsing error", "code": 500}
query = query[0]
total_count = await db['SensitiveResult'].count_documents(query)
pipeline = [
{
"$match": query # 增加搜索条件
},
{
"$project": {
"_id": 1,
"url": 1,
"time": 1,
"sid": 1,
"match": 1,
"color": 1
}
},
{
"$sort": {"_id": DESCENDING} # 按时间降序排序
},
{
"$group": {
"_id": "$url",
"time": {"$first": "$time"}, # 记录相同url下最早插入数据的时间
"url": {"$first": "$url"},
"body_id": {"$last": {"$toString": "$_id"}}, # 记录相同url下最早插入数据的_id
"children": {
"$push": {
"id": {"$toString": "$_id"},
"name": "$sid",
"color": "$color",
"match": "$match",
"time": "$time"
}
}
}
},
{
"$sort": {"time": DESCENDING} # 按每组的最新时间降序排序
},
{
"$skip": (page_index - 1) * page_size # 跳过前面的URL用于分页
},
{
"$limit": page_size # 获取当前页的URL
}
]
# 执行聚合查询
result = await db['SensitiveResult'].aggregate(pipeline).to_list(None)
return {
"code": 200,
"data": {
'list': result,
'total': total_count
}
}
except Exception as e:
logger.error(str(e))
# Handle exceptions as needed
return {"message": "error","code":500}
@router.post("/sensitive/result/body")
async def get_sensitive_result_body_rules(request_data: dict, db=Depends(get_mongo_db), _: dict = Depends(verify_token)):
try: