diff --git a/api/configuration.py b/api/configuration.py index b9bb0fe..801f39d 100644 --- a/api/configuration.py +++ b/api/configuration.py @@ -168,14 +168,60 @@ async def save_deduplication_config(request_data: dict, _: dict = Depends(verify async def do_asset_deduplication(): async for db in get_mongo_db(): result = await db.config.find_one({"name": "deduplication"}) - print(result) + result.pop("_id") + result.pop("name") + result.pop("hour") + result.pop("flag") + f_g_k = { + "DirScanResult": { + "filters": [], + "groups": ["url", "status", "msg"] + }, + "PageMonitoring": { + "filters": [], + "groups": ["url"] + }, + "SensitiveResult": { + "filters": [], + "groups": ["url"] + },############# + "SubdoaminTakerResult": { + "filters": [], + "groups": ["input", "value"] + }, + "UrlScan": { + "filters": [], + "groups": ["output"] + }, + "asset": { + "filters": [], + "groups": [""] + },################ + "crawler": { + "filters": [], + "groups": ["url", "body"] + }, + "subdomain": { + "filters": [], + "groups": ["host", "type", "ip"] + }, + "vulnerability": { + "filters": [], + "groups": ["url", "vulnid", "matched"] + } + } + for r in result: + if result[r]: + await asset_data_dedup(db, r, ) -async def asset_data_dedup(db, filters, groups): +async def asset_data_dedup(db, collection_name, filters, groups): # db[].update_many({}, {'$set': {'process_flag': timestamp}}) # 去重http资产 + logger.info(f"{collection_name} 开始去重") + collection = db[collection_name] timestamp = datetime.datetime.now() - db['asset'].update_many({}, {'$set': {'process_flag': timestamp}}) + collection.update_many({}, {'$set': {'process_flag': timestamp}}) filter = { "process_flag": timestamp } @@ -183,7 +229,7 @@ async def asset_data_dedup(db, filters, groups): filter[f] = filters[f] group = {} for g in groups: - group[g] = "$" + groups[g] + group[g.replace(".")] = "$" + g pipeline = [ { @@ -203,5 +249,12 @@ async def asset_data_dedup(db, filters, groups): } ] latest_ids = [] - for doc in db['asset'].aggregate(pipeline): - latest_ids.append(doc['latestId']) \ No newline at end of file + for doc in collection.aggregate(pipeline): + latest_ids.append(doc['latestId']) + collection.update_many({'_id': {'$in': latest_ids}}, {'$set': {'latest': True}}) + collection.delete_many({'process_flag': timestamp, 'latest': {'$ne': True}}) + collection.update_many({'process_flag': timestamp}, {'$unset': {'process_flag': "", 'latest': ""}}) + timestamp2 = datetime.datetime.now() + time_difference = timestamp2 - timestamp + time_difference_in_seconds = time_difference.total_seconds() + logger.info(f"{collection_name} 去重消耗时间: {time_difference_in_seconds}") \ No newline at end of file diff --git a/api/sensitive.py b/api/sensitive.py index 1f2b47b..2d98955 100644 --- a/api/sensitive.py +++ b/api/sensitive.py @@ -137,8 +137,10 @@ async def delete_sensitive_rules(request_data: dict, db=Depends(get_mongo_db), _ rule_ids = request_data.get("ids", []) # Convert the provided rule_ids to ObjectId - obj_ids = [ObjectId(rule_id) for rule_id in rule_ids] - + obj_ids = [] + for rule_id in rule_ids: + if rule_id != None and rule_id != "": + obj_ids.append(ObjectId(rule_id)) # Delete the SensitiveRule documents based on the provided IDs result = await db.SensitiveRule.delete_many({"_id": {"$in": obj_ids}}) @@ -175,6 +177,14 @@ async def get_sensitive_result_rules(request_data: dict, db=Depends(get_mongo_db if query == "" or query is None: return {"message": "Search condition parsing error", "code": 500} query = query[0] + filter = request_data.get("filter", {}) + if filter: + query["$and"] = [] + for f in filter: + tmp_or = [] + for v in filter[f]: + tmp_or.append({f: v}) + query["$and"].append({"$or": tmp_or}) total_count = await db['SensitiveResult'].count_documents(query) cursor: AsyncIOMotorCursor = ((db['SensitiveResult'].find(query, {"_id": 0, "id": {"$toString": "$_id"}, @@ -212,6 +222,85 @@ async def get_sensitive_result_rules(request_data: dict, db=Depends(get_mongo_db return {"message": "error","code":500} +@router.post("/sensitive/result/data2") +async def get_sensitive_result_rules(request_data: dict, db=Depends(get_mongo_db), _: dict = Depends(verify_token)): + try: + search_query = request_data.get("search", "") + page_index = request_data.get("pageIndex", 1) + page_size = request_data.get("pageSize", 10) + keyword = { + 'url': 'url', + 'sname': 'sid', + "body": "body", + "info": "match", + 'project': 'project', + 'md5': 'md5' + } + query = await search_to_mongodb(search_query, keyword) + if query == "" or query is None: + return {"message": "Search condition parsing error", "code": 500} + query = query[0] + total_count = await db['SensitiveResult'].count_documents(query) + pipeline = [ + { + "$match": query # 增加搜索条件 + }, + { + "$project": { + "_id": 1, + "url": 1, + "time": 1, + "sid": 1, + "match": 1, + "color": 1 + } + }, + { + "$sort": {"_id": DESCENDING} # 按时间降序排序 + }, + { + "$group": { + "_id": "$url", + "time": {"$first": "$time"}, # 记录相同url下最早插入数据的时间 + "url": {"$first": "$url"}, + "body_id": {"$last": {"$toString": "$_id"}}, # 记录相同url下最早插入数据的_id + "children": { + "$push": { + "id": {"$toString": "$_id"}, + "name": "$sid", + "color": "$color", + "match": "$match", + "time": "$time" + } + } + } + }, + { + "$sort": {"time": DESCENDING} # 按每组的最新时间降序排序 + }, + { + "$skip": (page_index - 1) * page_size # 跳过前面的URL,用于分页 + }, + { + "$limit": page_size # 获取当前页的URL + } + ] + # 执行聚合查询 + result = await db['SensitiveResult'].aggregate(pipeline).to_list(None) + return { + "code": 200, + "data": { + 'list': result, + 'total': total_count + } + } + except Exception as e: + logger.error(str(e)) + # Handle exceptions as needed + return {"message": "error","code":500} + + + @router.post("/sensitive/result/body") async def get_sensitive_result_body_rules(request_data: dict, db=Depends(get_mongo_db), _: dict = Depends(verify_token)): try: