diff --git a/README.md b/README.md index b824e3f..be4ce08 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ Scope Sentry是一款具有资产测绘、子域名枚举、信息泄露检测 Discord: -[https://discord.gg/agsYdAyN](https://discord.gg/agsYdAyN) +[https://discord.gg/GWVwSBBm48](https://discord.gg/GWVwSBBm48) QQ: diff --git a/api/asset_info.py b/api/asset_info.py index 608374e..8882b25 100644 --- a/api/asset_info.py +++ b/api/asset_info.py @@ -43,14 +43,14 @@ async def asset_data(request_data: dict, db=Depends(get_mongo_db), _: dict = Dep try: if len(APP) == 0: collection = db["FingerprintRules"] - cursor = collection.find({}, {"_id": 1, "name": 1}) + cursor = await collection.find({}, {"_id": 1, "name": 1}) async for document in cursor: document['id'] = str(document['_id']) del document['_id'] APP[document['id']] = document['name'] if len(SensitiveRuleList) == 0: collection = db["SensitiveRule"] - cursor = collection.find({}, {"_id": 1, "name": 1}) + cursor = await collection.find({}, {"_id": 1, "name": 1}) async for document in cursor: document['id'] = str(document['_id']) del document['_id'] diff --git a/api/configuration.py b/api/configuration.py index a6b71e5..2c2d968 100644 --- a/api/configuration.py +++ b/api/configuration.py @@ -118,3 +118,20 @@ async def save_system_data(data: dict, db=Depends(get_mongo_db), _: dict = Depen return {"message": "error", "code": 500} +@router.get("/system/deduplication/config") +async def get_deduplication_config(_: dict = Depends(verify_token), db=Depends(get_mongo_db)): + try: + # 查询所有 type 为 "system" 的文档 + cursor = await db.config.find_one({"name": "deduplication"}) + deduplication_data = {} + + async for document in cursor: + deduplication_data[document["name"]] = document["value"] + return { + "code": 200, + "data": deduplication_data + } + except Exception as e: + logger.error(str(e)) + # 根据需要处理异常 + return {"message": "error", "code": 500} \ No newline at end of file diff --git a/api/node.py b/api/node.py index 1237a66..97ce083 100644 --- a/api/node.py +++ b/api/node.py @@ -150,3 +150,20 @@ async def get_redis_online_data(redis_con): else: result.append(name) return result + + +async def get_node_all(redis_con): + try: + result = [] + async with redis_con as redis: + # 获取所有以 node: 开头的键 + keys = await redis.keys("node:*") + for key in keys: + name = key.split(":")[1] + hash_data = await redis.hgetall(key) + if hash_data.get('state') != '2': + result.append(name) + + return result + except: + return [] \ No newline at end of file diff --git a/api/project.py b/api/project.py index daa4142..cfd2c3e 100644 --- a/api/project.py +++ b/api/project.py @@ -170,7 +170,7 @@ async def add_project_rule(request_data: dict, db=Depends(get_mongo_db), _: dict {"id": str(result.inserted_id), "name": name, 'hour': hour, 'type': 'Project', 'state': True, 'lastTime': get_now_time(), 'nextTime': formatted_time, 'runner_id': str(result.inserted_id)}) await scheduler_project(str(result.inserted_id)) - background_tasks.add_task(update_project, tmsg, str(result.inserted_id)) + background_tasks.add_task(update_project, root_domains, str(result.inserted_id)) await refresh_config('all', 'project') Project_List[name] = str(result.inserted_id) return {"code": 200, "message": "Project added successfully"} @@ -239,7 +239,9 @@ async def update_project_data(request_data: dict, db=Depends(get_mongo_db), _: d id=str(pro_id), jobstore='mongo') await db.ScheduledTasks.update_one({"id": pro_id}, {"$set": {'state': True}}) else: - scheduler.remove_job(pro_id) + job = scheduler.get_job(pro_id) + if job is not None: + scheduler.remove_job(pro_id) await db.ScheduledTasks.update_one({"id": pro_id}, {"$set": {'state': False}}) else: if newScheduledTasks: @@ -278,7 +280,7 @@ async def update_project_data(request_data: dict, db=Depends(get_mongo_db), _: d new_root_domain.append(t_root_domain) request_data["root_domains"] = new_root_domain await db.ProjectTargetData.update_one({"id": pro_id}, update_document) - background_tasks.add_task(change_update_project, new_targets.strip().strip('\n'), pro_id) + background_tasks.add_task(update_project, new_root_domain, pro_id, True) if old_name != new_name: del Project_List[old_name] Project_List[new_name] = pro_id @@ -300,415 +302,72 @@ async def update_project_data(request_data: dict, db=Depends(get_mongo_db), _: d # Handle exceptions as needed return {"message": "error", "code": 500} - -async def change_update_project(domain, project_id): +async def update_project(root_domain, project_id, change=False): + asset_collection_list = { + 'asset': ["url", "host", "ip"], + 'subdomain': ["host", "ip"], + 'DirScanResult': ["url"], + 'vulnerability': ["url"], + 'SubdoaminTakerResult': ["input"], + 'PageMonitoring': ["url"], + 'SensitiveResult': ["url"], + 'UrlScan': ["input"], + 'crawler': ["url"]} async for db in get_mongo_db(): - await add_asset_project(db, domain, project_id, True) - await add_subdomain_project(db, domain, project_id, True) - await add_dir_project(db, domain, project_id, True) - await add_vul_project(db, domain, project_id, True) - await add_SubTaker_project(db, domain, project_id, True) - await add_PageMonitoring_project(db, domain, project_id, True) - await add_sensitive_project(db, domain, project_id, True) - await add_url_project(db, domain, project_id, True) - await add_crawler_project(db, domain, project_id, True) + for a in asset_collection_list: + if change: + await asset_update_project(root_domain, asset_collection_list[a], a, db, project_id) + else: + await asset_add_project(root_domain, asset_collection_list[a], a, db, project_id) -async def add_asset_project(db, domain, project_id, updata=False): - try: - if updata: - query = {"$or": [{"project": ""}, {"project": project_id}]} - else: - query = {"project": {"$eq": ""}} - cursor: AsyncIOMotorCursor = ((db['asset'].find(query, { - "_id": 0, "id": {"$toString": "$_id"}, - "url": 1, - "host": 1, - "project": 1, - }))) - result = await cursor.to_list(length=None) - logger.debug(f"asset project null number is {len(result)}") - if len(result) != 0: - domain_root_list = [] - for d in domain.split("\n"): - u = get_root_domain(d) - if u not in domain_root_list: - domain_root_list.append(u) - for r in result: - url = "" - if "url" in r: - url = r['url'] - else: - url = r['host'] - if url != "": - targer_url = get_root_domain(url) - if targer_url in domain_root_list: - update_document = { - "$set": { - "project": project_id, - } - } - await db['asset'].update_one({"_id": ObjectId(r['id'])}, update_document) - else: - if r["project"] != "": - update_document = { - "$set": { - "project": "", - } - } - await db['asset'].update_one({"_id": ObjectId(r['id'])}, update_document) - except Exception as e: - logger.error(f"add_asset_project error:{e}") +async def asset_add_project(root_domain, db_key, doc_name, db, project_id): + regex_patterns = [f".*{domain}.*" for domain in root_domain] + pattern = "|".join(regex_patterns) + # 构建查询条件 + query = { + "$and": [ + { + "$or": [ + {key: {"$regex": pattern, "$options": "i"}} for key in db_key + ] + }, + {"project": {"$exists": True, "$eq": ""}} + ] + } + update_query = { + "$set": { + "project": project_id + } + } + result = await db[doc_name].update_many(query, update_query) + # 打印更新的文档数量 + logger.info(f"Updated {doc_name} {result.modified_count} documents") -async def add_subdomain_project(db, domain, project_id, updata=False): - try: - if updata: - query = {"$or": [{"project": ""}, {"project": project_id}]} - else: - query = {"project": {"$eq": ""}} - cursor: AsyncIOMotorCursor = ((db['subdomain'].find(query, { - "_id": 0, "id": {"$toString": "$_id"}, - "host": 1, - "project": 1 - }))) - result = await cursor.to_list(length=None) - logger.debug(f"subdomain project null number is {len(result)}") - if len(result) != 0: - domain_root_list = [] - for d in domain.split("\n"): - u = get_root_domain(d) - if u not in domain_root_list: - domain_root_list.append(u) - for r in result: - url = r['host'] - if url != "": - targer_url = get_root_domain(url) - if targer_url in domain_root_list: - update_document = { - "$set": { - "project": project_id, - } - } - await db['subdomain'].update_one({"_id": ObjectId(r['id'])}, update_document) - else: - if r["project"] != "": - update_document = { - "$set": { - "project": "", - } - } - await db['subdomain'].update_one({"_id": ObjectId(r['id'])}, update_document) - except Exception as e: - logger.error(f"add_subdomain_project error:{e}") - - -async def add_url_project(db, domain, project_id, updata=False): - try: - if updata: - query = {"$or": [{"project": ""}, {"project": project_id}]} - else: - query = {"project": {"$eq": ""}} - cursor: AsyncIOMotorCursor = ((db['UrlScan'].find(query, { - "_id": 0, "id": {"$toString": "$_id"}, - "input": 1, - "project": 1 - }))) - result = await cursor.to_list(length=None) - logger.debug(f"url project null number is {len(result)}") - if len(result) != 0: - domain_root_list = [] - for d in domain.split("\n"): - u = get_root_domain(d) - if u not in domain_root_list: - domain_root_list.append(u) - for r in result: - url = r['input'] - if url != "": - targer_url = get_root_domain(url) - if targer_url in domain_root_list: - update_document = { - "$set": { - "project": project_id, - } - } - await db['UrlScan'].update_one({"_id": ObjectId(r['id'])}, update_document) - else: - if r["project"] != "": - update_document = { - "$set": { - "project": "", - } - } - await db['UrlScan'].update_one({"_id": ObjectId(r['id'])}, update_document) - except Exception as e: - logger.error(f"add_url_project error:{e}") - - -async def add_crawler_project(db, domain, project_id, updata=False): - try: - if updata: - query = {"$or": [{"project": ""}, {"project": project_id}]} - else: - query = {"project": {"$eq": ""}} - cursor: AsyncIOMotorCursor = ((db['crawler'].find(query, { - "_id": 0, "id": {"$toString": "$_id"}, - "url": 1, - "project": 1 - }))) - result = await cursor.to_list(length=None) - logger.debug(f"crawler project null number is {len(result)}") - if len(result) != 0: - domain_root_list = [] - for d in domain.split("\n"): - u = get_root_domain(d) - if u not in domain_root_list: - domain_root_list.append(u) - for r in result: - url = r['url'] - if url != "": - targer_url = get_root_domain(url) - if targer_url in domain_root_list: - update_document = { - "$set": { - "project": project_id, - } - } - await db['crawler'].update_one({"_id": ObjectId(r['id'])}, update_document) - else: - if r["project"] != "": - update_document = { - "$set": { - "project": "", - } - } - await db['crawler'].update_one({"_id": ObjectId(r['id'])}, update_document) - except Exception as e: - logger.error(f"add_crawler_project error:{e}") - - -async def add_sensitive_project(db, domain, project_id, updata=False): - try: - if updata: - query = {"$or": [{"project": ""}, {"project": project_id}]} - else: - query = {"project": {"$eq": ""}} - cursor: AsyncIOMotorCursor = ((db['SensitiveResult'].find(query, { - "_id": 0, "id": {"$toString": "$_id"}, - "url": 1, - "project": 1 - }))) - result = await cursor.to_list(length=None) - logger.debug(f"sensitive project null number is {len(result)}") - if len(result) != 0: - domain_root_list = [] - for d in domain.split("\n"): - u = get_root_domain(d) - if u not in domain_root_list: - domain_root_list.append(u) - for r in result: - url = r['url'] - if url != "": - targer_url = get_root_domain(url) - if targer_url in domain_root_list: - update_document = { - "$set": { - "project": project_id, - } - } - await db['SensitiveResult'].update_one({"_id": ObjectId(r['id'])}, update_document) - else: - if r["project"] != "": - update_document = { - "$set": { - "project": "", - } - } - await db['SensitiveResult'].update_one({"_id": ObjectId(r['id'])}, update_document) - except Exception as e: - logger.error(f"add_sensitive_project error:{e}") - - -async def add_dir_project(db, domain, project_id, updata=False): - try: - if updata: - query = {"$or": [{"project": ""}, {"project": project_id}]} - else: - query = {"project": {"$eq": ""}} - cursor: AsyncIOMotorCursor = ((db['DirScanResult'].find(query, { - "_id": 0, "id": {"$toString": "$_id"}, - "url": 1, - "project": 1 - }))) - result = await cursor.to_list(length=None) - logger.debug(f"dir project null number is {len(result)}") - if len(result) != 0: - domain_root_list = [] - for d in domain.split("\n"): - u = get_root_domain(d) - if u not in domain_root_list: - domain_root_list.append(u) - for r in result: - url = r['url'] - if url != "": - targer_url = get_root_domain(url) - if targer_url in domain_root_list: - update_document = { - "$set": { - "project": project_id, - } - } - await db['DirScanResult'].update_one({"_id": ObjectId(r['id'])}, update_document) - else: - if r["project"] != "": - update_document = { - "$set": { - "project": "", - } - } - await db['DirScanResult'].update_one({"_id": ObjectId(r['id'])}, update_document) - except Exception as e: - logger.error(f"add_dir_project error:{e}") - - -async def add_vul_project(db, domain, project_id, updata=False): - try: - if updata: - query = {"$or": [{"project": ""}, {"project": project_id}]} - else: - query = {"project": {"$eq": ""}} - cursor: AsyncIOMotorCursor = ((db['vulnerability'].find(query, { - "_id": 0, "id": {"$toString": "$_id"}, - "url": 1, - "project": 1 - }))) - result = await cursor.to_list(length=None) - logger.debug(f"vul project null number is {len(result)}") - if len(result) != 0: - domain_root_list = [] - for d in domain.split("\n"): - u = get_root_domain(d) - if u not in domain_root_list: - domain_root_list.append(u) - for r in result: - url = r['url'] - if url != "": - targer_url = get_root_domain(url) - if targer_url in domain_root_list: - update_document = { - "$set": { - "project": project_id, - } - } - await db['vulnerability'].update_one({"_id": ObjectId(r['id'])}, update_document) - else: - if r["project"] != "": - update_document = { - "$set": { - "project": "", - } - } - await db['vulnerability'].update_one({"_id": ObjectId(r['id'])}, update_document) - - except Exception as e: - logger.error(f"add_vul_project error:{e}") - - -async def add_PageMonitoring_project(db, domain, project_id, updata=False): - try: - if updata: - query = {"$or": [{"project": ""}, {"project": project_id}]} - else: - query = {"project": {"$eq": ""}} - cursor: AsyncIOMotorCursor = ((db['PageMonitoring'].find(query, { - "_id": 0, "id": {"$toString": "$_id"}, - "url": 1, - "project": 1 - }))) - result = await cursor.to_list(length=None) - logger.debug(f"PageMonitoring project null number is {len(result)}") - if len(result) != 0: - domain_root_list = [] - for d in domain.split("\n"): - u = get_root_domain(d) - if u not in domain_root_list: - domain_root_list.append(u) - for r in result: - url = r['url'] - if url != "": - targer_url = get_root_domain(url) - if targer_url in domain_root_list: - update_document = { - "$set": { - "project": project_id, - } - } - await db['PageMonitoring'].update_one({"_id": ObjectId(r['id'])}, update_document) - else: - if r["project"] != "": - update_document = { - "$set": { - "project": "", - } - } - await db['PageMonitoring'].update_one({"_id": ObjectId(r['id'])}, update_document) - except Exception as e: - logger.error(f"add_PageMonitoring_project error:{e}") - - -async def add_SubTaker_project(db, domain, project_id, updata=False): - try: - if updata: - query = {"$or": [{"project": ""}, {"project": project_id}]} - else: - query = {"project": {"$eq": ""}} - cursor: AsyncIOMotorCursor = ((db['SubdoaminTakerResult'].find(query, { - "_id": 0, "id": {"$toString": "$_id"}, - "Input": 1, - "project": 1 - }))) - result = await cursor.to_list(length=None) - logger.debug(f"SubTaker project null number is {len(result)}") - if len(result) != 0: - domain_root_list = [] - for d in domain.split("\n"): - u = get_root_domain(d) - if u not in domain_root_list: - domain_root_list.append(u) - for r in result: - url = r['input'] - if url != "": - targer_url = get_root_domain(url) - if targer_url in domain_root_list: - update_document = { - "$set": { - "project": project_id, - } - } - await db['SubdoaminTakerResult'].update_one({"_id": ObjectId(r['id'])}, update_document) - else: - if r["project"] != "": - update_document = { - "$set": { - "project": "", - } - } - await db['SubdoaminTakerResult'].update_one({"_id": ObjectId(r['id'])}, update_document) - except Exception as e: - logger.error(f"add_SubTaker_project error:{e}") - - -async def update_project(domain, project_id): - async for db in get_mongo_db(): - await add_asset_project(db, domain, project_id) - await add_subdomain_project(db, domain, project_id) - await add_dir_project(db, domain, project_id) - await add_vul_project(db, domain, project_id) - await add_SubTaker_project(db, domain, project_id) - await add_PageMonitoring_project(db, domain, project_id) - await add_sensitive_project(db, domain, project_id) - await add_url_project(db, domain, project_id) - await add_crawler_project(db, domain, project_id) +async def asset_update_project(root_domain, db_key, doc_name, db, project_id): + regex_patterns = [f".*{domain}.*" for domain in root_domain] + pattern = "|".join(regex_patterns) + # 构建查询条件 + query = { + "$and": [ + {"project": project_id}, + { + "$nor": [ + {key: {"$regex": pattern, "$options": "i"}} for key in db_key + ] + } + ] + } + update_query = { + "$set": { + "project": "" + } + } + result = await db[doc_name].update_many(query, update_query) + # 打印更新的文档数量 + logger.info(f"Updated {doc_name} {result.modified_count} documents to null ") + await asset_add_project(root_domain, db_key, doc_name, db, project_id) async def delete_asset_project(db, collection, project_id): diff --git a/api/task.py b/api/task.py index 34bdd44..42f6acc 100644 --- a/api/task.py +++ b/api/task.py @@ -16,7 +16,7 @@ from core.apscheduler_handler import scheduler from core.db import get_mongo_db from core.redis_handler import get_redis_pool, check_redis_task_target_is_null from core.util import * -from api.node import get_redis_online_data +from api.node import get_redis_online_data, get_node_all from api.page_monitoring import get_page_monitoring_data router = APIRouter() @@ -254,7 +254,7 @@ async def create_scan_task(request_data, id, targetList, redis_con): try: request_data["id"] = str(id) if request_data['allNode']: - request_data["node"] = await get_redis_online_data(redis_con) + request_data["node"] = await get_node_all(redis_con) keys_to_delete = [ f"TaskInfo:tmp:{id}", diff --git a/core/default.py b/core/default.py index 0ffa7c3..092aaf3 100644 --- a/core/default.py +++ b/core/default.py @@ -4,6 +4,7 @@ # @version: import json import os +from urllib.parse import urlparse from bson import ObjectId @@ -58,6 +59,7 @@ def get_finger(): d.pop('_id', None) return data + def get_project_data(): project_path = os.path.join(combined_directory, "ScopeSentry.project.json") data = read_json_file(project_path) @@ -68,8 +70,9 @@ def get_project_data(): tmp = [] for t in d['target'].split('\n'): root_domain = get_root_domain(t) - if root_domain not in tmp: - tmp.append(root_domain) + if root_domain is not None and root_domain != "": + if root_domain not in tmp: + tmp.append(root_domain) d["root_domains"] = tmp d['_id'] = ObjectId(project_id) target_data.append({"id": project_id, "target": d['target']}) @@ -503,3 +506,50 @@ def get_fingerprint_data(): except FileNotFoundError: logger.error("文件不存在") return json.loads(fingerprint) + + +def get_root_domain(url): + # 如果URL不带协议,添加一个默认的http协议 + global root_domain + if not url.startswith(('http://', 'https://')): + url = 'http://' + url + + parsed_url = urlparse(url) + + # 检查是否为IP地址 + try: + # 使用ip_address来检查 + from ipaddress import ip_address + ip_address(parsed_url.netloc) + return parsed_url.netloc # 如果是IP地址,直接返回 + except ValueError: + pass + + domain_parts = parsed_url.netloc.split('.') + + # 复合域名列表 + compound_domains = [ + 'com.cn', 'net.cn', 'org.cn', 'gov.cn', 'edu.cn', 'ac.cn', 'mil.cn', + 'co.uk', 'org.uk', 'net.uk', 'gov.uk', 'ac.uk', 'sch.uk', + 'co.jp', 'ne.jp', 'or.jp', 'go.jp', 'ac.jp', 'ad.jp', + 'com.de', 'org.de', 'net.de', 'gov.de', + 'com.ca', 'net.ca', 'org.ca', 'gov.ca', + 'com.au', 'net.au', 'org.au', 'gov.au', 'edu.au', + 'com.fr', 'net.fr', 'org.fr', 'gov.fr', + 'com.br', 'com.mx', 'com.ar', 'com.ru', + 'co.in', 'co.za', + 'co.kr', 'com.tw' +] + + # 检查是否为复合域名 + is_compound_domain = False + for compound_domain in compound_domains: + if domain_parts[-2:] == compound_domain.split('.'): + is_compound_domain = True + root_domain = '.'.join(domain_parts[-3:]) + break + + if not is_compound_domain: + root_domain = '.'.join(domain_parts[-2:]) + + return root_domain \ No newline at end of file