1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
| import subprocess import os import pandas as pd import time from utils.db_config import CIC_dfm
def unsupervise_extract(mode_id: int, pcap_list: list, csv_save_folder: str): """ 无监督算法流量特征提取,包含训练,离线测试和在线测试 :param mode_id: 1为训练; 2为离线测试; 3为在线测试; :param pcap_list: 要进行特征提取的pcap包路径列表(在线测试时里面只能包含一个) :param csv_save_folder: 提取出的csv文件要保存的目录 :return: csv_save_path: 提取出的csv文件路径 """
time_now = time.strftime("%Y%m%d-%H%M%S", time.localtime()) os.makedirs(csv_save_folder, exist_ok=True) if mode_id not in [1, 2, 3]: print('mode_id出错') return 0 if mode_id == 3: try: assert len(pcap_list) == 1 except: print('在线测试时pcap_list中只能包含一个pcap包') return 0 for pcap_path in pcap_list: pcap_name = pcap_path.split(os.sep)[-1].replace('.pcap', '') cmd = '%s %s %s' % (CIC_dfm, pcap_path, csv_save_folder) p = subprocess.Popen(cmd, shell=True) return_code = p.wait() csv_path = os.path.join(csv_save_folder, pcap_name + '.pcap_Flow.csv') if pcap_path == pcap_list[0]: df = pd.read_csv(csv_path, skipinitialspace=True) else: temp_df = pd.read_csv(csv_path, skipinitialspace=True) df = pd.concat([df, temp_df], ignore_index=True) os.remove(csv_path)
csv_save_path = os.path.join(csv_save_folder, '%s.csv' % time_now) df.to_csv(csv_save_path, index=0) return csv_save_path
def supervise_extract(mode_id: int, pcap_list: list, norm_pcap_list: list, abnorm_pcap_list: list, csv_save_folder: str): """ 有监督算法流量特征提取,包含训练,离线测试和在线测试 :param mode_id: 1为训练; 2为离线测试; 3为在线测试; :param pcap_list: 在线测试要进行特征提取的pcap包路径列表(里面只能包含一个) :param norm_pcap_list: 训练和离线测试要进行特征提取的全正常流量pcap包路径列表(至少包含一个) :param abnorm_pcap_list: 训练和离线测试要进行特征提取的全异常流量pcap包路径列表(至少包含一个) :param csv_save_folder: 提取出的csv文件要保存的目录 :return: csv_save_path: 提取出的csv文件路径 """
time_now = time.strftime("%Y%m%d-%H%M%S", time.localtime()) os.makedirs(csv_save_folder, exist_ok=True) if mode_id not in [1, 2, 3]: print('mode_id出错') return 0 if mode_id == 3: try: assert len(pcap_list) == 1 except: print('在线测试时pcap_list中只能包含一个pcap包') return 0 else: try: assert len(norm_pcap_list) > 0 and len(abnorm_pcap_list) > 0 except: print('需要同时包含正常和异常pcap包') return 0 if mode_id == 3: pcap_path = pcap_list[0] pcap_name = pcap_path.split(os.sep)[-1].replace('.pcap', '') cmd = '%s %s %s' % (CIC_dfm, pcap_path, csv_save_folder) p = subprocess.Popen(cmd, shell=True) return_code = p.wait() csv_path = os.path.join(csv_save_folder, pcap_name + '.pcap_Flow.csv') df = pd.read_csv(csv_path, skipinitialspace=True) else: for pcap_path in norm_pcap_list: pcap_name = pcap_path.split(os.sep)[-1].replace('.pcap', '') cmd = '%s %s %s' % (CIC_dfm, pcap_path, csv_save_folder) p = subprocess.Popen(cmd, shell=True) return_code = p.wait() csv_path = os.path.join(csv_save_folder, pcap_name + '.pcap_Flow.csv') if pcap_path == norm_pcap_list[0]: df_norm = pd.read_csv(csv_path, skipinitialspace=True) else: temp_df = pd.read_csv(csv_path, skipinitialspace=True) df_norm = pd.concat([df_norm, temp_df], ignore_index=True) df_norm['Label'] = 0 os.remove(csv_path)
for pcap_path in abnorm_pcap_list: pcap_name = pcap_path.split(os.sep)[-1].replace('.pcap', '') cmd = '%s %s %s' % (CIC_dfm, pcap_path, csv_save_folder) p = subprocess.Popen(cmd, shell=True) return_code = p.wait() csv_path = os.path.join(csv_save_folder, pcap_name + '.pcap_Flow.csv') if pcap_path == abnorm_pcap_list[0]: df_abnorm = pd.read_csv(csv_path, skipinitialspace=True) else: temp_df = pd.read_csv(csv_path, skipinitialspace=True) df_abnorm = pd.concat([df_abnorm, temp_df], ignore_index=True) df_abnorm['Label'] = 1 os.remove(csv_path)
df = pd.concat([df_abnorm, df_norm], ignore_index=True)
csv_save_path = os.path.join(csv_save_folder, '%s.csv' % time_now) df.to_csv(csv_save_path, index=0) return csv_save_path
if __name__ == '__main__': pcap_list = ['/home/*/test1.pcap', '/home/*/test2.pcap', '/home/*/test3.pcap'] csv_save_folder = '/home/*' mode_id = 1 _ = unsupervise_extract(mode_id, pcap_list, csv_save_folder)
norm_pcap_list = ['/home/*/test1.pcap', '/home/*/test3.pcap'] abnorm_pcap_list = ['/home/*/test2.pcap'] pcap_list = ['/home/*/test1.pcap'] mode_id = 2 _ = supervise_extract(mode_id, pcap_list, norm_pcap_list, abnorm_pcap_list, csv_save_folder)
print(_)
|