import os import numpy as np import pandas as pd from loader import BaseLoader __all__ = ["DataLoader"] class DataLoader(BaseLoader): def __init__(self, path: "str" = "csv/dc", win: "int" = 50): super().__init__(path) self._raw: "pd.DataFrame" = pd.DataFrame() # 28359 self._split(win) def _load(self, path: "str") -> "None": for name in os.listdir(path): data = pd.read_csv(f"{path}/{name}", header=None) self._raw = pd.concat([self._raw, data]) def _split(self, win: "int") -> "None": Yes, Not = [], [] for idx in range(win, self._raw.shape[0]): tmp = self._raw[idx - win:idx].to_numpy() cond = (tmp[:, :-1] == 1).any() if cond: Yes.append(tmp[:, :-1].flatten()) else: Not.append(tmp[:, :-1].flatten()) Yes, Not = np.array(Yes), np.array(Not) np.random.shuffle(Yes) np.random.shuffle(Not) count = int(Yes.shape[0] * 0.9) self.have = Yes[:count] self.test = Yes[count:] self.none = Not def toh(self, file: "str") -> "None": pass @staticmethod def _num2str(arr: "list") -> "str": res = "\t{" for itr in arr: res += f"{itr:.6f}," return res[:-1] + "}" def toc(self, file: "str"): temp = ( "#ifndef LOCAL_DATA_H\n" "#define LOCAL_DATA_H\n" "\n" "#define HaveCount {haveCount}\n" "#define TestCount {testCount}\n" "#define NoneCount {noneCount}\n" "#define Features {features}\n" "\n" "static double Have[HaveCount][Features] = {{\n" "{have}\n" "}};\n" "\n" "static double Test[TestCount][Features] = {{\n" "{test}\n" "}};\n" "\n" "static double None[NoneCount][Features] = {{\n" "{none}\n" "}};\n" "\n" "#endif" ) have = ",\n".join([self._num2str(line) for line in self.have]) test = ",\n".join([self._num2str(line) for line in self.test]) none = ",\n".join([self._num2str(line) for line in self.none]) content = temp.format( haveCount=self.have.shape[0], testCount=self.test.shape[0], noneCount=self.none.shape[0], features=self.have.shape[1], have=have, test=test, none=none ) with open(file, "w", encoding="utf-8") as fp: fp.write(content)