loader_dc.py 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. import os
  2. import numpy as np
  3. import pandas as pd
  4. from loader import BaseLoader
  5. __all__ = ["DataLoader"]
  6. class DataLoader(BaseLoader):
  7. def __init__(self, path: "str" = "csv/dc", win: "int" = 50):
  8. super().__init__(path)
  9. self._raw: "pd.DataFrame" = pd.DataFrame() # 28359
  10. self._split(win)
  11. def _load(self, path: "str") -> "None":
  12. for name in os.listdir(path):
  13. data = pd.read_csv(f"{path}/{name}", header=None)
  14. self._raw = pd.concat([self._raw, data])
  15. def _split(self, win: "int") -> "None":
  16. Yes, Not = [], []
  17. for idx in range(win, self._raw.shape[0]):
  18. tmp = self._raw[idx - win:idx].to_numpy()
  19. cond = (tmp[:, :-1] == 1).any()
  20. if cond:
  21. Yes.append(tmp[:, :-1].flatten())
  22. else:
  23. Not.append(tmp[:, :-1].flatten())
  24. Yes, Not = np.array(Yes), np.array(Not)
  25. np.random.shuffle(Yes)
  26. np.random.shuffle(Not)
  27. count = int(Yes.shape[0] * 0.9)
  28. self.have = Yes[:count]
  29. self.test = Yes[count:]
  30. self.none = Not
  31. def toh(self, file: "str") -> "None":
  32. pass
  33. @staticmethod
  34. def _num2str(arr: "list") -> "str":
  35. res = "\t{"
  36. for itr in arr:
  37. res += f"{itr:.6f},"
  38. return res[:-1] + "}"
  39. def toc(self, file: "str"):
  40. temp = (
  41. "#ifndef LOCAL_DATA_H\n"
  42. "#define LOCAL_DATA_H\n"
  43. "\n"
  44. "#define HaveCount {haveCount}\n"
  45. "#define TestCount {testCount}\n"
  46. "#define NoneCount {noneCount}\n"
  47. "#define Features {features}\n"
  48. "\n"
  49. "static double Have[HaveCount][Features] = {{\n"
  50. "{have}\n"
  51. "}};\n"
  52. "\n"
  53. "static double Test[TestCount][Features] = {{\n"
  54. "{test}\n"
  55. "}};\n"
  56. "\n"
  57. "static double None[NoneCount][Features] = {{\n"
  58. "{none}\n"
  59. "}};\n"
  60. "\n"
  61. "#endif"
  62. )
  63. have = ",\n".join([self._num2str(line) for line in self.have])
  64. test = ",\n".join([self._num2str(line) for line in self.test])
  65. none = ",\n".join([self._num2str(line) for line in self.none])
  66. content = temp.format(
  67. haveCount=self.have.shape[0],
  68. testCount=self.test.shape[0],
  69. noneCount=self.none.shape[0],
  70. features=self.have.shape[1],
  71. have=have, test=test, none=none
  72. )
  73. with open(file, "w", encoding="utf-8") as fp:
  74. fp.write(content)