Browse Source

Clears easy baseline.

Nicolas Hafner 1 year ago
parent
commit
5b7def7136
Signed by: Shinmera <[email protected]> GPG Key ID: E12B14478BE4C922
2 changed files with 8189 additions and 19 deletions
  1. 8138
    0
      task3/output.csv
  2. 51
    19
      task3/solver.py

+ 8138
- 0
task3/output.csv
File diff suppressed because it is too large
View File


+ 51
- 19
task3/solver.py View File

@@ -3,53 +3,85 @@ import pandas as pd
3 3
 import torch
4 4
 import torch.nn as nn
5 5
 import torch.optim as optim
6
+import torch.nn.functional as F
6 7
 
8
+#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9
+device = torch.device("cpu")
7 10
 train = pd.read_hdf("train.h5", "train")
8 11
 test = pd.read_hdf("test.h5", "test")
9 12
 
10
-input_size = len(train.columns)-1
13
+input_size = len(test.columns)
11 14
 output_size = 5 # Fixed.
12
-hidden_size = 5
13
-batch_size = 1
14
-learning_rate = 1e-4
15
+learning_rate = 0.001
15 16
 
16 17
 net = nn.Sequential(
17
-    nn.Linear(input_size, hidden_size),
18
-    nn.ReLU(),
19
-    nn.Linear(hidden_size, output_size)
20
-)
21
-optimizer = optim.SGD(net.parameters(), lr=learning_rate)
18
+    nn.LayerNorm(100),
19
+    nn.Linear(input_size, 256),
20
+    nn.ELU(),
21
+    nn.Linear(256, 128),
22
+    nn.ELU(),
23
+    nn.Linear(128, 64),
24
+    nn.ELU(),
25
+    nn.Linear(64, 32),
26
+    nn.ELU(),
27
+    nn.Linear(32, 16),
28
+    nn.ELU(),
29
+    nn.Linear(16, output_size)
30
+).to(device)
31
+optimizer = optim.Adam(net.parameters(), lr=learning_rate)
22 32
 loss_fun = nn.MSELoss()
23 33
 
24 34
 def y_to_floats(y):
25 35
     out = [0.0]*output_size
26 36
     out[y] = 1.0
27
-    return out        
37
+    return list(out)        
28 38
 
29 39
 def train_nn():
30
-    for i in range(0, len(train.index), batch_size):
40
+    for i in range(0, len(train.index)):
31 41
         # Extract data
32
-        y = map(y_to_floats, train.iloc[i:i+batch_size, 0].values)
33
-        y = torch.tensor(list(y))
34
-        x = torch.tensor(train.iloc[i:i+batch_size, 1:].values)
42
+        y = torch.tensor(y_to_floats(train.iloc[i, 0]), device=device)
43
+        x = torch.tensor(train.iloc[i, 1:].values, device=device)
44
+        y.unsqueeze_(0)
45
+        x.unsqueeze_(0)
35 46
         # Run NN
36 47
         yy = net(x)
37
-        loss = loss_fun(yy, y)
38 48
         # Optimize
49
+        loss = loss_fun(yy, y)
39 50
         optimizer.zero_grad()
40 51
         loss.backward()
41 52
         optimizer.step()
42 53
 
54
+def confirm_nn():
55
+    correct = 0
56
+    for i in range(0, len(train.index)):
57
+        # Extract data
58
+        y = torch.tensor(y_to_floats(train.iloc[i, 0]), device=device)
59
+        x = torch.tensor(train.iloc[i, 1:].values, device=device)
60
+        y.unsqueeze_(0)
61
+        x.unsqueeze_(0)
62
+        # Run NN
63
+        yy = net(x)
64
+        # Stats
65
+        if yy.argmax().item() == y.argmax().item():
66
+            correct += 1
67
+    return correct
68
+
43 69
 def test_nn():
44 70
     with open("output.csv", 'w') as output:
45 71
         output.write("Id,y\n")
46 72
         for i in test.index:
47
-            x = torch.tensor(test.loc[i:i+1, :].values)
73
+            x = torch.tensor(test.loc[i, :].values, device=device)
74
+            x.unsqueeze_(0)
48 75
             yy = net(x)
49
-            for y in yy:
50
-                output.write("{},{}\n".format(i, y.argmax().item()))
51
-                break;
76
+            output.write("{},{}\n".format(i, yy.argmax().item()))
52 77
 
53 78
 if __name__ == '__main__':
79
+    print("Running on", device);
80
+    print("Training ...");
81
+    train_nn()
82
+    train_nn()
54 83
     train_nn()
84
+    print("Confirming ...");
85
+    print("Correct:", confirm_nn() / len(train.index) * 100, "%")
86
+    print("Testing ...");
55 87
     test_nn()