1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,93 @@ |
1 |
+import pandas as pd |
|
2 |
+import tensorflow as tf |
|
3 |
+ |
|
4 |
+TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv" |
|
5 |
+TEST_URL = "http://download.tensorflow.org/data/iris_test.csv" |
|
6 |
+ |
|
7 |
+CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth', |
|
8 |
+ 'PetalLength', 'PetalWidth', 'Species'] |
|
9 |
+SPECIES = ['Setosa', 'Versicolor', 'Virginica'] |
|
10 |
+ |
|
11 |
+def maybe_download(): |
|
12 |
+ train_path = tf.keras.utils.get_file(TRAIN_URL.split('/')[-1], TRAIN_URL) |
|
13 |
+ test_path = tf.keras.utils.get_file(TEST_URL.split('/')[-1], TEST_URL) |
|
14 |
+ |
|
15 |
+ return train_path, test_path |
|
16 |
+ |
|
17 |
+def load_data(y_name='Species'): |
|
18 |
+ """Returns the iris dataset as (train_x, train_y), (test_x, test_y).""" |
|
19 |
+ train_path, test_path = maybe_download() |
|
20 |
+ |
|
21 |
+ train = pd.read_csv(train_path, names=CSV_COLUMN_NAMES, header=0) |
|
22 |
+ train_x, train_y = train, train.pop(y_name) |
|
23 |
+ |
|
24 |
+ test = pd.read_csv(test_path, names=CSV_COLUMN_NAMES, header=0) |
|
25 |
+ test_x, test_y = test, test.pop(y_name) |
|
26 |
+ |
|
27 |
+ return (train_x, train_y), (test_x, test_y) |
|
28 |
+ |
|
29 |
+ |
|
30 |
+def train_input_fn(features, labels, batch_size): |
|
31 |
+ """An input function for training""" |
|
32 |
+ # Convert the inputs to a Dataset. |
|
33 |
+ dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels)) |
|
34 |
+ |
|
35 |
+ # Shuffle, repeat, and batch the examples. |
|
36 |
+ dataset = dataset.shuffle(1000).repeat().batch(batch_size) |
|
37 |
+ |
|
38 |
+ # Return the dataset. |
|
39 |
+ return dataset |
|
40 |
+ |
|
41 |
+ |
|
42 |
+def eval_input_fn(features, labels, batch_size): |
|
43 |
+ """An input function for evaluation or prediction""" |
|
44 |
+ features=dict(features) |
|
45 |
+ if labels is None: |
|
46 |
+ # No labels, use only features. |
|
47 |
+ inputs = features |
|
48 |
+ else: |
|
49 |
+ inputs = (features, labels) |
|
50 |
+ |
|
51 |
+ # Convert the inputs to a Dataset. |
|
52 |
+ dataset = tf.data.Dataset.from_tensor_slices(inputs) |
|
53 |
+ |
|
54 |
+ # Batch the examples |
|
55 |
+ assert batch_size is not None, "batch_size must not be None" |
|
56 |
+ dataset = dataset.batch(batch_size) |
|
57 |
+ |
|
58 |
+ # Return the dataset. |
|
59 |
+ return dataset |
|
60 |
+ |
|
61 |
+ |
|
62 |
+# The remainder of this file contains a simple example of a csv parser, |
|
63 |
+# implemented using a the `Dataset` class. |
|
64 |
+ |
|
65 |
+# `tf.parse_csv` sets the types of the outputs to match the examples given in |
|
66 |
+# the `record_defaults` argument. |
|
67 |
+CSV_TYPES = [[0.0], [0.0], [0.0], [0.0], [0]] |
|
68 |
+ |
|
69 |
+def _parse_line(line): |
|
70 |
+ # Decode the line into its fields |
|
71 |
+ fields = tf.decode_csv(line, record_defaults=CSV_TYPES) |
|
72 |
+ |
|
73 |
+ # Pack the result into a dictionary |
|
74 |
+ features = dict(zip(CSV_COLUMN_NAMES, fields)) |
|
75 |
+ |
|
76 |
+ # Separate the label from the features |
|
77 |
+ label = features.pop('Species') |
|
78 |
+ |
|
79 |
+ return features, label |
|
80 |
+ |
|
81 |
+ |
|
82 |
+def csv_input_fn(csv_path, batch_size): |
|
83 |
+ # Create a dataset containing the text lines. |
|
84 |
+ dataset = tf.data.TextLineDataset(csv_path).skip(1) |
|
85 |
+ |
|
86 |
+ # Parse each line. |
|
87 |
+ dataset = dataset.map(_parse_line) |
|
88 |
+ |
|
89 |
+ # Shuffle, repeat, and batch the examples. |
|
90 |
+ dataset = dataset.shuffle(1000).repeat().batch(batch_size) |
|
91 |
+ |
|
92 |
+ # Return the dataset. |
|
93 |
+ return dataset |