change to absolute coord

HiKapok · Aug 5, 2018 · 1ace507 · 1ace507
1 parent 88bfb31
commit 1ace507
Show file tree

Hide file tree

Showing 14 changed files with 646 additions and 700 deletions.
diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@ This repository contains codes of the reimplementation of [SSD: Single Shot Mult
 
 There are already some TensorFlow based SSD reimplementation codes on GitHub, the main special features of this repo inlcude:
 
-- state of the art performance(77.8%mAP) when training from VGG-16 pre-trained model (SSD300-VGG16).
+- state of the art performance(77.4%mAP) when training from VGG-16 pre-trained model (SSD300-VGG16).
 - the model is trained using TensorFlow high level API [tf.estimator](https://www.tensorflow.org/api_docs/python/tf/estimator/Estimator). Although TensorFlow provides many APIs, the Estimator API is highly recommended to yield scalable, high-performance models. 
 - all codes were writen by pure TensorFlow ops (no numpy operation) to ensure the performance and portability.
 - using ssd augmentation pipeline discribed in the original paper.
@@ -65,15 +65,15 @@ All the codes was tested under TensorFlow 1.6, Python 3.5, Ubuntu 16.04 with CUD
 
 ## Results (VOC07 Metric)
 
-This implementation(SSD300-VGG16) yield **mAP 77.8%** on PASCAL VOC 2007 test dataset(the original performance described in the paper is 77.2%mAP), the details are as follows:
+This implementation(SSD300-VGG16) yield **mAP 77.4%** on PASCAL VOC 2007 test dataset(the original performance described in the paper is 77.2%mAP), the details are as follows:
 
 | sofa   | bird  | pottedplant | bus | diningtable | cow | bottle | horse | aeroplane | motorbike
 |:-------|:-----:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|
-|  79.6  |  76.0 |  52.8   |   85.9  |   76.9    |  83.5 |  49.9  | 86.0  |   82.9    |   81.0   |
+|  78.8  |  76.3 |  53.3   |   86.2  |   77.7    |  83.0 |  52.7  | 85.5  |   82.3    |   82.2   |
 | **sheep**  | **train** | **boat**    | **bicycle** | **chair**    | **cat**   | **tvmonitor** | **person** | **car**  | **dog** |
-|  81.6  |  86.2 |  71.8   |   84.2  |   60.2    | 87.8 |  76.7  | 80.5  |   85.5   |   86.2   |
+|  77.2  |  87.3 |  69.7   |   83.3  |   59.0    | 88.2 |  74.6  | 79.6  |   84.8   |   85.1   |
 
-You can download the trained model(VOC07+12 Train) from [GoogleDrive](https://drive.google.com/open?id=1yeYcfcOURcZ4DaElEn9C2xY1NymGzG5W) for further research.
+You can download the trained model(VOC07+12 Train) from [GoogleDrive](https://drive.google.com/open?id=1sr3khWzrXZtcS5mmkQDL00y07Rj7erW5) for further research.
 
 For Chinese friends, you can also download both the trained model and pre-trained vgg16 weights from [BaiduYun Drive](https://pan.baidu.com/s/1kRhZd4p-N46JFpVkMgU3fg), access code: **tg64**.
 

diff --git a/dataset/convert_tfrecords.py b/dataset/convert_tfrecords.py
@@ -43,13 +43,13 @@
        |    |->Annotations/
        |    |->...
 '''
-tf.app.flags.DEFINE_string('dataset_directory', '/media/rs/7A0EE8880EE83EAF/Detections/PASCAL/VOC',
+tf.app.flags.DEFINE_string('dataset_directory', './dataset/VOC',
                            'All datas directory')
 tf.app.flags.DEFINE_string('train_splits', 'VOC2007, VOC2012',
                            'Comma-separated list of the training data sub-directory')
 tf.app.flags.DEFINE_string('validation_splits', 'VOC2007TEST',
                            'Comma-separated list of the validation data sub-directory')
-tf.app.flags.DEFINE_string('output_directory', '/media/rs/7A0EE8880EE83EAF/Detections/SSD/dataset/tfrecords',
+tf.app.flags.DEFINE_string('output_directory', './dataset/tfrecords',
                            'Output data directory')
 tf.app.flags.DEFINE_integer('train_shards', 16,
                             'Number of shards in training TFRecord files.')
@@ -228,7 +228,7 @@ def _find_image_bounding_boxes(directory, cur_record):
   difficult = []
   truncated = []
   for obj in root.findall('object'):
-      label = obj.find('name').text
+      label = obj.find('name').text.strip()
       labels.append(int(dataset_common.VOC_LABELS[label][0]))
       labels_text.append(label.encode('ascii'))
 
@@ -245,10 +245,10 @@ def _find_image_bounding_boxes(directory, cur_record):
           truncated.append(0)
 
       bbox = obj.find('bndbox')
-      bboxes.append((float(bbox.find('ymin').text) / shape[0],
-                     float(bbox.find('xmin').text) / shape[1],
-                     float(bbox.find('ymax').text) / shape[0],
-                     float(bbox.find('xmax').text) / shape[1]
+      bboxes.append((float(bbox.find('ymin').text) - 1.,
+                     float(bbox.find('xmin').text) - 1.,
+                     float(bbox.find('ymax').text) - 1.,
+                     float(bbox.find('xmax').text) - 1.
                      ))
   return bboxes, labels, labels_text, difficult, truncated
 

diff --git a/dataset/dataset_common.py b/dataset/dataset_common.py
@@ -221,17 +221,17 @@ def slim_get_batch(num_classes, batch_size, split_name, file_pattern, num_reader
         gbboxes_raw = tf.boolean_mask(gbboxes_raw, isdifficult_mask)
 
     # Pre-processing image, labels and bboxes.
-
+    tensors_to_batch = []
     if is_training:
         image, glabels, gbboxes = image_preprocessing_fn(org_image, glabels_raw, gbboxes_raw)
+        gt_targets, gt_labels, gt_scores = anchor_encoder(glabels, gbboxes)
+        tensors_to_batch = [image, filename, shape, gt_targets, gt_labels, gt_scores]
     else:
-        image = image_preprocessing_fn(org_image, glabels_raw, gbboxes_raw)
-        glabels, gbboxes = glabels_raw, gbboxes_raw
-
-    gt_targets, gt_labels, gt_scores = anchor_encoder(glabels, gbboxes)
+        image, output_shape = image_preprocessing_fn(org_image, glabels_raw, gbboxes_raw)
+        tensors_to_batch = [image, filename, shape, output_shape]
 
-    return tf.train.batch([image, filename, shape, gt_targets, gt_labels, gt_scores],
-                    dynamic_pad=False,
+    return tf.train.batch(tensors_to_batch,
+                    dynamic_pad=(not is_training),
                     batch_size=batch_size,
                     allow_smaller_final_batch=(not is_training),
                     num_threads=num_preprocessing_threads,

diff --git a/dataset/dataset_inspect.py b/dataset/dataset_inspect.py
@@ -31,5 +31,5 @@ def count_split_examples(split_path, file_prefix='.tfrecord'):
     return num_samples
 
 if __name__ == '__main__':
-    print('train:', count_split_examples('/media/rs/7A0EE8880EE83EAF/Detections/SSD/dataset/tfrecords', 'train-?????-of-?????'))
-    print('val:', count_split_examples('/media/rs/7A0EE8880EE83EAF/Detections/SSD/dataset/tfrecords', 'val-?????-of-?????'))
+    print('train:', count_split_examples('./dataset/tfrecords', 'train-?????-of-?????'))
+    print('val:', count_split_examples('./dataset/tfrecords', 'val-?????-of-?????'))