franciszzj commited on
Commit
a72d826
β€’
1 Parent(s): 6d0a20b
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. README.md +1 -0
  2. app.py +139 -44
  3. examples/04181_00_garment.jpg +0 -0
  4. examples/14092_00_person.jpg +0 -0
  5. examples/14684_00_garment.jpg +0 -0
  6. examples/14684_00_person.jpg +0 -0
  7. leffa/inference.py +1 -6
  8. leffa/pipeline.py +17 -9
  9. {utils β†’ leffa_utils}/densepose_for_mask.py +0 -0
  10. {utils β†’ leffa_utils}/densepose_predictor.py +0 -0
  11. {utils β†’ leffa_utils}/garment_agnostic_mask_predictor.py +1 -1
  12. leffa_utils/utils.py +376 -0
  13. preprocess/humanparsing/datasets/__init__.py +0 -0
  14. preprocess/humanparsing/datasets/datasets.py +201 -0
  15. preprocess/humanparsing/datasets/simple_extractor_dataset.py +89 -0
  16. preprocess/humanparsing/datasets/target_generation.py +40 -0
  17. preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/human_to_coco.py +166 -0
  18. preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/pycococreatortools.py +114 -0
  19. preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/test_human2coco_format.py +74 -0
  20. preprocess/humanparsing/mhp_extension/detectron2/.circleci/config.yml +179 -0
  21. preprocess/humanparsing/mhp_extension/detectron2/.clang-format +85 -0
  22. preprocess/humanparsing/mhp_extension/detectron2/.flake8 +9 -0
  23. preprocess/humanparsing/mhp_extension/detectron2/.github/CODE_OF_CONDUCT.md +5 -0
  24. preprocess/humanparsing/mhp_extension/detectron2/.github/CONTRIBUTING.md +49 -0
  25. preprocess/humanparsing/mhp_extension/detectron2/.github/Detectron2-Logo-Horz.svg +1 -0
  26. preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE.md +5 -0
  27. preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/bugs.md +36 -0
  28. preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/config.yml +9 -0
  29. preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/feature-request.md +31 -0
  30. preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/questions-help-support.md +26 -0
  31. preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/unexpected-problems-bugs.md +45 -0
  32. preprocess/humanparsing/mhp_extension/detectron2/.github/pull_request_template.md +9 -0
  33. preprocess/humanparsing/mhp_extension/detectron2/.gitignore +46 -0
  34. preprocess/humanparsing/mhp_extension/detectron2/GETTING_STARTED.md +79 -0
  35. preprocess/humanparsing/mhp_extension/detectron2/INSTALL.md +184 -0
  36. preprocess/humanparsing/mhp_extension/detectron2/LICENSE +201 -0
  37. preprocess/humanparsing/mhp_extension/detectron2/MODEL_ZOO.md +903 -0
  38. preprocess/humanparsing/mhp_extension/detectron2/README.md +56 -0
  39. preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-C4.yaml +18 -0
  40. preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-DilatedC5.yaml +31 -0
  41. preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-FPN.yaml +42 -0
  42. preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RetinaNet.yaml +24 -0
  43. preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml +17 -0
  44. preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml +9 -0
  45. preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml +9 -0
  46. preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml +9 -0
  47. preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml +6 -0
  48. preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml +9 -0
  49. preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml +6 -0
  50. preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml +9 -0
README.md CHANGED
@@ -17,6 +17,7 @@ license: mit
17
  Star ⭐ us if you like it!
18
 
19
  ## News
 
20
  - 18/Dec/2024, thanks to @[StartHua](https://github.com/StartHua) for integrating Leffa into ComfyUI! Here is the [repo](https://github.com/StartHua/Comfyui_leffa)!
21
  - 16/Dec/2024, the virtual try-on [model](https://huggingface.co/franciszzj/Leffa/blob/main/virtual_tryon_dc.pth) trained on DressCode is released.
22
  - 12/Dec/2024, the HuggingFace [demo](https://huggingface.co/spaces/franciszzj/Leffa) and [models](https://huggingface.co/franciszzj/Leffa) (virtual try-on model trained on VITON-HD and pose transfer model trained on DeepFashion) are released.
 
17
  Star ⭐ us if you like it!
18
 
19
  ## News
20
+ - 02/Jan/2025, Update the mask generator to improve results. Add ref unet acceleration, boosting prediction speed by 30%. Include more controls in Advanced Options to enhance user experience. Enable intermediate result output for easier development. Enjoy using it!
21
  - 18/Dec/2024, thanks to @[StartHua](https://github.com/StartHua) for integrating Leffa into ComfyUI! Here is the [repo](https://github.com/StartHua/Comfyui_leffa)!
22
  - 16/Dec/2024, the virtual try-on [model](https://huggingface.co/franciszzj/Leffa/blob/main/virtual_tryon_dc.pth) trained on DressCode is released.
23
  - 12/Dec/2024, the HuggingFace [demo](https://huggingface.co/spaces/franciszzj/Leffa) and [models](https://huggingface.co/franciszzj/Leffa) (virtual try-on model trained on VITON-HD and pose transfer model trained on DeepFashion) are released.
app.py CHANGED
@@ -4,9 +4,11 @@ from huggingface_hub import snapshot_download
4
  from leffa.transform import LeffaTransform
5
  from leffa.model import LeffaModel
6
  from leffa.inference import LeffaInference
7
- from utils.garment_agnostic_mask_predictor import AutoMasker
8
- from utils.densepose_predictor import DensePosePredictor
9
- from utils.utils import resize_and_center, list_dir
 
 
10
 
11
  import gradio as gr
12
 
@@ -26,12 +28,26 @@ class LeffaPredictor(object):
26
  weights_path="./ckpts/densepose/model_final_162be9.pkl",
27
  )
28
 
29
- vt_model = LeffaModel(
 
 
 
 
 
 
 
 
 
30
  pretrained_model_name_or_path="./ckpts/stable-diffusion-inpainting",
31
  pretrained_model="./ckpts/virtual_tryon.pth",
32
  )
33
- self.vt_inference = LeffaInference(model=vt_model)
34
- self.vt_model_type = "viton_hd"
 
 
 
 
 
35
 
36
  pt_model = LeffaModel(
37
  pretrained_model_name_or_path="./ckpts/stable-diffusion-xl-1.0-inpainting-0.1",
@@ -39,21 +55,19 @@ class LeffaPredictor(object):
39
  )
40
  self.pt_inference = LeffaInference(model=pt_model)
41
 
42
- def change_vt_model(self, vt_model_type):
43
- if vt_model_type == self.vt_model_type:
44
- return
45
- if vt_model_type == "viton_hd":
46
- pretrained_model = "./ckpts/virtual_tryon.pth"
47
- elif vt_model_type == "dress_code":
48
- pretrained_model = "./ckpts/virtual_tryon_dc.pth"
49
- vt_model = LeffaModel(
50
- pretrained_model_name_or_path="./ckpts/stable-diffusion-inpainting",
51
- pretrained_model=pretrained_model,
52
- )
53
- self.vt_inference = LeffaInference(model=vt_model)
54
- self.vt_model_type = vt_model_type
55
-
56
- def leffa_predict(self, src_image_path, ref_image_path, control_type, step=50, scale=2.5, seed=42):
57
  assert control_type in [
58
  "virtual_tryon", "pose_transfer"], "Invalid control type: {}".format(control_type)
59
  src_image = Image.open(src_image_path)
@@ -66,19 +80,39 @@ class LeffaPredictor(object):
66
  # Mask
67
  if control_type == "virtual_tryon":
68
  src_image = src_image.convert("RGB")
69
- mask = self.mask_predictor(src_image, "upper")["mask"]
 
 
 
 
 
 
 
 
 
 
 
70
  elif control_type == "pose_transfer":
71
  mask = Image.fromarray(np.ones_like(src_image_array) * 255)
72
 
73
  # DensePose
74
  if control_type == "virtual_tryon":
75
- src_image_seg_array = self.densepose_predictor.predict_seg(
76
- src_image_array)
77
- src_image_seg = Image.fromarray(src_image_seg_array)
78
- densepose = src_image_seg
 
 
 
 
 
 
 
 
 
79
  elif control_type == "pose_transfer":
80
  src_image_iuv_array = self.densepose_predictor.predict_iuv(
81
- src_image_array)
82
  src_image_iuv = Image.fromarray(src_image_iuv_array)
83
  densepose = src_image_iuv
84
 
@@ -93,23 +127,28 @@ class LeffaPredictor(object):
93
  }
94
  data = transform(data)
95
  if control_type == "virtual_tryon":
96
- inference = self.vt_inference
 
 
 
97
  elif control_type == "pose_transfer":
98
  inference = self.pt_inference
99
  output = inference(
100
  data,
 
101
  num_inference_steps=step,
102
  guidance_scale=scale,
103
- seed=seed,)
 
104
  gen_image = output["generated_image"][0]
105
  # gen_image.save("gen_image.png")
106
- return np.array(gen_image)
107
 
108
- def leffa_predict_vt(self, src_image_path, ref_image_path, step, scale, seed):
109
- return self.leffa_predict(src_image_path, ref_image_path, "virtual_tryon", step, scale, seed)
110
 
111
- def leffa_predict_pt(self, src_image_path, ref_image_path, step, scale, seed):
112
- return self.leffa_predict(src_image_path, ref_image_path, "pose_transfer", step, scale, seed)
113
 
114
 
115
  if __name__ == "__main__":
@@ -121,14 +160,10 @@ if __name__ == "__main__":
121
  garment_images = list_dir(f"{example_dir}/garment")
122
 
123
  title = "## Leffa: Learning Flow Fields in Attention for Controllable Person Image Generation"
124
- link = """[πŸ“š Paper](https://arxiv.org/abs/2412.08486) - [πŸ€– Code](https://github.com/franciszzj/Leffa) - [πŸ”₯ Demo](https://huggingface.co/spaces/franciszzj/Leffa) - [πŸ€— Model](https://huggingface.co/franciszzj/Leffa)
125
- Star ⭐ us if you like it!
126
- """
127
  news = """## News
128
- - 18/Dec/2024, thanks to @[StartHua](https://github.com/StartHua) for integrating Leffa into ComfyUI! Here is the [repo](https://github.com/StartHua/Comfyui_leffa)!
129
- - 16/Dec/2024, the virtual try-on [model](https://huggingface.co/franciszzj/Leffa/blob/main/virtual_tryon_dc.pth) trained on DressCode is released.
130
- - 12/Dec/2024, the HuggingFace [demo](https://huggingface.co/spaces/franciszzj/Leffa) and [models](https://huggingface.co/franciszzj/Leffa) (virtual try-on model trained on VITON-HD and pose transfer model trained on DeepFashion) are released.
131
- - 11/Dec/2024, the [arXiv](https://arxiv.org/abs/2412.08486) version of the paper is released.
132
  """
133
  description = "Leffa is a unified framework for controllable person image generation that enables precise manipulation of both appearance (i.e., virtual try-on) and pose (i.e., pose transfer)."
134
  note = "Note: The models used in the demo are trained solely on academic datasets. Virtual try-on uses VITON-HD/DressCode, and pose transfer uses DeepFashion."
@@ -185,6 +220,33 @@ if __name__ == "__main__":
185
  vt_gen_button = gr.Button("Generate")
186
 
187
  with gr.Accordion("Advanced Options", open=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  vt_step = gr.Number(
189
  label="Inference Steps", minimum=30, maximum=100, step=1, value=50)
190
 
@@ -194,8 +256,21 @@ if __name__ == "__main__":
194
  vt_seed = gr.Number(
195
  label="Random Seed", minimum=-1, maximum=2147483647, step=1, value=42)
196
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  vt_gen_button.click(fn=leffa_predictor.leffa_predict_vt, inputs=[
198
- vt_src_image, vt_ref_image, vt_step, vt_scale, vt_seed], outputs=[vt_gen_image])
199
 
200
  with gr.Tab("Control Pose (Pose Transfer)"):
201
  with gr.Row():
@@ -243,6 +318,12 @@ if __name__ == "__main__":
243
  pose_transfer_gen_button = gr.Button("Generate")
244
 
245
  with gr.Accordion("Advanced Options", open=False):
 
 
 
 
 
 
246
  pt_step = gr.Number(
247
  label="Inference Steps", minimum=30, maximum=100, step=1, value=50)
248
 
@@ -252,9 +333,23 @@ if __name__ == "__main__":
252
  pt_seed = gr.Number(
253
  label="Random Seed", minimum=-1, maximum=2147483647, step=1, value=42)
254
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  pose_transfer_gen_button.click(fn=leffa_predictor.leffa_predict_pt, inputs=[
256
- pt_src_image, pt_ref_image, pt_step, pt_scale, pt_seed], outputs=[pt_gen_image])
257
 
258
  gr.Markdown(note)
259
 
260
- demo.launch(share=True, server_port=7860)
 
 
4
  from leffa.transform import LeffaTransform
5
  from leffa.model import LeffaModel
6
  from leffa.inference import LeffaInference
7
+ from leffa_utils.garment_agnostic_mask_predictor import AutoMasker
8
+ from leffa_utils.densepose_predictor import DensePosePredictor
9
+ from leffa_utils.utils import resize_and_center, list_dir, get_agnostic_mask_hd, get_agnostic_mask_dc
10
+ from preprocess.humanparsing.run_parsing import Parsing
11
+ from preprocess.openpose.run_openpose import OpenPose
12
 
13
  import gradio as gr
14
 
 
28
  weights_path="./ckpts/densepose/model_final_162be9.pkl",
29
  )
30
 
31
+ self.parsing = Parsing(
32
+ atr_path="./ckpts/humanparsing/parsing_atr.onnx",
33
+ lip_path="./ckpts/humanparsing/parsing_lip.onnx",
34
+ )
35
+
36
+ self.openpose = OpenPose(
37
+ body_model_path="./ckpts/openpose/body_pose_model.pth",
38
+ )
39
+
40
+ vt_model_hd = LeffaModel(
41
  pretrained_model_name_or_path="./ckpts/stable-diffusion-inpainting",
42
  pretrained_model="./ckpts/virtual_tryon.pth",
43
  )
44
+ self.vt_inference_hd = LeffaInference(model=vt_model_hd)
45
+
46
+ vt_model_dc = LeffaModel(
47
+ pretrained_model_name_or_path="./ckpts/stable-diffusion-inpainting",
48
+ pretrained_model="./ckpts/virtual_tryon_dc.pth",
49
+ )
50
+ self.vt_inference_dc = LeffaInference(model=vt_model_dc)
51
 
52
  pt_model = LeffaModel(
53
  pretrained_model_name_or_path="./ckpts/stable-diffusion-xl-1.0-inpainting-0.1",
 
55
  )
56
  self.pt_inference = LeffaInference(model=pt_model)
57
 
58
+ def leffa_predict(
59
+ self,
60
+ src_image_path,
61
+ ref_image_path,
62
+ control_type,
63
+ ref_acceleration=True,
64
+ step=50,
65
+ scale=2.5,
66
+ seed=42,
67
+ vt_model_type="viton_hd",
68
+ vt_garment_type="upper_body",
69
+ vt_repaint=False
70
+ ):
 
 
71
  assert control_type in [
72
  "virtual_tryon", "pose_transfer"], "Invalid control type: {}".format(control_type)
73
  src_image = Image.open(src_image_path)
 
80
  # Mask
81
  if control_type == "virtual_tryon":
82
  src_image = src_image.convert("RGB")
83
+ model_parse, _ = self.parsing(src_image.resize((384, 512)))
84
+ keypoints = self.openpose(src_image.resize((384, 512)))
85
+ if vt_model_type == "viton_hd":
86
+ mask = get_agnostic_mask_hd(
87
+ model_parse, keypoints, vt_garment_type)
88
+ elif vt_model_type == "dress_code":
89
+ mask = get_agnostic_mask_dc(
90
+ model_parse, keypoints, vt_garment_type)
91
+ mask = mask.resize((768, 1024))
92
+ # garment_type_hd = "upper" if vt_garment_type in [
93
+ # "upper_body", "dresses"] else "lower"
94
+ # mask = self.mask_predictor(src_image, garment_type_hd)["mask"]
95
  elif control_type == "pose_transfer":
96
  mask = Image.fromarray(np.ones_like(src_image_array) * 255)
97
 
98
  # DensePose
99
  if control_type == "virtual_tryon":
100
+ if vt_model_type == "viton_hd":
101
+ src_image_seg_array = self.densepose_predictor.predict_seg(
102
+ src_image_array)[:, :, ::-1]
103
+ src_image_seg = Image.fromarray(src_image_seg_array)
104
+ densepose = src_image_seg
105
+ elif vt_model_type == "dress_code":
106
+ src_image_iuv_array = self.densepose_predictor.predict_iuv(
107
+ src_image_array)
108
+ src_image_seg_array = src_image_iuv_array[:, :, 0:1]
109
+ src_image_seg_array = np.concatenate(
110
+ [src_image_seg_array] * 3, axis=-1)
111
+ src_image_seg = Image.fromarray(src_image_seg_array)
112
+ densepose = src_image_seg
113
  elif control_type == "pose_transfer":
114
  src_image_iuv_array = self.densepose_predictor.predict_iuv(
115
+ src_image_array)[:, :, ::-1]
116
  src_image_iuv = Image.fromarray(src_image_iuv_array)
117
  densepose = src_image_iuv
118
 
 
127
  }
128
  data = transform(data)
129
  if control_type == "virtual_tryon":
130
+ if vt_model_type == "viton_hd":
131
+ inference = self.vt_inference_hd
132
+ elif vt_model_type == "dress_code":
133
+ inference = self.vt_inference_dc
134
  elif control_type == "pose_transfer":
135
  inference = self.pt_inference
136
  output = inference(
137
  data,
138
+ ref_acceleration=ref_acceleration,
139
  num_inference_steps=step,
140
  guidance_scale=scale,
141
+ seed=seed,
142
+ repaint=vt_repaint,)
143
  gen_image = output["generated_image"][0]
144
  # gen_image.save("gen_image.png")
145
+ return np.array(gen_image), np.array(mask), np.array(densepose)
146
 
147
+ def leffa_predict_vt(self, src_image_path, ref_image_path, ref_acceleration, step, scale, seed, vt_model_type, vt_garment_type, vt_repaint):
148
+ return self.leffa_predict(src_image_path, ref_image_path, "virtual_tryon", ref_acceleration, step, scale, seed, vt_model_type, vt_garment_type, vt_repaint)
149
 
150
+ def leffa_predict_pt(self, src_image_path, ref_image_path, ref_acceleration, step, scale, seed):
151
+ return self.leffa_predict(src_image_path, ref_image_path, "pose_transfer", ref_acceleration, step, scale, seed)
152
 
153
 
154
  if __name__ == "__main__":
 
160
  garment_images = list_dir(f"{example_dir}/garment")
161
 
162
  title = "## Leffa: Learning Flow Fields in Attention for Controllable Person Image Generation"
163
+ link = "[πŸ“š Paper](https://arxiv.org/abs/2412.08486) - [πŸ€– Code](https://github.com/franciszzj/Leffa) - [πŸ”₯ Demo](https://huggingface.co/spaces/franciszzj/Leffa) - [πŸ€— Model](https://huggingface.co/franciszzj/Leffa)"
 
 
164
  news = """## News
165
+ - 02/Jan/2025, Update the mask generator to improve results. Add ref unet acceleration, boosting prediction speed by 30%. Include more controls in Advanced Options to enhance user experience. Enable intermediate result output for easier development. Enjoy using it!
166
+ More news can be found in the [GitHub repository](https://github.com/franciszzj/Leffa).
 
 
167
  """
168
  description = "Leffa is a unified framework for controllable person image generation that enables precise manipulation of both appearance (i.e., virtual try-on) and pose (i.e., pose transfer)."
169
  note = "Note: The models used in the demo are trained solely on academic datasets. Virtual try-on uses VITON-HD/DressCode, and pose transfer uses DeepFashion."
 
220
  vt_gen_button = gr.Button("Generate")
221
 
222
  with gr.Accordion("Advanced Options", open=False):
223
+ vt_model_type = gr.Radio(
224
+ label="Model Type",
225
+ choices=[("VITON-HD (Recommended)", "viton_hd"),
226
+ ("DressCode (Experimental)", "dress_code")],
227
+ value="viton_hd",
228
+ )
229
+
230
+ vt_garment_type = gr.Radio(
231
+ label="Garment Type",
232
+ choices=[("Upper", "upper_body"),
233
+ ("Lower", "lower_body"),
234
+ ("Dress", "dresses")],
235
+ value="upper_body",
236
+ )
237
+
238
+ vt_ref_acceleration = gr.Radio(
239
+ label="Accelerate Reference UNet (may slightly reduce performance)",
240
+ choices=[("True", True), ("False", False)],
241
+ value=False,
242
+ )
243
+
244
+ vt_repaint = gr.Radio(
245
+ label="Repaint Mode",
246
+ choices=[("True", True), ("False", False)],
247
+ value=False,
248
+ )
249
+
250
  vt_step = gr.Number(
251
  label="Inference Steps", minimum=30, maximum=100, step=1, value=50)
252
 
 
256
  vt_seed = gr.Number(
257
  label="Random Seed", minimum=-1, maximum=2147483647, step=1, value=42)
258
 
259
+ with gr.Accordion("Debug", open=False):
260
+ vt_mask = gr.Image(
261
+ label="Generated Mask",
262
+ width=256,
263
+ height=256,
264
+ )
265
+
266
+ vt_densepose = gr.Image(
267
+ label="Generated DensePose",
268
+ width=256,
269
+ height=256,
270
+ )
271
+
272
  vt_gen_button.click(fn=leffa_predictor.leffa_predict_vt, inputs=[
273
+ vt_src_image, vt_ref_image, vt_ref_acceleration, vt_step, vt_scale, vt_seed, vt_model_type, vt_garment_type, vt_repaint], outputs=[vt_gen_image, vt_mask, vt_densepose])
274
 
275
  with gr.Tab("Control Pose (Pose Transfer)"):
276
  with gr.Row():
 
318
  pose_transfer_gen_button = gr.Button("Generate")
319
 
320
  with gr.Accordion("Advanced Options", open=False):
321
+ pt_ref_acceleration = gr.Radio(
322
+ label="Accelerate Reference UNet",
323
+ choices=[("True", True), ("False", False)],
324
+ value=False,
325
+ )
326
+
327
  pt_step = gr.Number(
328
  label="Inference Steps", minimum=30, maximum=100, step=1, value=50)
329
 
 
333
  pt_seed = gr.Number(
334
  label="Random Seed", minimum=-1, maximum=2147483647, step=1, value=42)
335
 
336
+ with gr.Accordion("Debug", open=False):
337
+ pt_mask = gr.Image(
338
+ label="Generated Mask",
339
+ width=256,
340
+ height=256,
341
+ )
342
+
343
+ pt_densepose = gr.Image(
344
+ label="Generated DensePose",
345
+ width=256,
346
+ height=256,
347
+ )
348
+
349
  pose_transfer_gen_button.click(fn=leffa_predictor.leffa_predict_pt, inputs=[
350
+ pt_src_image, pt_ref_image, pt_ref_acceleration, pt_step, pt_scale, pt_seed], outputs=[pt_gen_image, pt_mask, pt_densepose])
351
 
352
  gr.Markdown(note)
353
 
354
+ demo.launch(share=True, server_port=7860,
355
+ allowed_paths=["./ckpts/examples"])
examples/04181_00_garment.jpg DELETED
Binary file (45.6 kB)
 
examples/14092_00_person.jpg DELETED
Binary file (178 kB)
 
examples/14684_00_garment.jpg DELETED
Binary file (99.3 kB)
 
examples/14684_00_person.jpg DELETED
Binary file (115 kB)
 
leffa/inference.py CHANGED
@@ -16,15 +16,10 @@ class LeffaInference(object):
16
  def __init__(
17
  self,
18
  model: nn.Module,
19
- ckpt_path: Optional[str] = None,
20
  ) -> None:
21
- self.model: torch.nn.Module = model
22
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
23
 
24
- # load model
25
- if ckpt_path is not None:
26
- self.model.load_state_dict(torch.load(ckpt_path, map_location="cpu"))
27
- self.model = self.model.to(self.device)
28
  self.model.eval()
29
 
30
  self.pipe = LeffaPipeline(model=self.model)
 
16
  def __init__(
17
  self,
18
  model: nn.Module,
 
19
  ) -> None:
 
20
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
21
 
22
+ self.model = model.to(self.device)
 
 
 
23
  self.model.eval()
24
 
25
  self.pipe = LeffaPipeline(model=self.model)
leffa/pipeline.py CHANGED
@@ -12,14 +12,12 @@ class LeffaPipeline(object):
12
  def __init__(
13
  self,
14
  model,
15
- repaint=False,
16
  device="cuda",
17
  ):
18
  self.vae = model.vae
19
  self.unet_encoder = model.unet_encoder
20
  self.unet = model.unet
21
  self.noise_scheduler = model.noise_scheduler
22
- self.repaint = repaint # used for virtual try-on
23
  self.device = device
24
 
25
  def prepare_extra_step_kwargs(self, generator, eta):
@@ -50,11 +48,13 @@ class LeffaPipeline(object):
50
  ref_image,
51
  mask,
52
  densepose,
53
- num_inference_steps: int = 50,
 
54
  do_classifier_free_guidance=True,
55
- guidance_scale: float = 2.5,
56
  generator=None,
57
  eta=1.0,
 
58
  **kwargs,
59
  ):
60
  src_image = src_image.to(device=self.vae.device, dtype=self.vae.dtype)
@@ -100,6 +100,13 @@ class LeffaPipeline(object):
100
  len(timesteps) - num_inference_steps * self.noise_scheduler.order
101
  )
102
 
 
 
 
 
 
 
 
103
  with tqdm.tqdm(total=num_inference_steps) as progress_bar:
104
  for i, t in enumerate(timesteps):
105
  # expand the latent if we are doing classifier free guidance
@@ -122,10 +129,11 @@ class LeffaPipeline(object):
122
  dim=1,
123
  )
124
 
125
- down, reference_features = self.unet_encoder(
126
- ref_image_latent, t, encoder_hidden_states=None, return_dict=False
127
- )
128
- reference_features = list(reference_features)
 
129
 
130
  # predict the noise residual
131
  noise_pred = self.unet(
@@ -166,7 +174,7 @@ class LeffaPipeline(object):
166
  # Decode the final latent
167
  gen_image = latent_to_image(latent, self.vae)
168
 
169
- if self.repaint:
170
  src_image = (src_image / 2 + 0.5).clamp(0, 1)
171
  src_image = src_image.cpu().permute(0, 2, 3, 1).float().numpy()
172
  src_image = numpy_to_pil(src_image)
 
12
  def __init__(
13
  self,
14
  model,
 
15
  device="cuda",
16
  ):
17
  self.vae = model.vae
18
  self.unet_encoder = model.unet_encoder
19
  self.unet = model.unet
20
  self.noise_scheduler = model.noise_scheduler
 
21
  self.device = device
22
 
23
  def prepare_extra_step_kwargs(self, generator, eta):
 
48
  ref_image,
49
  mask,
50
  densepose,
51
+ ref_acceleration=True,
52
+ num_inference_steps=50,
53
  do_classifier_free_guidance=True,
54
+ guidance_scale=2.5,
55
  generator=None,
56
  eta=1.0,
57
+ repaint=False, # used for virtual try-on
58
  **kwargs,
59
  ):
60
  src_image = src_image.to(device=self.vae.device, dtype=self.vae.dtype)
 
100
  len(timesteps) - num_inference_steps * self.noise_scheduler.order
101
  )
102
 
103
+ if ref_acceleration:
104
+ down, reference_features = self.unet_encoder(
105
+ ref_image_latent, timesteps[num_inference_steps//2], encoder_hidden_states=None, return_dict=False
106
+ )
107
+ reference_features = list(reference_features)
108
+
109
+
110
  with tqdm.tqdm(total=num_inference_steps) as progress_bar:
111
  for i, t in enumerate(timesteps):
112
  # expand the latent if we are doing classifier free guidance
 
129
  dim=1,
130
  )
131
 
132
+ if not ref_acceleration:
133
+ down, reference_features = self.unet_encoder(
134
+ ref_image_latent, t, encoder_hidden_states=None, return_dict=False
135
+ )
136
+ reference_features = list(reference_features)
137
 
138
  # predict the noise residual
139
  noise_pred = self.unet(
 
174
  # Decode the final latent
175
  gen_image = latent_to_image(latent, self.vae)
176
 
177
+ if repaint:
178
  src_image = (src_image / 2 + 0.5).clamp(0, 1)
179
  src_image = src_image.cpu().permute(0, 2, 3, 1).float().numpy()
180
  src_image = numpy_to_pil(src_image)
{utils β†’ leffa_utils}/densepose_for_mask.py RENAMED
File without changes
{utils β†’ leffa_utils}/densepose_predictor.py RENAMED
File without changes
{utils β†’ leffa_utils}/garment_agnostic_mask_predictor.py RENAMED
@@ -8,7 +8,7 @@ from diffusers.image_processor import VaeImageProcessor
8
  from PIL import Image
9
  from SCHP import SCHP # type: ignore
10
 
11
- from utils.densepose_for_mask import DensePose # type: ignore
12
 
13
  DENSE_INDEX_MAP = {
14
  "background": [0],
 
8
  from PIL import Image
9
  from SCHP import SCHP # type: ignore
10
 
11
+ from leffa_utils.densepose_for_mask import DensePose # type: ignore
12
 
13
  DENSE_INDEX_MAP = {
14
  "background": [0],
leffa_utils/utils.py ADDED
@@ -0,0 +1,376 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import torch
4
+ import numpy as np
5
+ from numpy.linalg import lstsq
6
+ from PIL import Image, ImageDraw
7
+
8
+
9
+ def resize_and_center(image, target_width, target_height):
10
+ img = np.array(image)
11
+
12
+ if img.shape[-1] == 4:
13
+ img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
14
+ elif len(img.shape) == 2 or img.shape[-1] == 1:
15
+ img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
16
+
17
+ original_height, original_width = img.shape[:2]
18
+
19
+ scale = min(target_height / original_height, target_width / original_width)
20
+ new_height = int(original_height * scale)
21
+ new_width = int(original_width * scale)
22
+
23
+ resized_img = cv2.resize(img, (new_width, new_height),
24
+ interpolation=cv2.INTER_CUBIC)
25
+
26
+ padded_img = np.ones((target_height, target_width, 3),
27
+ dtype=np.uint8) * 255
28
+
29
+ top = (target_height - new_height) // 2
30
+ left = (target_width - new_width) // 2
31
+
32
+ padded_img[top:top + new_height, left:left + new_width] = resized_img
33
+
34
+ return Image.fromarray(padded_img)
35
+
36
+
37
+ def list_dir(folder_path):
38
+ # Collect all file paths within the directory
39
+ file_paths = []
40
+ for root, _, files in os.walk(folder_path):
41
+ for file in files:
42
+ file_paths.append(os.path.join(root, file))
43
+
44
+ file_paths = sorted(file_paths)
45
+ return file_paths
46
+
47
+
48
+ label_map = {
49
+ "background": 0,
50
+ "hat": 1,
51
+ "hair": 2,
52
+ "sunglasses": 3,
53
+ "upper_clothes": 4,
54
+ "skirt": 5,
55
+ "pants": 6,
56
+ "dress": 7,
57
+ "belt": 8,
58
+ "left_shoe": 9,
59
+ "right_shoe": 10,
60
+ "head": 11,
61
+ "left_leg": 12,
62
+ "right_leg": 13,
63
+ "left_arm": 14,
64
+ "right_arm": 15,
65
+ "bag": 16,
66
+ "scarf": 17,
67
+ "neck": 18,
68
+ }
69
+
70
+
71
+ def extend_arm_mask(wrist, elbow, scale):
72
+ wrist = elbow + scale * (wrist - elbow)
73
+ return wrist
74
+
75
+
76
+ def hole_fill(img):
77
+ img = np.pad(img[1:-1, 1:-1], pad_width=1,
78
+ mode='constant', constant_values=0)
79
+ img_copy = img.copy()
80
+ mask = np.zeros((img.shape[0] + 2, img.shape[1] + 2), dtype=np.uint8)
81
+
82
+ cv2.floodFill(img, mask, (0, 0), 255)
83
+ img_inverse = cv2.bitwise_not(img)
84
+ dst = cv2.bitwise_or(img_copy, img_inverse)
85
+ return dst
86
+
87
+
88
+ def refine_mask(mask):
89
+ contours, hierarchy = cv2.findContours(mask.astype(np.uint8),
90
+ cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_L1)
91
+ area = []
92
+ for j in range(len(contours)):
93
+ a_d = cv2.contourArea(contours[j], True)
94
+ area.append(abs(a_d))
95
+ refine_mask = np.zeros_like(mask).astype(np.uint8)
96
+ if len(area) != 0:
97
+ i = area.index(max(area))
98
+ cv2.drawContours(refine_mask, contours, i, color=255, thickness=-1)
99
+
100
+ return refine_mask
101
+
102
+
103
+ def get_agnostic_mask_hd(model_parse, keypoint, category, size=(384, 512)):
104
+ model_type = "hd"
105
+ ##############################
106
+ width, height = size
107
+ im_parse = model_parse.resize((width, height), Image.NEAREST)
108
+ parse_array = np.array(im_parse)
109
+
110
+ if model_type == 'hd':
111
+ arm_width = 60
112
+ elif model_type == 'dc':
113
+ arm_width = 45
114
+ else:
115
+ raise ValueError("model_type must be \'hd\' or \'dc\'!")
116
+
117
+ parse_head = (parse_array == 1).astype(np.float32) + \
118
+ (parse_array == 3).astype(np.float32) + \
119
+ (parse_array == 11).astype(np.float32)
120
+
121
+ parser_mask_fixed = (parse_array == label_map["left_shoe"]).astype(np.float32) + \
122
+ (parse_array == label_map["right_shoe"]).astype(np.float32) + \
123
+ (parse_array == label_map["hat"]).astype(np.float32) + \
124
+ (parse_array == label_map["sunglasses"]).astype(np.float32) + \
125
+ (parse_array == label_map["bag"]).astype(np.float32)
126
+
127
+ parser_mask_changeable = (
128
+ parse_array == label_map["background"]).astype(np.float32)
129
+
130
+ arms_left = (parse_array == 14).astype(np.float32)
131
+ arms_right = (parse_array == 15).astype(np.float32)
132
+
133
+ if category == 'dresses':
134
+ parse_mask = (parse_array == 7).astype(np.float32) + \
135
+ (parse_array == 4).astype(np.float32) + \
136
+ (parse_array == 5).astype(np.float32) + \
137
+ (parse_array == 6).astype(np.float32)
138
+
139
+ parser_mask_changeable += np.logical_and(
140
+ parse_array, np.logical_not(parser_mask_fixed))
141
+
142
+ elif category == 'upper_body':
143
+ parse_mask = (parse_array == 4).astype(np.float32) + \
144
+ (parse_array == 7).astype(np.float32)
145
+ parser_mask_fixed_lower_cloth = (parse_array == label_map["skirt"]).astype(np.float32) + \
146
+ (parse_array == label_map["pants"]).astype(
147
+ np.float32)
148
+ parser_mask_fixed += parser_mask_fixed_lower_cloth
149
+ parser_mask_changeable += np.logical_and(
150
+ parse_array, np.logical_not(parser_mask_fixed))
151
+ elif category == 'lower_body':
152
+ parse_mask = (parse_array == 6).astype(np.float32) + \
153
+ (parse_array == 12).astype(np.float32) + \
154
+ (parse_array == 13).astype(np.float32) + \
155
+ (parse_array == 5).astype(np.float32)
156
+ parser_mask_fixed += (parse_array == label_map["upper_clothes"]).astype(np.float32) + \
157
+ (parse_array == 14).astype(np.float32) + \
158
+ (parse_array == 15).astype(np.float32)
159
+ parser_mask_changeable += np.logical_and(
160
+ parse_array, np.logical_not(parser_mask_fixed))
161
+ else:
162
+ raise NotImplementedError
163
+
164
+ # Load pose points
165
+ pose_data = keypoint["pose_keypoints_2d"]
166
+ pose_data = np.array(pose_data)
167
+ pose_data = pose_data.reshape((-1, 2))
168
+
169
+ im_arms_left = Image.new('L', (width, height))
170
+ im_arms_right = Image.new('L', (width, height))
171
+ arms_draw_left = ImageDraw.Draw(im_arms_left)
172
+ arms_draw_right = ImageDraw.Draw(im_arms_right)
173
+ if category == 'dresses' or category == 'upper_body':
174
+ shoulder_right = np.multiply(tuple(pose_data[2][:2]), height / 512.0)
175
+ shoulder_left = np.multiply(tuple(pose_data[5][:2]), height / 512.0)
176
+ elbow_right = np.multiply(tuple(pose_data[3][:2]), height / 512.0)
177
+ elbow_left = np.multiply(tuple(pose_data[6][:2]), height / 512.0)
178
+ wrist_right = np.multiply(tuple(pose_data[4][:2]), height / 512.0)
179
+ wrist_left = np.multiply(tuple(pose_data[7][:2]), height / 512.0)
180
+ ARM_LINE_WIDTH = int(arm_width / 512 * height)
181
+ size_left = [shoulder_left[0] - ARM_LINE_WIDTH // 2, shoulder_left[1] - ARM_LINE_WIDTH //
182
+ 2, shoulder_left[0] + ARM_LINE_WIDTH // 2, shoulder_left[1] + ARM_LINE_WIDTH // 2]
183
+ size_right = [shoulder_right[0] - ARM_LINE_WIDTH // 2, shoulder_right[1] - ARM_LINE_WIDTH // 2, shoulder_right[0] + ARM_LINE_WIDTH // 2,
184
+ shoulder_right[1] + ARM_LINE_WIDTH // 2]
185
+
186
+ if wrist_right[0] <= 1. and wrist_right[1] <= 1.:
187
+ im_arms_right = arms_right
188
+ else:
189
+ wrist_right = extend_arm_mask(wrist_right, elbow_right, 1.2)
190
+ arms_draw_right.line(np.concatenate((shoulder_right, elbow_right, wrist_right)).astype(
191
+ np.uint16).tolist(), 'white', ARM_LINE_WIDTH, 'curve')
192
+ arms_draw_right.arc(size_right, 0, 360,
193
+ 'white', ARM_LINE_WIDTH // 2)
194
+
195
+ if wrist_left[0] <= 1. and wrist_left[1] <= 1.:
196
+ im_arms_left = arms_left
197
+ else:
198
+ wrist_left = extend_arm_mask(wrist_left, elbow_left, 1.2)
199
+ arms_draw_left.line(np.concatenate((wrist_left, elbow_left, shoulder_left)).astype(
200
+ np.uint16).tolist(), 'white', ARM_LINE_WIDTH, 'curve')
201
+ arms_draw_left.arc(size_left, 0, 360, 'white', ARM_LINE_WIDTH // 2)
202
+
203
+ hands_left = np.logical_and(np.logical_not(im_arms_left), arms_left)
204
+ hands_right = np.logical_and(np.logical_not(im_arms_right), arms_right)
205
+ parser_mask_fixed += hands_left + hands_right
206
+
207
+ parser_mask_fixed = np.logical_or(parser_mask_fixed, parse_head)
208
+ parse_mask = cv2.dilate(parse_mask, np.ones(
209
+ (5, 5), np.uint16), iterations=5)
210
+ if category == 'dresses' or category == 'upper_body':
211
+ neck_mask = (parse_array == 18).astype(np.float32)
212
+ neck_mask = cv2.dilate(neck_mask, np.ones(
213
+ (5, 5), np.uint16), iterations=1)
214
+ neck_mask = np.logical_and(neck_mask, np.logical_not(parse_head))
215
+ parse_mask = np.logical_or(parse_mask, neck_mask)
216
+ arm_mask = cv2.dilate(np.logical_or(im_arms_left, im_arms_right).astype(
217
+ 'float32'), np.ones((5, 5), np.uint16), iterations=4)
218
+ parse_mask += np.logical_or(parse_mask, arm_mask)
219
+
220
+ parse_mask = np.logical_and(
221
+ parser_mask_changeable, np.logical_not(parse_mask))
222
+
223
+ parse_mask_total = np.logical_or(parse_mask, parser_mask_fixed)
224
+ inpaint_mask = 1 - parse_mask_total
225
+ img = np.where(inpaint_mask, 255, 0)
226
+ dst = hole_fill(img.astype(np.uint8))
227
+ dst = refine_mask(dst)
228
+ inpaint_mask = dst / 255 * 1
229
+ mask = Image.fromarray(inpaint_mask.astype(np.uint8) * 255)
230
+
231
+ return mask
232
+
233
+
234
+ def get_agnostic_mask_dc(model_parse, keypoint, category, size=(384, 512)):
235
+ parse_array = np.array(model_parse)
236
+ pose_data = keypoint["pose_keypoints_2d"]
237
+ pose_data = np.array(pose_data)
238
+ pose_data = pose_data.reshape((-1, 2))
239
+
240
+ parse_shape = (parse_array > 0).astype(np.float32)
241
+
242
+ parse_head = (parse_array == 1).astype(np.float32) + \
243
+ (parse_array == 2).astype(np.float32) + \
244
+ (parse_array == 3).astype(np.float32) + \
245
+ (parse_array == 11).astype(np.float32) + \
246
+ (parse_array == 18).astype(np.float32)
247
+
248
+ parser_mask_fixed = (parse_array == label_map["hair"]).astype(np.float32) + \
249
+ (parse_array == label_map["left_shoe"]).astype(np.float32) + \
250
+ (parse_array == label_map["right_shoe"]).astype(np.float32) + \
251
+ (parse_array == label_map["hat"]).astype(np.float32) + \
252
+ (parse_array == label_map["sunglasses"]).astype(np.float32) + \
253
+ (parse_array == label_map["scarf"]).astype(np.float32) + \
254
+ (parse_array == label_map["bag"]).astype(np.float32)
255
+
256
+ parser_mask_changeable = (
257
+ parse_array == label_map["background"]).astype(np.float32)
258
+
259
+ arms = (parse_array == 14).astype(np.float32) + \
260
+ (parse_array == 15).astype(np.float32)
261
+
262
+ if category == 'dresses':
263
+ label_cat = 7
264
+ parse_mask = (parse_array == 7).astype(np.float32) + \
265
+ (parse_array == 12).astype(np.float32) + \
266
+ (parse_array == 13).astype(np.float32)
267
+ parser_mask_changeable += np.logical_and(
268
+ parse_array, np.logical_not(parser_mask_fixed))
269
+
270
+ elif category == 'upper_body':
271
+ label_cat = 4
272
+ parse_mask = (parse_array == 4).astype(np.float32)
273
+
274
+ parser_mask_fixed += (parse_array == label_map["skirt"]).astype(np.float32) + \
275
+ (parse_array == label_map["pants"]).astype(np.float32)
276
+
277
+ parser_mask_changeable += np.logical_and(
278
+ parse_array, np.logical_not(parser_mask_fixed))
279
+ elif category == 'lower_body':
280
+ label_cat = 6
281
+ parse_mask = (parse_array == 6).astype(np.float32) + \
282
+ (parse_array == 12).astype(np.float32) + \
283
+ (parse_array == 13).astype(np.float32)
284
+
285
+ parser_mask_fixed += (parse_array == label_map["upper_clothes"]).astype(np.float32) + \
286
+ (parse_array == 14).astype(np.float32) + \
287
+ (parse_array == 15).astype(np.float32)
288
+ parser_mask_changeable += np.logical_and(
289
+ parse_array, np.logical_not(parser_mask_fixed))
290
+
291
+ parse_head = torch.from_numpy(parse_head) # [0,1]
292
+ parse_mask = torch.from_numpy(parse_mask) # [0,1]
293
+ parser_mask_fixed = torch.from_numpy(parser_mask_fixed)
294
+ parser_mask_changeable = torch.from_numpy(parser_mask_changeable)
295
+
296
+ # dilation
297
+ parse_without_cloth = np.logical_and(
298
+ parse_shape, np.logical_not(parse_mask))
299
+ parse_mask = parse_mask.cpu().numpy()
300
+
301
+ width = size[0]
302
+ height = size[1]
303
+
304
+ im_arms = Image.new('L', (width, height))
305
+ arms_draw = ImageDraw.Draw(im_arms)
306
+ if category == 'dresses' or category == 'upper_body':
307
+ shoulder_right = tuple(np.multiply(pose_data[2, :2], height / 512.0))
308
+ shoulder_left = tuple(np.multiply(pose_data[5, :2], height / 512.0))
309
+ elbow_right = tuple(np.multiply(pose_data[3, :2], height / 512.0))
310
+ elbow_left = tuple(np.multiply(pose_data[6, :2], height / 512.0))
311
+ wrist_right = tuple(np.multiply(pose_data[4, :2], height / 512.0))
312
+ wrist_left = tuple(np.multiply(pose_data[7, :2], height / 512.0))
313
+ if wrist_right[0] <= 1. and wrist_right[1] <= 1.:
314
+ if elbow_right[0] <= 1. and elbow_right[1] <= 1.:
315
+ arms_draw.line(
316
+ [wrist_left, elbow_left, shoulder_left, shoulder_right], 'white', 30, 'curve')
317
+ else:
318
+ arms_draw.line([wrist_left, elbow_left, shoulder_left, shoulder_right, elbow_right], 'white', 30,
319
+ 'curve')
320
+ elif wrist_left[0] <= 1. and wrist_left[1] <= 1.:
321
+ if elbow_left[0] <= 1. and elbow_left[1] <= 1.:
322
+ arms_draw.line([shoulder_left, shoulder_right,
323
+ elbow_right, wrist_right], 'white', 30, 'curve')
324
+ else:
325
+ arms_draw.line([elbow_left, shoulder_left, shoulder_right, elbow_right, wrist_right], 'white', 30,
326
+ 'curve')
327
+ else:
328
+ arms_draw.line([wrist_left, elbow_left, shoulder_left, shoulder_right, elbow_right, wrist_right], 'white',
329
+ 30, 'curve')
330
+
331
+ if height > 512:
332
+ im_arms = cv2.dilate(np.float32(im_arms), np.ones(
333
+ (10, 10), np.uint16), iterations=5)
334
+ elif height > 256:
335
+ im_arms = cv2.dilate(np.float32(im_arms), np.ones(
336
+ (5, 5), np.uint16), iterations=5)
337
+ hands = np.logical_and(np.logical_not(im_arms), arms)
338
+ parse_mask += im_arms
339
+ parser_mask_fixed += hands
340
+
341
+ # delete neck
342
+ parse_head_2 = torch.clone(parse_head)
343
+ if category == 'dresses' or category == 'upper_body':
344
+ points = []
345
+ points.append(np.multiply(pose_data[2, :2], height / 512.0))
346
+ points.append(np.multiply(pose_data[5, :2], height / 512.0))
347
+ x_coords, y_coords = zip(*points)
348
+ A = np.vstack([x_coords, np.ones(len(x_coords))]).T
349
+ m, c = lstsq(A, y_coords, rcond=None)[0]
350
+ for i in range(parse_array.shape[1]):
351
+ y = i * m + c
352
+ parse_head_2[int(y - 20 * (height / 512.0)):, i] = 0
353
+
354
+ parser_mask_fixed = np.logical_or(
355
+ parser_mask_fixed, np.array(parse_head_2, dtype=np.uint16))
356
+ parse_mask += np.logical_or(parse_mask, np.logical_and(np.array(parse_head, dtype=np.uint16),
357
+ np.logical_not(np.array(parse_head_2, dtype=np.uint16))))
358
+
359
+ if height > 512:
360
+ parse_mask = cv2.dilate(parse_mask, np.ones(
361
+ (20, 20), np.uint16), iterations=5)
362
+ elif height > 256:
363
+ parse_mask = cv2.dilate(parse_mask, np.ones(
364
+ (10, 10), np.uint16), iterations=5)
365
+ else:
366
+ parse_mask = cv2.dilate(parse_mask, np.ones(
367
+ (5, 5), np.uint16), iterations=5)
368
+ parse_mask = np.logical_and(
369
+ parser_mask_changeable, np.logical_not(parse_mask))
370
+ parse_mask_total = np.logical_or(parse_mask, parser_mask_fixed)
371
+ inpaint_mask = 1 - parse_mask_total
372
+ img = np.where(inpaint_mask, 255, 0)
373
+ img = hole_fill(img.astype(np.uint8))
374
+ inpaint_mask = img / 255 * 1
375
+ mask = Image.fromarray(inpaint_mask.astype(np.uint8) * 255)
376
+ return mask
preprocess/humanparsing/datasets/__init__.py ADDED
File without changes
preprocess/humanparsing/datasets/datasets.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- encoding: utf-8 -*-
3
+
4
+ """
5
+ @Author : Peike Li
6
+ @Contact : [email protected]
7
+ @File : datasets.py
8
+ @Time : 8/4/19 3:35 PM
9
+ @Desc :
10
+ @License : This source code is licensed under the license found in the
11
+ LICENSE file in the root directory of this source tree.
12
+ """
13
+
14
+ import os
15
+ import numpy as np
16
+ import random
17
+ import torch
18
+ import cv2
19
+ from torch.utils import data
20
+ from utils.transforms import get_affine_transform
21
+
22
+
23
+ class LIPDataSet(data.Dataset):
24
+ def __init__(self, root, dataset, crop_size=[473, 473], scale_factor=0.25,
25
+ rotation_factor=30, ignore_label=255, transform=None):
26
+ self.root = root
27
+ self.aspect_ratio = crop_size[1] * 1.0 / crop_size[0]
28
+ self.crop_size = np.asarray(crop_size)
29
+ self.ignore_label = ignore_label
30
+ self.scale_factor = scale_factor
31
+ self.rotation_factor = rotation_factor
32
+ self.flip_prob = 0.5
33
+ self.transform = transform
34
+ self.dataset = dataset
35
+
36
+ list_path = os.path.join(self.root, self.dataset + '_id.txt')
37
+ train_list = [i_id.strip() for i_id in open(list_path)]
38
+
39
+ self.train_list = train_list
40
+ self.number_samples = len(self.train_list)
41
+
42
+ def __len__(self):
43
+ return self.number_samples
44
+
45
+ def _box2cs(self, box):
46
+ x, y, w, h = box[:4]
47
+ return self._xywh2cs(x, y, w, h)
48
+
49
+ def _xywh2cs(self, x, y, w, h):
50
+ center = np.zeros((2), dtype=np.float32)
51
+ center[0] = x + w * 0.5
52
+ center[1] = y + h * 0.5
53
+ if w > self.aspect_ratio * h:
54
+ h = w * 1.0 / self.aspect_ratio
55
+ elif w < self.aspect_ratio * h:
56
+ w = h * self.aspect_ratio
57
+ scale = np.array([w * 1.0, h * 1.0], dtype=np.float32)
58
+ return center, scale
59
+
60
+ def __getitem__(self, index):
61
+ train_item = self.train_list[index]
62
+
63
+ im_path = os.path.join(self.root, self.dataset + '_images', train_item + '.jpg')
64
+ parsing_anno_path = os.path.join(self.root, self.dataset + '_segmentations', train_item + '.png')
65
+
66
+ im = cv2.imread(im_path, cv2.IMREAD_COLOR)
67
+ h, w, _ = im.shape
68
+ parsing_anno = np.zeros((h, w), dtype=np.long)
69
+
70
+ # Get person center and scale
71
+ person_center, s = self._box2cs([0, 0, w - 1, h - 1])
72
+ r = 0
73
+
74
+ if self.dataset != 'test':
75
+ # Get pose annotation
76
+ parsing_anno = cv2.imread(parsing_anno_path, cv2.IMREAD_GRAYSCALE)
77
+ if self.dataset == 'train' or self.dataset == 'trainval':
78
+ sf = self.scale_factor
79
+ rf = self.rotation_factor
80
+ s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
81
+ r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if random.random() <= 0.6 else 0
82
+
83
+ if random.random() <= self.flip_prob:
84
+ im = im[:, ::-1, :]
85
+ parsing_anno = parsing_anno[:, ::-1]
86
+ person_center[0] = im.shape[1] - person_center[0] - 1
87
+ right_idx = [15, 17, 19]
88
+ left_idx = [14, 16, 18]
89
+ for i in range(0, 3):
90
+ right_pos = np.where(parsing_anno == right_idx[i])
91
+ left_pos = np.where(parsing_anno == left_idx[i])
92
+ parsing_anno[right_pos[0], right_pos[1]] = left_idx[i]
93
+ parsing_anno[left_pos[0], left_pos[1]] = right_idx[i]
94
+
95
+ trans = get_affine_transform(person_center, s, r, self.crop_size)
96
+ input = cv2.warpAffine(
97
+ im,
98
+ trans,
99
+ (int(self.crop_size[1]), int(self.crop_size[0])),
100
+ flags=cv2.INTER_LINEAR,
101
+ borderMode=cv2.BORDER_CONSTANT,
102
+ borderValue=(0, 0, 0))
103
+
104
+ if self.transform:
105
+ input = self.transform(input)
106
+
107
+ meta = {
108
+ 'name': train_item,
109
+ 'center': person_center,
110
+ 'height': h,
111
+ 'width': w,
112
+ 'scale': s,
113
+ 'rotation': r
114
+ }
115
+
116
+ if self.dataset == 'val' or self.dataset == 'test':
117
+ return input, meta
118
+ else:
119
+ label_parsing = cv2.warpAffine(
120
+ parsing_anno,
121
+ trans,
122
+ (int(self.crop_size[1]), int(self.crop_size[0])),
123
+ flags=cv2.INTER_NEAREST,
124
+ borderMode=cv2.BORDER_CONSTANT,
125
+ borderValue=(255))
126
+
127
+ label_parsing = torch.from_numpy(label_parsing)
128
+
129
+ return input, label_parsing, meta
130
+
131
+
132
+ class LIPDataValSet(data.Dataset):
133
+ def __init__(self, root, dataset='val', crop_size=[473, 473], transform=None, flip=False):
134
+ self.root = root
135
+ self.crop_size = crop_size
136
+ self.transform = transform
137
+ self.flip = flip
138
+ self.dataset = dataset
139
+ self.root = root
140
+ self.aspect_ratio = crop_size[1] * 1.0 / crop_size[0]
141
+ self.crop_size = np.asarray(crop_size)
142
+
143
+ list_path = os.path.join(self.root, self.dataset + '_id.txt')
144
+ val_list = [i_id.strip() for i_id in open(list_path)]
145
+
146
+ self.val_list = val_list
147
+ self.number_samples = len(self.val_list)
148
+
149
+ def __len__(self):
150
+ return len(self.val_list)
151
+
152
+ def _box2cs(self, box):
153
+ x, y, w, h = box[:4]
154
+ return self._xywh2cs(x, y, w, h)
155
+
156
+ def _xywh2cs(self, x, y, w, h):
157
+ center = np.zeros((2), dtype=np.float32)
158
+ center[0] = x + w * 0.5
159
+ center[1] = y + h * 0.5
160
+ if w > self.aspect_ratio * h:
161
+ h = w * 1.0 / self.aspect_ratio
162
+ elif w < self.aspect_ratio * h:
163
+ w = h * self.aspect_ratio
164
+ scale = np.array([w * 1.0, h * 1.0], dtype=np.float32)
165
+
166
+ return center, scale
167
+
168
+ def __getitem__(self, index):
169
+ val_item = self.val_list[index]
170
+ # Load training image
171
+ im_path = os.path.join(self.root, self.dataset + '_images', val_item + '.jpg')
172
+ im = cv2.imread(im_path, cv2.IMREAD_COLOR)
173
+ h, w, _ = im.shape
174
+ # Get person center and scale
175
+ person_center, s = self._box2cs([0, 0, w - 1, h - 1])
176
+ r = 0
177
+ trans = get_affine_transform(person_center, s, r, self.crop_size)
178
+ input = cv2.warpAffine(
179
+ im,
180
+ trans,
181
+ (int(self.crop_size[1]), int(self.crop_size[0])),
182
+ flags=cv2.INTER_LINEAR,
183
+ borderMode=cv2.BORDER_CONSTANT,
184
+ borderValue=(0, 0, 0))
185
+ input = self.transform(input)
186
+ flip_input = input.flip(dims=[-1])
187
+ if self.flip:
188
+ batch_input_im = torch.stack([input, flip_input])
189
+ else:
190
+ batch_input_im = input
191
+
192
+ meta = {
193
+ 'name': val_item,
194
+ 'center': person_center,
195
+ 'height': h,
196
+ 'width': w,
197
+ 'scale': s,
198
+ 'rotation': r
199
+ }
200
+
201
+ return batch_input_im, meta
preprocess/humanparsing/datasets/simple_extractor_dataset.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- encoding: utf-8 -*-
3
+
4
+ """
5
+ @Author : Peike Li
6
+ @Contact : [email protected]
7
+ @File : dataset.py
8
+ @Time : 8/30/19 9:12 PM
9
+ @Desc : Dataset Definition
10
+ @License : This source code is licensed under the license found in the
11
+ LICENSE file in the root directory of this source tree.
12
+ """
13
+
14
+ import os
15
+ import pdb
16
+
17
+ import cv2
18
+ import numpy as np
19
+ from PIL import Image
20
+ from torch.utils import data
21
+ from utils.transforms import get_affine_transform
22
+
23
+
24
+ class SimpleFolderDataset(data.Dataset):
25
+ def __init__(self, root, input_size=[512, 512], transform=None):
26
+ self.root = root
27
+ self.input_size = input_size
28
+ self.transform = transform
29
+ self.aspect_ratio = input_size[1] * 1.0 / input_size[0]
30
+ self.input_size = np.asarray(input_size)
31
+ self.is_pil_image = False
32
+ if isinstance(root, Image.Image):
33
+ self.file_list = [root]
34
+ self.is_pil_image = True
35
+ elif os.path.isfile(root):
36
+ self.file_list = [os.path.basename(root)]
37
+ self.root = os.path.dirname(root)
38
+ else:
39
+ self.file_list = os.listdir(self.root)
40
+
41
+ def __len__(self):
42
+ return len(self.file_list)
43
+
44
+ def _box2cs(self, box):
45
+ x, y, w, h = box[:4]
46
+ return self._xywh2cs(x, y, w, h)
47
+
48
+ def _xywh2cs(self, x, y, w, h):
49
+ center = np.zeros((2), dtype=np.float32)
50
+ center[0] = x + w * 0.5
51
+ center[1] = y + h * 0.5
52
+ if w > self.aspect_ratio * h:
53
+ h = w * 1.0 / self.aspect_ratio
54
+ elif w < self.aspect_ratio * h:
55
+ w = h * self.aspect_ratio
56
+ scale = np.array([w, h], dtype=np.float32)
57
+ return center, scale
58
+
59
+ def __getitem__(self, index):
60
+ if self.is_pil_image:
61
+ img = np.asarray(self.file_list[index])[:, :, [2, 1, 0]]
62
+ else:
63
+ img_name = self.file_list[index]
64
+ img_path = os.path.join(self.root, img_name)
65
+ img = cv2.imread(img_path, cv2.IMREAD_COLOR)
66
+ h, w, _ = img.shape
67
+
68
+ # Get person center and scale
69
+ person_center, s = self._box2cs([0, 0, w - 1, h - 1])
70
+ r = 0
71
+ trans = get_affine_transform(person_center, s, r, self.input_size)
72
+ input = cv2.warpAffine(
73
+ img,
74
+ trans,
75
+ (int(self.input_size[1]), int(self.input_size[0])),
76
+ flags=cv2.INTER_LINEAR,
77
+ borderMode=cv2.BORDER_CONSTANT,
78
+ borderValue=(0, 0, 0))
79
+
80
+ input = self.transform(input)
81
+ meta = {
82
+ 'center': person_center,
83
+ 'height': h,
84
+ 'width': w,
85
+ 'scale': s,
86
+ 'rotation': r
87
+ }
88
+
89
+ return input, meta
preprocess/humanparsing/datasets/target_generation.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.nn import functional as F
3
+
4
+
5
+ def generate_edge_tensor(label, edge_width=3):
6
+ label = label.type(torch.cuda.FloatTensor)
7
+ if len(label.shape) == 2:
8
+ label = label.unsqueeze(0)
9
+ n, h, w = label.shape
10
+ edge = torch.zeros(label.shape, dtype=torch.float).cuda()
11
+ # right
12
+ edge_right = edge[:, 1:h, :]
13
+ edge_right[(label[:, 1:h, :] != label[:, :h - 1, :]) & (label[:, 1:h, :] != 255)
14
+ & (label[:, :h - 1, :] != 255)] = 1
15
+
16
+ # up
17
+ edge_up = edge[:, :, :w - 1]
18
+ edge_up[(label[:, :, :w - 1] != label[:, :, 1:w])
19
+ & (label[:, :, :w - 1] != 255)
20
+ & (label[:, :, 1:w] != 255)] = 1
21
+
22
+ # upright
23
+ edge_upright = edge[:, :h - 1, :w - 1]
24
+ edge_upright[(label[:, :h - 1, :w - 1] != label[:, 1:h, 1:w])
25
+ & (label[:, :h - 1, :w - 1] != 255)
26
+ & (label[:, 1:h, 1:w] != 255)] = 1
27
+
28
+ # bottomright
29
+ edge_bottomright = edge[:, :h - 1, 1:w]
30
+ edge_bottomright[(label[:, :h - 1, 1:w] != label[:, 1:h, :w - 1])
31
+ & (label[:, :h - 1, 1:w] != 255)
32
+ & (label[:, 1:h, :w - 1] != 255)] = 1
33
+
34
+ kernel = torch.ones((1, 1, edge_width, edge_width), dtype=torch.float).cuda()
35
+ with torch.no_grad():
36
+ edge = edge.unsqueeze(1)
37
+ edge = F.conv2d(edge, kernel, stride=1, padding=1)
38
+ edge[edge!=0] = 1
39
+ edge = edge.squeeze()
40
+ return edge
preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/human_to_coco.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import datetime
3
+ import json
4
+ import os
5
+ from PIL import Image
6
+ import numpy as np
7
+
8
+ import pycococreatortools
9
+
10
+
11
+ def get_arguments():
12
+ parser = argparse.ArgumentParser(description="transform mask annotation to coco annotation")
13
+ parser.add_argument("--dataset", type=str, default='CIHP', help="name of dataset (CIHP, MHPv2 or VIP)")
14
+ parser.add_argument("--json_save_dir", type=str, default='../data/msrcnn_finetune_annotations',
15
+ help="path to save coco-style annotation json file")
16
+ parser.add_argument("--use_val", type=bool, default=False,
17
+ help="use train+val set for finetuning or not")
18
+ parser.add_argument("--train_img_dir", type=str, default='../data/instance-level_human_parsing/Training/Images',
19
+ help="train image path")
20
+ parser.add_argument("--train_anno_dir", type=str,
21
+ default='../data/instance-level_human_parsing/Training/Human_ids',
22
+ help="train human mask path")
23
+ parser.add_argument("--val_img_dir", type=str, default='../data/instance-level_human_parsing/Validation/Images',
24
+ help="val image path")
25
+ parser.add_argument("--val_anno_dir", type=str,
26
+ default='../data/instance-level_human_parsing/Validation/Human_ids',
27
+ help="val human mask path")
28
+ return parser.parse_args()
29
+
30
+
31
+ def main(args):
32
+ INFO = {
33
+ "description": args.split_name + " Dataset",
34
+ "url": "",
35
+ "version": "",
36
+ "year": 2019,
37
+ "contributor": "xyq",
38
+ "date_created": datetime.datetime.utcnow().isoformat(' ')
39
+ }
40
+
41
+ LICENSES = [
42
+ {
43
+ "id": 1,
44
+ "name": "",
45
+ "url": ""
46
+ }
47
+ ]
48
+
49
+ CATEGORIES = [
50
+ {
51
+ 'id': 1,
52
+ 'name': 'person',
53
+ 'supercategory': 'person',
54
+ },
55
+ ]
56
+
57
+ coco_output = {
58
+ "info": INFO,
59
+ "licenses": LICENSES,
60
+ "categories": CATEGORIES,
61
+ "images": [],
62
+ "annotations": []
63
+ }
64
+
65
+ image_id = 1
66
+ segmentation_id = 1
67
+
68
+ for image_name in os.listdir(args.train_img_dir):
69
+ image = Image.open(os.path.join(args.train_img_dir, image_name))
70
+ image_info = pycococreatortools.create_image_info(
71
+ image_id, image_name, image.size
72
+ )
73
+ coco_output["images"].append(image_info)
74
+
75
+ human_mask_name = os.path.splitext(image_name)[0] + '.png'
76
+ human_mask = np.asarray(Image.open(os.path.join(args.train_anno_dir, human_mask_name)))
77
+ human_gt_labels = np.unique(human_mask)
78
+
79
+ for i in range(1, len(human_gt_labels)):
80
+ category_info = {'id': 1, 'is_crowd': 0}
81
+ binary_mask = np.uint8(human_mask == i)
82
+ annotation_info = pycococreatortools.create_annotation_info(
83
+ segmentation_id, image_id, category_info, binary_mask,
84
+ image.size, tolerance=10
85
+ )
86
+ if annotation_info is not None:
87
+ coco_output["annotations"].append(annotation_info)
88
+
89
+ segmentation_id += 1
90
+ image_id += 1
91
+
92
+ if not os.path.exists(args.json_save_dir):
93
+ os.makedirs(args.json_save_dir)
94
+ if not args.use_val:
95
+ with open('{}/{}_train.json'.format(args.json_save_dir, args.split_name), 'w') as output_json_file:
96
+ json.dump(coco_output, output_json_file)
97
+ else:
98
+ for image_name in os.listdir(args.val_img_dir):
99
+ image = Image.open(os.path.join(args.val_img_dir, image_name))
100
+ image_info = pycococreatortools.create_image_info(
101
+ image_id, image_name, image.size
102
+ )
103
+ coco_output["images"].append(image_info)
104
+
105
+ human_mask_name = os.path.splitext(image_name)[0] + '.png'
106
+ human_mask = np.asarray(Image.open(os.path.join(args.val_anno_dir, human_mask_name)))
107
+ human_gt_labels = np.unique(human_mask)
108
+
109
+ for i in range(1, len(human_gt_labels)):
110
+ category_info = {'id': 1, 'is_crowd': 0}
111
+ binary_mask = np.uint8(human_mask == i)
112
+ annotation_info = pycococreatortools.create_annotation_info(
113
+ segmentation_id, image_id, category_info, binary_mask,
114
+ image.size, tolerance=10
115
+ )
116
+ if annotation_info is not None:
117
+ coco_output["annotations"].append(annotation_info)
118
+
119
+ segmentation_id += 1
120
+ image_id += 1
121
+
122
+ with open('{}/{}_trainval.json'.format(args.json_save_dir, args.split_name), 'w') as output_json_file:
123
+ json.dump(coco_output, output_json_file)
124
+
125
+ coco_output_val = {
126
+ "info": INFO,
127
+ "licenses": LICENSES,
128
+ "categories": CATEGORIES,
129
+ "images": [],
130
+ "annotations": []
131
+ }
132
+
133
+ image_id_val = 1
134
+ segmentation_id_val = 1
135
+
136
+ for image_name in os.listdir(args.val_img_dir):
137
+ image = Image.open(os.path.join(args.val_img_dir, image_name))
138
+ image_info = pycococreatortools.create_image_info(
139
+ image_id_val, image_name, image.size
140
+ )
141
+ coco_output_val["images"].append(image_info)
142
+
143
+ human_mask_name = os.path.splitext(image_name)[0] + '.png'
144
+ human_mask = np.asarray(Image.open(os.path.join(args.val_anno_dir, human_mask_name)))
145
+ human_gt_labels = np.unique(human_mask)
146
+
147
+ for i in range(1, len(human_gt_labels)):
148
+ category_info = {'id': 1, 'is_crowd': 0}
149
+ binary_mask = np.uint8(human_mask == i)
150
+ annotation_info = pycococreatortools.create_annotation_info(
151
+ segmentation_id_val, image_id_val, category_info, binary_mask,
152
+ image.size, tolerance=10
153
+ )
154
+ if annotation_info is not None:
155
+ coco_output_val["annotations"].append(annotation_info)
156
+
157
+ segmentation_id_val += 1
158
+ image_id_val += 1
159
+
160
+ with open('{}/{}_val.json'.format(args.json_save_dir, args.split_name), 'w') as output_json_file_val:
161
+ json.dump(coco_output_val, output_json_file_val)
162
+
163
+
164
+ if __name__ == "__main__":
165
+ args = get_arguments()
166
+ main(args)
preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/pycococreatortools.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import datetime
3
+ import numpy as np
4
+ from itertools import groupby
5
+ from skimage import measure
6
+ from PIL import Image
7
+ from pycocotools import mask
8
+
9
+ convert = lambda text: int(text) if text.isdigit() else text.lower()
10
+ natrual_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
11
+
12
+
13
+ def resize_binary_mask(array, new_size):
14
+ image = Image.fromarray(array.astype(np.uint8) * 255)
15
+ image = image.resize(new_size)
16
+ return np.asarray(image).astype(np.bool_)
17
+
18
+
19
+ def close_contour(contour):
20
+ if not np.array_equal(contour[0], contour[-1]):
21
+ contour = np.vstack((contour, contour[0]))
22
+ return contour
23
+
24
+
25
+ def binary_mask_to_rle(binary_mask):
26
+ rle = {'counts': [], 'size': list(binary_mask.shape)}
27
+ counts = rle.get('counts')
28
+ for i, (value, elements) in enumerate(groupby(binary_mask.ravel(order='F'))):
29
+ if i == 0 and value == 1:
30
+ counts.append(0)
31
+ counts.append(len(list(elements)))
32
+
33
+ return rle
34
+
35
+
36
+ def binary_mask_to_polygon(binary_mask, tolerance=0):
37
+ """Converts a binary mask to COCO polygon representation
38
+ Args:
39
+ binary_mask: a 2D binary numpy array where '1's represent the object
40
+ tolerance: Maximum distance from original points of polygon to approximated
41
+ polygonal chain. If tolerance is 0, the original coordinate array is returned.
42
+ """
43
+ polygons = []
44
+ # pad mask to close contours of shapes which start and end at an edge
45
+ padded_binary_mask = np.pad(binary_mask, pad_width=1, mode='constant', constant_values=0)
46
+ contours = measure.find_contours(padded_binary_mask, 0.5)
47
+ contours = np.subtract(contours, 1)
48
+ for contour in contours:
49
+ contour = close_contour(contour)
50
+ contour = measure.approximate_polygon(contour, tolerance)
51
+ if len(contour) < 3:
52
+ continue
53
+ contour = np.flip(contour, axis=1)
54
+ segmentation = contour.ravel().tolist()
55
+ # after padding and subtracting 1 we may get -0.5 points in our segmentation
56
+ segmentation = [0 if i < 0 else i for i in segmentation]
57
+ polygons.append(segmentation)
58
+
59
+ return polygons
60
+
61
+
62
+ def create_image_info(image_id, file_name, image_size,
63
+ date_captured=datetime.datetime.utcnow().isoformat(' '),
64
+ license_id=1, coco_url="", flickr_url=""):
65
+ image_info = {
66
+ "id": image_id,
67
+ "file_name": file_name,
68
+ "width": image_size[0],
69
+ "height": image_size[1],
70
+ "date_captured": date_captured,
71
+ "license": license_id,
72
+ "coco_url": coco_url,
73
+ "flickr_url": flickr_url
74
+ }
75
+
76
+ return image_info
77
+
78
+
79
+ def create_annotation_info(annotation_id, image_id, category_info, binary_mask,
80
+ image_size=None, tolerance=2, bounding_box=None):
81
+ if image_size is not None:
82
+ binary_mask = resize_binary_mask(binary_mask, image_size)
83
+
84
+ binary_mask_encoded = mask.encode(np.asfortranarray(binary_mask.astype(np.uint8)))
85
+
86
+ area = mask.area(binary_mask_encoded)
87
+ if area < 1:
88
+ return None
89
+
90
+ if bounding_box is None:
91
+ bounding_box = mask.toBbox(binary_mask_encoded)
92
+
93
+ if category_info["is_crowd"]:
94
+ is_crowd = 1
95
+ segmentation = binary_mask_to_rle(binary_mask)
96
+ else:
97
+ is_crowd = 0
98
+ segmentation = binary_mask_to_polygon(binary_mask, tolerance)
99
+ if not segmentation:
100
+ return None
101
+
102
+ annotation_info = {
103
+ "id": annotation_id,
104
+ "image_id": image_id,
105
+ "category_id": category_info["id"],
106
+ "iscrowd": is_crowd,
107
+ "area": area.tolist(),
108
+ "bbox": bounding_box.tolist(),
109
+ "segmentation": segmentation,
110
+ "width": binary_mask.shape[1],
111
+ "height": binary_mask.shape[0],
112
+ }
113
+
114
+ return annotation_info
preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/test_human2coco_format.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import datetime
3
+ import json
4
+ import os
5
+ from PIL import Image
6
+
7
+ import pycococreatortools
8
+
9
+
10
+ def get_arguments():
11
+ parser = argparse.ArgumentParser(description="transform mask annotation to coco annotation")
12
+ parser.add_argument("--dataset", type=str, default='CIHP', help="name of dataset (CIHP, MHPv2 or VIP)")
13
+ parser.add_argument("--json_save_dir", type=str, default='../data/CIHP/annotations',
14
+ help="path to save coco-style annotation json file")
15
+ parser.add_argument("--test_img_dir", type=str, default='../data/CIHP/Testing/Images',
16
+ help="test image path")
17
+ return parser.parse_args()
18
+
19
+ args = get_arguments()
20
+
21
+ INFO = {
22
+ "description": args.dataset + "Dataset",
23
+ "url": "",
24
+ "version": "",
25
+ "year": 2020,
26
+ "contributor": "yunqiuxu",
27
+ "date_created": datetime.datetime.utcnow().isoformat(' ')
28
+ }
29
+
30
+ LICENSES = [
31
+ {
32
+ "id": 1,
33
+ "name": "",
34
+ "url": ""
35
+ }
36
+ ]
37
+
38
+ CATEGORIES = [
39
+ {
40
+ 'id': 1,
41
+ 'name': 'person',
42
+ 'supercategory': 'person',
43
+ },
44
+ ]
45
+
46
+
47
+ def main(args):
48
+ coco_output = {
49
+ "info": INFO,
50
+ "licenses": LICENSES,
51
+ "categories": CATEGORIES,
52
+ "images": [],
53
+ "annotations": []
54
+ }
55
+
56
+ image_id = 1
57
+
58
+ for image_name in os.listdir(args.test_img_dir):
59
+ image = Image.open(os.path.join(args.test_img_dir, image_name))
60
+ image_info = pycococreatortools.create_image_info(
61
+ image_id, image_name, image.size
62
+ )
63
+ coco_output["images"].append(image_info)
64
+ image_id += 1
65
+
66
+ if not os.path.exists(os.path.join(args.json_save_dir)):
67
+ os.mkdir(os.path.join(args.json_save_dir))
68
+
69
+ with open('{}/{}.json'.format(args.json_save_dir, args.dataset), 'w') as output_json_file:
70
+ json.dump(coco_output, output_json_file)
71
+
72
+
73
+ if __name__ == "__main__":
74
+ main(args)
preprocess/humanparsing/mhp_extension/detectron2/.circleci/config.yml ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python CircleCI 2.0 configuration file
2
+ #
3
+ # Check https://circleci.com/docs/2.0/language-python/ for more details
4
+ #
5
+ version: 2
6
+
7
+ # -------------------------------------------------------------------------------------
8
+ # Environments to run the jobs in
9
+ # -------------------------------------------------------------------------------------
10
+ cpu: &cpu
11
+ docker:
12
+ - image: circleci/python:3.6.8-stretch
13
+ resource_class: medium
14
+
15
+ gpu: &gpu
16
+ machine:
17
+ image: ubuntu-1604:201903-01
18
+ docker_layer_caching: true
19
+ resource_class: gpu.small
20
+
21
+ # -------------------------------------------------------------------------------------
22
+ # Re-usable commands
23
+ # -------------------------------------------------------------------------------------
24
+ install_python: &install_python
25
+ - run:
26
+ name: Install Python
27
+ working_directory: ~/
28
+ command: |
29
+ pyenv install 3.6.1
30
+ pyenv global 3.6.1
31
+
32
+ setup_venv: &setup_venv
33
+ - run:
34
+ name: Setup Virtual Env
35
+ working_directory: ~/
36
+ command: |
37
+ python -m venv ~/venv
38
+ echo ". ~/venv/bin/activate" >> $BASH_ENV
39
+ . ~/venv/bin/activate
40
+ python --version
41
+ which python
42
+ which pip
43
+ pip install --upgrade pip
44
+
45
+ install_dep: &install_dep
46
+ - run:
47
+ name: Install Dependencies
48
+ command: |
49
+ pip install --progress-bar off -U 'git+https://github.com/facebookresearch/fvcore'
50
+ pip install --progress-bar off cython opencv-python
51
+ pip install --progress-bar off 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
52
+ pip install --progress-bar off torch torchvision
53
+
54
+ install_detectron2: &install_detectron2
55
+ - run:
56
+ name: Install Detectron2
57
+ command: |
58
+ gcc --version
59
+ pip install -U --progress-bar off -e .[dev]
60
+ python -m detectron2.utils.collect_env
61
+
62
+ install_nvidia_driver: &install_nvidia_driver
63
+ - run:
64
+ name: Install nvidia driver
65
+ working_directory: ~/
66
+ command: |
67
+ wget -q 'https://s3.amazonaws.com/ossci-linux/nvidia_driver/NVIDIA-Linux-x86_64-430.40.run'
68
+ sudo /bin/bash ./NVIDIA-Linux-x86_64-430.40.run -s --no-drm
69
+ nvidia-smi
70
+
71
+ run_unittests: &run_unittests
72
+ - run:
73
+ name: Run Unit Tests
74
+ command: |
75
+ python -m unittest discover -v -s tests
76
+
77
+ # -------------------------------------------------------------------------------------
78
+ # Jobs to run
79
+ # -------------------------------------------------------------------------------------
80
+ jobs:
81
+ cpu_tests:
82
+ <<: *cpu
83
+
84
+ working_directory: ~/detectron2
85
+
86
+ steps:
87
+ - checkout
88
+ - <<: *setup_venv
89
+
90
+ # Cache the venv directory that contains dependencies
91
+ - restore_cache:
92
+ keys:
93
+ - cache-key-{{ .Branch }}-ID-20200425
94
+
95
+ - <<: *install_dep
96
+
97
+ - save_cache:
98
+ paths:
99
+ - ~/venv
100
+ key: cache-key-{{ .Branch }}-ID-20200425
101
+
102
+ - <<: *install_detectron2
103
+
104
+ - run:
105
+ name: isort
106
+ command: |
107
+ isort -c -sp .
108
+ - run:
109
+ name: black
110
+ command: |
111
+ black --check -l 100 .
112
+ - run:
113
+ name: flake8
114
+ command: |
115
+ flake8 .
116
+
117
+ - <<: *run_unittests
118
+
119
+ gpu_tests:
120
+ <<: *gpu
121
+
122
+ working_directory: ~/detectron2
123
+
124
+ steps:
125
+ - checkout
126
+ - <<: *install_nvidia_driver
127
+
128
+ - run:
129
+ name: Install nvidia-docker
130
+ working_directory: ~/
131
+ command: |
132
+ curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
133
+ distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
134
+ curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | \
135
+ sudo tee /etc/apt/sources.list.d/nvidia-docker.list
136
+ sudo apt-get update && sudo apt-get install -y nvidia-docker2
137
+ # reload the docker daemon configuration
138
+ sudo pkill -SIGHUP dockerd
139
+
140
+ - run:
141
+ name: Launch docker
142
+ working_directory: ~/detectron2/docker
143
+ command: |
144
+ nvidia-docker build -t detectron2:v0 -f Dockerfile-circleci .
145
+ nvidia-docker run -itd --name d2 detectron2:v0
146
+ docker exec -it d2 nvidia-smi
147
+
148
+ - run:
149
+ name: Build Detectron2
150
+ command: |
151
+ docker exec -it d2 pip install 'git+https://github.com/facebookresearch/fvcore'
152
+ docker cp ~/detectron2 d2:/detectron2
153
+ # This will build d2 for the target GPU arch only
154
+ docker exec -it d2 pip install -e /detectron2
155
+ docker exec -it d2 python3 -m detectron2.utils.collect_env
156
+ docker exec -it d2 python3 -c 'import torch; assert(torch.cuda.is_available())'
157
+
158
+ - run:
159
+ name: Run Unit Tests
160
+ command: |
161
+ docker exec -e CIRCLECI=true -it d2 python3 -m unittest discover -v -s /detectron2/tests
162
+
163
+ workflows:
164
+ version: 2
165
+ regular_test:
166
+ jobs:
167
+ - cpu_tests
168
+ - gpu_tests
169
+
170
+ #nightly_test:
171
+ #jobs:
172
+ #- gpu_tests
173
+ #triggers:
174
+ #- schedule:
175
+ #cron: "0 0 * * *"
176
+ #filters:
177
+ #branches:
178
+ #only:
179
+ #- master
preprocess/humanparsing/mhp_extension/detectron2/.clang-format ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ AccessModifierOffset: -1
2
+ AlignAfterOpenBracket: AlwaysBreak
3
+ AlignConsecutiveAssignments: false
4
+ AlignConsecutiveDeclarations: false
5
+ AlignEscapedNewlinesLeft: true
6
+ AlignOperands: false
7
+ AlignTrailingComments: false
8
+ AllowAllParametersOfDeclarationOnNextLine: false
9
+ AllowShortBlocksOnASingleLine: false
10
+ AllowShortCaseLabelsOnASingleLine: false
11
+ AllowShortFunctionsOnASingleLine: Empty
12
+ AllowShortIfStatementsOnASingleLine: false
13
+ AllowShortLoopsOnASingleLine: false
14
+ AlwaysBreakAfterReturnType: None
15
+ AlwaysBreakBeforeMultilineStrings: true
16
+ AlwaysBreakTemplateDeclarations: true
17
+ BinPackArguments: false
18
+ BinPackParameters: false
19
+ BraceWrapping:
20
+ AfterClass: false
21
+ AfterControlStatement: false
22
+ AfterEnum: false
23
+ AfterFunction: false
24
+ AfterNamespace: false
25
+ AfterObjCDeclaration: false
26
+ AfterStruct: false
27
+ AfterUnion: false
28
+ BeforeCatch: false
29
+ BeforeElse: false
30
+ IndentBraces: false
31
+ BreakBeforeBinaryOperators: None
32
+ BreakBeforeBraces: Attach
33
+ BreakBeforeTernaryOperators: true
34
+ BreakConstructorInitializersBeforeComma: false
35
+ BreakAfterJavaFieldAnnotations: false
36
+ BreakStringLiterals: false
37
+ ColumnLimit: 80
38
+ CommentPragmas: '^ IWYU pragma:'
39
+ ConstructorInitializerAllOnOneLineOrOnePerLine: true
40
+ ConstructorInitializerIndentWidth: 4
41
+ ContinuationIndentWidth: 4
42
+ Cpp11BracedListStyle: true
43
+ DerivePointerAlignment: false
44
+ DisableFormat: false
45
+ ForEachMacros: [ FOR_EACH, FOR_EACH_ENUMERATE, FOR_EACH_KV, FOR_EACH_R, FOR_EACH_RANGE, ]
46
+ IncludeCategories:
47
+ - Regex: '^<.*\.h(pp)?>'
48
+ Priority: 1
49
+ - Regex: '^<.*'
50
+ Priority: 2
51
+ - Regex: '.*'
52
+ Priority: 3
53
+ IndentCaseLabels: true
54
+ IndentWidth: 2
55
+ IndentWrappedFunctionNames: false
56
+ KeepEmptyLinesAtTheStartOfBlocks: false
57
+ MacroBlockBegin: ''
58
+ MacroBlockEnd: ''
59
+ MaxEmptyLinesToKeep: 1
60
+ NamespaceIndentation: None
61
+ ObjCBlockIndentWidth: 2
62
+ ObjCSpaceAfterProperty: false
63
+ ObjCSpaceBeforeProtocolList: false
64
+ PenaltyBreakBeforeFirstCallParameter: 1
65
+ PenaltyBreakComment: 300
66
+ PenaltyBreakFirstLessLess: 120
67
+ PenaltyBreakString: 1000
68
+ PenaltyExcessCharacter: 1000000
69
+ PenaltyReturnTypeOnItsOwnLine: 200
70
+ PointerAlignment: Left
71
+ ReflowComments: true
72
+ SortIncludes: true
73
+ SpaceAfterCStyleCast: false
74
+ SpaceBeforeAssignmentOperators: true
75
+ SpaceBeforeParens: ControlStatements
76
+ SpaceInEmptyParentheses: false
77
+ SpacesBeforeTrailingComments: 1
78
+ SpacesInAngles: false
79
+ SpacesInContainerLiterals: true
80
+ SpacesInCStyleCastParentheses: false
81
+ SpacesInParentheses: false
82
+ SpacesInSquareBrackets: false
83
+ Standard: Cpp11
84
+ TabWidth: 8
85
+ UseTab: Never
preprocess/humanparsing/mhp_extension/detectron2/.flake8 ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # This is an example .flake8 config, used when developing *Black* itself.
2
+ # Keep in sync with setup.cfg which is used for source packages.
3
+
4
+ [flake8]
5
+ ignore = W503, E203, E221, C901, C408, E741
6
+ max-line-length = 100
7
+ max-complexity = 18
8
+ select = B,C,E,F,W,T4,B9
9
+ exclude = build,__init__.py
preprocess/humanparsing/mhp_extension/detectron2/.github/CODE_OF_CONDUCT.md ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Code of Conduct
2
+
3
+ Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
4
+ Please read the [full text](https://code.fb.com/codeofconduct/)
5
+ so that you can understand what actions will and will not be tolerated.
preprocess/humanparsing/mhp_extension/detectron2/.github/CONTRIBUTING.md ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Contributing to detectron2
2
+
3
+ ## Issues
4
+ We use GitHub issues to track public bugs and questions.
5
+ Please make sure to follow one of the
6
+ [issue templates](https://github.com/facebookresearch/detectron2/issues/new/choose)
7
+ when reporting any issues.
8
+
9
+ Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
10
+ disclosure of security bugs. In those cases, please go through the process
11
+ outlined on that page and do not file a public issue.
12
+
13
+ ## Pull Requests
14
+ We actively welcome your pull requests.
15
+
16
+ However, if you're adding any significant features (e.g. > 50 lines), please
17
+ make sure to have a corresponding issue to discuss your motivation and proposals,
18
+ before sending a PR. We do not always accept new features, and we take the following
19
+ factors into consideration:
20
+
21
+ 1. Whether the same feature can be achieved without modifying detectron2.
22
+ Detectron2 is designed so that you can implement many extensions from the outside, e.g.
23
+ those in [projects](https://github.com/facebookresearch/detectron2/tree/master/projects).
24
+ If some part is not as extensible, you can also bring up the issue to make it more extensible.
25
+ 2. Whether the feature is potentially useful to a large audience, or only to a small portion of users.
26
+ 3. Whether the proposed solution has a good design / interface.
27
+ 4. Whether the proposed solution adds extra mental/practical overhead to users who don't
28
+ need such feature.
29
+ 5. Whether the proposed solution breaks existing APIs.
30
+
31
+ When sending a PR, please do:
32
+
33
+ 1. If a PR contains multiple orthogonal changes, split it to several PRs.
34
+ 2. If you've added code that should be tested, add tests.
35
+ 3. For PRs that need experiments (e.g. adding a new model or new methods),
36
+ you don't need to update model zoo, but do provide experiment results in the description of the PR.
37
+ 4. If APIs are changed, update the documentation.
38
+ 5. Make sure your code lints with `./dev/linter.sh`.
39
+
40
+
41
+ ## Contributor License Agreement ("CLA")
42
+ In order to accept your pull request, we need you to submit a CLA. You only need
43
+ to do this once to work on any of Facebook's open source projects.
44
+
45
+ Complete your CLA here: <https://code.facebook.com/cla>
46
+
47
+ ## License
48
+ By contributing to detectron2, you agree that your contributions will be licensed
49
+ under the LICENSE file in the root directory of this source tree.
preprocess/humanparsing/mhp_extension/detectron2/.github/Detectron2-Logo-Horz.svg ADDED
preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE.md ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+
2
+ Please select an issue template from
3
+ https://github.com/facebookresearch/detectron2/issues/new/choose .
4
+
5
+ Otherwise your issue will be closed.
preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/bugs.md ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: "πŸ› Bugs"
3
+ about: Report bugs in detectron2
4
+ title: Please read & provide the following
5
+
6
+ ---
7
+
8
+ ## Instructions To Reproduce the πŸ› Bug:
9
+
10
+ 1. what changes you made (`git diff`) or what code you wrote
11
+ ```
12
+ <put diff or code here>
13
+ ```
14
+ 2. what exact command you run:
15
+ 3. what you observed (including __full logs__):
16
+ ```
17
+ <put logs here>
18
+ ```
19
+ 4. please simplify the steps as much as possible so they do not require additional resources to
20
+ run, such as a private dataset.
21
+
22
+ ## Expected behavior:
23
+
24
+ If there are no obvious error in "what you observed" provided above,
25
+ please tell us the expected behavior.
26
+
27
+ ## Environment:
28
+
29
+ Provide your environment information using the following command:
30
+ ```
31
+ wget -nc -q https://github.com/facebookresearch/detectron2/raw/master/detectron2/utils/collect_env.py && python collect_env.py
32
+ ```
33
+
34
+ If your issue looks like an installation issue / environment issue,
35
+ please first try to solve it yourself with the instructions in
36
+ https://detectron2.readthedocs.io/tutorials/install.html#common-installation-issues
preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/config.yml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # require an issue template to be chosen
2
+ blank_issues_enabled: false
3
+
4
+ # Unexpected behaviors & bugs are split to two templates.
5
+ # When they are one template, users think "it's not a bug" and don't choose the template.
6
+ #
7
+ # But the file name is still "unexpected-problems-bugs.md" so that old references
8
+ # to this issue template still works.
9
+ # It's ok since this template should be a superset of "bugs.md" (unexpected behaviors is a superset of bugs)
preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/feature-request.md ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: "\U0001F680Feature Request"
3
+ about: Submit a proposal/request for a new detectron2 feature
4
+
5
+ ---
6
+
7
+ ## πŸš€ Feature
8
+ A clear and concise description of the feature proposal.
9
+
10
+
11
+ ## Motivation & Examples
12
+
13
+ Tell us why the feature is useful.
14
+
15
+ Describe what the feature would look like, if it is implemented.
16
+ Best demonstrated using **code examples** in addition to words.
17
+
18
+ ## Note
19
+
20
+ We only consider adding new features if they are relevant to many users.
21
+
22
+ If you request implementation of research papers --
23
+ we only consider papers that have enough significance and prevalance in the object detection field.
24
+
25
+ We do not take requests for most projects in the `projects/` directory,
26
+ because they are research code release that is mainly for other researchers to reproduce results.
27
+
28
+ Instead of adding features inside detectron2,
29
+ you can implement many features by [extending detectron2](https://detectron2.readthedocs.io/tutorials/extend.html).
30
+ The [projects/](https://github.com/facebookresearch/detectron2/tree/master/projects/) directory contains many of such examples.
31
+
preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/questions-help-support.md ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: "❓How to do something?"
3
+ about: How to do something using detectron2? What does an API do?
4
+
5
+ ---
6
+
7
+ ## ❓ How to do something using detectron2
8
+
9
+ Describe what you want to do, including:
10
+ 1. what inputs you will provide, if any:
11
+ 2. what outputs you are expecting:
12
+
13
+ ## ❓ What does an API do and how to use it?
14
+ Please link to which API or documentation you're asking about from
15
+ https://detectron2.readthedocs.io/
16
+
17
+
18
+ NOTE:
19
+
20
+ 1. Only general answers are provided.
21
+ If you want to ask about "why X did not work", please use the
22
+ [Unexpected behaviors](https://github.com/facebookresearch/detectron2/issues/new/choose) issue template.
23
+
24
+ 2. About how to implement new models / new dataloader / new training logic, etc., check documentation first.
25
+
26
+ 3. We do not answer general machine learning / computer vision questions that are not specific to detectron2, such as how a model works, how to improve your training/make it converge, or what algorithm/methods can be used to achieve X.
preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/unexpected-problems-bugs.md ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: "Unexpected behaviors"
3
+ about: Run into unexpected behaviors when using detectron2
4
+ title: Please read & provide the following
5
+
6
+ ---
7
+
8
+ If you do not know the root cause of the problem, and wish someone to help you, please
9
+ post according to this template:
10
+
11
+ ## Instructions To Reproduce the Issue:
12
+
13
+ 1. what changes you made (`git diff`) or what code you wrote
14
+ ```
15
+ <put diff or code here>
16
+ ```
17
+ 2. what exact command you run:
18
+ 3. what you observed (including __full logs__):
19
+ ```
20
+ <put logs here>
21
+ ```
22
+ 4. please simplify the steps as much as possible so they do not require additional resources to
23
+ run, such as a private dataset.
24
+
25
+ ## Expected behavior:
26
+
27
+ If there are no obvious error in "what you observed" provided above,
28
+ please tell us the expected behavior.
29
+
30
+ If you expect the model to converge / work better, note that we do not give suggestions
31
+ on how to train a new model.
32
+ Only in one of the two conditions we will help with it:
33
+ (1) You're unable to reproduce the results in detectron2 model zoo.
34
+ (2) It indicates a detectron2 bug.
35
+
36
+ ## Environment:
37
+
38
+ Provide your environment information using the following command:
39
+ ```
40
+ wget -nc -q https://github.com/facebookresearch/detectron2/raw/master/detectron2/utils/collect_env.py && python collect_env.py
41
+ ```
42
+
43
+ If your issue looks like an installation issue / environment issue,
44
+ please first try to solve it yourself with the instructions in
45
+ https://detectron2.readthedocs.io/tutorials/install.html#common-installation-issues
preprocess/humanparsing/mhp_extension/detectron2/.github/pull_request_template.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ Thanks for your contribution!
2
+
3
+ If you're sending a large PR (e.g., >50 lines),
4
+ please open an issue first about the feature / bug, and indicate how you want to contribute.
5
+
6
+ Before submitting a PR, please run `dev/linter.sh` to lint the code.
7
+
8
+ See https://detectron2.readthedocs.io/notes/contributing.html#pull-requests
9
+ about how we handle PRs.
preprocess/humanparsing/mhp_extension/detectron2/.gitignore ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # output dir
2
+ output
3
+ instant_test_output
4
+ inference_test_output
5
+
6
+
7
+ *.jpg
8
+ *.png
9
+ *.txt
10
+ *.json
11
+ *.diff
12
+
13
+ # compilation and distribution
14
+ __pycache__
15
+ _ext
16
+ *.pyc
17
+ *.so
18
+ detectron2.egg-info/
19
+ build/
20
+ dist/
21
+ wheels/
22
+
23
+ # pytorch/python/numpy formats
24
+ *.pth
25
+ *.pkl
26
+ *.npy
27
+
28
+ # ipython/jupyter notebooks
29
+ *.ipynb
30
+ **/.ipynb_checkpoints/
31
+
32
+ # Editor temporaries
33
+ *.swn
34
+ *.swo
35
+ *.swp
36
+ *~
37
+
38
+ # editor settings
39
+ .idea
40
+ .vscode
41
+
42
+ # project dirs
43
+ /detectron2/model_zoo/configs
44
+ /datasets
45
+ /projects/*/datasets
46
+ /models
preprocess/humanparsing/mhp_extension/detectron2/GETTING_STARTED.md ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Getting Started with Detectron2
2
+
3
+ This document provides a brief intro of the usage of builtin command-line tools in detectron2.
4
+
5
+ For a tutorial that involves actual coding with the API,
6
+ see our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
7
+ which covers how to run inference with an
8
+ existing model, and how to train a builtin model on a custom dataset.
9
+
10
+ For more advanced tutorials, refer to our [documentation](https://detectron2.readthedocs.io/tutorials/extend.html).
11
+
12
+
13
+ ### Inference Demo with Pre-trained Models
14
+
15
+ 1. Pick a model and its config file from
16
+ [model zoo](MODEL_ZOO.md),
17
+ for example, `mask_rcnn_R_50_FPN_3x.yaml`.
18
+ 2. We provide `demo.py` that is able to run builtin standard models. Run it with:
19
+ ```
20
+ cd demo/
21
+ python demo.py --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \
22
+ --input input1.jpg input2.jpg \
23
+ [--other-options]
24
+ --opts MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl
25
+ ```
26
+ The configs are made for training, therefore we need to specify `MODEL.WEIGHTS` to a model from model zoo for evaluation.
27
+ This command will run the inference and show visualizations in an OpenCV window.
28
+
29
+ For details of the command line arguments, see `demo.py -h` or look at its source code
30
+ to understand its behavior. Some common arguments are:
31
+ * To run __on your webcam__, replace `--input files` with `--webcam`.
32
+ * To run __on a video__, replace `--input files` with `--video-input video.mp4`.
33
+ * To run __on cpu__, add `MODEL.DEVICE cpu` after `--opts`.
34
+ * To save outputs to a directory (for images) or a file (for webcam or video), use `--output`.
35
+
36
+
37
+ ### Training & Evaluation in Command Line
38
+
39
+ We provide a script in "tools/{,plain_}train_net.py", that is made to train
40
+ all the configs provided in detectron2.
41
+ You may want to use it as a reference to write your own training script.
42
+
43
+ To train a model with "train_net.py", first
44
+ setup the corresponding datasets following
45
+ [datasets/README.md](./datasets/README.md),
46
+ then run:
47
+ ```
48
+ cd tools/
49
+ ./train_net.py --num-gpus 8 \
50
+ --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
51
+ ```
52
+
53
+ The configs are made for 8-GPU training.
54
+ To train on 1 GPU, you may need to [change some parameters](https://arxiv.org/abs/1706.02677), e.g.:
55
+ ```
56
+ ./train_net.py \
57
+ --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \
58
+ --num-gpus 1 SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025
59
+ ```
60
+
61
+ For most models, CPU training is not supported.
62
+
63
+ To evaluate a model's performance, use
64
+ ```
65
+ ./train_net.py \
66
+ --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \
67
+ --eval-only MODEL.WEIGHTS /path/to/checkpoint_file
68
+ ```
69
+ For more options, see `./train_net.py -h`.
70
+
71
+ ### Use Detectron2 APIs in Your Code
72
+
73
+ See our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
74
+ to learn how to use detectron2 APIs to:
75
+ 1. run inference with an existing model
76
+ 2. train a builtin model on a custom dataset
77
+
78
+ See [detectron2/projects](https://github.com/facebookresearch/detectron2/tree/master/projects)
79
+ for more ways to build your project on detectron2.
preprocess/humanparsing/mhp_extension/detectron2/INSTALL.md ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Installation
2
+
3
+ Our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5)
4
+ has step-by-step instructions that install detectron2.
5
+ The [Dockerfile](docker)
6
+ also installs detectron2 with a few simple commands.
7
+
8
+ ### Requirements
9
+ - Linux or macOS with Python β‰₯ 3.6
10
+ - PyTorch β‰₯ 1.4
11
+ - [torchvision](https://github.com/pytorch/vision/) that matches the PyTorch installation.
12
+ You can install them together at [pytorch.org](https://pytorch.org) to make sure of this.
13
+ - OpenCV, optional, needed by demo and visualization
14
+ - pycocotools: `pip install cython; pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'`
15
+
16
+
17
+ ### Build Detectron2 from Source
18
+
19
+ gcc & g++ β‰₯ 5 are required. [ninja](https://ninja-build.org/) is recommended for faster build.
20
+ After having them, run:
21
+ ```
22
+ python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
23
+ # (add --user if you don't have permission)
24
+
25
+ # Or, to install it from a local clone:
26
+ git clone https://github.com/facebookresearch/detectron2.git
27
+ python -m pip install -e detectron2
28
+
29
+ # Or if you are on macOS
30
+ # CC=clang CXX=clang++ python -m pip install -e .
31
+ ```
32
+
33
+ To __rebuild__ detectron2 that's built from a local clone, use `rm -rf build/ **/*.so` to clean the
34
+ old build first. You often need to rebuild detectron2 after reinstalling PyTorch.
35
+
36
+ ### Install Pre-Built Detectron2 (Linux only)
37
+ ```
38
+ # for CUDA 10.1:
39
+ python -m pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/index.html
40
+ ```
41
+ You can replace cu101 with "cu{100,92}" or "cpu".
42
+
43
+ Note that:
44
+ 1. Such installation has to be used with certain version of official PyTorch release.
45
+ See [releases](https://github.com/facebookresearch/detectron2/releases) for requirements.
46
+ It will not work with a different version of PyTorch or a non-official build of PyTorch.
47
+ 2. Such installation is out-of-date w.r.t. master branch of detectron2. It may not be
48
+ compatible with the master branch of a research project that uses detectron2 (e.g. those in
49
+ [projects](projects) or [meshrcnn](https://github.com/facebookresearch/meshrcnn/)).
50
+
51
+ ### Common Installation Issues
52
+
53
+ If you met issues using the pre-built detectron2, please uninstall it and try building it from source.
54
+
55
+ Click each issue for its solutions:
56
+
57
+ <details>
58
+ <summary>
59
+ Undefined torch/aten/caffe2 symbols, or segmentation fault immediately when running the library.
60
+ </summary>
61
+ <br/>
62
+
63
+ This usually happens when detectron2 or torchvision is not
64
+ compiled with the version of PyTorch you're running.
65
+
66
+ Pre-built torchvision or detectron2 has to work with the corresponding official release of pytorch.
67
+ If the error comes from a pre-built torchvision, uninstall torchvision and pytorch and reinstall them
68
+ following [pytorch.org](http://pytorch.org). So the versions will match.
69
+
70
+ If the error comes from a pre-built detectron2, check [release notes](https://github.com/facebookresearch/detectron2/releases)
71
+ to see the corresponding pytorch version required for each pre-built detectron2.
72
+
73
+ If the error comes from detectron2 or torchvision that you built manually from source,
74
+ remove files you built (`build/`, `**/*.so`) and rebuild it so it can pick up the version of pytorch currently in your environment.
75
+
76
+ If you cannot resolve this problem, please include the output of `gdb -ex "r" -ex "bt" -ex "quit" --args python -m detectron2.utils.collect_env`
77
+ in your issue.
78
+ </details>
79
+
80
+ <details>
81
+ <summary>
82
+ Undefined C++ symbols (e.g. `GLIBCXX`) or C++ symbols not found.
83
+ </summary>
84
+ <br/>
85
+ Usually it's because the library is compiled with a newer C++ compiler but run with an old C++ runtime.
86
+
87
+ This often happens with old anaconda.
88
+ Try `conda update libgcc`. Then rebuild detectron2.
89
+
90
+ The fundamental solution is to run the code with proper C++ runtime.
91
+ One way is to use `LD_PRELOAD=/path/to/libstdc++.so`.
92
+
93
+ </details>
94
+
95
+ <details>
96
+ <summary>
97
+ "Not compiled with GPU support" or "Detectron2 CUDA Compiler: not available".
98
+ </summary>
99
+ <br/>
100
+ CUDA is not found when building detectron2.
101
+ You should make sure
102
+
103
+ ```
104
+ python -c 'import torch; from torch.utils.cpp_extension import CUDA_HOME; print(torch.cuda.is_available(), CUDA_HOME)'
105
+ ```
106
+
107
+ print valid outputs at the time you build detectron2.
108
+
109
+ Most models can run inference (but not training) without GPU support. To use CPUs, set `MODEL.DEVICE='cpu'` in the config.
110
+ </details>
111
+
112
+ <details>
113
+ <summary>
114
+ "invalid device function" or "no kernel image is available for execution".
115
+ </summary>
116
+ <br/>
117
+ Two possibilities:
118
+
119
+ * You build detectron2 with one version of CUDA but run it with a different version.
120
+
121
+ To check whether it is the case,
122
+ use `python -m detectron2.utils.collect_env` to find out inconsistent CUDA versions.
123
+ In the output of this command, you should expect "Detectron2 CUDA Compiler", "CUDA_HOME", "PyTorch built with - CUDA"
124
+ to contain cuda libraries of the same version.
125
+
126
+ When they are inconsistent,
127
+ you need to either install a different build of PyTorch (or build by yourself)
128
+ to match your local CUDA installation, or install a different version of CUDA to match PyTorch.
129
+
130
+ * Detectron2 or PyTorch/torchvision is not built for the correct GPU architecture (compute compatibility).
131
+
132
+ The GPU architecture for PyTorch/detectron2/torchvision is available in the "architecture flags" in
133
+ `python -m detectron2.utils.collect_env`.
134
+
135
+ The GPU architecture flags of detectron2/torchvision by default matches the GPU model detected
136
+ during compilation. This means the compiled code may not work on a different GPU model.
137
+ To overwrite the GPU architecture for detectron2/torchvision, use `TORCH_CUDA_ARCH_LIST` environment variable during compilation.
138
+
139
+ For example, `export TORCH_CUDA_ARCH_LIST=6.0,7.0` makes it compile for both P100s and V100s.
140
+ Visit [developer.nvidia.com/cuda-gpus](https://developer.nvidia.com/cuda-gpus) to find out
141
+ the correct compute compatibility number for your device.
142
+
143
+ </details>
144
+
145
+ <details>
146
+ <summary>
147
+ Undefined CUDA symbols; cannot open libcudart.so; other nvcc failures.
148
+ </summary>
149
+ <br/>
150
+ The version of NVCC you use to build detectron2 or torchvision does
151
+ not match the version of CUDA you are running with.
152
+ This often happens when using anaconda's CUDA runtime.
153
+
154
+ Use `python -m detectron2.utils.collect_env` to find out inconsistent CUDA versions.
155
+ In the output of this command, you should expect "Detectron2 CUDA Compiler", "CUDA_HOME", "PyTorch built with - CUDA"
156
+ to contain cuda libraries of the same version.
157
+
158
+ When they are inconsistent,
159
+ you need to either install a different build of PyTorch (or build by yourself)
160
+ to match your local CUDA installation, or install a different version of CUDA to match PyTorch.
161
+ </details>
162
+
163
+
164
+ <details>
165
+ <summary>
166
+ "ImportError: cannot import name '_C'".
167
+ </summary>
168
+ <br/>
169
+ Please build and install detectron2 following the instructions above.
170
+
171
+ If you are running code from detectron2's root directory, `cd` to a different one.
172
+ Otherwise you may not import the code that you installed.
173
+ </details>
174
+
175
+ <details>
176
+ <summary>
177
+ ONNX conversion segfault after some "TraceWarning".
178
+ </summary>
179
+ <br/>
180
+ The ONNX package is compiled with too old compiler.
181
+
182
+ Please build and install ONNX from its source code using a compiler
183
+ whose version is closer to what's used by PyTorch (available in `torch.__config__.show()`).
184
+ </details>
preprocess/humanparsing/mhp_extension/detectron2/LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright 2019 - present, Facebook, Inc
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
preprocess/humanparsing/mhp_extension/detectron2/MODEL_ZOO.md ADDED
@@ -0,0 +1,903 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Detectron2 Model Zoo and Baselines
2
+
3
+ ## Introduction
4
+
5
+ This file documents a large collection of baselines trained
6
+ with detectron2 in Sep-Oct, 2019.
7
+ All numbers were obtained on [Big Basin](https://engineering.fb.com/data-center-engineering/introducing-big-basin-our-next-generation-ai-hardware/)
8
+ servers with 8 NVIDIA V100 GPUs & NVLink. The software in use were PyTorch 1.3, CUDA 9.2, cuDNN 7.4.2 or 7.6.3.
9
+ You can access these models from code using [detectron2.model_zoo](https://detectron2.readthedocs.io/modules/model_zoo.html) APIs.
10
+
11
+ In addition to these official baseline models, you can find more models in [projects/](projects/).
12
+
13
+ #### How to Read the Tables
14
+ * The "Name" column contains a link to the config file. Running `tools/train_net.py` with this config file
15
+ and 8 GPUs will reproduce the model.
16
+ * Training speed is averaged across the entire training.
17
+ We keep updating the speed with latest version of detectron2/pytorch/etc.,
18
+ so they might be different from the `metrics` file.
19
+ Training speed for multi-machine jobs is not provided.
20
+ * Inference speed is measured by `tools/train_net.py --eval-only`, or [inference_on_dataset()](https://detectron2.readthedocs.io/modules/evaluation.html#detectron2.evaluation.inference_on_dataset),
21
+ with batch size 1 in detectron2 directly.
22
+ Measuring it with your own code will likely introduce other overhead.
23
+ Actual deployment in production should in general be faster than the given inference
24
+ speed due to more optimizations.
25
+ * The *model id* column is provided for ease of reference.
26
+ To check downloaded file integrity, any model on this page contains its md5 prefix in its file name.
27
+ * Training curves and other statistics can be found in `metrics` for each model.
28
+
29
+ #### Common Settings for COCO Models
30
+ * All COCO models were trained on `train2017` and evaluated on `val2017`.
31
+ * The default settings are __not directly comparable__ with Detectron's standard settings.
32
+ For example, our default training data augmentation uses scale jittering in addition to horizontal flipping.
33
+
34
+ To make fair comparisons with Detectron's settings, see
35
+ [Detectron1-Comparisons](configs/Detectron1-Comparisons/) for accuracy comparison,
36
+ and [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html)
37
+ for speed comparison.
38
+ * For Faster/Mask R-CNN, we provide baselines based on __3 different backbone combinations__:
39
+ * __FPN__: Use a ResNet+FPN backbone with standard conv and FC heads for mask and box prediction,
40
+ respectively. It obtains the best
41
+ speed/accuracy tradeoff, but the other two are still useful for research.
42
+ * __C4__: Use a ResNet conv4 backbone with conv5 head. The original baseline in the Faster R-CNN paper.
43
+ * __DC5__ (Dilated-C5): Use a ResNet conv5 backbone with dilations in conv5, and standard conv and FC heads
44
+ for mask and box prediction, respectively.
45
+ This is used by the Deformable ConvNet paper.
46
+ * Most models are trained with the 3x schedule (~37 COCO epochs).
47
+ Although 1x models are heavily under-trained, we provide some ResNet-50 models with the 1x (~12 COCO epochs)
48
+ training schedule for comparison when doing quick research iteration.
49
+
50
+ #### ImageNet Pretrained Models
51
+
52
+ We provide backbone models pretrained on ImageNet-1k dataset.
53
+ These models have __different__ format from those provided in Detectron: we do not fuse BatchNorm into an affine layer.
54
+ * [R-50.pkl](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/MSRA/R-50.pkl): converted copy of [MSRA's original ResNet-50](https://github.com/KaimingHe/deep-residual-networks) model.
55
+ * [R-101.pkl](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/MSRA/R-101.pkl): converted copy of [MSRA's original ResNet-101](https://github.com/KaimingHe/deep-residual-networks) model.
56
+ * [X-101-32x8d.pkl](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/FAIR/X-101-32x8d.pkl): ResNeXt-101-32x8d model trained with Caffe2 at FB.
57
+
58
+ Pretrained models in Detectron's format can still be used. For example:
59
+ * [X-152-32x8d-IN5k.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl):
60
+ ResNeXt-152-32x8d model trained on ImageNet-5k with Caffe2 at FB (see ResNeXt paper for details on ImageNet-5k).
61
+ * [R-50-GN.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47261647/R-50-GN.pkl):
62
+ ResNet-50 with Group Normalization.
63
+ * [R-101-GN.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47592356/R-101-GN.pkl):
64
+ ResNet-101 with Group Normalization.
65
+
66
+ Torchvision's ResNet models can be used after converted by [this script](tools/convert-torchvision-to-d2.py).
67
+
68
+ #### License
69
+
70
+ All models available for download through this document are licensed under the
71
+ [Creative Commons Attribution-ShareAlike 3.0 license](https://creativecommons.org/licenses/by-sa/3.0/).
72
+
73
+ ### COCO Object Detection Baselines
74
+
75
+ #### Faster R-CNN:
76
+ <!--
77
+ (fb only) To update the table in vim:
78
+ 1. Remove the old table: d}
79
+ 2. Copy the below command to the place of the table
80
+ 3. :.!bash
81
+
82
+ ./gen_html_table.py --config 'COCO-Detection/faster*50*'{1x,3x}'*' 'COCO-Detection/faster*101*' --name R50-C4 R50-DC5 R50-FPN R50-C4 R50-DC5 R50-FPN R101-C4 R101-DC5 R101-FPN X101-FPN --fields lr_sched train_speed inference_speed mem box_AP
83
+ -->
84
+
85
+
86
+ <table><tbody>
87
+ <!-- START TABLE -->
88
+ <!-- TABLE HEADER -->
89
+ <th valign="bottom">Name</th>
90
+ <th valign="bottom">lr<br/>sched</th>
91
+ <th valign="bottom">train<br/>time<br/>(s/iter)</th>
92
+ <th valign="bottom">inference<br/>time<br/>(s/im)</th>
93
+ <th valign="bottom">train<br/>mem<br/>(GB)</th>
94
+ <th valign="bottom">box<br/>AP</th>
95
+ <th valign="bottom">model id</th>
96
+ <th valign="bottom">download</th>
97
+ <!-- TABLE BODY -->
98
+ <!-- ROW: faster_rcnn_R_50_C4_1x -->
99
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml">R50-C4</a></td>
100
+ <td align="center">1x</td>
101
+ <td align="center">0.551</td>
102
+ <td align="center">0.102</td>
103
+ <td align="center">4.8</td>
104
+ <td align="center">35.7</td>
105
+ <td align="center">137257644</td>
106
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_1x/137257644/model_final_721ade.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_1x/137257644/metrics.json">metrics</a></td>
107
+ </tr>
108
+ <!-- ROW: faster_rcnn_R_50_DC5_1x -->
109
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml">R50-DC5</a></td>
110
+ <td align="center">1x</td>
111
+ <td align="center">0.380</td>
112
+ <td align="center">0.068</td>
113
+ <td align="center">5.0</td>
114
+ <td align="center">37.3</td>
115
+ <td align="center">137847829</td>
116
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_DC5_1x/137847829/model_final_51d356.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_DC5_1x/137847829/metrics.json">metrics</a></td>
117
+ </tr>
118
+ <!-- ROW: faster_rcnn_R_50_FPN_1x -->
119
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml">R50-FPN</a></td>
120
+ <td align="center">1x</td>
121
+ <td align="center">0.210</td>
122
+ <td align="center">0.038</td>
123
+ <td align="center">3.0</td>
124
+ <td align="center">37.9</td>
125
+ <td align="center">137257794</td>
126
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_1x/137257794/model_final_b275ba.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_1x/137257794/metrics.json">metrics</a></td>
127
+ </tr>
128
+ <!-- ROW: faster_rcnn_R_50_C4_3x -->
129
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml">R50-C4</a></td>
130
+ <td align="center">3x</td>
131
+ <td align="center">0.543</td>
132
+ <td align="center">0.104</td>
133
+ <td align="center">4.8</td>
134
+ <td align="center">38.4</td>
135
+ <td align="center">137849393</td>
136
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_3x/137849393/model_final_f97cb7.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_3x/137849393/metrics.json">metrics</a></td>
137
+ </tr>
138
+ <!-- ROW: faster_rcnn_R_50_DC5_3x -->
139
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml">R50-DC5</a></td>
140
+ <td align="center">3x</td>
141
+ <td align="center">0.378</td>
142
+ <td align="center">0.070</td>
143
+ <td align="center">5.0</td>
144
+ <td align="center">39.0</td>
145
+ <td align="center">137849425</td>
146
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_DC5_3x/137849425/model_final_68d202.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_DC5_3x/137849425/metrics.json">metrics</a></td>
147
+ </tr>
148
+ <!-- ROW: faster_rcnn_R_50_FPN_3x -->
149
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml">R50-FPN</a></td>
150
+ <td align="center">3x</td>
151
+ <td align="center">0.209</td>
152
+ <td align="center">0.038</td>
153
+ <td align="center">3.0</td>
154
+ <td align="center">40.2</td>
155
+ <td align="center">137849458</td>
156
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/model_final_280758.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/metrics.json">metrics</a></td>
157
+ </tr>
158
+ <!-- ROW: faster_rcnn_R_101_C4_3x -->
159
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml">R101-C4</a></td>
160
+ <td align="center">3x</td>
161
+ <td align="center">0.619</td>
162
+ <td align="center">0.139</td>
163
+ <td align="center">5.9</td>
164
+ <td align="center">41.1</td>
165
+ <td align="center">138204752</td>
166
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_C4_3x/138204752/model_final_298dad.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_C4_3x/138204752/metrics.json">metrics</a></td>
167
+ </tr>
168
+ <!-- ROW: faster_rcnn_R_101_DC5_3x -->
169
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml">R101-DC5</a></td>
170
+ <td align="center">3x</td>
171
+ <td align="center">0.452</td>
172
+ <td align="center">0.086</td>
173
+ <td align="center">6.1</td>
174
+ <td align="center">40.6</td>
175
+ <td align="center">138204841</td>
176
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_DC5_3x/138204841/model_final_3e0943.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_DC5_3x/138204841/metrics.json">metrics</a></td>
177
+ </tr>
178
+ <!-- ROW: faster_rcnn_R_101_FPN_3x -->
179
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml">R101-FPN</a></td>
180
+ <td align="center">3x</td>
181
+ <td align="center">0.286</td>
182
+ <td align="center">0.051</td>
183
+ <td align="center">4.1</td>
184
+ <td align="center">42.0</td>
185
+ <td align="center">137851257</td>
186
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_FPN_3x/137851257/model_final_f6e8b1.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_FPN_3x/137851257/metrics.json">metrics</a></td>
187
+ </tr>
188
+ <!-- ROW: faster_rcnn_X_101_32x8d_FPN_3x -->
189
+ <tr><td align="left"><a href="configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml">X101-FPN</a></td>
190
+ <td align="center">3x</td>
191
+ <td align="center">0.638</td>
192
+ <td align="center">0.098</td>
193
+ <td align="center">6.7</td>
194
+ <td align="center">43.0</td>
195
+ <td align="center">139173657</td>
196
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x/139173657/model_final_68b088.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x/139173657/metrics.json">metrics</a></td>
197
+ </tr>
198
+ </tbody></table>
199
+
200
+ #### RetinaNet:
201
+ <!--
202
+ ./gen_html_table.py --config 'COCO-Detection/retina*50*' 'COCO-Detection/retina*101*' --name R50 R50 R101 --fields lr_sched train_speed inference_speed mem box_AP
203
+ -->
204
+
205
+
206
+ <table><tbody>
207
+ <!-- START TABLE -->
208
+ <!-- TABLE HEADER -->
209
+ <th valign="bottom">Name</th>
210
+ <th valign="bottom">lr<br/>sched</th>
211
+ <th valign="bottom">train<br/>time<br/>(s/iter)</th>
212
+ <th valign="bottom">inference<br/>time<br/>(s/im)</th>
213
+ <th valign="bottom">train<br/>mem<br/>(GB)</th>
214
+ <th valign="bottom">box<br/>AP</th>
215
+ <th valign="bottom">model id</th>
216
+ <th valign="bottom">download</th>
217
+ <!-- TABLE BODY -->
218
+ <!-- ROW: retinanet_R_50_FPN_1x -->
219
+ <tr><td align="left"><a href="configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml">R50</a></td>
220
+ <td align="center">1x</td>
221
+ <td align="center">0.200</td>
222
+ <td align="center">0.055</td>
223
+ <td align="center">3.9</td>
224
+ <td align="center">36.5</td>
225
+ <td align="center">137593951</td>
226
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_1x/137593951/model_final_b796dc.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_1x/137593951/metrics.json">metrics</a></td>
227
+ </tr>
228
+ <!-- ROW: retinanet_R_50_FPN_3x -->
229
+ <tr><td align="left"><a href="configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml">R50</a></td>
230
+ <td align="center">3x</td>
231
+ <td align="center">0.201</td>
232
+ <td align="center">0.055</td>
233
+ <td align="center">3.9</td>
234
+ <td align="center">37.9</td>
235
+ <td align="center">137849486</td>
236
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_3x/137849486/model_final_4cafe0.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_50_FPN_3x/137849486/metrics.json">metrics</a></td>
237
+ </tr>
238
+ <!-- ROW: retinanet_R_101_FPN_3x -->
239
+ <tr><td align="left"><a href="configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml">R101</a></td>
240
+ <td align="center">3x</td>
241
+ <td align="center">0.280</td>
242
+ <td align="center">0.068</td>
243
+ <td align="center">5.1</td>
244
+ <td align="center">39.9</td>
245
+ <td align="center">138363263</td>
246
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_101_FPN_3x/138363263/model_final_59f53c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/retinanet_R_101_FPN_3x/138363263/metrics.json">metrics</a></td>
247
+ </tr>
248
+ </tbody></table>
249
+
250
+ #### RPN & Fast R-CNN:
251
+ <!--
252
+ ./gen_html_table.py --config 'COCO-Detection/rpn*' 'COCO-Detection/fast_rcnn*' --name "RPN R50-C4" "RPN R50-FPN" "Fast R-CNN R50-FPN" --fields lr_sched train_speed inference_speed mem box_AP prop_AR
253
+ -->
254
+
255
+ <table><tbody>
256
+ <!-- START TABLE -->
257
+ <!-- TABLE HEADER -->
258
+ <th valign="bottom">Name</th>
259
+ <th valign="bottom">lr<br/>sched</th>
260
+ <th valign="bottom">train<br/>time<br/>(s/iter)</th>
261
+ <th valign="bottom">inference<br/>time<br/>(s/im)</th>
262
+ <th valign="bottom">train<br/>mem<br/>(GB)</th>
263
+ <th valign="bottom">box<br/>AP</th>
264
+ <th valign="bottom">prop.<br/>AR</th>
265
+ <th valign="bottom">model id</th>
266
+ <th valign="bottom">download</th>
267
+ <!-- TABLE BODY -->
268
+ <!-- ROW: rpn_R_50_C4_1x -->
269
+ <tr><td align="left"><a href="configs/COCO-Detection/rpn_R_50_C4_1x.yaml">RPN R50-C4</a></td>
270
+ <td align="center">1x</td>
271
+ <td align="center">0.130</td>
272
+ <td align="center">0.034</td>
273
+ <td align="center">1.5</td>
274
+ <td align="center"></td>
275
+ <td align="center">51.6</td>
276
+ <td align="center">137258005</td>
277
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/rpn_R_50_C4_1x/137258005/model_final_450694.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/rpn_R_50_C4_1x/137258005/metrics.json">metrics</a></td>
278
+ </tr>
279
+ <!-- ROW: rpn_R_50_FPN_1x -->
280
+ <tr><td align="left"><a href="configs/COCO-Detection/rpn_R_50_FPN_1x.yaml">RPN R50-FPN</a></td>
281
+ <td align="center">1x</td>
282
+ <td align="center">0.186</td>
283
+ <td align="center">0.032</td>
284
+ <td align="center">2.7</td>
285
+ <td align="center"></td>
286
+ <td align="center">58.0</td>
287
+ <td align="center">137258492</td>
288
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/rpn_R_50_FPN_1x/137258492/metrics.json">metrics</a></td>
289
+ </tr>
290
+ <!-- ROW: fast_rcnn_R_50_FPN_1x -->
291
+ <tr><td align="left"><a href="configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml">Fast R-CNN R50-FPN</a></td>
292
+ <td align="center">1x</td>
293
+ <td align="center">0.140</td>
294
+ <td align="center">0.029</td>
295
+ <td align="center">2.6</td>
296
+ <td align="center">37.8</td>
297
+ <td align="center"></td>
298
+ <td align="center">137635226</td>
299
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/model_final_e5f7ce.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/metrics.json">metrics</a></td>
300
+ </tr>
301
+ </tbody></table>
302
+
303
+ ### COCO Instance Segmentation Baselines with Mask R-CNN
304
+ <!--
305
+ ./gen_html_table.py --config 'COCO-InstanceSegmentation/mask*50*'{1x,3x}'*' 'COCO-InstanceSegmentation/mask*101*' --name R50-C4 R50-DC5 R50-FPN R50-C4 R50-DC5 R50-FPN R101-C4 R101-DC5 R101-FPN X101-FPN --fields lr_sched train_speed inference_speed mem box_AP mask_AP
306
+ -->
307
+
308
+
309
+
310
+ <table><tbody>
311
+ <!-- START TABLE -->
312
+ <!-- TABLE HEADER -->
313
+ <th valign="bottom">Name</th>
314
+ <th valign="bottom">lr<br/>sched</th>
315
+ <th valign="bottom">train<br/>time<br/>(s/iter)</th>
316
+ <th valign="bottom">inference<br/>time<br/>(s/im)</th>
317
+ <th valign="bottom">train<br/>mem<br/>(GB)</th>
318
+ <th valign="bottom">box<br/>AP</th>
319
+ <th valign="bottom">mask<br/>AP</th>
320
+ <th valign="bottom">model id</th>
321
+ <th valign="bottom">download</th>
322
+ <!-- TABLE BODY -->
323
+ <!-- ROW: mask_rcnn_R_50_C4_1x -->
324
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml">R50-C4</a></td>
325
+ <td align="center">1x</td>
326
+ <td align="center">0.584</td>
327
+ <td align="center">0.110</td>
328
+ <td align="center">5.2</td>
329
+ <td align="center">36.8</td>
330
+ <td align="center">32.2</td>
331
+ <td align="center">137259246</td>
332
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x/137259246/model_final_9243eb.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x/137259246/metrics.json">metrics</a></td>
333
+ </tr>
334
+ <!-- ROW: mask_rcnn_R_50_DC5_1x -->
335
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml">R50-DC5</a></td>
336
+ <td align="center">1x</td>
337
+ <td align="center">0.471</td>
338
+ <td align="center">0.076</td>
339
+ <td align="center">6.5</td>
340
+ <td align="center">38.3</td>
341
+ <td align="center">34.2</td>
342
+ <td align="center">137260150</td>
343
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x/137260150/model_final_4f86c3.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x/137260150/metrics.json">metrics</a></td>
344
+ </tr>
345
+ <!-- ROW: mask_rcnn_R_50_FPN_1x -->
346
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml">R50-FPN</a></td>
347
+ <td align="center">1x</td>
348
+ <td align="center">0.261</td>
349
+ <td align="center">0.043</td>
350
+ <td align="center">3.4</td>
351
+ <td align="center">38.6</td>
352
+ <td align="center">35.2</td>
353
+ <td align="center">137260431</td>
354
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/137260431/model_final_a54504.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/137260431/metrics.json">metrics</a></td>
355
+ </tr>
356
+ <!-- ROW: mask_rcnn_R_50_C4_3x -->
357
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml">R50-C4</a></td>
358
+ <td align="center">3x</td>
359
+ <td align="center">0.575</td>
360
+ <td align="center">0.111</td>
361
+ <td align="center">5.2</td>
362
+ <td align="center">39.8</td>
363
+ <td align="center">34.4</td>
364
+ <td align="center">137849525</td>
365
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/model_final_4ce675.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/metrics.json">metrics</a></td>
366
+ </tr>
367
+ <!-- ROW: mask_rcnn_R_50_DC5_3x -->
368
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml">R50-DC5</a></td>
369
+ <td align="center">3x</td>
370
+ <td align="center">0.470</td>
371
+ <td align="center">0.076</td>
372
+ <td align="center">6.5</td>
373
+ <td align="center">40.0</td>
374
+ <td align="center">35.9</td>
375
+ <td align="center">137849551</td>
376
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/model_final_84107b.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/metrics.json">metrics</a></td>
377
+ </tr>
378
+ <!-- ROW: mask_rcnn_R_50_FPN_3x -->
379
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml">R50-FPN</a></td>
380
+ <td align="center">3x</td>
381
+ <td align="center">0.261</td>
382
+ <td align="center">0.043</td>
383
+ <td align="center">3.4</td>
384
+ <td align="center">41.0</td>
385
+ <td align="center">37.2</td>
386
+ <td align="center">137849600</td>
387
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/metrics.json">metrics</a></td>
388
+ </tr>
389
+ <!-- ROW: mask_rcnn_R_101_C4_3x -->
390
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml">R101-C4</a></td>
391
+ <td align="center">3x</td>
392
+ <td align="center">0.652</td>
393
+ <td align="center">0.145</td>
394
+ <td align="center">6.3</td>
395
+ <td align="center">42.6</td>
396
+ <td align="center">36.7</td>
397
+ <td align="center">138363239</td>
398
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x/138363239/model_final_a2914c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x/138363239/metrics.json">metrics</a></td>
399
+ </tr>
400
+ <!-- ROW: mask_rcnn_R_101_DC5_3x -->
401
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml">R101-DC5</a></td>
402
+ <td align="center">3x</td>
403
+ <td align="center">0.545</td>
404
+ <td align="center">0.092</td>
405
+ <td align="center">7.6</td>
406
+ <td align="center">41.9</td>
407
+ <td align="center">37.3</td>
408
+ <td align="center">138363294</td>
409
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x/138363294/model_final_0464b7.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x/138363294/metrics.json">metrics</a></td>
410
+ </tr>
411
+ <!-- ROW: mask_rcnn_R_101_FPN_3x -->
412
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml">R101-FPN</a></td>
413
+ <td align="center">3x</td>
414
+ <td align="center">0.340</td>
415
+ <td align="center">0.056</td>
416
+ <td align="center">4.6</td>
417
+ <td align="center">42.9</td>
418
+ <td align="center">38.6</td>
419
+ <td align="center">138205316</td>
420
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x/138205316/model_final_a3ec72.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x/138205316/metrics.json">metrics</a></td>
421
+ </tr>
422
+ <!-- ROW: mask_rcnn_X_101_32x8d_FPN_3x -->
423
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml">X101-FPN</a></td>
424
+ <td align="center">3x</td>
425
+ <td align="center">0.690</td>
426
+ <td align="center">0.103</td>
427
+ <td align="center">7.2</td>
428
+ <td align="center">44.3</td>
429
+ <td align="center">39.5</td>
430
+ <td align="center">139653917</td>
431
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x/139653917/model_final_2d9806.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x/139653917/metrics.json">metrics</a></td>
432
+ </tr>
433
+ </tbody></table>
434
+
435
+ ### COCO Person Keypoint Detection Baselines with Keypoint R-CNN
436
+ <!--
437
+ ./gen_html_table.py --config 'COCO-Keypoints/*50*' 'COCO-Keypoints/*101*' --name R50-FPN R50-FPN R101-FPN X101-FPN --fields lr_sched train_speed inference_speed mem box_AP keypoint_AP
438
+ -->
439
+
440
+
441
+ <table><tbody>
442
+ <!-- START TABLE -->
443
+ <!-- TABLE HEADER -->
444
+ <th valign="bottom">Name</th>
445
+ <th valign="bottom">lr<br/>sched</th>
446
+ <th valign="bottom">train<br/>time<br/>(s/iter)</th>
447
+ <th valign="bottom">inference<br/>time<br/>(s/im)</th>
448
+ <th valign="bottom">train<br/>mem<br/>(GB)</th>
449
+ <th valign="bottom">box<br/>AP</th>
450
+ <th valign="bottom">kp.<br/>AP</th>
451
+ <th valign="bottom">model id</th>
452
+ <th valign="bottom">download</th>
453
+ <!-- TABLE BODY -->
454
+ <!-- ROW: keypoint_rcnn_R_50_FPN_1x -->
455
+ <tr><td align="left"><a href="configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml">R50-FPN</a></td>
456
+ <td align="center">1x</td>
457
+ <td align="center">0.315</td>
458
+ <td align="center">0.072</td>
459
+ <td align="center">5.0</td>
460
+ <td align="center">53.6</td>
461
+ <td align="center">64.0</td>
462
+ <td align="center">137261548</td>
463
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x/137261548/model_final_04e291.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x/137261548/metrics.json">metrics</a></td>
464
+ </tr>
465
+ <!-- ROW: keypoint_rcnn_R_50_FPN_3x -->
466
+ <tr><td align="left"><a href="configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml">R50-FPN</a></td>
467
+ <td align="center">3x</td>
468
+ <td align="center">0.316</td>
469
+ <td align="center">0.066</td>
470
+ <td align="center">5.0</td>
471
+ <td align="center">55.4</td>
472
+ <td align="center">65.5</td>
473
+ <td align="center">137849621</td>
474
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/model_final_a6e10b.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/metrics.json">metrics</a></td>
475
+ </tr>
476
+ <!-- ROW: keypoint_rcnn_R_101_FPN_3x -->
477
+ <tr><td align="left"><a href="configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml">R101-FPN</a></td>
478
+ <td align="center">3x</td>
479
+ <td align="center">0.390</td>
480
+ <td align="center">0.076</td>
481
+ <td align="center">6.1</td>
482
+ <td align="center">56.4</td>
483
+ <td align="center">66.1</td>
484
+ <td align="center">138363331</td>
485
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x/138363331/model_final_997cc7.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x/138363331/metrics.json">metrics</a></td>
486
+ </tr>
487
+ <!-- ROW: keypoint_rcnn_X_101_32x8d_FPN_3x -->
488
+ <tr><td align="left"><a href="configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml">X101-FPN</a></td>
489
+ <td align="center">3x</td>
490
+ <td align="center">0.738</td>
491
+ <td align="center">0.121</td>
492
+ <td align="center">8.7</td>
493
+ <td align="center">57.3</td>
494
+ <td align="center">66.0</td>
495
+ <td align="center">139686956</td>
496
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x/139686956/model_final_5ad38f.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x/139686956/metrics.json">metrics</a></td>
497
+ </tr>
498
+ </tbody></table>
499
+
500
+ ### COCO Panoptic Segmentation Baselines with Panoptic FPN
501
+ <!--
502
+ ./gen_html_table.py --config 'COCO-PanopticSegmentation/*50*' 'COCO-PanopticSegmentation/*101*' --name R50-FPN R50-FPN R101-FPN --fields lr_sched train_speed inference_speed mem box_AP mask_AP PQ
503
+ -->
504
+
505
+
506
+ <table><tbody>
507
+ <!-- START TABLE -->
508
+ <!-- TABLE HEADER -->
509
+ <th valign="bottom">Name</th>
510
+ <th valign="bottom">lr<br/>sched</th>
511
+ <th valign="bottom">train<br/>time<br/>(s/iter)</th>
512
+ <th valign="bottom">inference<br/>time<br/>(s/im)</th>
513
+ <th valign="bottom">train<br/>mem<br/>(GB)</th>
514
+ <th valign="bottom">box<br/>AP</th>
515
+ <th valign="bottom">mask<br/>AP</th>
516
+ <th valign="bottom">PQ</th>
517
+ <th valign="bottom">model id</th>
518
+ <th valign="bottom">download</th>
519
+ <!-- TABLE BODY -->
520
+ <!-- ROW: panoptic_fpn_R_50_1x -->
521
+ <tr><td align="left"><a href="configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml">R50-FPN</a></td>
522
+ <td align="center">1x</td>
523
+ <td align="center">0.304</td>
524
+ <td align="center">0.053</td>
525
+ <td align="center">4.8</td>
526
+ <td align="center">37.6</td>
527
+ <td align="center">34.7</td>
528
+ <td align="center">39.4</td>
529
+ <td align="center">139514544</td>
530
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x/139514544/model_final_dbfeb4.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x/139514544/metrics.json">metrics</a></td>
531
+ </tr>
532
+ <!-- ROW: panoptic_fpn_R_50_3x -->
533
+ <tr><td align="left"><a href="configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml">R50-FPN</a></td>
534
+ <td align="center">3x</td>
535
+ <td align="center">0.302</td>
536
+ <td align="center">0.053</td>
537
+ <td align="center">4.8</td>
538
+ <td align="center">40.0</td>
539
+ <td align="center">36.5</td>
540
+ <td align="center">41.5</td>
541
+ <td align="center">139514569</td>
542
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/metrics.json">metrics</a></td>
543
+ </tr>
544
+ <!-- ROW: panoptic_fpn_R_101_3x -->
545
+ <tr><td align="left"><a href="configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml">R101-FPN</a></td>
546
+ <td align="center">3x</td>
547
+ <td align="center">0.392</td>
548
+ <td align="center">0.066</td>
549
+ <td align="center">6.0</td>
550
+ <td align="center">42.4</td>
551
+ <td align="center">38.5</td>
552
+ <td align="center">43.0</td>
553
+ <td align="center">139514519</td>
554
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x/139514519/model_final_cafdb1.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x/139514519/metrics.json">metrics</a></td>
555
+ </tr>
556
+ </tbody></table>
557
+
558
+
559
+ ### LVIS Instance Segmentation Baselines with Mask R-CNN
560
+
561
+ Mask R-CNN baselines on the [LVIS dataset](https://lvisdataset.org), v0.5.
562
+ These baselines are described in Table 3(c) of the [LVIS paper](https://arxiv.org/abs/1908.03195).
563
+
564
+ NOTE: the 1x schedule here has the same amount of __iterations__ as the COCO 1x baselines.
565
+ They are roughly 24 epochs of LVISv0.5 data.
566
+ The final results of these configs have large variance across different runs.
567
+
568
+ <!--
569
+ ./gen_html_table.py --config 'LVIS-InstanceSegmentation/mask*50*' 'LVIS-InstanceSegmentation/mask*101*' --name R50-FPN R101-FPN X101-FPN --fields lr_sched train_speed inference_speed mem box_AP mask_AP
570
+ -->
571
+
572
+
573
+ <table><tbody>
574
+ <!-- START TABLE -->
575
+ <!-- TABLE HEADER -->
576
+ <th valign="bottom">Name</th>
577
+ <th valign="bottom">lr<br/>sched</th>
578
+ <th valign="bottom">train<br/>time<br/>(s/iter)</th>
579
+ <th valign="bottom">inference<br/>time<br/>(s/im)</th>
580
+ <th valign="bottom">train<br/>mem<br/>(GB)</th>
581
+ <th valign="bottom">box<br/>AP</th>
582
+ <th valign="bottom">mask<br/>AP</th>
583
+ <th valign="bottom">model id</th>
584
+ <th valign="bottom">download</th>
585
+ <!-- TABLE BODY -->
586
+ <!-- ROW: mask_rcnn_R_50_FPN_1x -->
587
+ <tr><td align="left"><a href="configs/LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml">R50-FPN</a></td>
588
+ <td align="center">1x</td>
589
+ <td align="center">0.292</td>
590
+ <td align="center">0.107</td>
591
+ <td align="center">7.1</td>
592
+ <td align="center">23.6</td>
593
+ <td align="center">24.4</td>
594
+ <td align="center">144219072</td>
595
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/144219072/model_final_571f7c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/144219072/metrics.json">metrics</a></td>
596
+ </tr>
597
+ <!-- ROW: mask_rcnn_R_101_FPN_1x -->
598
+ <tr><td align="left"><a href="configs/LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml">R101-FPN</a></td>
599
+ <td align="center">1x</td>
600
+ <td align="center">0.371</td>
601
+ <td align="center">0.114</td>
602
+ <td align="center">7.8</td>
603
+ <td align="center">25.6</td>
604
+ <td align="center">25.9</td>
605
+ <td align="center">144219035</td>
606
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x/144219035/model_final_824ab5.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x/144219035/metrics.json">metrics</a></td>
607
+ </tr>
608
+ <!-- ROW: mask_rcnn_X_101_32x8d_FPN_1x -->
609
+ <tr><td align="left"><a href="configs/LVIS-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml">X101-FPN</a></td>
610
+ <td align="center">1x</td>
611
+ <td align="center">0.712</td>
612
+ <td align="center">0.151</td>
613
+ <td align="center">10.2</td>
614
+ <td align="center">26.7</td>
615
+ <td align="center">27.1</td>
616
+ <td align="center">144219108</td>
617
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/LVIS-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x/144219108/model_final_5e3439.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/LVIS-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x/144219108/metrics.json">metrics</a></td>
618
+ </tr>
619
+ </tbody></table>
620
+
621
+
622
+
623
+ ### Cityscapes & Pascal VOC Baselines
624
+
625
+ Simple baselines for
626
+ * Mask R-CNN on Cityscapes instance segmentation (initialized from COCO pre-training, then trained on Cityscapes fine annotations only)
627
+ * Faster R-CNN on PASCAL VOC object detection (trained on VOC 2007 train+val + VOC 2012 train+val, tested on VOC 2007 using 11-point interpolated AP)
628
+
629
+ <!--
630
+ ./gen_html_table.py --config 'Cityscapes/*' 'PascalVOC-Detection/*' --name "R50-FPN, Cityscapes" "R50-C4, VOC" --fields train_speed inference_speed mem box_AP box_AP50 mask_AP
631
+ -->
632
+
633
+
634
+ <table><tbody>
635
+ <!-- START TABLE -->
636
+ <!-- TABLE HEADER -->
637
+ <th valign="bottom">Name</th>
638
+ <th valign="bottom">train<br/>time<br/>(s/iter)</th>
639
+ <th valign="bottom">inference<br/>time<br/>(s/im)</th>
640
+ <th valign="bottom">train<br/>mem<br/>(GB)</th>
641
+ <th valign="bottom">box<br/>AP</th>
642
+ <th valign="bottom">box<br/>AP50</th>
643
+ <th valign="bottom">mask<br/>AP</th>
644
+ <th valign="bottom">model id</th>
645
+ <th valign="bottom">download</th>
646
+ <!-- TABLE BODY -->
647
+ <!-- ROW: mask_rcnn_R_50_FPN -->
648
+ <tr><td align="left"><a href="configs/Cityscapes/mask_rcnn_R_50_FPN.yaml">R50-FPN, Cityscapes</a></td>
649
+ <td align="center">0.240</td>
650
+ <td align="center">0.078</td>
651
+ <td align="center">4.4</td>
652
+ <td align="center"></td>
653
+ <td align="center"></td>
654
+ <td align="center">36.5</td>
655
+ <td align="center">142423278</td>
656
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Cityscapes/mask_rcnn_R_50_FPN/142423278/model_final_af9cf5.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Cityscapes/mask_rcnn_R_50_FPN/142423278/metrics.json">metrics</a></td>
657
+ </tr>
658
+ <!-- ROW: faster_rcnn_R_50_C4 -->
659
+ <tr><td align="left"><a href="configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml">R50-C4, VOC</a></td>
660
+ <td align="center">0.537</td>
661
+ <td align="center">0.081</td>
662
+ <td align="center">4.8</td>
663
+ <td align="center">51.9</td>
664
+ <td align="center">80.3</td>
665
+ <td align="center"></td>
666
+ <td align="center">142202221</td>
667
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/PascalVOC-Detection/faster_rcnn_R_50_C4/142202221/model_final_b1acc2.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/PascalVOC-Detection/faster_rcnn_R_50_C4/142202221/metrics.json">metrics</a></td>
668
+ </tr>
669
+ </tbody></table>
670
+
671
+
672
+
673
+ ### Other Settings
674
+
675
+ Ablations for Deformable Conv and Cascade R-CNN:
676
+
677
+ <!--
678
+ ./gen_html_table.py --config 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml' 'Misc/*R_50_FPN_1x_dconv*' 'Misc/cascade*1x.yaml' 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml' 'Misc/*R_50_FPN_3x_dconv*' 'Misc/cascade*3x.yaml' --name "Baseline R50-FPN" "Deformable Conv" "Cascade R-CNN" "Baseline R50-FPN" "Deformable Conv" "Cascade R-CNN" --fields lr_sched train_speed inference_speed mem box_AP mask_AP
679
+ -->
680
+
681
+
682
+ <table><tbody>
683
+ <!-- START TABLE -->
684
+ <!-- TABLE HEADER -->
685
+ <th valign="bottom">Name</th>
686
+ <th valign="bottom">lr<br/>sched</th>
687
+ <th valign="bottom">train<br/>time<br/>(s/iter)</th>
688
+ <th valign="bottom">inference<br/>time<br/>(s/im)</th>
689
+ <th valign="bottom">train<br/>mem<br/>(GB)</th>
690
+ <th valign="bottom">box<br/>AP</th>
691
+ <th valign="bottom">mask<br/>AP</th>
692
+ <th valign="bottom">model id</th>
693
+ <th valign="bottom">download</th>
694
+ <!-- TABLE BODY -->
695
+ <!-- ROW: mask_rcnn_R_50_FPN_1x -->
696
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml">Baseline R50-FPN</a></td>
697
+ <td align="center">1x</td>
698
+ <td align="center">0.261</td>
699
+ <td align="center">0.043</td>
700
+ <td align="center">3.4</td>
701
+ <td align="center">38.6</td>
702
+ <td align="center">35.2</td>
703
+ <td align="center">137260431</td>
704
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/137260431/model_final_a54504.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/137260431/metrics.json">metrics</a></td>
705
+ </tr>
706
+ <!-- ROW: mask_rcnn_R_50_FPN_1x_dconv_c3-c5 -->
707
+ <tr><td align="left"><a href="configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml">Deformable Conv</a></td>
708
+ <td align="center">1x</td>
709
+ <td align="center">0.342</td>
710
+ <td align="center">0.048</td>
711
+ <td align="center">3.5</td>
712
+ <td align="center">41.5</td>
713
+ <td align="center">37.5</td>
714
+ <td align="center">138602867</td>
715
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5/138602867/model_final_65c703.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5/138602867/metrics.json">metrics</a></td>
716
+ </tr>
717
+ <!-- ROW: cascade_mask_rcnn_R_50_FPN_1x -->
718
+ <tr><td align="left"><a href="configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml">Cascade R-CNN</a></td>
719
+ <td align="center">1x</td>
720
+ <td align="center">0.317</td>
721
+ <td align="center">0.052</td>
722
+ <td align="center">4.0</td>
723
+ <td align="center">42.1</td>
724
+ <td align="center">36.4</td>
725
+ <td align="center">138602847</td>
726
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_R_50_FPN_1x/138602847/model_final_e9d89b.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_R_50_FPN_1x/138602847/metrics.json">metrics</a></td>
727
+ </tr>
728
+ <!-- ROW: mask_rcnn_R_50_FPN_3x -->
729
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml">Baseline R50-FPN</a></td>
730
+ <td align="center">3x</td>
731
+ <td align="center">0.261</td>
732
+ <td align="center">0.043</td>
733
+ <td align="center">3.4</td>
734
+ <td align="center">41.0</td>
735
+ <td align="center">37.2</td>
736
+ <td align="center">137849600</td>
737
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/metrics.json">metrics</a></td>
738
+ </tr>
739
+ <!-- ROW: mask_rcnn_R_50_FPN_3x_dconv_c3-c5 -->
740
+ <tr><td align="left"><a href="configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml">Deformable Conv</a></td>
741
+ <td align="center">3x</td>
742
+ <td align="center">0.349</td>
743
+ <td align="center">0.047</td>
744
+ <td align="center">3.5</td>
745
+ <td align="center">42.7</td>
746
+ <td align="center">38.5</td>
747
+ <td align="center">144998336</td>
748
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5/144998336/model_final_821d0b.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5/144998336/metrics.json">metrics</a></td>
749
+ </tr>
750
+ <!-- ROW: cascade_mask_rcnn_R_50_FPN_3x -->
751
+ <tr><td align="left"><a href="configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml">Cascade R-CNN</a></td>
752
+ <td align="center">3x</td>
753
+ <td align="center">0.328</td>
754
+ <td align="center">0.053</td>
755
+ <td align="center">4.0</td>
756
+ <td align="center">44.3</td>
757
+ <td align="center">38.5</td>
758
+ <td align="center">144998488</td>
759
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/model_final_480dd8.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/metrics.json">metrics</a></td>
760
+ </tr>
761
+ </tbody></table>
762
+
763
+
764
+ Ablations for normalization methods, and a few models trained from scratch following [Rethinking ImageNet Pre-training](https://arxiv.org/abs/1811.08883).
765
+ (Note: The baseline uses `2fc` head while the others use [`4conv1fc` head](https://arxiv.org/abs/1803.08494))
766
+ <!--
767
+ ./gen_html_table.py --config 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml' 'Misc/mask*50_FPN_3x_gn.yaml' 'Misc/mask*50_FPN_3x_syncbn.yaml' 'Misc/scratch*' --name "Baseline R50-FPN" "GN" "SyncBN" "GN (from scratch)" "GN (from scratch)" "SyncBN (from scratch)" --fields lr_sched train_speed inference_speed mem box_AP mask_AP
768
+ -->
769
+
770
+
771
+ <table><tbody>
772
+ <!-- START TABLE -->
773
+ <!-- TABLE HEADER -->
774
+ <th valign="bottom">Name</th>
775
+ <th valign="bottom">lr<br/>sched</th>
776
+ <th valign="bottom">train<br/>time<br/>(s/iter)</th>
777
+ <th valign="bottom">inference<br/>time<br/>(s/im)</th>
778
+ <th valign="bottom">train<br/>mem<br/>(GB)</th>
779
+ <th valign="bottom">box<br/>AP</th>
780
+ <th valign="bottom">mask<br/>AP</th>
781
+ <th valign="bottom">model id</th>
782
+ <th valign="bottom">download</th>
783
+ <!-- TABLE BODY -->
784
+ <!-- ROW: mask_rcnn_R_50_FPN_3x -->
785
+ <tr><td align="left"><a href="configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml">Baseline R50-FPN</a></td>
786
+ <td align="center">3x</td>
787
+ <td align="center">0.261</td>
788
+ <td align="center">0.043</td>
789
+ <td align="center">3.4</td>
790
+ <td align="center">41.0</td>
791
+ <td align="center">37.2</td>
792
+ <td align="center">137849600</td>
793
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/metrics.json">metrics</a></td>
794
+ </tr>
795
+ <!-- ROW: mask_rcnn_R_50_FPN_3x_gn -->
796
+ <tr><td align="left"><a href="configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml">GN</a></td>
797
+ <td align="center">3x</td>
798
+ <td align="center">0.356</td>
799
+ <td align="center">0.069</td>
800
+ <td align="center">7.3</td>
801
+ <td align="center">42.6</td>
802
+ <td align="center">38.6</td>
803
+ <td align="center">138602888</td>
804
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_gn/138602888/model_final_dc5d9e.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_gn/138602888/metrics.json">metrics</a></td>
805
+ </tr>
806
+ <!-- ROW: mask_rcnn_R_50_FPN_3x_syncbn -->
807
+ <tr><td align="left"><a href="configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml">SyncBN</a></td>
808
+ <td align="center">3x</td>
809
+ <td align="center">0.371</td>
810
+ <td align="center">0.053</td>
811
+ <td align="center">5.5</td>
812
+ <td align="center">41.9</td>
813
+ <td align="center">37.8</td>
814
+ <td align="center">169527823</td>
815
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_syncbn/169527823/model_final_3b3c51.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/mask_rcnn_R_50_FPN_3x_syncbn/169527823/metrics.json">metrics</a></td>
816
+ </tr>
817
+ <!-- ROW: scratch_mask_rcnn_R_50_FPN_3x_gn -->
818
+ <tr><td align="left"><a href="configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml">GN (from scratch)</a></td>
819
+ <td align="center">3x</td>
820
+ <td align="center">0.400</td>
821
+ <td align="center">0.069</td>
822
+ <td align="center">9.8</td>
823
+ <td align="center">39.9</td>
824
+ <td align="center">36.6</td>
825
+ <td align="center">138602908</td>
826
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn/138602908/model_final_01ca85.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn/138602908/metrics.json">metrics</a></td>
827
+ </tr>
828
+ <!-- ROW: scratch_mask_rcnn_R_50_FPN_9x_gn -->
829
+ <tr><td align="left"><a href="configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml">GN (from scratch)</a></td>
830
+ <td align="center">9x</td>
831
+ <td align="center">N/A</td>
832
+ <td align="center">0.070</td>
833
+ <td align="center">9.8</td>
834
+ <td align="center">43.7</td>
835
+ <td align="center">39.6</td>
836
+ <td align="center">183808979</td>
837
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn/183808979/model_final_da7b4c.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn/183808979/metrics.json">metrics</a></td>
838
+ </tr>
839
+ <!-- ROW: scratch_mask_rcnn_R_50_FPN_9x_syncbn -->
840
+ <tr><td align="left"><a href="configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml">SyncBN (from scratch)</a></td>
841
+ <td align="center">9x</td>
842
+ <td align="center">N/A</td>
843
+ <td align="center">0.055</td>
844
+ <td align="center">7.2</td>
845
+ <td align="center">43.6</td>
846
+ <td align="center">39.3</td>
847
+ <td align="center">184226666</td>
848
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn/184226666/model_final_5ce33e.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn/184226666/metrics.json">metrics</a></td>
849
+ </tr>
850
+ </tbody></table>
851
+
852
+
853
+ A few very large models trained for a long time, for demo purposes. They are trained using multiple machines:
854
+
855
+ <!--
856
+ ./gen_html_table.py --config 'Misc/panoptic_*dconv*' 'Misc/cascade_*152*' --name "Panoptic FPN R101" "Mask R-CNN X152" --fields inference_speed mem box_AP mask_AP PQ
857
+ # manually add TTA results
858
+ -->
859
+
860
+
861
+ <table><tbody>
862
+ <!-- START TABLE -->
863
+ <!-- TABLE HEADER -->
864
+ <th valign="bottom">Name</th>
865
+ <th valign="bottom">inference<br/>time<br/>(s/im)</th>
866
+ <th valign="bottom">train<br/>mem<br/>(GB)</th>
867
+ <th valign="bottom">box<br/>AP</th>
868
+ <th valign="bottom">mask<br/>AP</th>
869
+ <th valign="bottom">PQ</th>
870
+ <th valign="bottom">model id</th>
871
+ <th valign="bottom">download</th>
872
+ <!-- TABLE BODY -->
873
+ <!-- ROW: panoptic_fpn_R_101_dconv_cascade_gn_3x -->
874
+ <tr><td align="left"><a href="configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml">Panoptic FPN R101</a></td>
875
+ <td align="center">0.107</td>
876
+ <td align="center">11.4</td>
877
+ <td align="center">47.4</td>
878
+ <td align="center">41.3</td>
879
+ <td align="center">46.1</td>
880
+ <td align="center">139797668</td>
881
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x/139797668/model_final_be35db.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x/139797668/metrics.json">metrics</a></td>
882
+ </tr>
883
+ <!-- ROW: cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv -->
884
+ <tr><td align="left"><a href="configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml">Mask R-CNN X152</a></td>
885
+ <td align="center">0.242</td>
886
+ <td align="center">15.1</td>
887
+ <td align="center">50.2</td>
888
+ <td align="center">44.0</td>
889
+ <td align="center"></td>
890
+ <td align="center">18131413</td>
891
+ <td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv/18131413/model_0039999_e76410.pkl">model</a>&nbsp;|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv/18131413/metrics.json">metrics</a></td>
892
+ </tr>
893
+ <!-- ROW: TTA cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv -->
894
+ <tr><td align="left">above + test-time aug.</td>
895
+ <td align="center"></td>
896
+ <td align="center"></td>
897
+ <td align="center">51.9</td>
898
+ <td align="center">45.9</td>
899
+ <td align="center"></td>
900
+ <td align="center"></td>
901
+ <td align="center"></td>
902
+ </tr>
903
+ </tbody></table>
preprocess/humanparsing/mhp_extension/detectron2/README.md ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <img src=".github/Detectron2-Logo-Horz.svg" width="300" >
2
+
3
+ Detectron2 is Facebook AI Research's next generation software system
4
+ that implements state-of-the-art object detection algorithms.
5
+ It is a ground-up rewrite of the previous version,
6
+ [Detectron](https://github.com/facebookresearch/Detectron/),
7
+ and it originates from [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark/).
8
+
9
+ <div align="center">
10
+ <img src="https://user-images.githubusercontent.com/1381301/66535560-d3422200-eace-11e9-9123-5535d469db19.png"/>
11
+ </div>
12
+
13
+ ### What's New
14
+ * It is powered by the [PyTorch](https://pytorch.org) deep learning framework.
15
+ * Includes more features such as panoptic segmentation, densepose, Cascade R-CNN, rotated bounding boxes, etc.
16
+ * Can be used as a library to support [different projects](projects/) on top of it.
17
+ We'll open source more research projects in this way.
18
+ * It [trains much faster](https://detectron2.readthedocs.io/notes/benchmarks.html).
19
+
20
+ See our [blog post](https://ai.facebook.com/blog/-detectron2-a-pytorch-based-modular-object-detection-library-/)
21
+ to see more demos and learn about detectron2.
22
+
23
+ ## Installation
24
+
25
+ See [INSTALL.md](INSTALL.md).
26
+
27
+ ## Quick Start
28
+
29
+ See [GETTING_STARTED.md](GETTING_STARTED.md),
30
+ or the [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5).
31
+
32
+ Learn more at our [documentation](https://detectron2.readthedocs.org).
33
+ And see [projects/](projects/) for some projects that are built on top of detectron2.
34
+
35
+ ## Model Zoo and Baselines
36
+
37
+ We provide a large set of baseline results and trained models available for download in the [Detectron2 Model Zoo](MODEL_ZOO.md).
38
+
39
+
40
+ ## License
41
+
42
+ Detectron2 is released under the [Apache 2.0 license](LICENSE).
43
+
44
+ ## Citing Detectron2
45
+
46
+ If you use Detectron2 in your research or wish to refer to the baseline results published in the [Model Zoo](MODEL_ZOO.md), please use the following BibTeX entry.
47
+
48
+ ```BibTeX
49
+ @misc{wu2019detectron2,
50
+ author = {Yuxin Wu and Alexander Kirillov and Francisco Massa and
51
+ Wan-Yen Lo and Ross Girshick},
52
+ title = {Detectron2},
53
+ howpublished = {\url{https://github.com/facebookresearch/detectron2}},
54
+ year = {2019}
55
+ }
56
+ ```
preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-C4.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MODEL:
2
+ META_ARCHITECTURE: "GeneralizedRCNN"
3
+ RPN:
4
+ PRE_NMS_TOPK_TEST: 6000
5
+ POST_NMS_TOPK_TEST: 1000
6
+ ROI_HEADS:
7
+ NAME: "Res5ROIHeads"
8
+ DATASETS:
9
+ TRAIN: ("coco_2017_train",)
10
+ TEST: ("coco_2017_val",)
11
+ SOLVER:
12
+ IMS_PER_BATCH: 16
13
+ BASE_LR: 0.02
14
+ STEPS: (60000, 80000)
15
+ MAX_ITER: 90000
16
+ INPUT:
17
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
18
+ VERSION: 2
preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-DilatedC5.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MODEL:
2
+ META_ARCHITECTURE: "GeneralizedRCNN"
3
+ RESNETS:
4
+ OUT_FEATURES: ["res5"]
5
+ RES5_DILATION: 2
6
+ RPN:
7
+ IN_FEATURES: ["res5"]
8
+ PRE_NMS_TOPK_TEST: 6000
9
+ POST_NMS_TOPK_TEST: 1000
10
+ ROI_HEADS:
11
+ NAME: "StandardROIHeads"
12
+ IN_FEATURES: ["res5"]
13
+ ROI_BOX_HEAD:
14
+ NAME: "FastRCNNConvFCHead"
15
+ NUM_FC: 2
16
+ POOLER_RESOLUTION: 7
17
+ ROI_MASK_HEAD:
18
+ NAME: "MaskRCNNConvUpsampleHead"
19
+ NUM_CONV: 4
20
+ POOLER_RESOLUTION: 14
21
+ DATASETS:
22
+ TRAIN: ("coco_2017_train",)
23
+ TEST: ("coco_2017_val",)
24
+ SOLVER:
25
+ IMS_PER_BATCH: 16
26
+ BASE_LR: 0.02
27
+ STEPS: (60000, 80000)
28
+ MAX_ITER: 90000
29
+ INPUT:
30
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
31
+ VERSION: 2
preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-FPN.yaml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MODEL:
2
+ META_ARCHITECTURE: "GeneralizedRCNN"
3
+ BACKBONE:
4
+ NAME: "build_resnet_fpn_backbone"
5
+ RESNETS:
6
+ OUT_FEATURES: ["res2", "res3", "res4", "res5"]
7
+ FPN:
8
+ IN_FEATURES: ["res2", "res3", "res4", "res5"]
9
+ ANCHOR_GENERATOR:
10
+ SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map
11
+ ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps)
12
+ RPN:
13
+ IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
14
+ PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level
15
+ PRE_NMS_TOPK_TEST: 1000 # Per FPN level
16
+ # Detectron1 uses 2000 proposals per-batch,
17
+ # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
18
+ # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
19
+ POST_NMS_TOPK_TRAIN: 1000
20
+ POST_NMS_TOPK_TEST: 1000
21
+ ROI_HEADS:
22
+ NAME: "StandardROIHeads"
23
+ IN_FEATURES: ["p2", "p3", "p4", "p5"]
24
+ ROI_BOX_HEAD:
25
+ NAME: "FastRCNNConvFCHead"
26
+ NUM_FC: 2
27
+ POOLER_RESOLUTION: 7
28
+ ROI_MASK_HEAD:
29
+ NAME: "MaskRCNNConvUpsampleHead"
30
+ NUM_CONV: 4
31
+ POOLER_RESOLUTION: 14
32
+ DATASETS:
33
+ TRAIN: ("coco_2017_train",)
34
+ TEST: ("coco_2017_val",)
35
+ SOLVER:
36
+ IMS_PER_BATCH: 16
37
+ BASE_LR: 0.02
38
+ STEPS: (60000, 80000)
39
+ MAX_ITER: 90000
40
+ INPUT:
41
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
42
+ VERSION: 2
preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RetinaNet.yaml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MODEL:
2
+ META_ARCHITECTURE: "RetinaNet"
3
+ BACKBONE:
4
+ NAME: "build_retinanet_resnet_fpn_backbone"
5
+ RESNETS:
6
+ OUT_FEATURES: ["res3", "res4", "res5"]
7
+ ANCHOR_GENERATOR:
8
+ SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"]
9
+ FPN:
10
+ IN_FEATURES: ["res3", "res4", "res5"]
11
+ RETINANET:
12
+ IOU_THRESHOLDS: [0.4, 0.5]
13
+ IOU_LABELS: [0, -1, 1]
14
+ DATASETS:
15
+ TRAIN: ("coco_2017_train",)
16
+ TEST: ("coco_2017_val",)
17
+ SOLVER:
18
+ IMS_PER_BATCH: 16
19
+ BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate
20
+ STEPS: (60000, 80000)
21
+ MAX_ITER: 90000
22
+ INPUT:
23
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
24
+ VERSION: 2
preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-FPN.yaml"
2
+ MODEL:
3
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4
+ MASK_ON: False
5
+ LOAD_PROPOSALS: True
6
+ RESNETS:
7
+ DEPTH: 50
8
+ PROPOSAL_GENERATOR:
9
+ NAME: "PrecomputedProposals"
10
+ DATASETS:
11
+ TRAIN: ("coco_2017_train",)
12
+ PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_train_box_proposals_21bc3a.pkl", )
13
+ TEST: ("coco_2017_val",)
14
+ PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
15
+ DATALOADER:
16
+ # proposals are part of the dataset_dicts, and take a lot of RAM
17
+ NUM_WORKERS: 2
preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-C4.yaml"
2
+ MODEL:
3
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4
+ MASK_ON: False
5
+ RESNETS:
6
+ DEPTH: 101
7
+ SOLVER:
8
+ STEPS: (210000, 250000)
9
+ MAX_ITER: 270000
preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-DilatedC5.yaml"
2
+ MODEL:
3
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4
+ MASK_ON: False
5
+ RESNETS:
6
+ DEPTH: 101
7
+ SOLVER:
8
+ STEPS: (210000, 250000)
9
+ MAX_ITER: 270000
preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-FPN.yaml"
2
+ MODEL:
3
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4
+ MASK_ON: False
5
+ RESNETS:
6
+ DEPTH: 101
7
+ SOLVER:
8
+ STEPS: (210000, 250000)
9
+ MAX_ITER: 270000
preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-C4.yaml"
2
+ MODEL:
3
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4
+ MASK_ON: False
5
+ RESNETS:
6
+ DEPTH: 50
preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-C4.yaml"
2
+ MODEL:
3
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4
+ MASK_ON: False
5
+ RESNETS:
6
+ DEPTH: 50
7
+ SOLVER:
8
+ STEPS: (210000, 250000)
9
+ MAX_ITER: 270000
preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-DilatedC5.yaml"
2
+ MODEL:
3
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4
+ MASK_ON: False
5
+ RESNETS:
6
+ DEPTH: 50
preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "../Base-RCNN-DilatedC5.yaml"
2
+ MODEL:
3
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4
+ MASK_ON: False
5
+ RESNETS:
6
+ DEPTH: 50
7
+ SOLVER:
8
+ STEPS: (210000, 250000)
9
+ MAX_ITER: 270000