Skip to content

Commit 5ebe6e5

Browse files
ThomasFroechphotolapssheorey
authored
Add tum facade dataset (#613)
added dataset classes for the TUM-Facade dataset (https://github.com/OloOcki/tum-facade ) and updated the init file --------- Co-authored-by: photolap <[email protected]> Co-authored-by: Sameer Sheorey <[email protected]>
1 parent 6f20459 commit 5ebe6e5

File tree

4 files changed

+245
-5
lines changed

4 files changed

+245
-5
lines changed

.github/workflows/style.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ jobs:
2424
python -m pip install -U clang-format==10.0.1.1 yapf==0.30.0 nbformat pydocstyle==6.0.0
2525
- name: Run style check
2626
run: |
27-
python ci/check_style.py
27+
python ci/check_style.py --verbose
2828
- name: Run docstring style check
2929
run: |
3030
pydocstyle --convention=google --add-ignore=D1,D205,D415,D212 .

ml3d/datasets/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,12 @@
2121
from .scannet import Scannet
2222
from .sunrgbd import SunRGBD
2323
from .matterport_objects import MatterportObjects
24+
from .tumfacade import TUMFacade
2425

2526
__all__ = [
2627
'SemanticKITTI', 'S3DIS', 'Toronto3D', 'ParisLille3D', 'Semantic3D',
2728
'Custom3D', 'utils', 'augment', 'samplers', 'KITTI', 'Waymo', 'NuScenes',
2829
'Lyft', 'ShapeNet', 'SemSegRandomSampler', 'InferenceDummySplit',
2930
'SemSegSpatiallyRegularSampler', 'Argoverse', 'Scannet', 'SunRGBD',
30-
'MatterportObjects'
31+
'MatterportObjects', 'TUMFacade'
3132
]

ml3d/datasets/pandaset.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ def __init__(self,
8484
cache_dir: The directory where the cache is stored.
8585
use_cache: Indicates if the dataset should be cached.
8686
ignored_label_inds: A list of labels that should be ignored in the dataset.
87+
8788
Returns:
8889
class: The corresponding class.
8990
"""
@@ -223,9 +224,9 @@ def save_test_result(self, results, attr):
223224
224225
Args:
225226
results: The output of a model for the datum associated with the
226-
attribute passed.
227-
attrs: The attributes that correspond to the outputs passed in
228-
results.
227+
attribute passed.
228+
attr: The attributes that correspond to the outputs passed in
229+
results.
229230
"""
230231
cfg = self.cfg
231232
pred = results['predict_labels']

ml3d/datasets/tumfacade.py

Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
import glob
2+
from pathlib import Path
3+
import logging
4+
import numpy as np
5+
import open3d as o3d
6+
from ..utils import DATASET
7+
from .base_dataset import BaseDataset, BaseDatasetSplit
8+
9+
log = logging.getLogger(__name__)
10+
11+
12+
class TUMFacade(BaseDataset):
13+
14+
def __init__(self,
15+
dataset_path,
16+
info_path=None,
17+
name='TUM_Facade',
18+
cache_dir='./logs/cache',
19+
use_cache=False,
20+
use_global=False,
21+
**kwargs):
22+
"""Dataset classes for the TUM-Facade dataset. Semantic segmentation
23+
annotations over TUM-MLS-2016 point cloud data.
24+
25+
Website: https://mediatum.ub.tum.de/node?id=1636761
26+
Code: https://github.com/OloOcki/tum-facade
27+
Download:
28+
- Original: https://dataserv.ub.tum.de/index.php/s/m1636761.003
29+
- Processed: https://tumde-my.sharepoint.com/:f:/g/personal/olaf_wysocki_tum_de/EjA8B_KGDyFEulRzmq-CG1QBBL4dZ7z5PoHeI8zMD0JxIQ?e=9MrMcl
30+
Data License: CC BY-NC-SA 4.0
31+
Citation:
32+
- Paper: Wysocki, O. and Hoegner, L. and Stilla, U., TUM-FAÇADE:
33+
Reviewing and enriching point cloud benchmarks for façade
34+
segmentation, ISPRS 2022
35+
- Dataset: Wysocki, Olaf and Tan, Yue and Zhang, Jiarui and
36+
Stilla, Uwe, TUM-FACADE dataset, TU Munich, 2023
37+
38+
README file from processed dataset website:
39+
40+
The dataset split is provided in the following folder structure
41+
42+
-->tum-facade
43+
-->pointclouds
44+
-->annotatedGlobalCRS
45+
-->test_files
46+
-->training_files
47+
-->validation_files
48+
-->annotatedLocalCRS
49+
-->test_files
50+
-->training_files
51+
-->validation_file
52+
53+
The indivisual point clouds are compressed as .7z files and are
54+
stored in the .pcd format.
55+
56+
To make use of the dataset split in open3D-ML, all the point cloud
57+
files have to be unpacked with 7Zip. The folder structure itself
58+
must not be modified, else the reading functionalities in open3D-ML
59+
are not going to work. As a path to the dataset, the path to the
60+
'tum-facade' folder must be set.
61+
62+
The dataset is split in the following way (10.08.2023):
63+
64+
Testing : Building Nr. 23
65+
Training : Buildings Nr. 57, Nr.58, Nr. 60
66+
Validation : Buildings Nr. 22, Nr.59, Nr. 62, Nr. 81
67+
68+
69+
Initialize the function by passing the dataset and other details.
70+
71+
Args:
72+
dataset_path: The path to the dataset to use.
73+
info_path: The path to the file that includes information about
74+
the dataset. This is default to dataset path if nothing is
75+
provided.
76+
name: The name of the dataset (TUM_Facade in this case).
77+
cache_dir: The directory where the cache is stored.
78+
use_cache: Indicates if the dataset should be cached.
79+
use_global: Inidcates if the dataset should be used in a local or
80+
the global CRS
81+
82+
Returns:
83+
class: The corresponding class.
84+
"""
85+
super().__init__(
86+
dataset_path=dataset_path,
87+
info_path=info_path,
88+
name=name,
89+
cache_dir=cache_dir,
90+
use_cache=use_cache,
91+
use_global=use_global, # Diese habe ich selbst hinzugefügt
92+
**kwargs)
93+
cfg = self.cfg
94+
self.name = cfg.name
95+
self.dataset_path = cfg.dataset_path
96+
self.label_to_names = self.get_label_to_names()
97+
self.use_global = cfg.use_global
98+
if info_path is None:
99+
self.info_path = dataset_path
100+
101+
if self.use_global:
102+
# Finding all the training files
103+
self.trainFiles = glob.glob(
104+
str(
105+
Path(cfg.dataset_path) / 'pointclouds' /
106+
'annotatedGlobalCRS' / 'training_files' / '*.pcd'))
107+
# Finding all the validation Files
108+
self.valFiles = glob.glob(
109+
str(
110+
Path(cfg.dataset_path) / 'pointclouds' /
111+
'annotatedGlobalCRS' / 'validation_files' / '*.pcd'))
112+
# Finding all the test files
113+
self.testFiles = glob.glob(
114+
str(
115+
Path(cfg.dataset_path) / 'pointclouds' /
116+
'annotatedGlobalCRS' / 'test_files' / '*.pcd'))
117+
118+
elif not self.use_global:
119+
# Finding all the training files
120+
self.trainFiles = glob.glob(
121+
str(
122+
Path(cfg.dataset_path) / 'pointclouds' /
123+
'annotatedLocalCRS' / 'training_files' / '*.pcd'))
124+
# Finding all the validation Files
125+
self.valFiles = glob.glob(
126+
str(
127+
Path(cfg.dataset_path) / 'pointclouds' /
128+
'annotatedLocalCRS' / 'validation_files' / '*.pcd'))
129+
# Finding all the test files
130+
self.testFiles = glob.glob(
131+
str(
132+
Path(cfg.dataset_path) / 'pointclouds' /
133+
'annotatedLocalCRS' / 'test_files' / '*.pcd'))
134+
135+
else:
136+
raise ValueError(
137+
"Invalid specification! use_global must either be True or False!"
138+
)
139+
140+
@staticmethod
141+
def get_label_to_names(): #
142+
"""Returns a label to names dictionary object.
143+
144+
Returns:
145+
A dict where keys are label numbers and values are the corresponding
146+
names.
147+
"""
148+
label_to_names = {
149+
0: 'not_assigned',
150+
1: 'wall',
151+
2: 'window',
152+
3: 'door',
153+
4: 'balcony',
154+
5: 'molding',
155+
6: 'deco',
156+
7: 'column',
157+
8: 'arch',
158+
9: 'drainpipe',
159+
10: 'stairs',
160+
11: 'ground_surface',
161+
12: 'terrain',
162+
13: 'roof',
163+
14: 'blinds',
164+
15: 'outer_ceiling_surface',
165+
16: 'interior',
166+
17: 'other'
167+
}
168+
return label_to_names
169+
170+
def get_split(self, split):
171+
return TUMFacadeSplit(self, split=split)
172+
173+
def get_split_list(self, split):
174+
"""Returns the list of data splits available.
175+
176+
Args:
177+
split: A string identifying the dataset split that is usually one of
178+
'training', 'test', 'validation', or 'all'.
179+
180+
Returns:
181+
A dataset split object providing the requested subset of the data.
182+
183+
Raises:
184+
ValueError: Indicates that the split name passed is incorrect. The
185+
split name should be one of 'training', 'test', 'validation', or
186+
'all'.
187+
"""
188+
if split in ['train', 'training']:
189+
return self.trainFiles
190+
elif split in ['test', 'testing']:
191+
return self.testFiles
192+
elif split in ['val', 'validation']:
193+
return self.valFiles
194+
elif split in ['all']:
195+
return self.trainFiles + self.valFiles + self.testFiles
196+
else:
197+
raise ValueError("Invalid split {}".format(split))
198+
199+
def is_tested(self, attr):
200+
201+
pass
202+
203+
def save_test_result(self, results, attr):
204+
205+
pass
206+
207+
208+
class TUMFacadeSplit(BaseDatasetSplit):
209+
210+
def __init__(self, dataset, split='train'):
211+
super().__init__(dataset, split=split)
212+
log.info("Found {} pointclouds for {}".format(len(self.path_list),
213+
split))
214+
215+
def __len__(self):
216+
return len(self.path_list)
217+
218+
def get_data(self, idx):
219+
pc_path = self.path_list[idx]
220+
data = o3d.t.io.read_point_cloud(pc_path).point
221+
points = data["positions"].numpy()
222+
points = np.float32(points)
223+
labels = data['classification'].numpy().astype(np.int32).reshape((-1,))
224+
data = {'point': points, 'feat': None, 'label': labels}
225+
return data
226+
227+
def get_attr(self, idx):
228+
pc_path = Path(self.path_list[idx])
229+
pc_path = str(pc_path)
230+
name = pc_path.replace('.txt', '')
231+
parts = name.split("/")
232+
name = parts[-1]
233+
split = self.split
234+
attr = {'idx': idx, 'name': name, 'path': pc_path, 'split': split}
235+
return attr
236+
237+
238+
DATASET._register_module(TUMFacade)

0 commit comments

Comments
 (0)