Skip to content

Commit 9f21e52

Browse files
authored
Fix NWS image timestamp extraction (#137)
The pattern that encodes the timestamp for NWS images used to be irregular and required careful parsing. This pattern was changed some time before November 2020 and became regular. The irregular parser produced invalid timestamps when applied to filenames with the new pattern. This commit updates the function that extracts a timestamp from the filename to expect the regular pattern. This commit also removes the timestamp prefix from the filename if it could be extracted, allowing users to specify their own timestamp pattern. This means that a handler with the following definition: ``` filename = "{time:%Y%m%dT%H%M%SZ}_{filename}" ``` No longer produces filenames: ``` 20220327T110046Z_20220327110046-hiwind_pac_latest.gif ``` But rather: ``` 20220327T110046Z_hiwind_pac_latest.gif ``` Fixes #100.
1 parent d61405e commit 9f21e52

File tree

2 files changed

+23
-65
lines changed

2 files changed

+23
-65
lines changed

src/goesproc/filename.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ struct FilenameBuilder {
77
std::string dir;
88
std::string filename;
99

10-
struct timespec time;
10+
struct timespec time{0, 0};
1111
AWIPS awips;
1212
Product product;
1313
Region region;

src/goesproc/handler_nws_image.cc

Lines changed: 22 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -6,68 +6,27 @@
66

77
namespace {
88

9-
void parseIrregularTime(const std::string& text, struct timespec& time) {
10-
// Unlike the NWS text files, the NWS image files on GOES-R
11-
// don't use a consistent pattern for time in their name.
12-
//
13-
// Example file names:
14-
// - 201801010001834-pacsfc48_latestBW.gif
15-
// - 201803640503770-pacsfc72_latestBW.gif
16-
// - 201803640503019-USA_latest.gif
17-
// - 2018041050104193-pac24_latestBW.gif
18-
//
19-
// As you can see, the month is not followed by the day of the
20-
// month, but the day of the year. The number of seconds and
21-
// sub-seconds may miss a leading 0. These ambiguities make that
22-
// we stick to extracting the year, month, day, hour, and minute.
23-
//
9+
std::string parseTime(const std::string& text, struct timespec& time) {
10+
// This field used an irregular pattern before November 2020.
11+
// See https://github.com/pietern/goestools/issues/100 for historical context.
2412
const char* buf = text.c_str();
25-
const char* format = "%Y%m";
13+
const char* format = "%Y%m%d%H%M%S";
2614
struct tm tm;
2715
memset(&tm, 0, sizeof(tm));
28-
auto ptr = strptime(buf, format, &tm);
16+
const auto ptr = strptime(buf, format, &tm);
2917

30-
// Only use time if strptime was successful
31-
if (ptr != (buf + 6)) {
32-
return;
33-
}
34-
35-
// Number of characters used for day of year
36-
const auto month = tm.tm_mon + 1;
37-
auto mlen = 2;
38-
buf = ptr;
39-
40-
// April contains both 2 and 3 digits day of year.
41-
// If it starts with a '1' it must be 3 digits.
42-
if ((month == 4 && buf[0] == '1') || month > 4) {
43-
mlen = 3;
44-
}
45-
46-
// Interpret variable length day of year.
47-
char tmp[4];
48-
int yday;
49-
memcpy(tmp, buf, mlen);
50-
tmp[mlen] = 0;
51-
auto rv = sscanf(tmp, "%d", &yday);
52-
if (rv != 1 || yday >= 367) {
53-
return;
54-
}
55-
56-
buf += mlen;
57-
format = "%H%M";
58-
ptr = strptime(buf, format, &tm);
59-
60-
// Only use time if strptime was successful
61-
if (ptr != (buf + 4)) {
62-
return;
18+
// Only use time if strptime was successful.
19+
// Format with zero padding is always 14 characters.
20+
// The character after the time must be '-'.
21+
if (ptr != (buf + 14) || ptr[0] != '-') {
22+
return text;
6323
}
6424

65-
// Set day to January 1 before mktime, so we can use simple
66-
// arithmetic to get to the real day of the year.
67-
tm.tm_mon = 0;
68-
tm.tm_mday = 1;
69-
time.tv_sec = mktime(&tm) + (60 * 60 * 24 * (yday - 1));
25+
time.tv_sec = mktime(&tm);
7026
time.tv_nsec = 0;
27+
28+
// Return everything after the separator.
29+
return std::string(&ptr[1]);
7130
}
7231

7332
} // namespace
@@ -91,21 +50,20 @@ void NWSImageHandler::handle(std::shared_ptr<const lrit::File> f) {
9150
return;
9251
}
9352

53+
FilenameBuilder fb;
54+
fb.dir = config_.dir;
55+
fb.filename = getBasename(*f);
56+
9457
// In the GOES-15 LRIT stream these text files have a time stamp
9558
// header; in the GOES-R HRIT stream they don't.
96-
struct timespec time = {0, 0};
9759
if (f->hasHeader<lrit::TimeStampHeader>()) {
98-
time = f->getHeader<lrit::TimeStampHeader>().getUnix();
60+
fb.time = f->getHeader<lrit::TimeStampHeader>().getUnix();
9961
} else {
100-
auto text = f->getHeader<lrit::AnnotationHeader>().text;
101-
parseIrregularTime(text, time);
62+
// If time can successfully be extracted from the filename
63+
// then remove it from the filename passed to the builder.
64+
fb.filename = parseTime(fb.filename, fb.time);
10265
}
10366

104-
FilenameBuilder fb;
105-
fb.dir = config_.dir;
106-
fb.filename = getBasename(*f);
107-
fb.time = time;
108-
10967
// If this is a GIF we can write it directly
11068
if (nlh.noaaSpecificCompression == 5) {
11169
auto path = fb.build(config_.filename, "gif");

0 commit comments

Comments
 (0)