in tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-news-module/src/main/java/org/apache/tika/parser/iptc/IptcAnpaParser.java [362:456]
private boolean parseHeader(byte[] value, HashMap<String, String> properties) {
boolean added = false;
String env_serviceid = "";
String env_category = "";
String env_urgency = "";
String hdr_edcode = "";
StringBuilder hdr_subject = new StringBuilder();
StringBuilder hdr_date = new StringBuilder();
StringBuilder hdr_time = new StringBuilder();
int read = 0;
while (read < value.length) {
// pull apart the envelope, getting the service id (....\x1f)
while (read < value.length) {
byte val_next = value[read++];
if (val_next != FS) {
env_serviceid +=
(char) (val_next & 0xff); // convert the byte to an unsigned int
} else {
break;
}
}
// pull apart the envelope, getting the category (....\x13\x11)
while (read < value.length) {
byte val_next = value[read++];
if (val_next != XS) { // the end of the envelope is marked (\x13)
env_category +=
(char) (val_next & 0xff); // convert the byte to an unsigned int
} else {
val_next = value[read]; // get the remaining byte (\x11)
if (val_next == XQ) {
read++;
}
break;
}
}
// pull apart the envelope, getting the subject heading
while (read < value.length) {
boolean subject = true;
byte val_next = value[read++];
while ((subject) && (val_next != SP) &&
(val_next != 0x00)) { // ignore the envelope subject
hdr_subject.append((char) (val_next & 0xff)); // convert the byte to an unsigned int
val_next = (read < value.length) ? value[read++] : 0x00;
while (val_next == SP) { // consume all the spaces
subject = false;
val_next = (read < value.length) ? value[read++] : 0x00;
if (val_next != SP) {
--read; // otherwise we eat into the next section
}
}
}
if (!subject) {
break;
}
}
// pull apart the envelope, getting the date and time
while (read < value.length) {
byte val_next = value[read++];
if (hdr_date.isEmpty()) {
while (((val_next >= (byte) 0x30) && (val_next <= (byte) 0x39))
// consume all numerics and hyphens
|| (val_next == HY)) {
hdr_date.append((char) (val_next & 0xff)); // convert the byte to an unsigned int
val_next = (read < value.length) ? value[read++] : 0x00;
}
} else if (val_next == SP) {
while (val_next == SP) { // consume all the spaces
val_next = (read < value.length) ? value[read++] : 0x00;
}
continue;
} else {
while (((val_next >= (byte) 0x30) && (val_next <= (byte) 0x39))
// consume all numerics and hyphens
|| (val_next == HY)) {
hdr_time.append((char) (val_next & 0xff)); // convert the byte to an unsigned int
val_next = (read < value.length) ? value[read++] : 0x00;
}
}
}
break; // don't let this run back through and start thrashing metadata
}
// if we were saving any of these values, we would set the properties map here
added = (env_serviceid.length() + env_category.length() + hdr_subject.length() +
hdr_date.length() + hdr_time.length()) > 0;
return added;
}