in harry-core/src/harry/generators/DataGenerators.java [390:428]
public boolean shouldInvertSign(int idx)
{
Bijections.Bijection<?> gen = columns.get(idx).generator();
int maxSliceSize = gen.byteSize();
int actualSliceSize = sizes[idx];
if (idx == 0)
{
// We consume a sign of a descriptor (long, long), (int, int), etc.
if (totalSize == Long.BYTES)
{
// If we use only 3 bytes for a 4-byte int, or 4 bytes for a 8-byte int,
// they're effectively unsigned/byte-ordered, so their order won't match
if (maxSliceSize > actualSliceSize)
return true;
// Sign of the current descriptor should match the sign of the slice.
// For example, (tinyint, double) or (double, tinyint). In the first case (tinyint first),
// sign of the first component is going to match the sign of the descriptor.
// In the second case (double first), double is 7-bit, but its most significant bit
// does not hold a sign, so we have to invert it to match sign of the descriptor.
else
return gen.unsigned();
}
// We do not consume a sign of a descriptor (float, tinyint), (int, tinyint), etc,
// so we have to only invert signs of the values, since their order doesn't match.
else
{
assert maxSliceSize == actualSliceSize;
return !gen.unsigned();
}
}
else if (gen.unsigned())
return false;
else
// We invert sign of all subsequent chunks if they have enough entropy to have a sign bit set
return maxSliceSize == actualSliceSize;
}