CVE-2012-2098
Description
Algorithmic complexity vulnerability in the sorting algorithms in bzip2 compressing stream (BZip2CompressorOutputStream) in Apache Commons Compress before 1.4.1 allows remote attackers to cause a denial of service (CPU consumption) via a file with many repeating inputs.
Affected packages
Versions sourced from the GitHub Security Advisory.
| Package | Affected versions | Patched versions |
|---|---|---|
org.apache.commons:commons-compressMaven | < 1.4.1 | 1.4.1 |
Affected products
2Patches
128f702469cbf4[CVE-2012-2098] Actually this shift is supposed to be unsigned.
1 file changed · +1 −1
src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java+1 −1 modified@@ -370,7 +370,7 @@ private void fallbackQSort3(int[] fmap, if (r3 == 0) { med = eclass[fmap[lo]]; } else if (r3 == 1) { - med = eclass[fmap[(lo+hi)>>1]]; + med = eclass[fmap[(lo + hi) >>> 1]]; } else { med = eclass[fmap[hi]]; }
654222e62809typos and make PMD happy
1 file changed · +40 −16
src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java+40 −16 modified@@ -104,7 +104,7 @@ class BlockSort { * * I've added the fallbackSort function of 1.0.6 and tried to * integrate it with the existing code without touching too much. - * I've also removed the now unused reandomization code. + * I've also removed the now unused randomization code. */ /* @@ -268,7 +268,9 @@ private void fallbackSimpleSort(int[] fmap, int[] eclass, int lo, int hi) { - if (lo == hi) return; + if (lo == hi) { + return; + } int j; if (hi - lo > 3) { @@ -380,32 +382,44 @@ private void fallbackQSort3(int[] fmap, // in the cited Sedgewick paper while (true) { while (true) { - if (unLo > unHi) break; + if (unLo > unHi) { + break; + } n = eclass[fmap[unLo]] - (int) med; if (n == 0) { fswap(fmap, unLo, ltLo); ltLo++; unLo++; continue; - }; - if (n > 0) break; + } + if (n > 0) { + break; + } unLo++; } while (true) { - if (unLo > unHi) break; + if (unLo > unHi) { + break; + } n = eclass[fmap[unHi]] - (int) med; if (n == 0) { fswap(fmap, unHi, gtHi); gtHi--; unHi--; continue; - }; - if (n < 0) break; + } + if (n < 0) { + break; + } unHi--; } - if (unLo > unHi) break; + if (unLo > unHi) { + break; + } fswap(fmap, unLo, unHi); unLo++; unHi--; } - if (gtHi < ltLo) continue; + if (gtHi < ltLo) { + continue; + } n = fmin(ltLo - lo, unLo - ltLo); fvswap(fmap, lo, unLo - n, n); @@ -470,7 +484,9 @@ final void fallbackSort(int[] fmap, byte[] block, int nblock) { for (i = 0; i < nblock; i++) { ftab[block[i] & 0xff]++; } - for (i = 1; i < 257; i++) ftab[i] += ftab[i - 1]; + for (i = 1; i < 257; i++) { + ftab[i] += ftab[i - 1]; + } for (i = 0; i < nblock; i++) { j = block[i] & 0xff; @@ -481,7 +497,9 @@ final void fallbackSort(int[] fmap, byte[] block, int nblock) { nBhtab = 64 + nblock; BitSet bhtab = new BitSet(nBhtab); - for (i = 0; i < 256; i++) bhtab.set(ftab[i]); + for (i = 0; i < 256; i++) { + bhtab.set(ftab[i]); + } /*-- LBZ2: Inductively refine the buckets. Kind-of an @@ -519,10 +537,14 @@ final void fallbackSort(int[] fmap, byte[] block, int nblock) { k = r + 1; k = bhtab.nextClearBit(k); l = k - 1; - if (l >= nblock) break; + if (l >= nblock) { + break; + } k = bhtab.nextSetBit(k + 1); r = k - 1; - if (r >= nblock) break; + if (r >= nblock) { + break; + } /*-- LBZ2: now [l, r] bracket current bucket --*/ if (r > l) { @@ -536,13 +558,15 @@ final void fallbackSort(int[] fmap, byte[] block, int nblock) { if (cc != cc1) { bhtab.set(i); cc = cc1; - }; + } } } } H *= 2; - if (H > nblock || nNotDone == 0) break; + if (H > nblock || nNotDone == 0) { + break; + } } }
6ced422bf5ecremove randomization code
3 files changed · +13 −52
src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java+6 −40 modified@@ -98,12 +98,13 @@ class BlockSort { * This class seems to mix several revisions of libbzip2's code. * The mainSort function and those used by it look closer to the * 0.9.5 version but show some variations introduced later. At - * the same time the logic to randomize the block on bad input has - * been dropped after 0.9.0 and replaced by a fallback sorting - * algorithm. + * the same time the logic of Compress 1.4 to randomize the block + * on bad input has been dropped after libbzip2 0.9.0 and replaced + * by a fallback sorting algorithm. * * I've added the fallbackSort function of 1.0.6 and tried to * integrate it with the existing code without touching too much. + * I've also removed the now unused reandomization code. */ /* @@ -120,11 +121,9 @@ class BlockSort { QSORT_STACK_SIZE < FALLBACK_QSORT_STACK_SIZE ? FALLBACK_QSORT_STACK_SIZE : QSORT_STACK_SIZE; - private boolean blockRandomised; - /* * Used when sorting. If too many long comparisons happen, we stop sorting, - * randomise the block slightly, and try again. + * and use fallbackSort instead. */ private int workDone; private int workLimit; @@ -151,10 +150,9 @@ class BlockSort { this.quadrant = data.sfmap; } - boolean blockSort(final BZip2CompressorOutputStream.Data data, final int last) { + void blockSort(final BZip2CompressorOutputStream.Data data, final int last) { this.workLimit = WORK_FACTOR * last; this.workDone = 0; - this.blockRandomised = false; this.firstAttempt = true; if (last + 1 < 10000) { @@ -177,7 +175,6 @@ boolean blockSort(final BZip2CompressorOutputStream.Data data, final int last) { } // assert (data.origPtr != -1) : data.origPtr; - return false; } /** @@ -1057,35 +1054,4 @@ final void mainSort(final BZip2CompressorOutputStream.Data dataShadow, } } -/*---------------------------------------------*/ - - private void randomiseBlock(final BZip2CompressorOutputStream.Data data, - final int lastShadow) { - final boolean[] inUse = data.inUse; - final byte[] block = data.block; - - for (int i = 256; --i >= 0;) { - inUse[i] = false; - } - - int rNToGo = 0; - int rTPos = 0; - for (int i = 0, j = 1; i <= lastShadow; i = j, j++) { - if (rNToGo == 0) { - rNToGo = (char) Rand.rNums(rTPos); - if (++rTPos == 512) { - rTPos = 0; - } - } - - rNToGo--; - block[j] ^= ((rNToGo == 1) ? 1 : 0); - - // handle 16 bit signed numbers - inUse[block[j] & 0xff] = true; - } - - this.blockRandomised = true; - } - }
src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorOutputStream.java+5 −9 modified@@ -562,7 +562,7 @@ private void endBlock() throws IOException { } /* sort the block and establish posn of original string */ - final boolean blockRandomised = blockSort(); + blockSort(); /* * A 6-byte block header, the value chosen arbitrarily as 0x314159265359 @@ -585,12 +585,8 @@ private void endBlock() throws IOException { /* Now the block's CRC, so it is in a known place. */ bsPutInt(this.blockCRC); - /* Now a single bit indicating randomisation. */ - if (blockRandomised) { - bsW(1, 1); - } else { - bsW(1, 0); - } + /* Now a single bit indicating no randomisation. */ + bsW(1, 0); /* Finally, block's contents proper. */ moveToFrontCodeAndSend(); @@ -1165,8 +1161,8 @@ private void moveToFrontCodeAndSend() throws IOException { sendMTFValues(); } - private boolean blockSort() { - return blockSorter.blockSort(data, last); + private void blockSort() { + blockSorter.blockSort(data, last); } /*
src/test/java/org/apache/commons/compress/compressors/bzip2/BlockSortTest.java+2 −3 modified@@ -22,7 +22,6 @@ import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; public class BlockSortTest { @@ -81,7 +80,7 @@ public class BlockSortTest { @Test public void testSortFixture() { DS ds = setUpFixture(); - assertFalse(ds.s.blockSort(ds.data, FIXTURE.length - 1)); + ds.s.blockSort(ds.data, FIXTURE.length - 1); assertFixtureSorted(ds.data); assertEquals(0, ds.data.origPtr); } @@ -103,7 +102,7 @@ public void testSortFixtureFallbackSort() { @Test public void testSortFixture2() { DS ds = setUpFixture2(); - assertFalse(ds.s.blockSort(ds.data, FIXTURE2.length - 1)); + ds.s.blockSort(ds.data, FIXTURE2.length - 1); assertFixture2Sorted(ds.data); assertEquals(1, ds.data.origPtr); }
2ab2fcb35675fix indentation
1 file changed · +3 −4
src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java+3 −4 modified@@ -160,12 +160,11 @@ boolean blockSort(final BZip2CompressorOutputStream.Data data, final int last) { if (last + 1 < 10000) { fallbackSort(data, last); } else { + mainSort(data, last); - mainSort(data, last); - - if (this.firstAttempt && (this.workDone > this.workLimit)) { + if (this.firstAttempt && (this.workDone > this.workLimit)) { fallbackSort(data, last); - } + } } final int[] fmap = data.fmap;
0600296ab8f8[CVE-2012-2098] Integrate fallback sort into the rest, add some more tests and fix bug in bucket boundary calculation
2 files changed · +139 −31
src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java+52 −22 modified@@ -34,6 +34,7 @@ * Compress" you'd get:</p> * * <pre> + * CompressCommons * Commons Compress * CompressCommons * essCommons Compr @@ -51,9 +52,9 @@ * ssCommons Compre * </pre> * - * <p>Which results in a new text "s romooCCmmpnse", in adition the + * <p>Which results in a new text "ss romooCCmmpnse", in adition the * index of the first line that contained the original text is kept - - * in this case it is 0. The idea is that in a long English text all + * in this case it is 1. The idea is that in a long English text all * permutations that start with "he" are likely suffixes of a "the" and * thus they end in "t" leading to a larger block of "t"s that can * better be compressed by the subsequent Move-to-Front, run-length @@ -101,7 +102,8 @@ class BlockSort { * been dropped after 0.9.0 and replaced by a fallback sorting * algorithm. * - * I've added the fallbackSort function of 1.0.6. + * I've added the fallbackSort function of 1.0.6 and tried to + * integrate it with the existing code without touching too much. */ /* @@ -154,13 +156,16 @@ boolean blockSort(final BZip2CompressorOutputStream.Data data, final int last) { this.workDone = 0; this.blockRandomised = false; this.firstAttempt = true; + + if (last + 1 < 10000) { + fallbackSort(data, last); + } else { + mainSort(data, last); if (this.firstAttempt && (this.workDone > this.workLimit)) { - randomiseBlock(data, last); - this.workLimit = this.workDone = 0; - this.firstAttempt = false; - mainSort(data, last); + fallbackSort(data, last); + } } final int[] fmap = data.fmap; @@ -173,7 +178,27 @@ boolean blockSort(final BZip2CompressorOutputStream.Data data, final int last) { } // assert (data.origPtr != -1) : data.origPtr; - return blockRandomised; + return false; + } + + /** + * Adapt fallbackSort to the expected interface of the rest of the + * code, in particular deal with the fact that block starts at + * offset 1 (in libbzip2 1.0.6 it starts at 0). + */ + final void fallbackSort(final BZip2CompressorOutputStream.Data data, + final int last) { + data.block[0] = data.block[last + 1]; + fallbackSort(data.fmap, data.block, last + 1); + for (int i = 0; i < last + 1; i++) { + --data.fmap[i]; + } + for (int i = 0; i < last + 1; i++) { + if (data.fmap[i] == -1) { + data.fmap[i] = last; + break; + } + } } /*---------------------------------------------*/ @@ -415,12 +440,13 @@ private int[] getEclass() { } /* - * The C code uses an array of ints to represents the bucket-start - * flags (bhtab). It also contains optimizations to skip over 32 - * consecutively set or consecutively unset bits on word - * boundaries at once. For now I've chosen to use the simpler but - * potentially slower code using BitSet - also in the hope that - * using the BitSet#nextXXX methods may be fast enough. + * The C code uses an array of ints (each int holding 32 flags) to + * represents the bucket-start flags (bhtab). It also contains + * optimizations to skip over 32 consecutively set or + * consecutively unset bits on word boundaries at once. For now + * I've chosen to use the simpler but potentially slower code + * using BitSet - also in the hope that using the BitSet#nextXXX + * methods may be fast enough. */ /** @@ -432,16 +458,22 @@ private int[] getEclass() { * @param off offset of first byte to sort in block */ final void fallbackSort(int[] fmap, byte[] block, int nblock) { - int[] ftab = new int[257]; + final int[] ftab = new int[257]; int H, i, j, k, l, r, cc, cc1; int nNotDone; int nBhtab; + final int[] eclass = getEclass(); + for (i = 0; i < nblock; i++) { + eclass[i] = 0; + } /*-- LBZ2: Initial 1-char radix sort to generate initial fmap and initial BH bits. --*/ - for (i = 0; i < nblock; i++) ftab[block[i] & 0xff]++; + for (i = 0; i < nblock; i++) { + ftab[block[i] & 0xff]++; + } for (i = 1; i < 257; i++) ftab[i] += ftab[i - 1]; for (i = 0; i < nblock; i++) { @@ -467,8 +499,6 @@ final void fallbackSort(int[] fmap, byte[] block, int nblock) { bhtab.clear(nblock + 2 * i + 1); } - eclass = getEclass(); - /*-- LBZ2: the log(N) loop --*/ H = 1; while (true) { @@ -491,10 +521,10 @@ final void fallbackSort(int[] fmap, byte[] block, int nblock) { /*-- LBZ2: find the next non-singleton bucket --*/ k = r + 1; - k = bhtab.nextSetBit(k); + k = bhtab.nextClearBit(k); l = k - 1; if (l >= nblock) break; - k = bhtab.nextClearBit(k); + k = bhtab.nextSetBit(k + 1); r = k - 1; if (r >= nblock) break; @@ -862,8 +892,8 @@ private void mainQSort3(final BZip2CompressorOutputStream.Data dataShadow, private static final int SETMASK = (1 << 21); private static final int CLEARMASK = (~SETMASK); - private void mainSort(final BZip2CompressorOutputStream.Data dataShadow, - final int lastShadow) { + final void mainSort(final BZip2CompressorOutputStream.Data dataShadow, + final int lastShadow) { final int[] runningOrder = this.mainSort_runningOrder; final int[] copy = this.mainSort_copy; final boolean[] bigDone = this.mainSort_bigDone;
src/test/java/org/apache/commons/compress/compressors/bzip2/BlockSortTest.java+87 −9 modified@@ -70,17 +70,56 @@ public class BlockSortTest { 0, 1, 7, 6, 8, 2, 3, 5, 4 }; + private static final byte[] FIXTURE2 = { + 'C', 'o', 'm', 'm', 'o', 'n', 's', ' ', 'C', 'o', 'm', 'p', 'r', 'e', 's', 's', + }; + + private static final byte[] FIXTURE2_BWT = { + 's', 's', ' ', 'r', 'o', 'm', 'o', 'o', 'C', 'C', 'm', 'm', 'p', 'n', 's', 'e', + }; + @Test public void testSortFixture() { - BZip2CompressorOutputStream.Data data = new BZip2CompressorOutputStream.Data(1); - System.arraycopy(FIXTURE, 0, data.block, 1, FIXTURE.length); - BlockSort s = new BlockSort(data); - assertFalse(s.blockSort(data, FIXTURE.length - 1)); - assertEquals(FIXTURE[FIXTURE.length - 1], data.block[0]); - for (int i = 0; i < FIXTURE.length; i++) { - assertEquals(FIXTURE_BWT[i], data.block[data.fmap[i]]); - } - assertEquals(0, data.origPtr); + DS ds = setUpFixture(); + assertFalse(ds.s.blockSort(ds.data, FIXTURE.length - 1)); + assertFixtureSorted(ds.data); + assertEquals(0, ds.data.origPtr); + } + + @Test + public void testSortFixtureMainSort() { + DS ds = setUpFixture(); + ds.s.mainSort(ds.data, FIXTURE.length - 1); + assertFixtureSorted(ds.data); + } + + @Test + public void testSortFixtureFallbackSort() { + DS ds = setUpFixture(); + ds.s.fallbackSort(ds.data, FIXTURE.length - 1); + assertFixtureSorted(ds.data); + } + + @Test + public void testSortFixture2() { + DS ds = setUpFixture2(); + assertFalse(ds.s.blockSort(ds.data, FIXTURE2.length - 1)); + assertFixture2Sorted(ds.data); + assertEquals(1, ds.data.origPtr); + } + + @Test + public void testSortFixture2MainSort() { + DS ds = setUpFixture2(); + ds.s.mainSort(ds.data, FIXTURE2.length - 1); + assertFixture2Sorted(ds.data); + } + + @Test + public void testSortFixture2FallbackSort() { + DS ds = setUpFixture2(); + ds.s.fallbackSort(ds.data, FIXTURE2.length - 1); + assertFixture2Sorted(ds.data); } @Test @@ -91,4 +130,43 @@ public void testFallbackSort() { s.fallbackSort(fmap, FIXTURE, FIXTURE.length); assertArrayEquals(FIXTURE_SORTED, fmap); } + + private DS setUpFixture() { + return setUpFixture(FIXTURE); + } + + private void assertFixtureSorted(BZip2CompressorOutputStream.Data data) { + assertFixtureSorted(data, FIXTURE, FIXTURE_BWT); + } + + private DS setUpFixture2() { + return setUpFixture(FIXTURE2); + } + + private void assertFixture2Sorted(BZip2CompressorOutputStream.Data data) { + assertFixtureSorted(data, FIXTURE2, FIXTURE2_BWT); + } + + private DS setUpFixture(byte[] fixture) { + BZip2CompressorOutputStream.Data data = new BZip2CompressorOutputStream.Data(1); + System.arraycopy(fixture, 0, data.block, 1, fixture.length); + return new DS(data, new BlockSort(data)); + } + + private void assertFixtureSorted(BZip2CompressorOutputStream.Data data, + byte[] fixture, byte[] fixtureBwt) { + assertEquals(fixture[fixture.length - 1], data.block[0]); + for (int i = 0; i < fixture.length; i++) { + assertEquals(fixtureBwt[i], data.block[data.fmap[i]]); + } + } + + private static class DS { + private final BZip2CompressorOutputStream.Data data; + private final BlockSort s; + DS(BZip2CompressorOutputStream.Data data, BlockSort s) { + this.data = data; + this.s = s; + } + } } \ No newline at end of file
fdd7459bc547[CVE-2012-2098] add fallback sorting algorithm of libbzip2 1.0.6 (actually added with 0.9.5)
2 files changed · +370 −3
src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java+357 −3 modified@@ -18,6 +18,8 @@ */ package org.apache.commons.compress.compressors.bzip2; +import java.util.BitSet; + /** * Encapsulates the Burrows-Wheeler sorting algorithm needed by {@link * BZip2CompressorOutputStream}. @@ -92,12 +94,14 @@ class BlockSort { /* * 2012-05-20 Stefan Bodewig: * - * The class seems to mix several revisions of libbzip2's code. + * This class seems to mix several revisions of libbzip2's code. * The mainSort function and those used by it look closer to the * 0.9.5 version but show some variations introduced later. At * the same time the logic to randomize the block on bad input has * been dropped after 0.9.0 and replaced by a fallback sorting * algorithm. + * + * I've added the fallbackSort function of 1.0.6. */ /* @@ -108,6 +112,12 @@ class BlockSort { */ private static final int QSORT_STACK_SIZE = 1000; + private static final int FALLBACK_QSORT_STACK_SIZE = 100; + + private static final int STACK_SIZE = + QSORT_STACK_SIZE < FALLBACK_QSORT_STACK_SIZE + ? FALLBACK_QSORT_STACK_SIZE : QSORT_STACK_SIZE; + private boolean blockRandomised; /* @@ -118,8 +128,8 @@ class BlockSort { private int workLimit; private boolean firstAttempt; - private final int[] stack_ll = new int[QSORT_STACK_SIZE]; // 4000 byte - private final int[] stack_hh = new int[QSORT_STACK_SIZE]; // 4000 byte + private final int[] stack_ll = new int[STACK_SIZE]; // 4000 byte + private final int[] stack_hh = new int[STACK_SIZE]; // 4000 byte private final int[] stack_dd = new int[QSORT_STACK_SIZE]; // 4000 byte private final int[] mainSort_runningOrder = new int[256]; // 1024 byte @@ -166,6 +176,350 @@ boolean blockSort(final BZip2CompressorOutputStream.Data data, final int last) { return blockRandomised; } +/*---------------------------------------------*/ + +/*---------------------------------------------*/ +/*--- LBZ2: Fallback O(N log(N)^2) sorting ---*/ +/*--- algorithm, for repetitive blocks ---*/ +/*---------------------------------------------*/ + + /* + * This is the fallback sorting algorithm libbzip2 1.0.6 uses for + * repetitive or very short inputs. + * + * The idea is inspired by Manber-Myers string suffix sorting + * algorithm. First a bucket sort places each permutation of the + * block into a bucket based on its first byte. Permutations are + * represented by pointers to their first character kept in + * (partially) sorted order inside the array ftab. + * + * The next step visits all buckets in order and performs a + * quicksort on all permutations of the bucket based on the index + * of the bucket the second byte of the permutation belongs to, + * thereby forming new buckets. When arrived here the + * permutations are sorted up to the second character and we have + * buckets of permutations that are identical up to two + * characters. + * + * Repeat the step of quicksorting each bucket, now based on the + * bucket holding the sequence of the third and forth character + * leading to four byte buckets. Repeat this doubling of bucket + * sizes until all buckets only contain single permutations or the + * bucket size exceeds the block size. + * + * I.e. + * + * "abraba" form three buckets for the chars "a", "b", and "r" in + * the first step with + * + * fmap = { 'a:' 5, 3, 0, 'b:' 4, 1, 'r', 2 } + * + * when looking at the bucket of "a"s the second characters are in + * the buckets that start with fmap-index 0 (rolled over), 3 and 3 + * respectively, forming two new buckets "aa" and "ab", so we get + * + * fmap = { 'aa:' 5, 'ab:' 3, 0, 'ba:' 4, 'br': 1, 'ra:' 2 } + * + * since the last bucket only contained a single item it didn't + * have to be sorted at all. + * + * There now is just one bucket with more than one permutation + * that remains to be sorted. For the permutation that starts + * with index 3 the third and forth char are in bucket 'aa' at + * index 0 and for the one starting at block index 0 they are in + * bucket 'ra' with sort index 5. The fully sorted order then becomes. + * + * fmap = { 5, 3, 0, 4, 1, 2 } + * + */ + + /** + * @param fmap points to the index of the starting point of a + * permutation inside the block of data in the current + * partially sorted order + * @param eclass points from the index of a character inside the + * block to the first index in fmap that contains the + * bucket of its suffix that is sorted in this step. + * @param lo lower boundary of the fmap-interval to be sorted + * @param hi upper boundary of the fmap-interval to be sorted + */ + private void fallbackSimpleSort(int[] fmap, + int[] eclass, + int lo, + int hi) { + if (lo == hi) return; + + int j; + if (hi - lo > 3) { + for (int i = hi - 4; i >= lo; i--) { + int tmp = fmap[i]; + int ec_tmp = eclass[tmp]; + for (j = i + 4; j <= hi && ec_tmp > eclass[fmap[j]]; + j += 4) { + fmap[j - 4] = fmap[j]; + } + fmap[j - 4] = tmp; + } + } + + for (int i = hi - 1; i >= lo; i--) { + int tmp = fmap[i]; + int ec_tmp = eclass[tmp]; + for (j = i + 1; j <= hi && ec_tmp > eclass[fmap[j]]; j++) { + fmap[j - 1] = fmap[j]; + } + fmap[j-1] = tmp; + } + } + + private static final int FALLBACK_QSORT_SMALL_THRESH = 10; + + /** + * swaps two values in fmap + */ + private void fswap(int[] fmap, int zz1, int zz2) { + int zztmp = fmap[zz1]; + fmap[zz1] = fmap[zz2]; + fmap[zz2] = zztmp; + } + + /** + * swaps two intervals starting at yyp1 and yyp2 of length yyn inside fmap. + */ + private void fvswap(int[] fmap, int yyp1, int yyp2, int yyn) { + while (yyn > 0) { + fswap(fmap, yyp1, yyp2); + yyp1++; yyp2++; yyn--; + } + } + + private int fmin(int a, int b) { + return a < b ? a : b; + } + + private void fpush(int sp, int lz, int hz) { + stack_ll[sp] = lz; + stack_hh[sp] = hz; + } + + private int[] fpop(int sp) { + return new int[] { stack_ll[sp], stack_hh[sp] }; + } + + /** + * @param fmap points to the index of the starting point of a + * permutation inside the block of data in the current + * partially sorted order + * @param eclass points from the index of a character inside the + * block to the first index in fmap that contains the + * bucket of its suffix that is sorted in this step. + * @param loSt lower boundary of the fmap-interval to be sorted + * @param hiSt upper boundary of the fmap-interval to be sorted + */ + private void fallbackQSort3(int[] fmap, + int[] eclass, + int loSt, + int hiSt) { + int lo, unLo, ltLo, hi, unHi, gtHi, n; + + long r = 0; + int sp = 0; + fpush(sp++, loSt, hiSt); + + while (sp > 0) { + int[] s = fpop(--sp); + lo = s[0]; hi = s[1]; + + if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { + fallbackSimpleSort(fmap, eclass, lo, hi); + continue; + } + + /* LBZ2: Random partitioning. Median of 3 sometimes fails to + avoid bad cases. Median of 9 seems to help but + looks rather expensive. This too seems to work but + is cheaper. Guidance for the magic constants + 7621 and 32768 is taken from Sedgewick's algorithms + book, chapter 35. + */ + r = ((r * 7621) + 1) % 32768; + long r3 = r % 3, med; + if (r3 == 0) { + med = eclass[fmap[lo]]; + } else if (r3 == 1) { + med = eclass[fmap[(lo+hi)>>1]]; + } else { + med = eclass[fmap[hi]]; + } + + unLo = ltLo = lo; + unHi = gtHi = hi; + + // looks like the ternary partition attributed to Wegner + // in the cited Sedgewick paper + while (true) { + while (true) { + if (unLo > unHi) break; + n = eclass[fmap[unLo]] - (int) med; + if (n == 0) { + fswap(fmap, unLo, ltLo); + ltLo++; unLo++; + continue; + }; + if (n > 0) break; + unLo++; + } + while (true) { + if (unLo > unHi) break; + n = eclass[fmap[unHi]] - (int) med; + if (n == 0) { + fswap(fmap, unHi, gtHi); + gtHi--; unHi--; + continue; + }; + if (n < 0) break; + unHi--; + } + if (unLo > unHi) break; + fswap(fmap, unLo, unHi); unLo++; unHi--; + } + + if (gtHi < ltLo) continue; + + n = fmin(ltLo - lo, unLo - ltLo); + fvswap(fmap, lo, unLo - n, n); + int m = fmin(hi - gtHi, gtHi - unHi); + fvswap(fmap, unHi + 1, hi - m + 1, m); + + n = lo + unLo - ltLo - 1; + m = hi - (gtHi - unHi) + 1; + + if (n - lo > hi - m) { + fpush(sp++, lo, n); + fpush(sp++, m, hi); + } else { + fpush(sp++, m, hi); + fpush(sp++, lo, n); + } + } + } + + +/*---------------------------------------------*/ + + private int[] eclass; + + private int[] getEclass() { + return eclass == null + ? (eclass = new int[quadrant.length / 2]) : eclass; + } + + /* + * The C code uses an array of ints to represents the bucket-start + * flags (bhtab). It also contains optimizations to skip over 32 + * consecutively set or consecutively unset bits on word + * boundaries at once. For now I've chosen to use the simpler but + * potentially slower code using BitSet - also in the hope that + * using the BitSet#nextXXX methods may be fast enough. + */ + + /** + * @param fmap points to the index of the starting point of a + * permutation inside the block of data in the current + * partially sorted order + * @param block the original data + * @param nblock size of the block + * @param off offset of first byte to sort in block + */ + final void fallbackSort(int[] fmap, byte[] block, int nblock) { + int[] ftab = new int[257]; + int H, i, j, k, l, r, cc, cc1; + int nNotDone; + int nBhtab; + + /*-- + LBZ2: Initial 1-char radix sort to generate + initial fmap and initial BH bits. + --*/ + for (i = 0; i < nblock; i++) ftab[block[i] & 0xff]++; + for (i = 1; i < 257; i++) ftab[i] += ftab[i - 1]; + + for (i = 0; i < nblock; i++) { + j = block[i] & 0xff; + k = ftab[j] - 1; + ftab[j] = k; + fmap[k] = i; + } + + nBhtab = 64 + nblock; + BitSet bhtab = new BitSet(nBhtab); + for (i = 0; i < 256; i++) bhtab.set(ftab[i]); + + /*-- + LBZ2: Inductively refine the buckets. Kind-of an + "exponential radix sort" (!), inspired by the + Manber-Myers suffix array construction algorithm. + --*/ + + /*-- LBZ2: set sentinel bits for block-end detection --*/ + for (i = 0; i < 32; i++) { + bhtab.set(nblock + 2 * i); + bhtab.clear(nblock + 2 * i + 1); + } + + eclass = getEclass(); + + /*-- LBZ2: the log(N) loop --*/ + H = 1; + while (true) { + + j = 0; + for (i = 0; i < nblock; i++) { + if (bhtab.get(i)) { + j = i; + } + k = fmap[i] - H; + if (k < 0) { + k += nblock; + } + eclass[k] = j; + } + + nNotDone = 0; + r = -1; + while (true) { + + /*-- LBZ2: find the next non-singleton bucket --*/ + k = r + 1; + k = bhtab.nextSetBit(k); + l = k - 1; + if (l >= nblock) break; + k = bhtab.nextClearBit(k); + r = k - 1; + if (r >= nblock) break; + + /*-- LBZ2: now [l, r] bracket current bucket --*/ + if (r > l) { + nNotDone += (r - l + 1); + fallbackQSort3(fmap, eclass, l, r); + + /*-- LBZ2: scan bucket and generate header bits-- */ + cc = -1; + for (i = l; i <= r; i++) { + cc1 = eclass[fmap[i]]; + if (cc != cc1) { + bhtab.set(i); + cc = cc1; + }; + } + } + } + + H *= 2; + if (H > nblock || nNotDone == 0) break; + } + } + /*---------------------------------------------*/ /*
src/test/java/org/apache/commons/compress/compressors/bzip2/BlockSortTest.java+13 −0 modified@@ -66,6 +66,10 @@ public class BlockSortTest { private static final byte[] FIXTURE_BWT = { (byte) 128, 0, 3, (byte) 254, 2, 1, (byte) 252, (byte) 255, (byte) 253 }; + private static final int[] FIXTURE_SORTED = { + 0, 1, 7, 6, 8, 2, 3, 5, 4 + }; + @Test public void testSortFixture() { BZip2CompressorOutputStream.Data data = new BZip2CompressorOutputStream.Data(1); @@ -78,4 +82,13 @@ public void testSortFixture() { } assertEquals(0, data.origPtr); } + + @Test + public void testFallbackSort() { + BZip2CompressorOutputStream.Data data = new BZip2CompressorOutputStream.Data(1); + BlockSort s = new BlockSort(data); + int[] fmap = new int[FIXTURE.length]; + s.fallbackSort(fmap, FIXTURE, FIXTURE.length); + assertArrayEquals(FIXTURE_SORTED, fmap); + } } \ No newline at end of file
cca0e6e5341ajust moving stuff around to closer match source code layout of libbzip2 1.0.6
1 file changed · +50 −48
src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java+50 −48 modified@@ -100,12 +100,6 @@ class BlockSort { * algorithm. */ - private static final int SETMASK = (1 << 21); - private static final int CLEARMASK = (~SETMASK); - private static final int SMALL_THRESH = 20; - private static final int DEPTH_THRESH = 10; - private static final int WORK_FACTOR = 30; - /* * LBZ2: If you are ever unlucky/improbable enough to get a stack * overflow whilst sorting, increase the following constant and @@ -114,15 +108,6 @@ class BlockSort { */ private static final int QSORT_STACK_SIZE = 1000; - /* - * LBZ2: Knuth's increments seem to work better than Incerpi-Sedgewick here. - * Possibly because the number of elems to sort is usually small, typically - * <= 20. - */ - private static final int[] INCS = { 1, 4, 13, 40, 121, 364, 1093, 3280, - 9841, 29524, 88573, 265720, 797161, - 2391484 }; - private boolean blockRandomised; /* @@ -154,14 +139,43 @@ class BlockSort { this.quadrant = data.sfmap; } + boolean blockSort(final BZip2CompressorOutputStream.Data data, final int last) { + this.workLimit = WORK_FACTOR * last; + this.workDone = 0; + this.blockRandomised = false; + this.firstAttempt = true; + mainSort(data, last); + + if (this.firstAttempt && (this.workDone > this.workLimit)) { + randomiseBlock(data, last); + this.workLimit = this.workDone = 0; + this.firstAttempt = false; + mainSort(data, last); + } + + final int[] fmap = data.fmap; + data.origPtr = -1; + for (int i = 0; i <= last; i++) { + if (fmap[i] == 0) { + data.origPtr = i; + break; + } + } + + // assert (data.origPtr != -1) : data.origPtr; + return blockRandomised; + } + /*---------------------------------------------*/ -/*-- - LBZ2: The following is an implementation of - an elegant 3-way quicksort for strings, - described in a paper "Fast Algorithms for - Sorting and Searching Strings", by Robert - Sedgewick and Jon L. Bentley. ---*/ + + /* + * LBZ2: Knuth's increments seem to work better than Incerpi-Sedgewick here. + * Possibly because the number of elems to sort is usually small, typically + * <= 20. + */ + private static final int[] INCS = { 1, 4, 13, 40, 121, 364, 1093, 3280, + 9841, 29524, 88573, 265720, 797161, + 2391484 }; /** * This is the most hammered method of this class. @@ -357,6 +371,14 @@ private boolean mainSimpleSort(final BZip2CompressorOutputStream.Data dataShadow return firstAttemptShadow && (workDoneShadow > workLimitShadow); } +/*-- + LBZ2: The following is an implementation of + an elegant 3-way quicksort for strings, + described in a paper "Fast Algorithms for + Sorting and Searching Strings", by Robert + Sedgewick and Jon L. Bentley. +--*/ + private static void vswap(int[] fmap, int p1, int p2, int n) { n += p1; while (p1 < n) { @@ -371,32 +393,9 @@ private static byte med3(byte a, byte b, byte c) { : a); } - boolean blockSort(final BZip2CompressorOutputStream.Data data, final int last) { - this.workLimit = WORK_FACTOR * last; - this.workDone = 0; - this.blockRandomised = false; - this.firstAttempt = true; - mainSort(data, last); - - if (this.firstAttempt && (this.workDone > this.workLimit)) { - randomiseBlock(data, last); - this.workLimit = this.workDone = 0; - this.firstAttempt = false; - mainSort(data, last); - } - - final int[] fmap = data.fmap; - data.origPtr = -1; - for (int i = 0; i <= last; i++) { - if (fmap[i] == 0) { - data.origPtr = i; - break; - } - } - - // assert (data.origPtr != -1) : data.origPtr; - return blockRandomised; - } + private static final int SMALL_THRESH = 20; + private static final int DEPTH_THRESH = 10; + private static final int WORK_FACTOR = 30; /** * Method "mainQSort3", file "blocksort.c", BZip2 1.0.2 @@ -506,6 +505,9 @@ private void mainQSort3(final BZip2CompressorOutputStream.Data dataShadow, } } + private static final int SETMASK = (1 << 21); + private static final int CLEARMASK = (~SETMASK); + private void mainSort(final BZip2CompressorOutputStream.Data dataShadow, final int lastShadow) { final int[] runningOrder = this.mainSort_runningOrder;
ea31005111f0Add some explanations, mark comments originally made by Julian Seward
1 file changed · +97 −12
src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java+97 −12 modified@@ -22,26 +22,100 @@ * Encapsulates the Burrows-Wheeler sorting algorithm needed by {@link * BZip2CompressorOutputStream}. * + * <p>This class is based on a Java port of Julian Seward's + * blocksort.c in his libbzip2</p> + * + * <p>The Burrows-Wheeler transform is a reversible transform of the + * original data that is supposed to group similiar bytes close to + * each other. The idea is to sort all permutations of the input and + * only keep the last byte of each permutation. E.g. for "Commons + * Compress" you'd get:</p> + * + * <pre> + * Commons Compress + * CompressCommons + * essCommons Compr + * mmons CompressCo + * mons CompressCom + * mpressCommons Co + * ns CompressCommo + * ommons CompressC + * ompressCommons C + * ons CompressComm + * pressCommons Com + * ressCommons Comp + * s CompressCommon + * sCommons Compres + * ssCommons Compre + * </pre> + * + * <p>Which results in a new text "s romooCCmmpnse", in adition the + * index of the first line that contained the original text is kept - + * in this case it is 0. The idea is that in a long English text all + * permutations that start with "he" are likely suffixes of a "the" and + * thus they end in "t" leading to a larger block of "t"s that can + * better be compressed by the subsequent Move-to-Front, run-length + * und Huffman encoding steps.</p> + * + * <p>For more information see for example:</p> + * <ul> + * <li><a + * href="http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf">Burrows, + * M. and Wheeler, D.: A Block-sorting Lossless Data Compression + * Algorithm</a></li> + * <li><a href="http://webglimpse.net/pubs/suffix.pdf">Manber, U. and + * Myers, G.: Suffix arrays: A new method for on-line string + * searches</a></li> + * <li><a + * href="http://www.cs.tufts.edu/~nr/comp150fp/archive/bob-sedgewick/fast-strings.pdf">Bentley, + * J.L. and Sedgewick, R.: Fast Algorithms for Sorting and Searching + * Strings</a></li> + * </ul> + * * @NotThreadSafe */ class BlockSort { + /* + * Some of the constructs used in the C code cannot be ported + * literally to Java - for example macros, unsigned types. Some + * code has been hand-tuned to improve performance. In order to + * avoid memory pressure some structures are reused for several + * blocks and some memory is even shared between sorting and the + * MTF stage even though either algorithm uses it for its own + * purpose. + * + * Comments preserved from the actual C code are prefixed with + * "LBZ2:". + */ + + /* + * 2012-05-20 Stefan Bodewig: + * + * The class seems to mix several revisions of libbzip2's code. + * The mainSort function and those used by it look closer to the + * 0.9.5 version but show some variations introduced later. At + * the same time the logic to randomize the block on bad input has + * been dropped after 0.9.0 and replaced by a fallback sorting + * algorithm. + */ + private static final int SETMASK = (1 << 21); private static final int CLEARMASK = (~SETMASK); private static final int SMALL_THRESH = 20; private static final int DEPTH_THRESH = 10; private static final int WORK_FACTOR = 30; /* - * <p> If you are ever unlucky/improbable enough to get a stack + * LBZ2: If you are ever unlucky/improbable enough to get a stack * overflow whilst sorting, increase the following constant and * try again. In practice I have never seen the stack go above 27 - * elems, so the following limit seems very generous. </p> + * elems, so the following limit seems very generous. */ private static final int QSORT_STACK_SIZE = 1000; - /** - * Knuth's increments seem to work better than Incerpi-Sedgewick here. + /* + * LBZ2: Knuth's increments seem to work better than Incerpi-Sedgewick here. * Possibly because the number of elems to sort is usually small, typically * <= 20. */ @@ -80,6 +154,15 @@ class BlockSort { this.quadrant = data.sfmap; } +/*---------------------------------------------*/ +/*-- + LBZ2: The following is an implementation of + an elegant 3-way quicksort for strings, + described in a paper "Fast Algorithms for + Sorting and Searching Strings", by Robert + Sedgewick and Jon L. Bentley. +--*/ + /** * This is the most hammered method of this class. * @@ -435,7 +518,7 @@ private void mainSort(final BZip2CompressorOutputStream.Data dataShadow, final int workLimitShadow = this.workLimit; final boolean firstAttemptShadow = this.firstAttempt; - // Set up the 2-byte frequency table + // LBZ2: Set up the 2-byte frequency table for (int i = 65537; --i >= 0;) { ftab[i] = 0; } @@ -453,7 +536,7 @@ private void mainSort(final BZip2CompressorOutputStream.Data dataShadow, } block[0] = block[lastShadow + 1]; - // Complete the initial radix sort: + // LBZ2: Complete the initial radix sort: int c1 = block[0] & 0xff; for (int i = 0; i <= lastShadow; i++) { @@ -476,7 +559,7 @@ private void mainSort(final BZip2CompressorOutputStream.Data dataShadow, fmap[--ftab[((block[lastShadow + 1] & 0xff) << 8) + (block[1] & 0xff)]] = lastShadow; /* - * Now ftab contains the first loc of every small bucket. Calculate the + * LBZ2: Now ftab contains the first loc of every small bucket. Calculate the * running order, from smallest to largest big bucket. */ for (int i = 256; --i >= 0;) { @@ -504,17 +587,17 @@ private void mainSort(final BZip2CompressorOutputStream.Data dataShadow, } /* - * The main sorting loop. + * LBZ2: The main sorting loop. */ for (int i = 0; i <= 255; i++) { /* - * Process big buckets, starting with the least full. + * LBZ2: Process big buckets, starting with the least full. */ final int ss = runningOrder[i]; // Step 1: /* - * Complete the big bucket [ss] by quicksorting any unsorted small + * LBZ2: Complete the big bucket [ss] by quicksorting any unsorted small * buckets [ss, j]. Hopefully previous pointer-scanning phases have * already completed many of the small buckets [ss, j], so we don't * have to sort them at all. @@ -537,7 +620,7 @@ private void mainSort(final BZip2CompressorOutputStream.Data dataShadow, } // Step 2: - // Now scan this big bucket so as to synthesise the + // LBZ2: Now scan this big bucket so as to synthesise the // sorted order for small buckets [t, ss] for all t != ss. for (int j = 0; j <= 255; j++) { @@ -559,7 +642,7 @@ private void mainSort(final BZip2CompressorOutputStream.Data dataShadow, // Step 3: /* - * The ss big bucket is now done. Record this fact, and update the + * LBZ2: The ss big bucket is now done. Record this fact, and update the * quadrant descriptors. Remember to update quadrants in the * overshoot area too, if necessary. The "if (i < 255)" test merely * skips this updating for the last bucket processed, since updating @@ -589,6 +672,8 @@ private void mainSort(final BZip2CompressorOutputStream.Data dataShadow, } } +/*---------------------------------------------*/ + private void randomiseBlock(final BZip2CompressorOutputStream.Data data, final int lastShadow) { final boolean[] inUse = data.inUse;
020c03d8ef57verify my understanding of the code
2 files changed · +85 −1
src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorOutputStream.java+4 −1 modified@@ -1321,7 +1321,10 @@ static final class Data extends Object { // ============ /** - * Index in fmap[] of original string after sorting. + * Index of original line in Burrows-Wheeler table. + * + * <p>This is the index in fmap that points to the last byte + * of the original data.</p> */ int origPtr;
src/test/java/org/apache/commons/compress/compressors/bzip2/BlockSortTest.java+81 −0 added@@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.bzip2; + +import org.junit.Test; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; + +public class BlockSortTest { + + private static final byte[] FIXTURE = { 0, 1, (byte) 252, (byte) 253, (byte) 255, + (byte) 254, 3, 2, (byte) 128 }; + + /* + Burrows-Wheeler transform of fixture the manual way: + + * build the matrix + + 0, 1, 252, 253, 255, 254, 3, 2, 128 + 1, 252, 253, 255, 254, 3, 2, 128, 0 + 252, 253, 255, 254, 3, 2, 128, 0, 1 + 253, 255, 254, 3, 2, 128, 0, 1, 252 + 255, 254, 3, 2, 128, 0, 1, 252, 253 + 254, 3, 2, 128, 0, 1, 252, 253, 255 + 3, 2, 128, 0, 1, 252, 253, 255, 254 + 2, 128, 0, 1, 252, 253, 255, 254, 3 + 128, 0, 1, 252, 253, 255, 254, 3, 2 + + * sort it + + 0, 1, 252, 253, 255, 254, 3, 2, 128 + 1, 252, 253, 255, 254, 3, 2, 128, 0 + 2, 128, 0, 1, 252, 253, 255, 254, 3 + 3, 2, 128, 0, 1, 252, 253, 255, 254 + 128, 0, 1, 252, 253, 255, 254, 3, 2 + 252, 253, 255, 254, 3, 2, 128, 0, 1 + 253, 255, 254, 3, 2, 128, 0, 1, 252 + 254, 3, 2, 128, 0, 1, 252, 253, 255 + 255, 254, 3, 2, 128, 0, 1, 252, 253 + + * grab last column + + 128, 0, 3, 254, 2, 1, 252, 255, 253 + + and the original line has been 0 + */ + + private static final byte[] FIXTURE_BWT = { (byte) 128, 0, 3, (byte) 254, 2, 1, + (byte) 252, (byte) 255, (byte) 253 }; + + @Test + public void testSortFixture() { + BZip2CompressorOutputStream.Data data = new BZip2CompressorOutputStream.Data(1); + System.arraycopy(FIXTURE, 0, data.block, 1, FIXTURE.length); + BlockSort s = new BlockSort(data); + assertFalse(s.blockSort(data, FIXTURE.length - 1)); + assertEquals(FIXTURE[FIXTURE.length - 1], data.block[0]); + for (int i = 0; i < FIXTURE.length; i++) { + assertEquals(FIXTURE_BWT[i], data.block[data.fmap[i]]); + } + assertEquals(0, data.origPtr); + } +} \ No newline at end of file
1ce57d976c4fAdd a few comments as earmarks for myself, I think this is what the code does. Two 'final's thrown in for good measure
2 files changed · +36 −6
src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java+3 −2 modified@@ -19,7 +19,8 @@ package org.apache.commons.compress.compressors.bzip2; /** - * Encapsulates the sorting algorithms needed by {@link BZip2CompressorOutputStream}. + * Encapsulates the Burrows-Wheeler sorting algorithm needed by {@link + * BZip2CompressorOutputStream}. * * @NotThreadSafe */ @@ -301,7 +302,7 @@ boolean blockSort(final BZip2CompressorOutputStream.Data data, final int last) { mainSort(data, last); } - int[] fmap = data.fmap; + final int[] fmap = data.fmap; data.origPtr = -1; for (int i = 0; i <= last; i++) { if (fmap[i] == 0) {
src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorOutputStream.java+33 −4 modified@@ -315,7 +315,7 @@ private static void hbMakeCodeLengths(final byte[] len, final int[] freq, private int blockCRC; private int combinedCRC; - private int allowableBlockSize; + private final int allowableBlockSize; /** * All memory intensive stuff. @@ -391,6 +391,8 @@ public BZip2CompressorOutputStream(final OutputStream out, } this.blockSize100k = blockSize; + /* 20 is just a paranoia constant */ + this.allowableBlockSize = (this.blockSize100k * BZip2Constants.BASEBLOCKSIZE) - 20; this.out = out; init(); } @@ -405,6 +407,19 @@ public void write(final int b) throws IOException { } } + /** + * Writes the current byte to the buffer, run-length encoding it + * if it has been repeated at least four times (the first step + * RLEs sequences of four identical bytes). + * + * <p>Flushes the current block before writing data if it is + * full.</p> + * + * <p>"write to the buffer" means adding to data.buffer starting + * two steps "after" this.last - initially starting at index 1 + * (not 0) - and updating this.last to point to the last index + * written minus 1.</p> + */ private void writeRun() throws IOException { final int lastShadow = this.last; @@ -534,9 +549,6 @@ private void initBlock() { for (int i = 256; --i >= 0;) { inUse[i] = false; } - - /* 20 is just a paranoia constant */ - this.allowableBlockSize = (this.blockSize100k * BZip2Constants.BASEBLOCKSIZE) - 20; } private void endBlock() throws IOException { @@ -632,6 +644,10 @@ public void write(final byte[] buf, int offs, final int len) } } + /** + * Keeps track of the last bytes written and implicitly performs + * run-length encoding as the first step of the bzip2 algorithm. + */ private void write0(int b) throws IOException { if (this.currentChar != -1) { b &= 0xff; @@ -1153,6 +1169,13 @@ private boolean blockSort() { return blockSorter.blockSort(data, last); } + /* + * Performs Move-To-Front on the Burrows-Wheeler transformed + * buffer, storing the MTFed data in data.sfmap in RUNA/RUNB + * run-length-encoded form. + * + * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p> + */ private void generateMTFValues() { final int lastShadow = this.last; final Data dataShadow = this.data; @@ -1259,6 +1282,7 @@ private void generateMTFValues() { static final class Data extends Object { // with blockSize 900k + /* maps unsigned byte => "does it occur in block" */ final boolean[] inUse = new boolean[256]; // 256 byte final byte[] unseqToSeq = new byte[256]; // 256 byte final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte @@ -1284,7 +1308,12 @@ static final class Data extends Object { // ------------ // 333408 byte + /* holds the RLEd block of original data starting at index 1. + * After sorting the last byte added to the buffer is at index + * 0. */ final byte[] block; // 900021 byte + /* maps index in Burrows-Wheeler transformed block => index of + * byte in original block */ final int[] fmap; // 3600000 byte final char[] sfmap; // 3600000 byte // ------------
6e95697e7837Move data that is used for sorting only to BlockSort class
2 files changed · +35 −28
src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java+31 −10 modified@@ -37,7 +37,7 @@ class BlockSort { * try again. In practice I have never seen the stack go above 27 * elems, so the following limit seems very generous. </p> */ - static final int QSORT_STACK_SIZE = 1000; + private static final int QSORT_STACK_SIZE = 1000; /** * Knuth's increments seem to work better than Incerpi-Sedgewick here. @@ -58,6 +58,27 @@ class BlockSort { private int workLimit; private boolean firstAttempt; + private final int[] stack_ll = new int[QSORT_STACK_SIZE]; // 4000 byte + private final int[] stack_hh = new int[QSORT_STACK_SIZE]; // 4000 byte + private final int[] stack_dd = new int[QSORT_STACK_SIZE]; // 4000 byte + + private final int[] mainSort_runningOrder = new int[256]; // 1024 byte + private final int[] mainSort_copy = new int[256]; // 1024 byte + private final boolean[] mainSort_bigDone = new boolean[256]; // 256 byte + + private final int[] ftab = new int[65537]; // 262148 byte + + /** + * Array instance identical to Data's sfmap, both are used only + * temporarily and indepently, so we do not need to allocate + * additional memory. + */ + private final char[] quadrant; + + BlockSort(final BZip2CompressorOutputStream.Data data) { + this.quadrant = data.sfmap; + } + /** * This is the most hammered method of this class. * @@ -82,7 +103,7 @@ private boolean mainSimpleSort(final BZip2CompressorOutputStream.Data dataShadow } final int[] fmap = dataShadow.fmap; - final char[] quadrant = dataShadow.quadrant; + final char[] quadrant = this.quadrant; final byte[] block = dataShadow.block; final int lastPlus1 = lastShadow + 1; final boolean firstAttemptShadow = this.firstAttempt; @@ -299,9 +320,9 @@ boolean blockSort(final BZip2CompressorOutputStream.Data data, final int last) { private void mainQSort3(final BZip2CompressorOutputStream.Data dataShadow, final int loSt, final int hiSt, final int dSt, final int last) { - final int[] stack_ll = dataShadow.stack_ll; - final int[] stack_hh = dataShadow.stack_hh; - final int[] stack_dd = dataShadow.stack_dd; + final int[] stack_ll = this.stack_ll; + final int[] stack_hh = this.stack_hh; + final int[] stack_dd = this.stack_dd; final int[] fmap = dataShadow.fmap; final byte[] block = dataShadow.block; @@ -403,13 +424,13 @@ private void mainQSort3(final BZip2CompressorOutputStream.Data dataShadow, private void mainSort(final BZip2CompressorOutputStream.Data dataShadow, final int lastShadow) { - final int[] runningOrder = dataShadow.mainSort_runningOrder; - final int[] copy = dataShadow.mainSort_copy; - final boolean[] bigDone = dataShadow.mainSort_bigDone; - final int[] ftab = dataShadow.ftab; + final int[] runningOrder = this.mainSort_runningOrder; + final int[] copy = this.mainSort_copy; + final boolean[] bigDone = this.mainSort_bigDone; + final int[] ftab = this.ftab; final byte[] block = dataShadow.block; final int[] fmap = dataShadow.fmap; - final char[] quadrant = dataShadow.quadrant; + final char[] quadrant = this.quadrant; final int workLimitShadow = this.workLimit; final boolean firstAttemptShadow = this.firstAttempt;
src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorOutputStream.java+4 −18 modified@@ -321,6 +321,7 @@ private static void hbMakeCodeLengths(final byte[] len, final int[] freq, * All memory intensive stuff. */ private Data data; + private BlockSort blockSorter; private OutputStream out; @@ -480,6 +481,7 @@ public void finish() throws IOException { } finally { this.out = null; this.data = null; + this.blockSorter = null; } } } @@ -512,6 +514,7 @@ private void init() throws IOException { bsPutUByte('Z'); this.data = new Data(this.blockSize100k); + this.blockSorter = new BlockSort(this.data); // huffmanised magic bytes bsPutUByte('h'); @@ -1147,7 +1150,7 @@ private void moveToFrontCodeAndSend() throws IOException { } private boolean blockSort() { - return new BlockSort().blockSort(data, last); + return blockSorter.blockSort(data, last); } private void generateMTFValues() { @@ -1274,19 +1277,10 @@ static final class Data extends Object { final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte - final int[] stack_ll = new int[BlockSort.QSORT_STACK_SIZE]; // 4000 byte - final int[] stack_hh = new int[BlockSort.QSORT_STACK_SIZE]; // 4000 byte - final int[] stack_dd = new int[BlockSort.QSORT_STACK_SIZE]; // 4000 byte - - final int[] mainSort_runningOrder = new int[256]; // 1024 byte - final int[] mainSort_copy = new int[256]; // 1024 byte - final boolean[] mainSort_bigDone = new boolean[256]; // 256 byte - final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte - final int[] ftab = new int[65537]; // 262148 byte // ------------ // 333408 byte @@ -1297,13 +1291,6 @@ static final class Data extends Object { // 8433529 byte // ============ - /** - * Array instance identical to sfmap, both are used only - * temporarily and indepently, so we do not need to allocate - * additional memory. - */ - final char[] quadrant; - /** * Index in fmap[] of original string after sorting. */ @@ -1316,7 +1303,6 @@ static final class Data extends Object { this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)]; this.fmap = new int[n]; this.sfmap = new char[2 * n]; - this.quadrant = this.sfmap; } }
b06f7b41c936split sorting out of BZip2CompressorOutputStream
2 files changed · +613 −579
src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java+599 −0 added@@ -0,0 +1,599 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.bzip2; + +/** + * Encapsulates the sorting algorithms needed by {@link BZip2CompressorOutputStream}. + * + * @NotThreadSafe + */ +class BlockSort { + + private static final int SETMASK = (1 << 21); + private static final int CLEARMASK = (~SETMASK); + private static final int SMALL_THRESH = 20; + private static final int DEPTH_THRESH = 10; + private static final int WORK_FACTOR = 30; + + /* + * <p> If you are ever unlucky/improbable enough to get a stack + * overflow whilst sorting, increase the following constant and + * try again. In practice I have never seen the stack go above 27 + * elems, so the following limit seems very generous. </p> + */ + static final int QSORT_STACK_SIZE = 1000; + + /** + * Knuth's increments seem to work better than Incerpi-Sedgewick here. + * Possibly because the number of elems to sort is usually small, typically + * <= 20. + */ + private static final int[] INCS = { 1, 4, 13, 40, 121, 364, 1093, 3280, + 9841, 29524, 88573, 265720, 797161, + 2391484 }; + + private boolean blockRandomised; + + /* + * Used when sorting. If too many long comparisons happen, we stop sorting, + * randomise the block slightly, and try again. + */ + private int workDone; + private int workLimit; + private boolean firstAttempt; + + /** + * This is the most hammered method of this class. + * + * <p> + * This is the version using unrolled loops. Normally I never use such ones + * in Java code. The unrolling has shown a noticable performance improvement + * on JRE 1.4.2 (Linux i586 / HotSpot Client). Of course it depends on the + * JIT compiler of the vm. + * </p> + */ + private boolean mainSimpleSort(final BZip2CompressorOutputStream.Data dataShadow, + final int lo, final int hi, final int d, + final int lastShadow) { + final int bigN = hi - lo + 1; + if (bigN < 2) { + return this.firstAttempt && (this.workDone > this.workLimit); + } + + int hp = 0; + while (INCS[hp] < bigN) { + hp++; + } + + final int[] fmap = dataShadow.fmap; + final char[] quadrant = dataShadow.quadrant; + final byte[] block = dataShadow.block; + final int lastPlus1 = lastShadow + 1; + final boolean firstAttemptShadow = this.firstAttempt; + final int workLimitShadow = this.workLimit; + int workDoneShadow = this.workDone; + + // Following block contains unrolled code which could be shortened by + // coding it in additional loops. + + HP: while (--hp >= 0) { + final int h = INCS[hp]; + final int mj = lo + h - 1; + + for (int i = lo + h; i <= hi;) { + // copy + for (int k = 3; (i <= hi) && (--k >= 0); i++) { + final int v = fmap[i]; + final int vd = v + d; + int j = i; + + // for (int a; + // (j > mj) && mainGtU((a = fmap[j - h]) + d, vd, + // block, quadrant, lastShadow); + // j -= h) { + // fmap[j] = a; + // } + // + // unrolled version: + + // start inline mainGTU + boolean onceRunned = false; + int a = 0; + + HAMMER: while (true) { + if (onceRunned) { + fmap[j] = a; + if ((j -= h) <= mj) { + break HAMMER; + } + } else { + onceRunned = true; + } + + a = fmap[j - h]; + int i1 = a + d; + int i2 = vd; + + // following could be done in a loop, but + // unrolled it for performance: + if (block[i1 + 1] == block[i2 + 1]) { + if (block[i1 + 2] == block[i2 + 2]) { + if (block[i1 + 3] == block[i2 + 3]) { + if (block[i1 + 4] == block[i2 + 4]) { + if (block[i1 + 5] == block[i2 + 5]) { + if (block[(i1 += 6)] == block[(i2 += 6)]) { + int x = lastShadow; + X: while (x > 0) { + x -= 4; + + if (block[i1 + 1] == block[i2 + 1]) { + if (quadrant[i1] == quadrant[i2]) { + if (block[i1 + 2] == block[i2 + 2]) { + if (quadrant[i1 + 1] == quadrant[i2 + 1]) { + if (block[i1 + 3] == block[i2 + 3]) { + if (quadrant[i1 + 2] == quadrant[i2 + 2]) { + if (block[i1 + 4] == block[i2 + 4]) { + if (quadrant[i1 + 3] == quadrant[i2 + 3]) { + if ((i1 += 4) >= lastPlus1) { + i1 -= lastPlus1; + } + if ((i2 += 4) >= lastPlus1) { + i2 -= lastPlus1; + } + workDoneShadow++; + continue X; + } else if ((quadrant[i1 + 3] > quadrant[i2 + 3])) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((quadrant[i1 + 2] > quadrant[i2 + 2])) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((quadrant[i1 + 1] > quadrant[i2 + 1])) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((quadrant[i1] > quadrant[i2])) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + + } + break HAMMER; + } // while x > 0 + else { + if ((block[i1] & 0xff) > (block[i2] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } + } else if ((block[i1 + 5] & 0xff) > (block[i2 + 5] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + + } // HAMMER + // end inline mainGTU + + fmap[j] = v; + } + + if (firstAttemptShadow && (i <= hi) + && (workDoneShadow > workLimitShadow)) { + break HP; + } + } + } + + this.workDone = workDoneShadow; + return firstAttemptShadow && (workDoneShadow > workLimitShadow); + } + + private static void vswap(int[] fmap, int p1, int p2, int n) { + n += p1; + while (p1 < n) { + int t = fmap[p1]; + fmap[p1++] = fmap[p2]; + fmap[p2++] = t; + } + } + + private static byte med3(byte a, byte b, byte c) { + return (a < b) ? (b < c ? b : a < c ? c : a) : (b > c ? b : a > c ? c + : a); + } + + boolean blockSort(final BZip2CompressorOutputStream.Data data, final int last) { + this.workLimit = WORK_FACTOR * last; + this.workDone = 0; + this.blockRandomised = false; + this.firstAttempt = true; + mainSort(data, last); + + if (this.firstAttempt && (this.workDone > this.workLimit)) { + randomiseBlock(data, last); + this.workLimit = this.workDone = 0; + this.firstAttempt = false; + mainSort(data, last); + } + + int[] fmap = data.fmap; + data.origPtr = -1; + for (int i = 0; i <= last; i++) { + if (fmap[i] == 0) { + data.origPtr = i; + break; + } + } + + // assert (data.origPtr != -1) : data.origPtr; + return blockRandomised; + } + + /** + * Method "mainQSort3", file "blocksort.c", BZip2 1.0.2 + */ + private void mainQSort3(final BZip2CompressorOutputStream.Data dataShadow, + final int loSt, final int hiSt, final int dSt, + final int last) { + final int[] stack_ll = dataShadow.stack_ll; + final int[] stack_hh = dataShadow.stack_hh; + final int[] stack_dd = dataShadow.stack_dd; + final int[] fmap = dataShadow.fmap; + final byte[] block = dataShadow.block; + + stack_ll[0] = loSt; + stack_hh[0] = hiSt; + stack_dd[0] = dSt; + + for (int sp = 1; --sp >= 0;) { + final int lo = stack_ll[sp]; + final int hi = stack_hh[sp]; + final int d = stack_dd[sp]; + + if ((hi - lo < SMALL_THRESH) || (d > DEPTH_THRESH)) { + if (mainSimpleSort(dataShadow, lo, hi, d, last)) { + return; + } + } else { + final int d1 = d + 1; + final int med = med3(block[fmap[lo] + d1], + block[fmap[hi] + d1], block[fmap[(lo + hi) >>> 1] + d1]) & 0xff; + + int unLo = lo; + int unHi = hi; + int ltLo = lo; + int gtHi = hi; + + while (true) { + while (unLo <= unHi) { + final int n = (block[fmap[unLo] + d1] & 0xff) + - med; + if (n == 0) { + final int temp = fmap[unLo]; + fmap[unLo++] = fmap[ltLo]; + fmap[ltLo++] = temp; + } else if (n < 0) { + unLo++; + } else { + break; + } + } + + while (unLo <= unHi) { + final int n = (block[fmap[unHi] + d1] & 0xff) + - med; + if (n == 0) { + final int temp = fmap[unHi]; + fmap[unHi--] = fmap[gtHi]; + fmap[gtHi--] = temp; + } else if (n > 0) { + unHi--; + } else { + break; + } + } + + if (unLo <= unHi) { + final int temp = fmap[unLo]; + fmap[unLo++] = fmap[unHi]; + fmap[unHi--] = temp; + } else { + break; + } + } + + if (gtHi < ltLo) { + stack_ll[sp] = lo; + stack_hh[sp] = hi; + stack_dd[sp] = d1; + sp++; + } else { + int n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo) + : (unLo - ltLo); + vswap(fmap, lo, unLo - n, n); + int m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi) + : (gtHi - unHi); + vswap(fmap, unLo, hi - m + 1, m); + + n = lo + unLo - ltLo - 1; + m = hi - (gtHi - unHi) + 1; + + stack_ll[sp] = lo; + stack_hh[sp] = n; + stack_dd[sp] = d; + sp++; + + stack_ll[sp] = n + 1; + stack_hh[sp] = m - 1; + stack_dd[sp] = d1; + sp++; + + stack_ll[sp] = m; + stack_hh[sp] = hi; + stack_dd[sp] = d; + sp++; + } + } + } + } + + private void mainSort(final BZip2CompressorOutputStream.Data dataShadow, + final int lastShadow) { + final int[] runningOrder = dataShadow.mainSort_runningOrder; + final int[] copy = dataShadow.mainSort_copy; + final boolean[] bigDone = dataShadow.mainSort_bigDone; + final int[] ftab = dataShadow.ftab; + final byte[] block = dataShadow.block; + final int[] fmap = dataShadow.fmap; + final char[] quadrant = dataShadow.quadrant; + final int workLimitShadow = this.workLimit; + final boolean firstAttemptShadow = this.firstAttempt; + + // Set up the 2-byte frequency table + for (int i = 65537; --i >= 0;) { + ftab[i] = 0; + } + + /* + * In the various block-sized structures, live data runs from 0 to + * last+NUM_OVERSHOOT_BYTES inclusive. First, set up the overshoot area + * for block. + */ + for (int i = 0; i < BZip2Constants.NUM_OVERSHOOT_BYTES; i++) { + block[lastShadow + i + 2] = block[(i % (lastShadow + 1)) + 1]; + } + for (int i = lastShadow + BZip2Constants.NUM_OVERSHOOT_BYTES +1; --i >= 0;) { + quadrant[i] = 0; + } + block[0] = block[lastShadow + 1]; + + // Complete the initial radix sort: + + int c1 = block[0] & 0xff; + for (int i = 0; i <= lastShadow; i++) { + final int c2 = block[i + 1] & 0xff; + ftab[(c1 << 8) + c2]++; + c1 = c2; + } + + for (int i = 1; i <= 65536; i++) { + ftab[i] += ftab[i - 1]; + } + + c1 = block[1] & 0xff; + for (int i = 0; i < lastShadow; i++) { + final int c2 = block[i + 2] & 0xff; + fmap[--ftab[(c1 << 8) + c2]] = i; + c1 = c2; + } + + fmap[--ftab[((block[lastShadow + 1] & 0xff) << 8) + (block[1] & 0xff)]] = lastShadow; + + /* + * Now ftab contains the first loc of every small bucket. Calculate the + * running order, from smallest to largest big bucket. + */ + for (int i = 256; --i >= 0;) { + bigDone[i] = false; + runningOrder[i] = i; + } + + for (int h = 364; h != 1;) { + h /= 3; + for (int i = h; i <= 255; i++) { + final int vv = runningOrder[i]; + final int a = ftab[(vv + 1) << 8] - ftab[vv << 8]; + final int b = h - 1; + int j = i; + for (int ro = runningOrder[j - h]; (ftab[(ro + 1) << 8] - ftab[ro << 8]) > a; ro = runningOrder[j + - h]) { + runningOrder[j] = ro; + j -= h; + if (j <= b) { + break; + } + } + runningOrder[j] = vv; + } + } + + /* + * The main sorting loop. + */ + for (int i = 0; i <= 255; i++) { + /* + * Process big buckets, starting with the least full. + */ + final int ss = runningOrder[i]; + + // Step 1: + /* + * Complete the big bucket [ss] by quicksorting any unsorted small + * buckets [ss, j]. Hopefully previous pointer-scanning phases have + * already completed many of the small buckets [ss, j], so we don't + * have to sort them at all. + */ + for (int j = 0; j <= 255; j++) { + final int sb = (ss << 8) + j; + final int ftab_sb = ftab[sb]; + if ((ftab_sb & SETMASK) != SETMASK) { + final int lo = ftab_sb & CLEARMASK; + final int hi = (ftab[sb + 1] & CLEARMASK) - 1; + if (hi > lo) { + mainQSort3(dataShadow, lo, hi, 2, lastShadow); + if (firstAttemptShadow + && (this.workDone > workLimitShadow)) { + return; + } + } + ftab[sb] = ftab_sb | SETMASK; + } + } + + // Step 2: + // Now scan this big bucket so as to synthesise the + // sorted order for small buckets [t, ss] for all t != ss. + + for (int j = 0; j <= 255; j++) { + copy[j] = ftab[(j << 8) + ss] & CLEARMASK; + } + + for (int j = ftab[ss << 8] & CLEARMASK, hj = (ftab[(ss + 1) << 8] & CLEARMASK); j < hj; j++) { + final int fmap_j = fmap[j]; + c1 = block[fmap_j] & 0xff; + if (!bigDone[c1]) { + fmap[copy[c1]] = (fmap_j == 0) ? lastShadow : (fmap_j - 1); + copy[c1]++; + } + } + + for (int j = 256; --j >= 0;) { + ftab[(j << 8) + ss] |= SETMASK; + } + + // Step 3: + /* + * The ss big bucket is now done. Record this fact, and update the + * quadrant descriptors. Remember to update quadrants in the + * overshoot area too, if necessary. The "if (i < 255)" test merely + * skips this updating for the last bucket processed, since updating + * for the last bucket is pointless. + */ + bigDone[ss] = true; + + if (i < 255) { + final int bbStart = ftab[ss << 8] & CLEARMASK; + final int bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart; + int shifts = 0; + + while ((bbSize >> shifts) > 65534) { + shifts++; + } + + for (int j = 0; j < bbSize; j++) { + final int a2update = fmap[bbStart + j]; + final char qVal = (char) (j >> shifts); + quadrant[a2update] = qVal; + if (a2update < BZip2Constants.NUM_OVERSHOOT_BYTES) { + quadrant[a2update + lastShadow + 1] = qVal; + } + } + } + + } + } + + private void randomiseBlock(final BZip2CompressorOutputStream.Data data, + final int lastShadow) { + final boolean[] inUse = data.inUse; + final byte[] block = data.block; + + for (int i = 256; --i >= 0;) { + inUse[i] = false; + } + + int rNToGo = 0; + int rTPos = 0; + for (int i = 0, j = 1; i <= lastShadow; i = j, j++) { + if (rNToGo == 0) { + rNToGo = (char) Rand.rNums(rTPos); + if (++rTPos == 512) { + rTPos = 0; + } + } + + rNToGo--; + block[j] ^= ((rNToGo == 1) ? 1 : 0); + + // handle 16 bit signed numbers + inUse[block[j] & 0xff] = true; + } + + this.blockRandomised = true; + } + +}
src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorOutputStream.java+14 −579 modified@@ -137,30 +137,8 @@ public class BZip2CompressorOutputStream extends CompressorOutputStream */ public static final int MAX_BLOCKSIZE = 9; - private static final int SETMASK = (1 << 21); - private static final int CLEARMASK = (~SETMASK); private static final int GREATER_ICOST = 15; private static final int LESSER_ICOST = 0; - private static final int SMALL_THRESH = 20; - private static final int DEPTH_THRESH = 10; - private static final int WORK_FACTOR = 30; - - /* - * <p> If you are ever unlucky/improbable enough to get a stack - * overflow whilst sorting, increase the following constant and - * try again. In practice I have never seen the stack go above 27 - * elems, so the following limit seems very generous. </p> - */ - private static final int QSORT_STACK_SIZE = 1000; - - /** - * Knuth's increments seem to work better than Incerpi-Sedgewick here. - * Possibly because the number of elems to sort is usually small, typically - * <= 20. - */ - private static final int[] INCS = { 1, 4, 13, 40, 121, 364, 1093, 3280, - 9841, 29524, 88573, 265720, 797161, - 2391484 }; private static void hbMakeCodeLengths(final byte[] len, final int[] freq, final Data dat, final int alphaSize, @@ -318,19 +296,12 @@ private static void hbMakeCodeLengths(final byte[] len, final int[] freq, */ private int last; - /** - * Index in fmap[] of original string after sorting. - */ - private int origPtr; - /** * Always: in the range 0 .. 9. The current block size is 100000 * this * number. */ private final int blockSize100k; - private boolean blockRandomised; - private int bsBuff; private int bsLive; private final CRC crc = new CRC(); @@ -339,14 +310,6 @@ private static void hbMakeCodeLengths(final byte[] len, final int[] freq, private int nMTF; - /* - * Used when sorting. If too many long comparisons happen, we stop sorting, - * randomise the block slightly, and try again. - */ - private int workDone; - private int workLimit; - private boolean firstAttempt; - private int currentChar = -1; private int runLength = 0; @@ -584,7 +547,7 @@ private void endBlock() throws IOException { } /* sort the block and establish posn of original string */ - blockSort(); + final boolean blockRandomised = blockSort(); /* * A 6-byte block header, the value chosen arbitrarily as 0x314159265359 @@ -608,7 +571,7 @@ private void endBlock() throws IOException { bsPutInt(this.blockCRC); /* Now a single bit indicating randomisation. */ - if (this.blockRandomised) { + if (blockRandomised) { bsW(1, 1); } else { bsW(1, 0); @@ -1178,546 +1141,13 @@ private void sendMTFValues7() throws IOException { } private void moveToFrontCodeAndSend() throws IOException { - bsW(24, this.origPtr); + bsW(24, this.data.origPtr); generateMTFValues(); sendMTFValues(); } - /** - * This is the most hammered method of this class. - * - * <p> - * This is the version using unrolled loops. Normally I never use such ones - * in Java code. The unrolling has shown a noticable performance improvement - * on JRE 1.4.2 (Linux i586 / HotSpot Client). Of course it depends on the - * JIT compiler of the vm. - * </p> - */ - private boolean mainSimpleSort(final Data dataShadow, final int lo, - final int hi, final int d) { - final int bigN = hi - lo + 1; - if (bigN < 2) { - return this.firstAttempt && (this.workDone > this.workLimit); - } - - int hp = 0; - while (INCS[hp] < bigN) { - hp++; - } - - final int[] fmap = dataShadow.fmap; - final char[] quadrant = dataShadow.quadrant; - final byte[] block = dataShadow.block; - final int lastShadow = this.last; - final int lastPlus1 = lastShadow + 1; - final boolean firstAttemptShadow = this.firstAttempt; - final int workLimitShadow = this.workLimit; - int workDoneShadow = this.workDone; - - // Following block contains unrolled code which could be shortened by - // coding it in additional loops. - - HP: while (--hp >= 0) { - final int h = INCS[hp]; - final int mj = lo + h - 1; - - for (int i = lo + h; i <= hi;) { - // copy - for (int k = 3; (i <= hi) && (--k >= 0); i++) { - final int v = fmap[i]; - final int vd = v + d; - int j = i; - - // for (int a; - // (j > mj) && mainGtU((a = fmap[j - h]) + d, vd, - // block, quadrant, lastShadow); - // j -= h) { - // fmap[j] = a; - // } - // - // unrolled version: - - // start inline mainGTU - boolean onceRunned = false; - int a = 0; - - HAMMER: while (true) { - if (onceRunned) { - fmap[j] = a; - if ((j -= h) <= mj) { - break HAMMER; - } - } else { - onceRunned = true; - } - - a = fmap[j - h]; - int i1 = a + d; - int i2 = vd; - - // following could be done in a loop, but - // unrolled it for performance: - if (block[i1 + 1] == block[i2 + 1]) { - if (block[i1 + 2] == block[i2 + 2]) { - if (block[i1 + 3] == block[i2 + 3]) { - if (block[i1 + 4] == block[i2 + 4]) { - if (block[i1 + 5] == block[i2 + 5]) { - if (block[(i1 += 6)] == block[(i2 += 6)]) { - int x = lastShadow; - X: while (x > 0) { - x -= 4; - - if (block[i1 + 1] == block[i2 + 1]) { - if (quadrant[i1] == quadrant[i2]) { - if (block[i1 + 2] == block[i2 + 2]) { - if (quadrant[i1 + 1] == quadrant[i2 + 1]) { - if (block[i1 + 3] == block[i2 + 3]) { - if (quadrant[i1 + 2] == quadrant[i2 + 2]) { - if (block[i1 + 4] == block[i2 + 4]) { - if (quadrant[i1 + 3] == quadrant[i2 + 3]) { - if ((i1 += 4) >= lastPlus1) { - i1 -= lastPlus1; - } - if ((i2 += 4) >= lastPlus1) { - i2 -= lastPlus1; - } - workDoneShadow++; - continue X; - } else if ((quadrant[i1 + 3] > quadrant[i2 + 3])) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((quadrant[i1 + 2] > quadrant[i2 + 2])) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((quadrant[i1 + 1] > quadrant[i2 + 1])) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((quadrant[i1] > quadrant[i2])) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - - } - break HAMMER; - } // while x > 0 - else { - if ((block[i1] & 0xff) > (block[i2] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - } - } else if ((block[i1 + 5] & 0xff) > (block[i2 + 5] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) { - continue HAMMER; - } else { - break HAMMER; - } - - } // HAMMER - // end inline mainGTU - - fmap[j] = v; - } - - if (firstAttemptShadow && (i <= hi) - && (workDoneShadow > workLimitShadow)) { - break HP; - } - } - } - - this.workDone = workDoneShadow; - return firstAttemptShadow && (workDoneShadow > workLimitShadow); - } - - private static void vswap(int[] fmap, int p1, int p2, int n) { - n += p1; - while (p1 < n) { - int t = fmap[p1]; - fmap[p1++] = fmap[p2]; - fmap[p2++] = t; - } - } - - private static byte med3(byte a, byte b, byte c) { - return (a < b) ? (b < c ? b : a < c ? c : a) : (b > c ? b : a > c ? c - : a); - } - - private void blockSort() { - this.workLimit = WORK_FACTOR * this.last; - this.workDone = 0; - this.blockRandomised = false; - this.firstAttempt = true; - mainSort(); - - if (this.firstAttempt && (this.workDone > this.workLimit)) { - randomiseBlock(); - this.workLimit = this.workDone = 0; - this.firstAttempt = false; - mainSort(); - } - - int[] fmap = this.data.fmap; - this.origPtr = -1; - for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) { - if (fmap[i] == 0) { - this.origPtr = i; - break; - } - } - - // assert (this.origPtr != -1) : this.origPtr; - } - - /** - * Method "mainQSort3", file "blocksort.c", BZip2 1.0.2 - */ - private void mainQSort3(final Data dataShadow, final int loSt, - final int hiSt, final int dSt) { - final int[] stack_ll = dataShadow.stack_ll; - final int[] stack_hh = dataShadow.stack_hh; - final int[] stack_dd = dataShadow.stack_dd; - final int[] fmap = dataShadow.fmap; - final byte[] block = dataShadow.block; - - stack_ll[0] = loSt; - stack_hh[0] = hiSt; - stack_dd[0] = dSt; - - for (int sp = 1; --sp >= 0;) { - final int lo = stack_ll[sp]; - final int hi = stack_hh[sp]; - final int d = stack_dd[sp]; - - if ((hi - lo < SMALL_THRESH) || (d > DEPTH_THRESH)) { - if (mainSimpleSort(dataShadow, lo, hi, d)) { - return; - } - } else { - final int d1 = d + 1; - final int med = med3(block[fmap[lo] + d1], - block[fmap[hi] + d1], block[fmap[(lo + hi) >>> 1] + d1]) & 0xff; - - int unLo = lo; - int unHi = hi; - int ltLo = lo; - int gtHi = hi; - - while (true) { - while (unLo <= unHi) { - final int n = (block[fmap[unLo] + d1] & 0xff) - - med; - if (n == 0) { - final int temp = fmap[unLo]; - fmap[unLo++] = fmap[ltLo]; - fmap[ltLo++] = temp; - } else if (n < 0) { - unLo++; - } else { - break; - } - } - - while (unLo <= unHi) { - final int n = (block[fmap[unHi] + d1] & 0xff) - - med; - if (n == 0) { - final int temp = fmap[unHi]; - fmap[unHi--] = fmap[gtHi]; - fmap[gtHi--] = temp; - } else if (n > 0) { - unHi--; - } else { - break; - } - } - - if (unLo <= unHi) { - final int temp = fmap[unLo]; - fmap[unLo++] = fmap[unHi]; - fmap[unHi--] = temp; - } else { - break; - } - } - - if (gtHi < ltLo) { - stack_ll[sp] = lo; - stack_hh[sp] = hi; - stack_dd[sp] = d1; - sp++; - } else { - int n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo) - : (unLo - ltLo); - vswap(fmap, lo, unLo - n, n); - int m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi) - : (gtHi - unHi); - vswap(fmap, unLo, hi - m + 1, m); - - n = lo + unLo - ltLo - 1; - m = hi - (gtHi - unHi) + 1; - - stack_ll[sp] = lo; - stack_hh[sp] = n; - stack_dd[sp] = d; - sp++; - - stack_ll[sp] = n + 1; - stack_hh[sp] = m - 1; - stack_dd[sp] = d1; - sp++; - - stack_ll[sp] = m; - stack_hh[sp] = hi; - stack_dd[sp] = d; - sp++; - } - } - } - } - - private void mainSort() { - final Data dataShadow = this.data; - final int[] runningOrder = dataShadow.mainSort_runningOrder; - final int[] copy = dataShadow.mainSort_copy; - final boolean[] bigDone = dataShadow.mainSort_bigDone; - final int[] ftab = dataShadow.ftab; - final byte[] block = dataShadow.block; - final int[] fmap = dataShadow.fmap; - final char[] quadrant = dataShadow.quadrant; - final int lastShadow = this.last; - final int workLimitShadow = this.workLimit; - final boolean firstAttemptShadow = this.firstAttempt; - - // Set up the 2-byte frequency table - for (int i = 65537; --i >= 0;) { - ftab[i] = 0; - } - - /* - * In the various block-sized structures, live data runs from 0 to - * last+NUM_OVERSHOOT_BYTES inclusive. First, set up the overshoot area - * for block. - */ - for (int i = 0; i < NUM_OVERSHOOT_BYTES; i++) { - block[lastShadow + i + 2] = block[(i % (lastShadow + 1)) + 1]; - } - for (int i = lastShadow + NUM_OVERSHOOT_BYTES +1; --i >= 0;) { - quadrant[i] = 0; - } - block[0] = block[lastShadow + 1]; - - // Complete the initial radix sort: - - int c1 = block[0] & 0xff; - for (int i = 0; i <= lastShadow; i++) { - final int c2 = block[i + 1] & 0xff; - ftab[(c1 << 8) + c2]++; - c1 = c2; - } - - for (int i = 1; i <= 65536; i++) { - ftab[i] += ftab[i - 1]; - } - - c1 = block[1] & 0xff; - for (int i = 0; i < lastShadow; i++) { - final int c2 = block[i + 2] & 0xff; - fmap[--ftab[(c1 << 8) + c2]] = i; - c1 = c2; - } - - fmap[--ftab[((block[lastShadow + 1] & 0xff) << 8) + (block[1] & 0xff)]] = lastShadow; - - /* - * Now ftab contains the first loc of every small bucket. Calculate the - * running order, from smallest to largest big bucket. - */ - for (int i = 256; --i >= 0;) { - bigDone[i] = false; - runningOrder[i] = i; - } - - for (int h = 364; h != 1;) { - h /= 3; - for (int i = h; i <= 255; i++) { - final int vv = runningOrder[i]; - final int a = ftab[(vv + 1) << 8] - ftab[vv << 8]; - final int b = h - 1; - int j = i; - for (int ro = runningOrder[j - h]; (ftab[(ro + 1) << 8] - ftab[ro << 8]) > a; ro = runningOrder[j - - h]) { - runningOrder[j] = ro; - j -= h; - if (j <= b) { - break; - } - } - runningOrder[j] = vv; - } - } - - /* - * The main sorting loop. - */ - for (int i = 0; i <= 255; i++) { - /* - * Process big buckets, starting with the least full. - */ - final int ss = runningOrder[i]; - - // Step 1: - /* - * Complete the big bucket [ss] by quicksorting any unsorted small - * buckets [ss, j]. Hopefully previous pointer-scanning phases have - * already completed many of the small buckets [ss, j], so we don't - * have to sort them at all. - */ - for (int j = 0; j <= 255; j++) { - final int sb = (ss << 8) + j; - final int ftab_sb = ftab[sb]; - if ((ftab_sb & SETMASK) != SETMASK) { - final int lo = ftab_sb & CLEARMASK; - final int hi = (ftab[sb + 1] & CLEARMASK) - 1; - if (hi > lo) { - mainQSort3(dataShadow, lo, hi, 2); - if (firstAttemptShadow - && (this.workDone > workLimitShadow)) { - return; - } - } - ftab[sb] = ftab_sb | SETMASK; - } - } - - // Step 2: - // Now scan this big bucket so as to synthesise the - // sorted order for small buckets [t, ss] for all t != ss. - - for (int j = 0; j <= 255; j++) { - copy[j] = ftab[(j << 8) + ss] & CLEARMASK; - } - - for (int j = ftab[ss << 8] & CLEARMASK, hj = (ftab[(ss + 1) << 8] & CLEARMASK); j < hj; j++) { - final int fmap_j = fmap[j]; - c1 = block[fmap_j] & 0xff; - if (!bigDone[c1]) { - fmap[copy[c1]] = (fmap_j == 0) ? lastShadow : (fmap_j - 1); - copy[c1]++; - } - } - - for (int j = 256; --j >= 0;) { - ftab[(j << 8) + ss] |= SETMASK; - } - - // Step 3: - /* - * The ss big bucket is now done. Record this fact, and update the - * quadrant descriptors. Remember to update quadrants in the - * overshoot area too, if necessary. The "if (i < 255)" test merely - * skips this updating for the last bucket processed, since updating - * for the last bucket is pointless. - */ - bigDone[ss] = true; - - if (i < 255) { - final int bbStart = ftab[ss << 8] & CLEARMASK; - final int bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart; - int shifts = 0; - - while ((bbSize >> shifts) > 65534) { - shifts++; - } - - for (int j = 0; j < bbSize; j++) { - final int a2update = fmap[bbStart + j]; - final char qVal = (char) (j >> shifts); - quadrant[a2update] = qVal; - if (a2update < NUM_OVERSHOOT_BYTES) { - quadrant[a2update + lastShadow + 1] = qVal; - } - } - } - - } - } - - private void randomiseBlock() { - final boolean[] inUse = this.data.inUse; - final byte[] block = this.data.block; - final int lastShadow = this.last; - - for (int i = 256; --i >= 0;) { - inUse[i] = false; - } - - int rNToGo = 0; - int rTPos = 0; - for (int i = 0, j = 1; i <= lastShadow; i = j, j++) { - if (rNToGo == 0) { - rNToGo = (char) Rand.rNums(rTPos); - if (++rTPos == 512) { - rTPos = 0; - } - } - - rNToGo--; - block[j] ^= ((rNToGo == 1) ? 1 : 0); - - // handle 16 bit signed numbers - inUse[block[j] & 0xff] = true; - } - - this.blockRandomised = true; + private boolean blockSort() { + return new BlockSort().blockSort(data, last); } private void generateMTFValues() { @@ -1823,7 +1253,7 @@ private void generateMTFValues() { this.nMTF = wr + 1; } - private static final class Data extends Object { + static final class Data extends Object { // with blockSize 900k final boolean[] inUse = new boolean[256]; // 256 byte @@ -1844,9 +1274,9 @@ private static final class Data extends Object { final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte - final int[] stack_ll = new int[QSORT_STACK_SIZE]; // 4000 byte - final int[] stack_hh = new int[QSORT_STACK_SIZE]; // 4000 byte - final int[] stack_dd = new int[QSORT_STACK_SIZE]; // 4000 byte + final int[] stack_ll = new int[BlockSort.QSORT_STACK_SIZE]; // 4000 byte + final int[] stack_hh = new int[BlockSort.QSORT_STACK_SIZE]; // 4000 byte + final int[] stack_dd = new int[BlockSort.QSORT_STACK_SIZE]; // 4000 byte final int[] mainSort_runningOrder = new int[256]; // 1024 byte final int[] mainSort_copy = new int[256]; // 1024 byte @@ -1874,6 +1304,11 @@ private static final class Data extends Object { */ final char[] quadrant; + /** + * Index in fmap[] of original string after sorting. + */ + int origPtr; + Data(int blockSize100k) { super();
Vulnerability mechanics
Generated on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.
References
37- ant.apache.org/security.htmlnvdVendor AdvisoryWEB
- archives.neohapsis.com/archives/bugtraq/2012-05/0130.htmlnvdThird Party AdvisoryWEB
- commons.apache.org/compress/security.htmlnvdVendor AdvisoryWEB
- lists.fedoraproject.org/pipermail/package-announce/2012-June/081697.htmlnvdThird Party AdvisoryWEB
- lists.fedoraproject.org/pipermail/package-announce/2012-June/081746.htmlnvdThird Party AdvisoryWEB
- lists.fedoraproject.org/pipermail/package-announce/2013-May/105049.htmlnvdThird Party AdvisoryWEB
- lists.fedoraproject.org/pipermail/package-announce/2013-May/105060.htmlnvdThird Party AdvisoryWEB
- packetstormsecurity.org/files/113014/Apache-Commons-Compress-Apache-Ant-Denial-Of-Service.htmlnvdThird Party AdvisoryWEB
- secunia.com/advisories/49255nvdVendor Advisory
- secunia.com/advisories/49286nvdThird Party Advisory
- www-01.ibm.com/support/docview.wssnvdThird Party AdvisoryWEB
- www.securityfocus.com/bid/53676nvdThird Party AdvisoryVDB Entry
- www.securitytracker.com/idnvdThird Party AdvisoryVDB Entry
- exchange.xforce.ibmcloud.com/vulnerabilities/75857nvdThird Party AdvisoryVDB EntryWEB
- github.com/advisories/GHSA-6fxm-66hq-fc96ghsaADVISORY
- nvd.nist.gov/vuln/detail/CVE-2012-2098ghsaADVISORY
- www.oracle.com/security-alerts/cpujan2021.htmlnvdThird Party AdvisoryWEB
- osvdb.org/82161nvdBroken Link
- www.openwall.com/lists/oss-security/2023/09/13/3nvdWEB
- github.com/apache/commons-compress/commit/020c03d8ef579e80511023fb46ece30e9c3dd27dghsaWEB
- github.com/apache/commons-compress/commit/0600296ab8f8a0bbdfedd483f51b38005eb8e34eghsaWEB
- github.com/apache/commons-compress/commit/1ce57d976c4f25fe99edcadf079840c278f3cb84ghsaWEB
- github.com/apache/commons-compress/commit/2ab2fcb356753927afaa731b9d2dcc47d3083408ghsaWEB
- github.com/apache/commons-compress/commit/654222e628097763ee6ca561ae77be5c06666173ghsaWEB
- github.com/apache/commons-compress/commit/6ced422bf5eca3aac05396367bafb33ec21bf74eghsaWEB
- github.com/apache/commons-compress/commit/6e95697e783767f3549f00d7d2e1b002eac4a3d4ghsaWEB
- github.com/apache/commons-compress/commit/8f702469cbf4c451b6dea349290bc4af0f6f76c7ghsaWEB
- github.com/apache/commons-compress/commit/b06f7b41c936ef1a79589d16ea5c1d8b93f71f66ghsaWEB
- github.com/apache/commons-compress/commit/cca0e6e5341aacddefd4c4d36cef7cbdbc2a8777ghsaWEB
- github.com/apache/commons-compress/commit/ea31005111f0abede7e43e4ba0012e62e0808b22ghsaWEB
- github.com/apache/commons-compress/commit/fdd7459bc5470e90024dbe762249166481cce769ghsaWEB
- lists.apache.org/thread.html/r204ba2a9ea750f38d789d2bb429cc0925ad6133deea7cbc3001d96b5@%3Csolr-user.lucene.apache.org%3EghsaWEB
- web.archive.org/web/20130525085523/http://www.securityfocus.com/bid/53676ghsaWEB
- web.archive.org/web/20140724002926/http://secunia.com/advisories/49286ghsaWEB
- web.archive.org/web/20140724023114/http://secunia.com/advisories/49255ghsaWEB
- web.archive.org/web/20200517014414/http://www.securitytracker.com/idghsaWEB
- lists.apache.org/thread.html/r204ba2a9ea750f38d789d2bb429cc0925ad6133deea7cbc3001d96b5%40%3Csolr-user.lucene.apache.org%3Envd
News mentions
0No linked articles in our index yet.