VYPR
Moderate severityNVD Advisory· Published Jun 29, 2012· Updated Apr 29, 2026

CVE-2012-2098

CVE-2012-2098

Description

Algorithmic complexity vulnerability in the sorting algorithms in bzip2 compressing stream (BZip2CompressorOutputStream) in Apache Commons Compress before 1.4.1 allows remote attackers to cause a denial of service (CPU consumption) via a file with many repeating inputs.

Affected packages

Versions sourced from the GitHub Security Advisory.

PackageAffected versionsPatched versions
org.apache.commons:commons-compressMaven
< 1.4.11.4.1

Affected products

2

Patches

12
8f702469cbf4

[CVE-2012-2098] Actually this shift is supposed to be unsigned.

https://github.com/apache/commons-compressStefan BodewigMay 20, 2012via ghsa
1 file changed · +1 1
  • src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java+1 1 modified
    @@ -370,7 +370,7 @@ private void fallbackQSort3(int[] fmap,
                 if (r3 == 0) {
                     med = eclass[fmap[lo]]; 
                 } else if (r3 == 1) {
    -                med = eclass[fmap[(lo+hi)>>1]];
    +                med = eclass[fmap[(lo + hi) >>> 1]];
                 } else {
                     med = eclass[fmap[hi]];
                 }
    
654222e62809

typos and make PMD happy

https://github.com/apache/commons-compressStefan BodewigMay 20, 2012via ghsa
1 file changed · +40 16
  • src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java+40 16 modified
    @@ -104,7 +104,7 @@ class BlockSort {
          *
          * I've added the fallbackSort function of 1.0.6 and tried to
          * integrate it with the existing code without touching too much.
    -     * I've also removed the now unused reandomization code.
    +     * I've also removed the now unused randomization code.
          */
     
         /*
    @@ -268,7 +268,9 @@ private void fallbackSimpleSort(int[] fmap,
                                         int[] eclass, 
                                         int lo, 
                                         int hi) {
    -        if (lo == hi) return;
    +        if (lo == hi) {
    +            return;
    +        }
     
             int j;
             if (hi - lo > 3) {
    @@ -380,32 +382,44 @@ private void fallbackQSort3(int[] fmap,
                 // in the cited Sedgewick paper
                 while (true) {
                     while (true) {
    -                    if (unLo > unHi) break;
    +                    if (unLo > unHi) {
    +                        break;
    +                    }
                         n = eclass[fmap[unLo]] - (int) med;
                         if (n == 0) { 
                             fswap(fmap, unLo, ltLo); 
                             ltLo++; unLo++; 
                             continue; 
    -                    };
    -                    if (n > 0) break;
    +                    }
    +                    if (n > 0) {
    +                        break;
    +                    }
                         unLo++;
                     }
                     while (true) {
    -                    if (unLo > unHi) break;
    +                    if (unLo > unHi) {
    +                        break;
    +                    }
                         n = eclass[fmap[unHi]] - (int) med;
                         if (n == 0) {
                             fswap(fmap, unHi, gtHi); 
                             gtHi--; unHi--; 
                             continue; 
    -                    };
    -                    if (n < 0) break;
    +                    }
    +                    if (n < 0) {
    +                        break;
    +                    }
                         unHi--;
                     }
    -                if (unLo > unHi) break;
    +                if (unLo > unHi) {
    +                    break;
    +                }
                     fswap(fmap, unLo, unHi); unLo++; unHi--;
                 }
     
    -            if (gtHi < ltLo) continue;
    +            if (gtHi < ltLo) {
    +                continue;
    +            }
     
                 n = fmin(ltLo - lo, unLo - ltLo);
                 fvswap(fmap, lo, unLo - n, n);
    @@ -470,7 +484,9 @@ final void fallbackSort(int[] fmap, byte[] block, int nblock) {
             for (i = 0; i < nblock; i++) {
                 ftab[block[i] & 0xff]++;
             }
    -        for (i = 1; i < 257;    i++) ftab[i] += ftab[i - 1];
    +        for (i = 1; i < 257;    i++) {
    +            ftab[i] += ftab[i - 1];
    +        }
     
             for (i = 0; i < nblock; i++) {
                 j = block[i] & 0xff;
    @@ -481,7 +497,9 @@ final void fallbackSort(int[] fmap, byte[] block, int nblock) {
     
             nBhtab = 64 + nblock;
             BitSet bhtab = new BitSet(nBhtab);
    -        for (i = 0; i < 256; i++) bhtab.set(ftab[i]);
    +        for (i = 0; i < 256; i++) {
    +            bhtab.set(ftab[i]);
    +        }
     
             /*--
               LBZ2: Inductively refine the buckets.  Kind-of an
    @@ -519,10 +537,14 @@ final void fallbackSort(int[] fmap, byte[] block, int nblock) {
                     k = r + 1;
                     k = bhtab.nextClearBit(k);
                     l = k - 1;
    -                if (l >= nblock) break;
    +                if (l >= nblock) {
    +                    break;
    +                }
                     k = bhtab.nextSetBit(k + 1);
                     r = k - 1;
    -                if (r >= nblock) break;
    +                if (r >= nblock) {
    +                    break;
    +                }
     
                     /*-- LBZ2: now [l, r] bracket current bucket --*/
                     if (r > l) {
    @@ -536,13 +558,15 @@ final void fallbackSort(int[] fmap, byte[] block, int nblock) {
                             if (cc != cc1) {
                                 bhtab.set(i);
                                 cc = cc1;
    -                        };
    +                        }
                         }
                     }
                 }
     
                 H *= 2;
    -            if (H > nblock || nNotDone == 0) break;
    +            if (H > nblock || nNotDone == 0) {
    +                break;
    +            }
             }
         }
     
    
6ced422bf5ec

remove randomization code

https://github.com/apache/commons-compressStefan BodewigMay 20, 2012via ghsa
3 files changed · +13 52
  • src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java+6 40 modified
    @@ -98,12 +98,13 @@ class BlockSort {
          * This class seems to mix several revisions of libbzip2's code.
          * The mainSort function and those used by it look closer to the
          * 0.9.5 version but show some variations introduced later.  At
    -     * the same time the logic to randomize the block on bad input has
    -     * been dropped after 0.9.0 and replaced by a fallback sorting
    -     * algorithm.
    +     * the same time the logic of Compress 1.4 to randomize the block
    +     * on bad input has been dropped after libbzip2 0.9.0 and replaced
    +     * by a fallback sorting algorithm.
          *
          * I've added the fallbackSort function of 1.0.6 and tried to
          * integrate it with the existing code without touching too much.
    +     * I've also removed the now unused reandomization code.
          */
     
         /*
    @@ -120,11 +121,9 @@ class BlockSort {
             QSORT_STACK_SIZE < FALLBACK_QSORT_STACK_SIZE
             ? FALLBACK_QSORT_STACK_SIZE : QSORT_STACK_SIZE;
     
    -    private boolean blockRandomised;
    -
         /*
          * Used when sorting. If too many long comparisons happen, we stop sorting,
    -     * randomise the block slightly, and try again.
    +     * and use fallbackSort instead.
          */
         private int workDone;
         private int workLimit;
    @@ -151,10 +150,9 @@ class BlockSort {
             this.quadrant = data.sfmap;
         }
     
    -    boolean blockSort(final BZip2CompressorOutputStream.Data data, final int last) {
    +    void blockSort(final BZip2CompressorOutputStream.Data data, final int last) {
             this.workLimit = WORK_FACTOR * last;
             this.workDone = 0;
    -        this.blockRandomised = false;
             this.firstAttempt = true;
     
             if (last + 1 < 10000) {
    @@ -177,7 +175,6 @@ boolean blockSort(final BZip2CompressorOutputStream.Data data, final int last) {
             }
     
             // assert (data.origPtr != -1) : data.origPtr;
    -        return false;
         }
     
         /**
    @@ -1057,35 +1054,4 @@ final void mainSort(final BZip2CompressorOutputStream.Data dataShadow,
             }
         }
     
    -/*---------------------------------------------*/
    -
    -    private void randomiseBlock(final BZip2CompressorOutputStream.Data data,
    -                                final int lastShadow) {
    -        final boolean[] inUse = data.inUse;
    -        final byte[] block = data.block;
    -
    -        for (int i = 256; --i >= 0;) {
    -            inUse[i] = false;
    -        }
    -
    -        int rNToGo = 0;
    -        int rTPos = 0;
    -        for (int i = 0, j = 1; i <= lastShadow; i = j, j++) {
    -            if (rNToGo == 0) {
    -                rNToGo = (char) Rand.rNums(rTPos);
    -                if (++rTPos == 512) {
    -                    rTPos = 0;
    -                }
    -            }
    -
    -            rNToGo--;
    -            block[j] ^= ((rNToGo == 1) ? 1 : 0);
    -
    -            // handle 16 bit signed numbers
    -            inUse[block[j] & 0xff] = true;
    -        }
    -
    -        this.blockRandomised = true;
    -    }
    -
     }
    
  • src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorOutputStream.java+5 9 modified
    @@ -562,7 +562,7 @@ private void endBlock() throws IOException {
             }
     
             /* sort the block and establish posn of original string */
    -        final boolean blockRandomised = blockSort();
    +        blockSort();
     
             /*
              * A 6-byte block header, the value chosen arbitrarily as 0x314159265359
    @@ -585,12 +585,8 @@ private void endBlock() throws IOException {
             /* Now the block's CRC, so it is in a known place. */
             bsPutInt(this.blockCRC);
     
    -        /* Now a single bit indicating randomisation. */
    -        if (blockRandomised) {
    -            bsW(1, 1);
    -        } else {
    -            bsW(1, 0);
    -        }
    +        /* Now a single bit indicating no randomisation. */
    +        bsW(1, 0);
     
             /* Finally, block's contents proper. */
             moveToFrontCodeAndSend();
    @@ -1165,8 +1161,8 @@ private void moveToFrontCodeAndSend() throws IOException {
             sendMTFValues();
         }
     
    -    private boolean blockSort() {
    -        return blockSorter.blockSort(data, last);
    +    private void blockSort() {
    +        blockSorter.blockSort(data, last);
         }
     
         /*
    
  • src/test/java/org/apache/commons/compress/compressors/bzip2/BlockSortTest.java+2 3 modified
    @@ -22,7 +22,6 @@
     
     import static org.junit.Assert.assertArrayEquals;
     import static org.junit.Assert.assertEquals;
    -import static org.junit.Assert.assertFalse;
     
     public class BlockSortTest {
     
    @@ -81,7 +80,7 @@ public class BlockSortTest {
         @Test
         public void testSortFixture() {
             DS ds = setUpFixture();
    -        assertFalse(ds.s.blockSort(ds.data, FIXTURE.length - 1));
    +        ds.s.blockSort(ds.data, FIXTURE.length - 1);
             assertFixtureSorted(ds.data);
             assertEquals(0, ds.data.origPtr);
         }
    @@ -103,7 +102,7 @@ public void testSortFixtureFallbackSort() {
         @Test
         public void testSortFixture2() {
             DS ds = setUpFixture2();
    -        assertFalse(ds.s.blockSort(ds.data, FIXTURE2.length - 1));
    +        ds.s.blockSort(ds.data, FIXTURE2.length - 1);
             assertFixture2Sorted(ds.data);
             assertEquals(1, ds.data.origPtr);
         }
    
2ab2fcb35675

fix indentation

https://github.com/apache/commons-compressStefan BodewigMay 20, 2012via ghsa
1 file changed · +3 4
  • src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java+3 4 modified
    @@ -160,12 +160,11 @@ boolean blockSort(final BZip2CompressorOutputStream.Data data, final int last) {
             if (last + 1 < 10000) {
                 fallbackSort(data, last);
             } else {
    +            mainSort(data, last);
     
    -        mainSort(data, last);
    -
    -        if (this.firstAttempt && (this.workDone > this.workLimit)) {
    +            if (this.firstAttempt && (this.workDone > this.workLimit)) {
                     fallbackSort(data, last);
    -        }
    +            }
             }
     
             final int[] fmap = data.fmap;
    
0600296ab8f8

[CVE-2012-2098] Integrate fallback sort into the rest, add some more tests and fix bug in bucket boundary calculation

https://github.com/apache/commons-compressStefan BodewigMay 20, 2012via ghsa
2 files changed · +139 31
  • src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java+52 22 modified
    @@ -34,6 +34,7 @@
      * Compress" you'd get:</p>
      *
      * <pre>
    + *  CompressCommons
      * Commons Compress
      * CompressCommons 
      * essCommons Compr
    @@ -51,9 +52,9 @@
      * ssCommons Compre
      * </pre>
      *
    - * <p>Which results in a new text "s romooCCmmpnse", in adition the
    + * <p>Which results in a new text "ss romooCCmmpnse", in adition the
      * index of the first line that contained the original text is kept -
    - * in this case it is 0.  The idea is that in a long English text all
    + * in this case it is 1.  The idea is that in a long English text all
      * permutations that start with "he" are likely suffixes of a "the" and
      * thus they end in "t" leading to a larger block of "t"s that can
      * better be compressed by the subsequent Move-to-Front, run-length
    @@ -101,7 +102,8 @@ class BlockSort {
          * been dropped after 0.9.0 and replaced by a fallback sorting
          * algorithm.
          *
    -     * I've added the fallbackSort function of 1.0.6.
    +     * I've added the fallbackSort function of 1.0.6 and tried to
    +     * integrate it with the existing code without touching too much.
          */
     
         /*
    @@ -154,13 +156,16 @@ boolean blockSort(final BZip2CompressorOutputStream.Data data, final int last) {
             this.workDone = 0;
             this.blockRandomised = false;
             this.firstAttempt = true;
    +
    +        if (last + 1 < 10000) {
    +            fallbackSort(data, last);
    +        } else {
    +
             mainSort(data, last);
     
             if (this.firstAttempt && (this.workDone > this.workLimit)) {
    -            randomiseBlock(data, last);
    -            this.workLimit = this.workDone = 0;
    -            this.firstAttempt = false;
    -            mainSort(data, last);
    +                fallbackSort(data, last);
    +        }
             }
     
             final int[] fmap = data.fmap;
    @@ -173,7 +178,27 @@ boolean blockSort(final BZip2CompressorOutputStream.Data data, final int last) {
             }
     
             // assert (data.origPtr != -1) : data.origPtr;
    -        return blockRandomised;
    +        return false;
    +    }
    +
    +    /**
    +     * Adapt fallbackSort to the expected interface of the rest of the
    +     * code, in particular deal with the fact that block starts at
    +     * offset 1 (in libbzip2 1.0.6 it starts at 0).
    +     */
    +    final void fallbackSort(final BZip2CompressorOutputStream.Data data,
    +                            final int last) {
    +        data.block[0] = data.block[last + 1];
    +        fallbackSort(data.fmap, data.block, last + 1);
    +        for (int i = 0; i < last + 1; i++) {
    +            --data.fmap[i];
    +        }
    +        for (int i = 0; i < last + 1; i++) {
    +            if (data.fmap[i] == -1) {
    +                data.fmap[i] = last;
    +                break;
    +            }
    +        }
         }
     
     /*---------------------------------------------*/
    @@ -415,12 +440,13 @@ private int[] getEclass() {
         }
     
         /*
    -     * The C code uses an array of ints to represents the bucket-start
    -     * flags (bhtab).  It also contains optimizations to skip over 32
    -     * consecutively set or consecutively unset bits on word
    -     * boundaries at once.  For now I've chosen to use the simpler but
    -     * potentially slower code using BitSet - also in the hope that
    -     * using the BitSet#nextXXX methods may be fast enough.
    +     * The C code uses an array of ints (each int holding 32 flags) to
    +     * represents the bucket-start flags (bhtab).  It also contains
    +     * optimizations to skip over 32 consecutively set or
    +     * consecutively unset bits on word boundaries at once.  For now
    +     * I've chosen to use the simpler but potentially slower code
    +     * using BitSet - also in the hope that using the BitSet#nextXXX
    +     * methods may be fast enough.
          */
     
         /**
    @@ -432,16 +458,22 @@ private int[] getEclass() {
          * @param off offset of first byte to sort in block
          */
         final void fallbackSort(int[] fmap, byte[] block, int nblock) {
    -        int[] ftab = new int[257];
    +        final int[] ftab = new int[257];
             int H, i, j, k, l, r, cc, cc1;
             int nNotDone;
             int nBhtab;
    +        final int[] eclass = getEclass();
     
    +        for (i = 0; i < nblock; i++) {
    +            eclass[i] = 0;
    +        }
             /*--
               LBZ2: Initial 1-char radix sort to generate
               initial fmap and initial BH bits.
               --*/
    -        for (i = 0; i < nblock; i++) ftab[block[i] & 0xff]++;
    +        for (i = 0; i < nblock; i++) {
    +            ftab[block[i] & 0xff]++;
    +        }
             for (i = 1; i < 257;    i++) ftab[i] += ftab[i - 1];
     
             for (i = 0; i < nblock; i++) {
    @@ -467,8 +499,6 @@ final void fallbackSort(int[] fmap, byte[] block, int nblock) {
                 bhtab.clear(nblock + 2 * i + 1);
             }
     
    -        eclass = getEclass();
    -
             /*-- LBZ2: the log(N) loop --*/
             H = 1;
             while (true) {
    @@ -491,10 +521,10 @@ final void fallbackSort(int[] fmap, byte[] block, int nblock) {
     
                     /*-- LBZ2: find the next non-singleton bucket --*/
                     k = r + 1;
    -                k = bhtab.nextSetBit(k);
    +                k = bhtab.nextClearBit(k);
                     l = k - 1;
                     if (l >= nblock) break;
    -                k = bhtab.nextClearBit(k);
    +                k = bhtab.nextSetBit(k + 1);
                     r = k - 1;
                     if (r >= nblock) break;
     
    @@ -862,8 +892,8 @@ private void mainQSort3(final BZip2CompressorOutputStream.Data dataShadow,
         private static final int SETMASK = (1 << 21);
         private static final int CLEARMASK = (~SETMASK);
     
    -    private void mainSort(final BZip2CompressorOutputStream.Data dataShadow,
    -                          final int lastShadow) {
    +    final void mainSort(final BZip2CompressorOutputStream.Data dataShadow,
    +                        final int lastShadow) {
             final int[] runningOrder = this.mainSort_runningOrder;
             final int[] copy = this.mainSort_copy;
             final boolean[] bigDone = this.mainSort_bigDone;
    
  • src/test/java/org/apache/commons/compress/compressors/bzip2/BlockSortTest.java+87 9 modified
    @@ -70,17 +70,56 @@ public class BlockSortTest {
             0, 1, 7, 6, 8, 2, 3, 5, 4
         };
     
    +    private static final byte[] FIXTURE2 = { 
    +        'C', 'o', 'm', 'm', 'o', 'n', 's', ' ', 'C', 'o', 'm', 'p', 'r', 'e', 's', 's', 
    +    };
    +
    +    private static final byte[] FIXTURE2_BWT = {
    +        's', 's', ' ', 'r', 'o', 'm', 'o', 'o', 'C', 'C', 'm', 'm', 'p', 'n', 's', 'e', 
    +    };
    +
         @Test
         public void testSortFixture() {
    -        BZip2CompressorOutputStream.Data data = new BZip2CompressorOutputStream.Data(1);
    -        System.arraycopy(FIXTURE, 0, data.block, 1, FIXTURE.length);
    -        BlockSort s = new BlockSort(data);
    -        assertFalse(s.blockSort(data, FIXTURE.length - 1));
    -        assertEquals(FIXTURE[FIXTURE.length - 1], data.block[0]);
    -        for (int i = 0; i < FIXTURE.length; i++) {
    -            assertEquals(FIXTURE_BWT[i], data.block[data.fmap[i]]);
    -        }
    -        assertEquals(0, data.origPtr);
    +        DS ds = setUpFixture();
    +        assertFalse(ds.s.blockSort(ds.data, FIXTURE.length - 1));
    +        assertFixtureSorted(ds.data);
    +        assertEquals(0, ds.data.origPtr);
    +    }
    +
    +    @Test
    +    public void testSortFixtureMainSort() {
    +        DS ds = setUpFixture();
    +        ds.s.mainSort(ds.data, FIXTURE.length - 1);
    +        assertFixtureSorted(ds.data);
    +    }
    +
    +    @Test
    +    public void testSortFixtureFallbackSort() {
    +        DS ds = setUpFixture();
    +        ds.s.fallbackSort(ds.data, FIXTURE.length - 1);
    +        assertFixtureSorted(ds.data);
    +    }
    +
    +    @Test
    +    public void testSortFixture2() {
    +        DS ds = setUpFixture2();
    +        assertFalse(ds.s.blockSort(ds.data, FIXTURE2.length - 1));
    +        assertFixture2Sorted(ds.data);
    +        assertEquals(1, ds.data.origPtr);
    +    }
    +
    +    @Test
    +    public void testSortFixture2MainSort() {
    +        DS ds = setUpFixture2();
    +        ds.s.mainSort(ds.data, FIXTURE2.length - 1);
    +        assertFixture2Sorted(ds.data);
    +    }
    +
    +    @Test
    +    public void testSortFixture2FallbackSort() {
    +        DS ds = setUpFixture2();
    +        ds.s.fallbackSort(ds.data, FIXTURE2.length - 1);
    +        assertFixture2Sorted(ds.data);
         }
     
         @Test
    @@ -91,4 +130,43 @@ public void testFallbackSort() {
             s.fallbackSort(fmap, FIXTURE, FIXTURE.length);
             assertArrayEquals(FIXTURE_SORTED, fmap);
         }
    +
    +    private DS setUpFixture() {
    +        return setUpFixture(FIXTURE);
    +    }
    +
    +    private void assertFixtureSorted(BZip2CompressorOutputStream.Data data) {
    +        assertFixtureSorted(data, FIXTURE, FIXTURE_BWT);
    +    }
    +
    +    private DS setUpFixture2() {
    +        return setUpFixture(FIXTURE2);
    +    }
    +
    +    private void assertFixture2Sorted(BZip2CompressorOutputStream.Data data) {
    +        assertFixtureSorted(data, FIXTURE2, FIXTURE2_BWT);
    +    }
    +
    +    private DS setUpFixture(byte[] fixture) {
    +        BZip2CompressorOutputStream.Data data = new BZip2CompressorOutputStream.Data(1);
    +        System.arraycopy(fixture, 0, data.block, 1, fixture.length);
    +        return new DS(data, new BlockSort(data));
    +    }
    +
    +    private void assertFixtureSorted(BZip2CompressorOutputStream.Data data,
    +                                     byte[] fixture, byte[] fixtureBwt) {
    +        assertEquals(fixture[fixture.length - 1], data.block[0]);
    +        for (int i = 0; i < fixture.length; i++) {
    +            assertEquals(fixtureBwt[i], data.block[data.fmap[i]]);
    +        }
    +    }
    +
    +    private static class DS {
    +        private final BZip2CompressorOutputStream.Data data;
    +        private final BlockSort s;
    +        DS(BZip2CompressorOutputStream.Data data, BlockSort s) {
    +            this.data = data;
    +            this.s = s;
    +        }
    +    }
     }
    \ No newline at end of file
    
fdd7459bc547

[CVE-2012-2098] add fallback sorting algorithm of libbzip2 1.0.6 (actually added with 0.9.5)

https://github.com/apache/commons-compressStefan BodewigMay 20, 2012via ghsa
2 files changed · +370 3
  • src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java+357 3 modified
    @@ -18,6 +18,8 @@
      */
     package org.apache.commons.compress.compressors.bzip2;
     
    +import java.util.BitSet;
    +
     /**
      * Encapsulates the Burrows-Wheeler sorting algorithm needed by {@link
      * BZip2CompressorOutputStream}.
    @@ -92,12 +94,14 @@ class BlockSort {
         /*
          * 2012-05-20 Stefan Bodewig:
          *
    -     * The class seems to mix several revisions of libbzip2's code.
    +     * This class seems to mix several revisions of libbzip2's code.
          * The mainSort function and those used by it look closer to the
          * 0.9.5 version but show some variations introduced later.  At
          * the same time the logic to randomize the block on bad input has
          * been dropped after 0.9.0 and replaced by a fallback sorting
          * algorithm.
    +     *
    +     * I've added the fallbackSort function of 1.0.6.
          */
     
         /*
    @@ -108,6 +112,12 @@ class BlockSort {
          */
         private static final int QSORT_STACK_SIZE = 1000;
     
    +    private static final int FALLBACK_QSORT_STACK_SIZE = 100;
    +
    +    private static final int STACK_SIZE =
    +        QSORT_STACK_SIZE < FALLBACK_QSORT_STACK_SIZE
    +        ? FALLBACK_QSORT_STACK_SIZE : QSORT_STACK_SIZE;
    +
         private boolean blockRandomised;
     
         /*
    @@ -118,8 +128,8 @@ class BlockSort {
         private int workLimit;
         private boolean firstAttempt;
     
    -    private final int[] stack_ll = new int[QSORT_STACK_SIZE]; // 4000 byte
    -    private final int[] stack_hh = new int[QSORT_STACK_SIZE]; // 4000 byte
    +    private final int[] stack_ll = new int[STACK_SIZE]; // 4000 byte
    +    private final int[] stack_hh = new int[STACK_SIZE]; // 4000 byte
         private final int[] stack_dd = new int[QSORT_STACK_SIZE]; // 4000 byte
     
         private final int[] mainSort_runningOrder = new int[256]; // 1024 byte
    @@ -166,6 +176,350 @@ boolean blockSort(final BZip2CompressorOutputStream.Data data, final int last) {
             return blockRandomised;
         }
     
    +/*---------------------------------------------*/
    +
    +/*---------------------------------------------*/
    +/*--- LBZ2: Fallback O(N log(N)^2) sorting        ---*/
    +/*--- algorithm, for repetitive blocks      ---*/
    +/*---------------------------------------------*/
    +
    +    /*
    +     * This is the fallback sorting algorithm libbzip2 1.0.6 uses for
    +     * repetitive or very short inputs.
    +     *
    +     * The idea is inspired by Manber-Myers string suffix sorting
    +     * algorithm.  First a bucket sort places each permutation of the
    +     * block into a bucket based on its first byte.  Permutations are
    +     * represented by pointers to their first character kept in
    +     * (partially) sorted order inside the array ftab.
    +     *
    +     * The next step visits all buckets in order and performs a
    +     * quicksort on all permutations of the bucket based on the index
    +     * of the bucket the second byte of the permutation belongs to,
    +     * thereby forming new buckets.  When arrived here the
    +     * permutations are sorted up to the second character and we have
    +     * buckets of permutations that are identical up to two
    +     * characters.
    +     *
    +     * Repeat the step of quicksorting each bucket, now based on the
    +     * bucket holding the sequence of the third and forth character
    +     * leading to four byte buckets.  Repeat this doubling of bucket
    +     * sizes until all buckets only contain single permutations or the
    +     * bucket size exceeds the block size.
    +     *
    +     * I.e.
    +     *
    +     * "abraba" form three buckets for the chars "a", "b", and "r" in
    +     * the first step with
    +     *
    +     * fmap = { 'a:' 5, 3, 0, 'b:' 4, 1, 'r', 2 }
    +     *
    +     * when looking at the bucket of "a"s the second characters are in
    +     * the buckets that start with fmap-index 0 (rolled over), 3 and 3
    +     * respectively, forming two new buckets "aa" and "ab", so we get
    +     *
    +     * fmap = { 'aa:' 5, 'ab:' 3, 0, 'ba:' 4, 'br': 1, 'ra:' 2 }
    +     *
    +     * since the last bucket only contained a single item it didn't
    +     * have to be sorted at all.
    +     *
    +     * There now is just one bucket with more than one permutation
    +     * that remains to be sorted.  For the permutation that starts
    +     * with index 3 the third and forth char are in bucket 'aa' at
    +     * index 0 and for the one starting at block index 0 they are in
    +     * bucket 'ra' with sort index 5.  The fully sorted order then becomes.
    +     *
    +     * fmap = { 5, 3, 0, 4, 1, 2 }
    +     * 
    +     */
    +
    +    /**
    +     * @param fmap points to the index of the starting point of a
    +     *        permutation inside the block of data in the current
    +     *        partially sorted order
    +     * @param eclass points from the index of a character inside the
    +     *        block to the first index in fmap that contains the
    +     *        bucket of its suffix that is sorted in this step.
    +     * @param lo lower boundary of the fmap-interval to be sorted 
    +     * @param hi upper boundary of the fmap-interval to be sorted 
    +     */
    +    private void fallbackSimpleSort(int[] fmap, 
    +                                    int[] eclass, 
    +                                    int lo, 
    +                                    int hi) {
    +        if (lo == hi) return;
    +
    +        int j;
    +        if (hi - lo > 3) {
    +            for (int i = hi - 4; i >= lo; i--) {
    +                int tmp = fmap[i];
    +                int ec_tmp = eclass[tmp];
    +                for (j = i + 4; j <= hi && ec_tmp > eclass[fmap[j]];
    +                     j += 4) {
    +                    fmap[j - 4] = fmap[j];
    +                }
    +                fmap[j - 4] = tmp;
    +            }
    +        }
    +
    +        for (int i = hi - 1; i >= lo; i--) {
    +            int tmp = fmap[i];
    +            int ec_tmp = eclass[tmp];
    +            for (j = i + 1; j <= hi && ec_tmp > eclass[fmap[j]]; j++) {
    +                fmap[j - 1] = fmap[j];
    +            }
    +            fmap[j-1] = tmp;
    +        }
    +    }
    +
    +    private static final int FALLBACK_QSORT_SMALL_THRESH = 10;
    +
    +    /**
    +     * swaps two values in fmap
    +     */
    +    private void fswap(int[] fmap, int zz1, int zz2) {
    +        int zztmp = fmap[zz1];
    +        fmap[zz1] = fmap[zz2];
    +        fmap[zz2] = zztmp;
    +    }
    +
    +    /**
    +     * swaps two intervals starting at yyp1 and yyp2 of length yyn inside fmap.
    +     */
    +    private void fvswap(int[] fmap, int yyp1, int yyp2, int yyn) {
    +        while (yyn > 0) {
    +            fswap(fmap, yyp1, yyp2);
    +            yyp1++; yyp2++; yyn--;
    +        }
    +    }
    +
    +    private int fmin(int a, int b) {
    +        return a < b ? a : b;
    +    }
    +
    +    private void fpush(int sp, int lz, int hz) {
    +        stack_ll[sp] = lz;
    +        stack_hh[sp] = hz;
    +    }
    +
    +    private int[] fpop(int sp) {
    +        return new int[] { stack_ll[sp], stack_hh[sp] };
    +    }
    +
    +    /**
    +     * @param fmap points to the index of the starting point of a
    +     *        permutation inside the block of data in the current
    +     *        partially sorted order
    +     * @param eclass points from the index of a character inside the
    +     *        block to the first index in fmap that contains the
    +     *        bucket of its suffix that is sorted in this step.
    +     * @param loSt lower boundary of the fmap-interval to be sorted 
    +     * @param hiSt upper boundary of the fmap-interval to be sorted 
    +     */
    +    private void fallbackQSort3(int[] fmap, 
    +                                int[] eclass, 
    +                                int loSt, 
    +                                int hiSt) {
    +        int lo, unLo, ltLo, hi, unHi, gtHi, n;
    +
    +        long r = 0;
    +        int sp = 0;
    +        fpush(sp++, loSt, hiSt);
    +
    +        while (sp > 0) {
    +            int[] s = fpop(--sp);
    +            lo = s[0]; hi = s[1];
    +
    +            if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
    +                fallbackSimpleSort(fmap, eclass, lo, hi);
    +                continue;
    +            }
    +
    +            /* LBZ2: Random partitioning.  Median of 3 sometimes fails to
    +               avoid bad cases.  Median of 9 seems to help but 
    +               looks rather expensive.  This too seems to work but
    +               is cheaper.  Guidance for the magic constants 
    +               7621 and 32768 is taken from Sedgewick's algorithms
    +               book, chapter 35.
    +            */
    +            r = ((r * 7621) + 1) % 32768;
    +            long r3 = r % 3, med;
    +            if (r3 == 0) {
    +                med = eclass[fmap[lo]]; 
    +            } else if (r3 == 1) {
    +                med = eclass[fmap[(lo+hi)>>1]];
    +            } else {
    +                med = eclass[fmap[hi]];
    +            }
    +
    +            unLo = ltLo = lo;
    +            unHi = gtHi = hi;
    +
    +            // looks like the ternary partition attributed to Wegner
    +            // in the cited Sedgewick paper
    +            while (true) {
    +                while (true) {
    +                    if (unLo > unHi) break;
    +                    n = eclass[fmap[unLo]] - (int) med;
    +                    if (n == 0) { 
    +                        fswap(fmap, unLo, ltLo); 
    +                        ltLo++; unLo++; 
    +                        continue; 
    +                    };
    +                    if (n > 0) break;
    +                    unLo++;
    +                }
    +                while (true) {
    +                    if (unLo > unHi) break;
    +                    n = eclass[fmap[unHi]] - (int) med;
    +                    if (n == 0) {
    +                        fswap(fmap, unHi, gtHi); 
    +                        gtHi--; unHi--; 
    +                        continue; 
    +                    };
    +                    if (n < 0) break;
    +                    unHi--;
    +                }
    +                if (unLo > unHi) break;
    +                fswap(fmap, unLo, unHi); unLo++; unHi--;
    +            }
    +
    +            if (gtHi < ltLo) continue;
    +
    +            n = fmin(ltLo - lo, unLo - ltLo);
    +            fvswap(fmap, lo, unLo - n, n);
    +            int m = fmin(hi - gtHi, gtHi - unHi);
    +            fvswap(fmap, unHi + 1, hi - m + 1, m);
    +
    +            n = lo + unLo - ltLo - 1;
    +            m = hi - (gtHi - unHi) + 1;
    +
    +            if (n - lo > hi - m) {
    +                fpush(sp++, lo, n);
    +                fpush(sp++, m, hi);
    +            } else {
    +                fpush(sp++, m, hi);
    +                fpush(sp++, lo, n);
    +            }
    +        }
    +    }
    +
    +
    +/*---------------------------------------------*/
    +
    +    private int[] eclass;
    +
    +    private int[] getEclass() {
    +        return eclass == null
    +            ? (eclass = new int[quadrant.length / 2]) : eclass;
    +    }
    +
    +    /*
    +     * The C code uses an array of ints to represents the bucket-start
    +     * flags (bhtab).  It also contains optimizations to skip over 32
    +     * consecutively set or consecutively unset bits on word
    +     * boundaries at once.  For now I've chosen to use the simpler but
    +     * potentially slower code using BitSet - also in the hope that
    +     * using the BitSet#nextXXX methods may be fast enough.
    +     */
    +
    +    /**
    +     * @param fmap points to the index of the starting point of a
    +     *        permutation inside the block of data in the current
    +     *        partially sorted order
    +     * @param block the original data
    +     * @param nblock size of the block
    +     * @param off offset of first byte to sort in block
    +     */
    +    final void fallbackSort(int[] fmap, byte[] block, int nblock) {
    +        int[] ftab = new int[257];
    +        int H, i, j, k, l, r, cc, cc1;
    +        int nNotDone;
    +        int nBhtab;
    +
    +        /*--
    +          LBZ2: Initial 1-char radix sort to generate
    +          initial fmap and initial BH bits.
    +          --*/
    +        for (i = 0; i < nblock; i++) ftab[block[i] & 0xff]++;
    +        for (i = 1; i < 257;    i++) ftab[i] += ftab[i - 1];
    +
    +        for (i = 0; i < nblock; i++) {
    +            j = block[i] & 0xff;
    +            k = ftab[j] - 1;
    +            ftab[j] = k;
    +            fmap[k] = i;
    +        }
    +
    +        nBhtab = 64 + nblock;
    +        BitSet bhtab = new BitSet(nBhtab);
    +        for (i = 0; i < 256; i++) bhtab.set(ftab[i]);
    +
    +        /*--
    +          LBZ2: Inductively refine the buckets.  Kind-of an
    +          "exponential radix sort" (!), inspired by the
    +          Manber-Myers suffix array construction algorithm.
    +          --*/
    +
    +        /*-- LBZ2: set sentinel bits for block-end detection --*/
    +        for (i = 0; i < 32; i++) { 
    +            bhtab.set(nblock + 2 * i);
    +            bhtab.clear(nblock + 2 * i + 1);
    +        }
    +
    +        eclass = getEclass();
    +
    +        /*-- LBZ2: the log(N) loop --*/
    +        H = 1;
    +        while (true) {
    +
    +            j = 0;
    +            for (i = 0; i < nblock; i++) {
    +                if (bhtab.get(i)) {
    +                    j = i;
    +                }
    +                k = fmap[i] - H;
    +                if (k < 0) {
    +                    k += nblock;
    +                }
    +                eclass[k] = j;
    +            }
    +
    +            nNotDone = 0;
    +            r = -1;
    +            while (true) {
    +
    +                /*-- LBZ2: find the next non-singleton bucket --*/
    +                k = r + 1;
    +                k = bhtab.nextSetBit(k);
    +                l = k - 1;
    +                if (l >= nblock) break;
    +                k = bhtab.nextClearBit(k);
    +                r = k - 1;
    +                if (r >= nblock) break;
    +
    +                /*-- LBZ2: now [l, r] bracket current bucket --*/
    +                if (r > l) {
    +                    nNotDone += (r - l + 1);
    +                    fallbackQSort3(fmap, eclass, l, r);
    +
    +                    /*-- LBZ2: scan bucket and generate header bits-- */
    +                    cc = -1;
    +                    for (i = l; i <= r; i++) {
    +                        cc1 = eclass[fmap[i]];
    +                        if (cc != cc1) {
    +                            bhtab.set(i);
    +                            cc = cc1;
    +                        };
    +                    }
    +                }
    +            }
    +
    +            H *= 2;
    +            if (H > nblock || nNotDone == 0) break;
    +        }
    +    }
    +
     /*---------------------------------------------*/
     
         /*
    
  • src/test/java/org/apache/commons/compress/compressors/bzip2/BlockSortTest.java+13 0 modified
    @@ -66,6 +66,10 @@ public class BlockSortTest {
         private static final byte[] FIXTURE_BWT = { (byte) 128, 0, 3, (byte) 254, 2, 1, 
                                                     (byte) 252, (byte) 255, (byte) 253 };
     
    +    private static final int[] FIXTURE_SORTED = {
    +        0, 1, 7, 6, 8, 2, 3, 5, 4
    +    };
    +
         @Test
         public void testSortFixture() {
             BZip2CompressorOutputStream.Data data = new BZip2CompressorOutputStream.Data(1);
    @@ -78,4 +82,13 @@ public void testSortFixture() {
             }
             assertEquals(0, data.origPtr);
         }
    +
    +    @Test
    +    public void testFallbackSort() {
    +        BZip2CompressorOutputStream.Data data = new BZip2CompressorOutputStream.Data(1);
    +        BlockSort s = new BlockSort(data);
    +        int[] fmap = new int[FIXTURE.length];
    +        s.fallbackSort(fmap, FIXTURE, FIXTURE.length);
    +        assertArrayEquals(FIXTURE_SORTED, fmap);
    +    }
     }
    \ No newline at end of file
    
cca0e6e5341a

just moving stuff around to closer match source code layout of libbzip2 1.0.6

https://github.com/apache/commons-compressStefan BodewigMay 20, 2012via ghsa
1 file changed · +50 48
  • src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java+50 48 modified
    @@ -100,12 +100,6 @@ class BlockSort {
          * algorithm.
          */
     
    -    private static final int SETMASK = (1 << 21);
    -    private static final int CLEARMASK = (~SETMASK);
    -    private static final int SMALL_THRESH = 20;
    -    private static final int DEPTH_THRESH = 10;
    -    private static final int WORK_FACTOR = 30;
    -
         /*
          * LBZ2: If you are ever unlucky/improbable enough to get a stack
          * overflow whilst sorting, increase the following constant and
    @@ -114,15 +108,6 @@ class BlockSort {
          */
         private static final int QSORT_STACK_SIZE = 1000;
     
    -    /*
    -     * LBZ2: Knuth's increments seem to work better than Incerpi-Sedgewick here.
    -     * Possibly because the number of elems to sort is usually small, typically
    -     * &lt;= 20.
    -     */
    -    private static final int[] INCS = { 1, 4, 13, 40, 121, 364, 1093, 3280,
    -                                        9841, 29524, 88573, 265720, 797161,
    -                                        2391484 };
    -
         private boolean blockRandomised;
     
         /*
    @@ -154,14 +139,43 @@ class BlockSort {
             this.quadrant = data.sfmap;
         }
     
    +    boolean blockSort(final BZip2CompressorOutputStream.Data data, final int last) {
    +        this.workLimit = WORK_FACTOR * last;
    +        this.workDone = 0;
    +        this.blockRandomised = false;
    +        this.firstAttempt = true;
    +        mainSort(data, last);
    +
    +        if (this.firstAttempt && (this.workDone > this.workLimit)) {
    +            randomiseBlock(data, last);
    +            this.workLimit = this.workDone = 0;
    +            this.firstAttempt = false;
    +            mainSort(data, last);
    +        }
    +
    +        final int[] fmap = data.fmap;
    +        data.origPtr = -1;
    +        for (int i = 0; i <= last; i++) {
    +            if (fmap[i] == 0) {
    +                data.origPtr = i;
    +                break;
    +            }
    +        }
    +
    +        // assert (data.origPtr != -1) : data.origPtr;
    +        return blockRandomised;
    +    }
    +
     /*---------------------------------------------*/
    -/*--
    -   LBZ2: The following is an implementation of
    -   an elegant 3-way quicksort for strings,
    -   described in a paper "Fast Algorithms for
    -   Sorting and Searching Strings", by Robert
    -   Sedgewick and Jon L. Bentley.
    ---*/
    +
    +    /*
    +     * LBZ2: Knuth's increments seem to work better than Incerpi-Sedgewick here.
    +     * Possibly because the number of elems to sort is usually small, typically
    +     * &lt;= 20.
    +     */
    +    private static final int[] INCS = { 1, 4, 13, 40, 121, 364, 1093, 3280,
    +                                        9841, 29524, 88573, 265720, 797161,
    +                                        2391484 };
     
         /**
          * This is the most hammered method of this class.
    @@ -357,6 +371,14 @@ private boolean mainSimpleSort(final BZip2CompressorOutputStream.Data dataShadow
             return firstAttemptShadow && (workDoneShadow > workLimitShadow);
         }
     
    +/*--
    +   LBZ2: The following is an implementation of
    +   an elegant 3-way quicksort for strings,
    +   described in a paper "Fast Algorithms for
    +   Sorting and Searching Strings", by Robert
    +   Sedgewick and Jon L. Bentley.
    +--*/
    +
         private static void vswap(int[] fmap, int p1, int p2, int n) {
             n += p1;
             while (p1 < n) {
    @@ -371,32 +393,9 @@ private static byte med3(byte a, byte b, byte c) {
                                                             : a);
         }
     
    -    boolean blockSort(final BZip2CompressorOutputStream.Data data, final int last) {
    -        this.workLimit = WORK_FACTOR * last;
    -        this.workDone = 0;
    -        this.blockRandomised = false;
    -        this.firstAttempt = true;
    -        mainSort(data, last);
    -
    -        if (this.firstAttempt && (this.workDone > this.workLimit)) {
    -            randomiseBlock(data, last);
    -            this.workLimit = this.workDone = 0;
    -            this.firstAttempt = false;
    -            mainSort(data, last);
    -        }
    -
    -        final int[] fmap = data.fmap;
    -        data.origPtr = -1;
    -        for (int i = 0; i <= last; i++) {
    -            if (fmap[i] == 0) {
    -                data.origPtr = i;
    -                break;
    -            }
    -        }
    -
    -        // assert (data.origPtr != -1) : data.origPtr;
    -        return blockRandomised;
    -    }
    +    private static final int SMALL_THRESH = 20;
    +    private static final int DEPTH_THRESH = 10;
    +    private static final int WORK_FACTOR = 30;
     
         /**
          * Method "mainQSort3", file "blocksort.c", BZip2 1.0.2
    @@ -506,6 +505,9 @@ private void mainQSort3(final BZip2CompressorOutputStream.Data dataShadow,
             }
         }
     
    +    private static final int SETMASK = (1 << 21);
    +    private static final int CLEARMASK = (~SETMASK);
    +
         private void mainSort(final BZip2CompressorOutputStream.Data dataShadow,
                               final int lastShadow) {
             final int[] runningOrder = this.mainSort_runningOrder;
    
ea31005111f0

Add some explanations, mark comments originally made by Julian Seward

https://github.com/apache/commons-compressStefan BodewigMay 20, 2012via ghsa
1 file changed · +97 12
  • src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java+97 12 modified
    @@ -22,26 +22,100 @@
      * Encapsulates the Burrows-Wheeler sorting algorithm needed by {@link
      * BZip2CompressorOutputStream}.
      *
    + * <p>This class is based on a Java port of Julian Seward's
    + * blocksort.c in his libbzip2</p>
    + *
    + * <p>The Burrows-Wheeler transform is a reversible transform of the
    + * original data that is supposed to group similiar bytes close to
    + * each other.  The idea is to sort all permutations of the input and
    + * only keep the last byte of each permutation.  E.g. for "Commons
    + * Compress" you'd get:</p>
    + *
    + * <pre>
    + * Commons Compress
    + * CompressCommons 
    + * essCommons Compr
    + * mmons CompressCo
    + * mons CompressCom
    + * mpressCommons Co
    + * ns CompressCommo
    + * ommons CompressC
    + * ompressCommons C
    + * ons CompressComm
    + * pressCommons Com
    + * ressCommons Comp
    + * s CompressCommon
    + * sCommons Compres
    + * ssCommons Compre
    + * </pre>
    + *
    + * <p>Which results in a new text "s romooCCmmpnse", in adition the
    + * index of the first line that contained the original text is kept -
    + * in this case it is 0.  The idea is that in a long English text all
    + * permutations that start with "he" are likely suffixes of a "the" and
    + * thus they end in "t" leading to a larger block of "t"s that can
    + * better be compressed by the subsequent Move-to-Front, run-length
    + * und Huffman encoding steps.</p>
    + *
    + * <p>For more information see for example:</p>
    + * <ul>
    + *   <li><a
    + *   href="http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf">Burrows,
    + *   M. and Wheeler, D.: A Block-sorting Lossless Data Compression
    + *   Algorithm</a></li>
    + *   <li><a href="http://webglimpse.net/pubs/suffix.pdf">Manber, U. and
    + *   Myers, G.: Suffix arrays: A new method for on-line string
    + *   searches</a></li>
    + *   <li><a
    + *   href="http://www.cs.tufts.edu/~nr/comp150fp/archive/bob-sedgewick/fast-strings.pdf">Bentley,
    + *   J.L. and Sedgewick, R.: Fast Algorithms for Sorting and Searching
    + *   Strings</a></li>
    + * </ul>
    + *
      * @NotThreadSafe
      */
     class BlockSort {
     
    +    /*
    +     * Some of the constructs used in the C code cannot be ported
    +     * literally to Java - for example macros, unsigned types.  Some
    +     * code has been hand-tuned to improve performance.  In order to
    +     * avoid memory pressure some structures are reused for several
    +     * blocks and some memory is even shared between sorting and the
    +     * MTF stage even though either algorithm uses it for its own
    +     * purpose.
    +     *
    +     * Comments preserved from the actual C code are prefixed with
    +     * "LBZ2:".
    +     */
    +
    +    /*
    +     * 2012-05-20 Stefan Bodewig:
    +     *
    +     * The class seems to mix several revisions of libbzip2's code.
    +     * The mainSort function and those used by it look closer to the
    +     * 0.9.5 version but show some variations introduced later.  At
    +     * the same time the logic to randomize the block on bad input has
    +     * been dropped after 0.9.0 and replaced by a fallback sorting
    +     * algorithm.
    +     */
    +
         private static final int SETMASK = (1 << 21);
         private static final int CLEARMASK = (~SETMASK);
         private static final int SMALL_THRESH = 20;
         private static final int DEPTH_THRESH = 10;
         private static final int WORK_FACTOR = 30;
     
         /*
    -     * <p> If you are ever unlucky/improbable enough to get a stack
    +     * LBZ2: If you are ever unlucky/improbable enough to get a stack
          * overflow whilst sorting, increase the following constant and
          * try again. In practice I have never seen the stack go above 27
    -     * elems, so the following limit seems very generous.  </p>
    +     * elems, so the following limit seems very generous.
          */
         private static final int QSORT_STACK_SIZE = 1000;
     
    -    /**
    -     * Knuth's increments seem to work better than Incerpi-Sedgewick here.
    +    /*
    +     * LBZ2: Knuth's increments seem to work better than Incerpi-Sedgewick here.
          * Possibly because the number of elems to sort is usually small, typically
          * &lt;= 20.
          */
    @@ -80,6 +154,15 @@ class BlockSort {
             this.quadrant = data.sfmap;
         }
     
    +/*---------------------------------------------*/
    +/*--
    +   LBZ2: The following is an implementation of
    +   an elegant 3-way quicksort for strings,
    +   described in a paper "Fast Algorithms for
    +   Sorting and Searching Strings", by Robert
    +   Sedgewick and Jon L. Bentley.
    +--*/
    +
         /**
          * This is the most hammered method of this class.
          *
    @@ -435,7 +518,7 @@ private void mainSort(final BZip2CompressorOutputStream.Data dataShadow,
             final int workLimitShadow = this.workLimit;
             final boolean firstAttemptShadow = this.firstAttempt;
     
    -        // Set up the 2-byte frequency table
    +        // LBZ2: Set up the 2-byte frequency table
             for (int i = 65537; --i >= 0;) {
                 ftab[i] = 0;
             }
    @@ -453,7 +536,7 @@ private void mainSort(final BZip2CompressorOutputStream.Data dataShadow,
             }
             block[0] = block[lastShadow + 1];
     
    -        // Complete the initial radix sort:
    +        // LBZ2: Complete the initial radix sort:
     
             int c1 = block[0] & 0xff;
             for (int i = 0; i <= lastShadow; i++) {
    @@ -476,7 +559,7 @@ private void mainSort(final BZip2CompressorOutputStream.Data dataShadow,
             fmap[--ftab[((block[lastShadow + 1] & 0xff) << 8) + (block[1] & 0xff)]] = lastShadow;
     
             /*
    -         * Now ftab contains the first loc of every small bucket. Calculate the
    +         * LBZ2: Now ftab contains the first loc of every small bucket. Calculate the
              * running order, from smallest to largest big bucket.
              */
             for (int i = 256; --i >= 0;) {
    @@ -504,17 +587,17 @@ private void mainSort(final BZip2CompressorOutputStream.Data dataShadow,
             }
     
             /*
    -         * The main sorting loop.
    +         * LBZ2: The main sorting loop.
              */
             for (int i = 0; i <= 255; i++) {
                 /*
    -             * Process big buckets, starting with the least full.
    +             * LBZ2: Process big buckets, starting with the least full.
                  */
                 final int ss = runningOrder[i];
     
                 // Step 1:
                 /*
    -             * Complete the big bucket [ss] by quicksorting any unsorted small
    +             * LBZ2: Complete the big bucket [ss] by quicksorting any unsorted small
                  * buckets [ss, j]. Hopefully previous pointer-scanning phases have
                  * already completed many of the small buckets [ss, j], so we don't
                  * have to sort them at all.
    @@ -537,7 +620,7 @@ private void mainSort(final BZip2CompressorOutputStream.Data dataShadow,
                 }
     
                 // Step 2:
    -            // Now scan this big bucket so as to synthesise the
    +            // LBZ2: Now scan this big bucket so as to synthesise the
                 // sorted order for small buckets [t, ss] for all t != ss.
     
                 for (int j = 0; j <= 255; j++) {
    @@ -559,7 +642,7 @@ private void mainSort(final BZip2CompressorOutputStream.Data dataShadow,
     
                 // Step 3:
                 /*
    -             * The ss big bucket is now done. Record this fact, and update the
    +             * LBZ2: The ss big bucket is now done. Record this fact, and update the
                  * quadrant descriptors. Remember to update quadrants in the
                  * overshoot area too, if necessary. The "if (i < 255)" test merely
                  * skips this updating for the last bucket processed, since updating
    @@ -589,6 +672,8 @@ private void mainSort(final BZip2CompressorOutputStream.Data dataShadow,
             }
         }
     
    +/*---------------------------------------------*/
    +
         private void randomiseBlock(final BZip2CompressorOutputStream.Data data,
                                     final int lastShadow) {
             final boolean[] inUse = data.inUse;
    
020c03d8ef57

verify my understanding of the code

https://github.com/apache/commons-compressStefan BodewigMay 12, 2012via ghsa
2 files changed · +85 1
  • src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorOutputStream.java+4 1 modified
    @@ -1321,7 +1321,10 @@ static final class Data extends Object {
             // ============
     
             /**
    -         * Index in fmap[] of original string after sorting.
    +         * Index of original line in Burrows-Wheeler table.
    +         *
    +         * <p>This is the index in fmap that points to the last byte
    +         * of the original data.</p>
              */
             int origPtr;
     
    
  • src/test/java/org/apache/commons/compress/compressors/bzip2/BlockSortTest.java+81 0 added
    @@ -0,0 +1,81 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing,
    + * software distributed under the License is distributed on an
    + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    + * KIND, either express or implied.  See the License for the
    + * specific language governing permissions and limitations
    + * under the License.
    + */
    +package org.apache.commons.compress.compressors.bzip2;
    +
    +import org.junit.Test;
    +
    +import static org.junit.Assert.assertArrayEquals;
    +import static org.junit.Assert.assertEquals;
    +import static org.junit.Assert.assertFalse;
    +
    +public class BlockSortTest {
    +
    +    private static final byte[] FIXTURE = { 0, 1, (byte) 252, (byte) 253, (byte) 255,
    +                                            (byte) 254, 3, 2, (byte) 128 };
    +
    +    /*
    +      Burrows-Wheeler transform of fixture the manual way:
    +
    +      * build the matrix
    +
    +      0, 1, 252, 253, 255, 254, 3, 2, 128
    +      1, 252, 253, 255, 254, 3, 2, 128, 0
    +      252, 253, 255, 254, 3, 2, 128, 0, 1
    +      253, 255, 254, 3, 2, 128, 0, 1, 252
    +      255, 254, 3, 2, 128, 0, 1, 252, 253
    +      254, 3, 2, 128, 0, 1, 252, 253, 255
    +      3, 2, 128, 0, 1, 252, 253, 255, 254
    +      2, 128, 0, 1, 252, 253, 255, 254, 3
    +      128, 0, 1, 252, 253, 255, 254, 3, 2
    +
    +      * sort it
    +
    +      0, 1, 252, 253, 255, 254, 3, 2, 128
    +      1, 252, 253, 255, 254, 3, 2, 128, 0
    +      2, 128, 0, 1, 252, 253, 255, 254, 3
    +      3, 2, 128, 0, 1, 252, 253, 255, 254
    +      128, 0, 1, 252, 253, 255, 254, 3, 2
    +      252, 253, 255, 254, 3, 2, 128, 0, 1
    +      253, 255, 254, 3, 2, 128, 0, 1, 252
    +      254, 3, 2, 128, 0, 1, 252, 253, 255
    +      255, 254, 3, 2, 128, 0, 1, 252, 253
    +
    +      * grab last column
    +
    +      128, 0, 3, 254, 2, 1, 252, 255, 253
    +
    +        and the original line has been 0
    +    */
    +
    +    private static final byte[] FIXTURE_BWT = { (byte) 128, 0, 3, (byte) 254, 2, 1, 
    +                                                (byte) 252, (byte) 255, (byte) 253 };
    +
    +    @Test
    +    public void testSortFixture() {
    +        BZip2CompressorOutputStream.Data data = new BZip2CompressorOutputStream.Data(1);
    +        System.arraycopy(FIXTURE, 0, data.block, 1, FIXTURE.length);
    +        BlockSort s = new BlockSort(data);
    +        assertFalse(s.blockSort(data, FIXTURE.length - 1));
    +        assertEquals(FIXTURE[FIXTURE.length - 1], data.block[0]);
    +        for (int i = 0; i < FIXTURE.length; i++) {
    +            assertEquals(FIXTURE_BWT[i], data.block[data.fmap[i]]);
    +        }
    +        assertEquals(0, data.origPtr);
    +    }
    +}
    \ No newline at end of file
    
1ce57d976c4f

Add a few comments as earmarks for myself, I think this is what the code does. Two 'final's thrown in for good measure

https://github.com/apache/commons-compressStefan BodewigMay 3, 2012via ghsa
2 files changed · +36 6
  • src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java+3 2 modified
    @@ -19,7 +19,8 @@
     package org.apache.commons.compress.compressors.bzip2;
     
     /**
    - * Encapsulates the sorting algorithms needed by {@link BZip2CompressorOutputStream}.
    + * Encapsulates the Burrows-Wheeler sorting algorithm needed by {@link
    + * BZip2CompressorOutputStream}.
      *
      * @NotThreadSafe
      */
    @@ -301,7 +302,7 @@ boolean blockSort(final BZip2CompressorOutputStream.Data data, final int last) {
                 mainSort(data, last);
             }
     
    -        int[] fmap = data.fmap;
    +        final int[] fmap = data.fmap;
             data.origPtr = -1;
             for (int i = 0; i <= last; i++) {
                 if (fmap[i] == 0) {
    
  • src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorOutputStream.java+33 4 modified
    @@ -315,7 +315,7 @@ private static void hbMakeCodeLengths(final byte[] len, final int[] freq,
     
         private int blockCRC;
         private int combinedCRC;
    -    private int allowableBlockSize;
    +    private final int allowableBlockSize;
     
         /**
          * All memory intensive stuff.
    @@ -391,6 +391,8 @@ public BZip2CompressorOutputStream(final OutputStream out,
             }
     
             this.blockSize100k = blockSize;
    +        /* 20 is just a paranoia constant */
    +        this.allowableBlockSize = (this.blockSize100k * BZip2Constants.BASEBLOCKSIZE) - 20;
             this.out = out;
             init();
         }
    @@ -405,6 +407,19 @@ public void write(final int b) throws IOException {
             }
         }
     
    +    /**
    +     * Writes the current byte to the buffer, run-length encoding it
    +     * if it has been repeated at least four times (the first step
    +     * RLEs sequences of four identical bytes).
    +     *
    +     * <p>Flushes the current block before writing data if it is
    +     * full.</p>
    +     *
    +     * <p>"write to the buffer" means adding to data.buffer starting
    +     * two steps "after" this.last - initially starting at index 1
    +     * (not 0) - and updating this.last to point to the last index
    +     * written minus 1.</p>
    +     */
         private void writeRun() throws IOException {
             final int lastShadow = this.last;
     
    @@ -534,9 +549,6 @@ private void initBlock() {
             for (int i = 256; --i >= 0;) {
                 inUse[i] = false;
             }
    -
    -        /* 20 is just a paranoia constant */
    -        this.allowableBlockSize = (this.blockSize100k * BZip2Constants.BASEBLOCKSIZE) - 20;
         }
     
         private void endBlock() throws IOException {
    @@ -632,6 +644,10 @@ public void write(final byte[] buf, int offs, final int len)
             }
         }
     
    +    /**
    +     * Keeps track of the last bytes written and implicitly performs
    +     * run-length encoding as the first step of the bzip2 algorithm.
    +     */
         private void write0(int b) throws IOException {
             if (this.currentChar != -1) {
                 b &= 0xff;
    @@ -1153,6 +1169,13 @@ private boolean blockSort() {
             return blockSorter.blockSort(data, last);
         }
     
    +    /*
    +     * Performs Move-To-Front on the Burrows-Wheeler transformed
    +     * buffer, storing the MTFed data in data.sfmap in RUNA/RUNB
    +     * run-length-encoded form.
    +     *
    +     * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p>
    +     */
         private void generateMTFValues() {
             final int lastShadow = this.last;
             final Data dataShadow = this.data;
    @@ -1259,6 +1282,7 @@ private void generateMTFValues() {
         static final class Data extends Object {
     
             // with blockSize 900k
    +        /* maps unsigned byte => "does it occur in block" */
             final boolean[] inUse = new boolean[256]; // 256 byte
             final byte[] unseqToSeq = new byte[256]; // 256 byte
             final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte
    @@ -1284,7 +1308,12 @@ static final class Data extends Object {
             // ------------
             // 333408 byte
     
    +        /* holds the RLEd block of original data starting at index 1.
    +         * After sorting the last byte added to the buffer is at index
    +         * 0. */
             final byte[] block; // 900021 byte
    +        /* maps index in Burrows-Wheeler transformed block => index of
    +         * byte in original block */
             final int[] fmap; // 3600000 byte
             final char[] sfmap; // 3600000 byte
             // ------------
    
6e95697e7837

Move data that is used for sorting only to BlockSort class

https://github.com/apache/commons-compressStefan BodewigMay 1, 2012via ghsa
2 files changed · +35 28
  • src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java+31 10 modified
    @@ -37,7 +37,7 @@ class BlockSort {
          * try again. In practice I have never seen the stack go above 27
          * elems, so the following limit seems very generous.  </p>
          */
    -    static final int QSORT_STACK_SIZE = 1000;
    +    private static final int QSORT_STACK_SIZE = 1000;
     
         /**
          * Knuth's increments seem to work better than Incerpi-Sedgewick here.
    @@ -58,6 +58,27 @@ class BlockSort {
         private int workLimit;
         private boolean firstAttempt;
     
    +    private final int[] stack_ll = new int[QSORT_STACK_SIZE]; // 4000 byte
    +    private final int[] stack_hh = new int[QSORT_STACK_SIZE]; // 4000 byte
    +    private final int[] stack_dd = new int[QSORT_STACK_SIZE]; // 4000 byte
    +
    +    private final int[] mainSort_runningOrder = new int[256]; // 1024 byte
    +    private final int[] mainSort_copy = new int[256]; // 1024 byte
    +    private final boolean[] mainSort_bigDone = new boolean[256]; // 256 byte
    +
    +    private final int[] ftab = new int[65537]; // 262148 byte
    +
    +    /**
    +     * Array instance identical to Data's sfmap, both are used only
    +     * temporarily and indepently, so we do not need to allocate
    +     * additional memory.
    +     */
    +    private final char[] quadrant;
    +
    +    BlockSort(final BZip2CompressorOutputStream.Data data) {
    +        this.quadrant = data.sfmap;
    +    }
    +
         /**
          * This is the most hammered method of this class.
          *
    @@ -82,7 +103,7 @@ private boolean mainSimpleSort(final BZip2CompressorOutputStream.Data dataShadow
             }
     
             final int[] fmap = dataShadow.fmap;
    -        final char[] quadrant = dataShadow.quadrant;
    +        final char[] quadrant = this.quadrant;
             final byte[] block = dataShadow.block;
             final int lastPlus1 = lastShadow + 1;
             final boolean firstAttemptShadow = this.firstAttempt;
    @@ -299,9 +320,9 @@ boolean blockSort(final BZip2CompressorOutputStream.Data data, final int last) {
         private void mainQSort3(final BZip2CompressorOutputStream.Data dataShadow,
                                 final int loSt, final int hiSt, final int dSt,
                                 final int last) {
    -        final int[] stack_ll = dataShadow.stack_ll;
    -        final int[] stack_hh = dataShadow.stack_hh;
    -        final int[] stack_dd = dataShadow.stack_dd;
    +        final int[] stack_ll = this.stack_ll;
    +        final int[] stack_hh = this.stack_hh;
    +        final int[] stack_dd = this.stack_dd;
             final int[] fmap = dataShadow.fmap;
             final byte[] block = dataShadow.block;
     
    @@ -403,13 +424,13 @@ private void mainQSort3(final BZip2CompressorOutputStream.Data dataShadow,
     
         private void mainSort(final BZip2CompressorOutputStream.Data dataShadow,
                               final int lastShadow) {
    -        final int[] runningOrder = dataShadow.mainSort_runningOrder;
    -        final int[] copy = dataShadow.mainSort_copy;
    -        final boolean[] bigDone = dataShadow.mainSort_bigDone;
    -        final int[] ftab = dataShadow.ftab;
    +        final int[] runningOrder = this.mainSort_runningOrder;
    +        final int[] copy = this.mainSort_copy;
    +        final boolean[] bigDone = this.mainSort_bigDone;
    +        final int[] ftab = this.ftab;
             final byte[] block = dataShadow.block;
             final int[] fmap = dataShadow.fmap;
    -        final char[] quadrant = dataShadow.quadrant;
    +        final char[] quadrant = this.quadrant;
             final int workLimitShadow = this.workLimit;
             final boolean firstAttemptShadow = this.firstAttempt;
     
    
  • src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorOutputStream.java+4 18 modified
    @@ -321,6 +321,7 @@ private static void hbMakeCodeLengths(final byte[] len, final int[] freq,
          * All memory intensive stuff.
          */
         private Data data;
    +    private BlockSort blockSorter;
     
         private OutputStream out;
     
    @@ -480,6 +481,7 @@ public void finish() throws IOException {
                 } finally {
                     this.out = null;
                     this.data = null;
    +                this.blockSorter = null;
                 }
             }
         }
    @@ -512,6 +514,7 @@ private void init() throws IOException {
             bsPutUByte('Z');
     
             this.data = new Data(this.blockSize100k);
    +        this.blockSorter = new BlockSort(this.data);
     
             // huffmanised magic bytes
             bsPutUByte('h');
    @@ -1147,7 +1150,7 @@ private void moveToFrontCodeAndSend() throws IOException {
         }
     
         private boolean blockSort() {
    -        return new BlockSort().blockSort(data, last);
    +        return blockSorter.blockSort(data, last);
         }
     
         private void generateMTFValues() {
    @@ -1274,19 +1277,10 @@ static final class Data extends Object {
             final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte
             final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte
     
    -        final int[] stack_ll = new int[BlockSort.QSORT_STACK_SIZE]; // 4000 byte
    -        final int[] stack_hh = new int[BlockSort.QSORT_STACK_SIZE]; // 4000 byte
    -        final int[] stack_dd = new int[BlockSort.QSORT_STACK_SIZE]; // 4000 byte
    -
    -        final int[] mainSort_runningOrder = new int[256]; // 1024 byte
    -        final int[] mainSort_copy = new int[256]; // 1024 byte
    -        final boolean[] mainSort_bigDone = new boolean[256]; // 256 byte
    -
             final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte
             final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
             final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
     
    -        final int[] ftab = new int[65537]; // 262148 byte
             // ------------
             // 333408 byte
     
    @@ -1297,13 +1291,6 @@ static final class Data extends Object {
             // 8433529 byte
             // ============
     
    -        /**
    -         * Array instance identical to sfmap, both are used only
    -         * temporarily and indepently, so we do not need to allocate
    -         * additional memory.
    -         */
    -        final char[] quadrant;
    -
             /**
              * Index in fmap[] of original string after sorting.
              */
    @@ -1316,7 +1303,6 @@ static final class Data extends Object {
                 this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)];
                 this.fmap = new int[n];
                 this.sfmap = new char[2 * n];
    -            this.quadrant = this.sfmap;
             }
     
         }
    
b06f7b41c936

split sorting out of BZip2CompressorOutputStream

https://github.com/apache/commons-compressStefan BodewigMay 1, 2012via ghsa
2 files changed · +613 579
  • src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java+599 0 added
    @@ -0,0 +1,599 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing,
    + * software distributed under the License is distributed on an
    + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    + * KIND, either express or implied.  See the License for the
    + * specific language governing permissions and limitations
    + * under the License.
    + */
    +package org.apache.commons.compress.compressors.bzip2;
    +
    +/**
    + * Encapsulates the sorting algorithms needed by {@link BZip2CompressorOutputStream}.
    + *
    + * @NotThreadSafe
    + */
    +class BlockSort {
    +
    +    private static final int SETMASK = (1 << 21);
    +    private static final int CLEARMASK = (~SETMASK);
    +    private static final int SMALL_THRESH = 20;
    +    private static final int DEPTH_THRESH = 10;
    +    private static final int WORK_FACTOR = 30;
    +
    +    /*
    +     * <p> If you are ever unlucky/improbable enough to get a stack
    +     * overflow whilst sorting, increase the following constant and
    +     * try again. In practice I have never seen the stack go above 27
    +     * elems, so the following limit seems very generous.  </p>
    +     */
    +    static final int QSORT_STACK_SIZE = 1000;
    +
    +    /**
    +     * Knuth's increments seem to work better than Incerpi-Sedgewick here.
    +     * Possibly because the number of elems to sort is usually small, typically
    +     * &lt;= 20.
    +     */
    +    private static final int[] INCS = { 1, 4, 13, 40, 121, 364, 1093, 3280,
    +                                        9841, 29524, 88573, 265720, 797161,
    +                                        2391484 };
    +
    +    private boolean blockRandomised;
    +
    +    /*
    +     * Used when sorting. If too many long comparisons happen, we stop sorting,
    +     * randomise the block slightly, and try again.
    +     */
    +    private int workDone;
    +    private int workLimit;
    +    private boolean firstAttempt;
    +
    +    /**
    +     * This is the most hammered method of this class.
    +     *
    +     * <p>
    +     * This is the version using unrolled loops. Normally I never use such ones
    +     * in Java code. The unrolling has shown a noticable performance improvement
    +     * on JRE 1.4.2 (Linux i586 / HotSpot Client). Of course it depends on the
    +     * JIT compiler of the vm.
    +     * </p>
    +     */
    +    private boolean mainSimpleSort(final BZip2CompressorOutputStream.Data dataShadow,
    +                                   final int lo, final int hi, final int d,
    +                                   final int lastShadow) {
    +        final int bigN = hi - lo + 1;
    +        if (bigN < 2) {
    +            return this.firstAttempt && (this.workDone > this.workLimit);
    +        }
    +
    +        int hp = 0;
    +        while (INCS[hp] < bigN) {
    +            hp++;
    +        }
    +
    +        final int[] fmap = dataShadow.fmap;
    +        final char[] quadrant = dataShadow.quadrant;
    +        final byte[] block = dataShadow.block;
    +        final int lastPlus1 = lastShadow + 1;
    +        final boolean firstAttemptShadow = this.firstAttempt;
    +        final int workLimitShadow = this.workLimit;
    +        int workDoneShadow = this.workDone;
    +
    +        // Following block contains unrolled code which could be shortened by
    +        // coding it in additional loops.
    +
    +        HP: while (--hp >= 0) {
    +            final int h = INCS[hp];
    +            final int mj = lo + h - 1;
    +
    +            for (int i = lo + h; i <= hi;) {
    +                // copy
    +                for (int k = 3; (i <= hi) && (--k >= 0); i++) {
    +                    final int v = fmap[i];
    +                    final int vd = v + d;
    +                    int j = i;
    +
    +                    // for (int a;
    +                    // (j > mj) && mainGtU((a = fmap[j - h]) + d, vd,
    +                    // block, quadrant, lastShadow);
    +                    // j -= h) {
    +                    // fmap[j] = a;
    +                    // }
    +                    //
    +                    // unrolled version:
    +
    +                    // start inline mainGTU
    +                    boolean onceRunned = false;
    +                    int a = 0;
    +
    +                    HAMMER: while (true) {
    +                        if (onceRunned) {
    +                            fmap[j] = a;
    +                            if ((j -= h) <= mj) {
    +                                break HAMMER;
    +                            }
    +                        } else {
    +                            onceRunned = true;
    +                        }
    +
    +                        a = fmap[j - h];
    +                        int i1 = a + d;
    +                        int i2 = vd;
    +
    +                        // following could be done in a loop, but
    +                        // unrolled it for performance:
    +                        if (block[i1 + 1] == block[i2 + 1]) {
    +                            if (block[i1 + 2] == block[i2 + 2]) {
    +                                if (block[i1 + 3] == block[i2 + 3]) {
    +                                    if (block[i1 + 4] == block[i2 + 4]) {
    +                                        if (block[i1 + 5] == block[i2 + 5]) {
    +                                            if (block[(i1 += 6)] == block[(i2 += 6)]) {
    +                                                int x = lastShadow;
    +                                                X: while (x > 0) {
    +                                                    x -= 4;
    +
    +                                                    if (block[i1 + 1] == block[i2 + 1]) {
    +                                                        if (quadrant[i1] == quadrant[i2]) {
    +                                                            if (block[i1 + 2] == block[i2 + 2]) {
    +                                                                if (quadrant[i1 + 1] == quadrant[i2 + 1]) {
    +                                                                    if (block[i1 + 3] == block[i2 + 3]) {
    +                                                                        if (quadrant[i1 + 2] == quadrant[i2 + 2]) {
    +                                                                            if (block[i1 + 4] == block[i2 + 4]) {
    +                                                                                if (quadrant[i1 + 3] == quadrant[i2 + 3]) {
    +                                                                                    if ((i1 += 4) >= lastPlus1) {
    +                                                                                        i1 -= lastPlus1;
    +                                                                                    }
    +                                                                                    if ((i2 += 4) >= lastPlus1) {
    +                                                                                        i2 -= lastPlus1;
    +                                                                                    }
    +                                                                                    workDoneShadow++;
    +                                                                                    continue X;
    +                                                                                } else if ((quadrant[i1 + 3] > quadrant[i2 + 3])) {
    +                                                                                    continue HAMMER;
    +                                                                                } else {
    +                                                                                    break HAMMER;
    +                                                                                }
    +                                                                            } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) {
    +                                                                                continue HAMMER;
    +                                                                            } else {
    +                                                                                break HAMMER;
    +                                                                            }
    +                                                                        } else if ((quadrant[i1 + 2] > quadrant[i2 + 2])) {
    +                                                                            continue HAMMER;
    +                                                                        } else {
    +                                                                            break HAMMER;
    +                                                                        }
    +                                                                    } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) {
    +                                                                        continue HAMMER;
    +                                                                    } else {
    +                                                                        break HAMMER;
    +                                                                    }
    +                                                                } else if ((quadrant[i1 + 1] > quadrant[i2 + 1])) {
    +                                                                    continue HAMMER;
    +                                                                } else {
    +                                                                    break HAMMER;
    +                                                                }
    +                                                            } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) {
    +                                                                continue HAMMER;
    +                                                            } else {
    +                                                                break HAMMER;
    +                                                            }
    +                                                        } else if ((quadrant[i1] > quadrant[i2])) {
    +                                                            continue HAMMER;
    +                                                        } else {
    +                                                            break HAMMER;
    +                                                        }
    +                                                    } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) {
    +                                                        continue HAMMER;
    +                                                    } else {
    +                                                        break HAMMER;
    +                                                    }
    +
    +                                                }
    +                                                break HAMMER;
    +                                            } // while x > 0
    +                                            else {
    +                                                if ((block[i1] & 0xff) > (block[i2] & 0xff)) {
    +                                                    continue HAMMER;
    +                                                } else {
    +                                                    break HAMMER;
    +                                                }
    +                                            }
    +                                        } else if ((block[i1 + 5] & 0xff) > (block[i2 + 5] & 0xff)) {
    +                                            continue HAMMER;
    +                                        } else {
    +                                            break HAMMER;
    +                                        }
    +                                    } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) {
    +                                        continue HAMMER;
    +                                    } else {
    +                                        break HAMMER;
    +                                    }
    +                                } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) {
    +                                    continue HAMMER;
    +                                } else {
    +                                    break HAMMER;
    +                                }
    +                            } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) {
    +                                continue HAMMER;
    +                            } else {
    +                                break HAMMER;
    +                            }
    +                        } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) {
    +                            continue HAMMER;
    +                        } else {
    +                            break HAMMER;
    +                        }
    +
    +                    } // HAMMER
    +                    // end inline mainGTU
    +
    +                    fmap[j] = v;
    +                }
    +
    +                if (firstAttemptShadow && (i <= hi)
    +                    && (workDoneShadow > workLimitShadow)) {
    +                    break HP;
    +                }
    +            }
    +        }
    +
    +        this.workDone = workDoneShadow;
    +        return firstAttemptShadow && (workDoneShadow > workLimitShadow);
    +    }
    +
    +    private static void vswap(int[] fmap, int p1, int p2, int n) {
    +        n += p1;
    +        while (p1 < n) {
    +            int t = fmap[p1];
    +            fmap[p1++] = fmap[p2];
    +            fmap[p2++] = t;
    +        }
    +    }
    +
    +    private static byte med3(byte a, byte b, byte c) {
    +        return (a < b) ? (b < c ? b : a < c ? c : a) : (b > c ? b : a > c ? c
    +                                                        : a);
    +    }
    +
    +    boolean blockSort(final BZip2CompressorOutputStream.Data data, final int last) {
    +        this.workLimit = WORK_FACTOR * last;
    +        this.workDone = 0;
    +        this.blockRandomised = false;
    +        this.firstAttempt = true;
    +        mainSort(data, last);
    +
    +        if (this.firstAttempt && (this.workDone > this.workLimit)) {
    +            randomiseBlock(data, last);
    +            this.workLimit = this.workDone = 0;
    +            this.firstAttempt = false;
    +            mainSort(data, last);
    +        }
    +
    +        int[] fmap = data.fmap;
    +        data.origPtr = -1;
    +        for (int i = 0; i <= last; i++) {
    +            if (fmap[i] == 0) {
    +                data.origPtr = i;
    +                break;
    +            }
    +        }
    +
    +        // assert (data.origPtr != -1) : data.origPtr;
    +        return blockRandomised;
    +    }
    +
    +    /**
    +     * Method "mainQSort3", file "blocksort.c", BZip2 1.0.2
    +     */
    +    private void mainQSort3(final BZip2CompressorOutputStream.Data dataShadow,
    +                            final int loSt, final int hiSt, final int dSt,
    +                            final int last) {
    +        final int[] stack_ll = dataShadow.stack_ll;
    +        final int[] stack_hh = dataShadow.stack_hh;
    +        final int[] stack_dd = dataShadow.stack_dd;
    +        final int[] fmap = dataShadow.fmap;
    +        final byte[] block = dataShadow.block;
    +
    +        stack_ll[0] = loSt;
    +        stack_hh[0] = hiSt;
    +        stack_dd[0] = dSt;
    +
    +        for (int sp = 1; --sp >= 0;) {
    +            final int lo = stack_ll[sp];
    +            final int hi = stack_hh[sp];
    +            final int d = stack_dd[sp];
    +
    +            if ((hi - lo < SMALL_THRESH) || (d > DEPTH_THRESH)) {
    +                if (mainSimpleSort(dataShadow, lo, hi, d, last)) {
    +                    return;
    +                }
    +            } else {
    +                final int d1 = d + 1;
    +                final int med = med3(block[fmap[lo] + d1],
    +                                     block[fmap[hi] + d1], block[fmap[(lo + hi) >>> 1] + d1]) & 0xff;
    +
    +                int unLo = lo;
    +                int unHi = hi;
    +                int ltLo = lo;
    +                int gtHi = hi;
    +
    +                while (true) {
    +                    while (unLo <= unHi) {
    +                        final int n = (block[fmap[unLo] + d1] & 0xff)
    +                            - med;
    +                        if (n == 0) {
    +                            final int temp = fmap[unLo];
    +                            fmap[unLo++] = fmap[ltLo];
    +                            fmap[ltLo++] = temp;
    +                        } else if (n < 0) {
    +                            unLo++;
    +                        } else {
    +                            break;
    +                        }
    +                    }
    +
    +                    while (unLo <= unHi) {
    +                        final int n = (block[fmap[unHi] + d1] & 0xff)
    +                            - med;
    +                        if (n == 0) {
    +                            final int temp = fmap[unHi];
    +                            fmap[unHi--] = fmap[gtHi];
    +                            fmap[gtHi--] = temp;
    +                        } else if (n > 0) {
    +                            unHi--;
    +                        } else {
    +                            break;
    +                        }
    +                    }
    +
    +                    if (unLo <= unHi) {
    +                        final int temp = fmap[unLo];
    +                        fmap[unLo++] = fmap[unHi];
    +                        fmap[unHi--] = temp;
    +                    } else {
    +                        break;
    +                    }
    +                }
    +
    +                if (gtHi < ltLo) {
    +                    stack_ll[sp] = lo;
    +                    stack_hh[sp] = hi;
    +                    stack_dd[sp] = d1;
    +                    sp++;
    +                } else {
    +                    int n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo)
    +                        : (unLo - ltLo);
    +                    vswap(fmap, lo, unLo - n, n);
    +                    int m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi)
    +                        : (gtHi - unHi);
    +                    vswap(fmap, unLo, hi - m + 1, m);
    +
    +                    n = lo + unLo - ltLo - 1;
    +                    m = hi - (gtHi - unHi) + 1;
    +
    +                    stack_ll[sp] = lo;
    +                    stack_hh[sp] = n;
    +                    stack_dd[sp] = d;
    +                    sp++;
    +
    +                    stack_ll[sp] = n + 1;
    +                    stack_hh[sp] = m - 1;
    +                    stack_dd[sp] = d1;
    +                    sp++;
    +
    +                    stack_ll[sp] = m;
    +                    stack_hh[sp] = hi;
    +                    stack_dd[sp] = d;
    +                    sp++;
    +                }
    +            }
    +        }
    +    }
    +
    +    private void mainSort(final BZip2CompressorOutputStream.Data dataShadow,
    +                          final int lastShadow) {
    +        final int[] runningOrder = dataShadow.mainSort_runningOrder;
    +        final int[] copy = dataShadow.mainSort_copy;
    +        final boolean[] bigDone = dataShadow.mainSort_bigDone;
    +        final int[] ftab = dataShadow.ftab;
    +        final byte[] block = dataShadow.block;
    +        final int[] fmap = dataShadow.fmap;
    +        final char[] quadrant = dataShadow.quadrant;
    +        final int workLimitShadow = this.workLimit;
    +        final boolean firstAttemptShadow = this.firstAttempt;
    +
    +        // Set up the 2-byte frequency table
    +        for (int i = 65537; --i >= 0;) {
    +            ftab[i] = 0;
    +        }
    +
    +        /*
    +         * In the various block-sized structures, live data runs from 0 to
    +         * last+NUM_OVERSHOOT_BYTES inclusive. First, set up the overshoot area
    +         * for block.
    +         */
    +        for (int i = 0; i < BZip2Constants.NUM_OVERSHOOT_BYTES; i++) {
    +            block[lastShadow + i + 2] = block[(i % (lastShadow + 1)) + 1];
    +        }
    +        for (int i = lastShadow + BZip2Constants.NUM_OVERSHOOT_BYTES +1; --i >= 0;) {
    +            quadrant[i] = 0;
    +        }
    +        block[0] = block[lastShadow + 1];
    +
    +        // Complete the initial radix sort:
    +
    +        int c1 = block[0] & 0xff;
    +        for (int i = 0; i <= lastShadow; i++) {
    +            final int c2 = block[i + 1] & 0xff;
    +            ftab[(c1 << 8) + c2]++;
    +            c1 = c2;
    +        }
    +
    +        for (int i = 1; i <= 65536; i++) {
    +            ftab[i] += ftab[i - 1];
    +        }
    +
    +        c1 = block[1] & 0xff;
    +        for (int i = 0; i < lastShadow; i++) {
    +            final int c2 = block[i + 2] & 0xff;
    +            fmap[--ftab[(c1 << 8) + c2]] = i;
    +            c1 = c2;
    +        }
    +
    +        fmap[--ftab[((block[lastShadow + 1] & 0xff) << 8) + (block[1] & 0xff)]] = lastShadow;
    +
    +        /*
    +         * Now ftab contains the first loc of every small bucket. Calculate the
    +         * running order, from smallest to largest big bucket.
    +         */
    +        for (int i = 256; --i >= 0;) {
    +            bigDone[i] = false;
    +            runningOrder[i] = i;
    +        }
    +
    +        for (int h = 364; h != 1;) {
    +            h /= 3;
    +            for (int i = h; i <= 255; i++) {
    +                final int vv = runningOrder[i];
    +                final int a = ftab[(vv + 1) << 8] - ftab[vv << 8];
    +                final int b = h - 1;
    +                int j = i;
    +                for (int ro = runningOrder[j - h]; (ftab[(ro + 1) << 8] - ftab[ro << 8]) > a; ro = runningOrder[j
    +                                                                                                                - h]) {
    +                    runningOrder[j] = ro;
    +                    j -= h;
    +                    if (j <= b) {
    +                        break;
    +                    }
    +                }
    +                runningOrder[j] = vv;
    +            }
    +        }
    +
    +        /*
    +         * The main sorting loop.
    +         */
    +        for (int i = 0; i <= 255; i++) {
    +            /*
    +             * Process big buckets, starting with the least full.
    +             */
    +            final int ss = runningOrder[i];
    +
    +            // Step 1:
    +            /*
    +             * Complete the big bucket [ss] by quicksorting any unsorted small
    +             * buckets [ss, j]. Hopefully previous pointer-scanning phases have
    +             * already completed many of the small buckets [ss, j], so we don't
    +             * have to sort them at all.
    +             */
    +            for (int j = 0; j <= 255; j++) {
    +                final int sb = (ss << 8) + j;
    +                final int ftab_sb = ftab[sb];
    +                if ((ftab_sb & SETMASK) != SETMASK) {
    +                    final int lo = ftab_sb & CLEARMASK;
    +                    final int hi = (ftab[sb + 1] & CLEARMASK) - 1;
    +                    if (hi > lo) {
    +                        mainQSort3(dataShadow, lo, hi, 2, lastShadow);
    +                        if (firstAttemptShadow
    +                            && (this.workDone > workLimitShadow)) {
    +                            return;
    +                        }
    +                    }
    +                    ftab[sb] = ftab_sb | SETMASK;
    +                }
    +            }
    +
    +            // Step 2:
    +            // Now scan this big bucket so as to synthesise the
    +            // sorted order for small buckets [t, ss] for all t != ss.
    +
    +            for (int j = 0; j <= 255; j++) {
    +                copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
    +            }
    +
    +            for (int j = ftab[ss << 8] & CLEARMASK, hj = (ftab[(ss + 1) << 8] & CLEARMASK); j < hj; j++) {
    +                final int fmap_j = fmap[j];
    +                c1 = block[fmap_j] & 0xff;
    +                if (!bigDone[c1]) {
    +                    fmap[copy[c1]] = (fmap_j == 0) ? lastShadow : (fmap_j - 1);
    +                    copy[c1]++;
    +                }
    +            }
    +
    +            for (int j = 256; --j >= 0;) {
    +                ftab[(j << 8) + ss] |= SETMASK;
    +            }
    +
    +            // Step 3:
    +            /*
    +             * The ss big bucket is now done. Record this fact, and update the
    +             * quadrant descriptors. Remember to update quadrants in the
    +             * overshoot area too, if necessary. The "if (i < 255)" test merely
    +             * skips this updating for the last bucket processed, since updating
    +             * for the last bucket is pointless.
    +             */
    +            bigDone[ss] = true;
    +
    +            if (i < 255) {
    +                final int bbStart = ftab[ss << 8] & CLEARMASK;
    +                final int bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart;
    +                int shifts = 0;
    +
    +                while ((bbSize >> shifts) > 65534) {
    +                    shifts++;
    +                }
    +
    +                for (int j = 0; j < bbSize; j++) {
    +                    final int a2update = fmap[bbStart + j];
    +                    final char qVal = (char) (j >> shifts);
    +                    quadrant[a2update] = qVal;
    +                    if (a2update < BZip2Constants.NUM_OVERSHOOT_BYTES) {
    +                        quadrant[a2update + lastShadow + 1] = qVal;
    +                    }
    +                }
    +            }
    +
    +        }
    +    }
    +
    +    private void randomiseBlock(final BZip2CompressorOutputStream.Data data,
    +                                final int lastShadow) {
    +        final boolean[] inUse = data.inUse;
    +        final byte[] block = data.block;
    +
    +        for (int i = 256; --i >= 0;) {
    +            inUse[i] = false;
    +        }
    +
    +        int rNToGo = 0;
    +        int rTPos = 0;
    +        for (int i = 0, j = 1; i <= lastShadow; i = j, j++) {
    +            if (rNToGo == 0) {
    +                rNToGo = (char) Rand.rNums(rTPos);
    +                if (++rTPos == 512) {
    +                    rTPos = 0;
    +                }
    +            }
    +
    +            rNToGo--;
    +            block[j] ^= ((rNToGo == 1) ? 1 : 0);
    +
    +            // handle 16 bit signed numbers
    +            inUse[block[j] & 0xff] = true;
    +        }
    +
    +        this.blockRandomised = true;
    +    }
    +
    +}
    
  • src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorOutputStream.java+14 579 modified
    @@ -137,30 +137,8 @@ public class BZip2CompressorOutputStream extends CompressorOutputStream
          */
         public static final int MAX_BLOCKSIZE = 9;
     
    -    private static final int SETMASK = (1 << 21);
    -    private static final int CLEARMASK = (~SETMASK);
         private static final int GREATER_ICOST = 15;
         private static final int LESSER_ICOST = 0;
    -    private static final int SMALL_THRESH = 20;
    -    private static final int DEPTH_THRESH = 10;
    -    private static final int WORK_FACTOR = 30;
    -
    -    /*
    -     * <p> If you are ever unlucky/improbable enough to get a stack
    -     * overflow whilst sorting, increase the following constant and
    -     * try again. In practice I have never seen the stack go above 27
    -     * elems, so the following limit seems very generous.  </p>
    -     */
    -    private static final int QSORT_STACK_SIZE = 1000;
    -
    -    /**
    -     * Knuth's increments seem to work better than Incerpi-Sedgewick here.
    -     * Possibly because the number of elems to sort is usually small, typically
    -     * &lt;= 20.
    -     */
    -    private static final int[] INCS = { 1, 4, 13, 40, 121, 364, 1093, 3280,
    -                                        9841, 29524, 88573, 265720, 797161,
    -                                        2391484 };
     
         private static void hbMakeCodeLengths(final byte[] len, final int[] freq,
                                               final Data dat, final int alphaSize,
    @@ -318,19 +296,12 @@ private static void hbMakeCodeLengths(final byte[] len, final int[] freq,
          */
         private int last;
     
    -    /**
    -     * Index in fmap[] of original string after sorting.
    -     */
    -    private int origPtr;
    -
         /**
          * Always: in the range 0 .. 9. The current block size is 100000 * this
          * number.
          */
         private final int blockSize100k;
     
    -    private boolean blockRandomised;
    -
         private int bsBuff;
         private int bsLive;
         private final CRC crc = new CRC();
    @@ -339,14 +310,6 @@ private static void hbMakeCodeLengths(final byte[] len, final int[] freq,
     
         private int nMTF;
     
    -    /*
    -     * Used when sorting. If too many long comparisons happen, we stop sorting,
    -     * randomise the block slightly, and try again.
    -     */
    -    private int workDone;
    -    private int workLimit;
    -    private boolean firstAttempt;
    -
         private int currentChar = -1;
         private int runLength = 0;
     
    @@ -584,7 +547,7 @@ private void endBlock() throws IOException {
             }
     
             /* sort the block and establish posn of original string */
    -        blockSort();
    +        final boolean blockRandomised = blockSort();
     
             /*
              * A 6-byte block header, the value chosen arbitrarily as 0x314159265359
    @@ -608,7 +571,7 @@ private void endBlock() throws IOException {
             bsPutInt(this.blockCRC);
     
             /* Now a single bit indicating randomisation. */
    -        if (this.blockRandomised) {
    +        if (blockRandomised) {
                 bsW(1, 1);
             } else {
                 bsW(1, 0);
    @@ -1178,546 +1141,13 @@ private void sendMTFValues7() throws IOException {
         }
     
         private void moveToFrontCodeAndSend() throws IOException {
    -        bsW(24, this.origPtr);
    +        bsW(24, this.data.origPtr);
             generateMTFValues();
             sendMTFValues();
         }
     
    -    /**
    -     * This is the most hammered method of this class.
    -     *
    -     * <p>
    -     * This is the version using unrolled loops. Normally I never use such ones
    -     * in Java code. The unrolling has shown a noticable performance improvement
    -     * on JRE 1.4.2 (Linux i586 / HotSpot Client). Of course it depends on the
    -     * JIT compiler of the vm.
    -     * </p>
    -     */
    -    private boolean mainSimpleSort(final Data dataShadow, final int lo,
    -                                   final int hi, final int d) {
    -        final int bigN = hi - lo + 1;
    -        if (bigN < 2) {
    -            return this.firstAttempt && (this.workDone > this.workLimit);
    -        }
    -
    -        int hp = 0;
    -        while (INCS[hp] < bigN) {
    -            hp++;
    -        }
    -
    -        final int[] fmap = dataShadow.fmap;
    -        final char[] quadrant = dataShadow.quadrant;
    -        final byte[] block = dataShadow.block;
    -        final int lastShadow = this.last;
    -        final int lastPlus1 = lastShadow + 1;
    -        final boolean firstAttemptShadow = this.firstAttempt;
    -        final int workLimitShadow = this.workLimit;
    -        int workDoneShadow = this.workDone;
    -
    -        // Following block contains unrolled code which could be shortened by
    -        // coding it in additional loops.
    -
    -        HP: while (--hp >= 0) {
    -            final int h = INCS[hp];
    -            final int mj = lo + h - 1;
    -
    -            for (int i = lo + h; i <= hi;) {
    -                // copy
    -                for (int k = 3; (i <= hi) && (--k >= 0); i++) {
    -                    final int v = fmap[i];
    -                    final int vd = v + d;
    -                    int j = i;
    -
    -                    // for (int a;
    -                    // (j > mj) && mainGtU((a = fmap[j - h]) + d, vd,
    -                    // block, quadrant, lastShadow);
    -                    // j -= h) {
    -                    // fmap[j] = a;
    -                    // }
    -                    //
    -                    // unrolled version:
    -
    -                    // start inline mainGTU
    -                    boolean onceRunned = false;
    -                    int a = 0;
    -
    -                    HAMMER: while (true) {
    -                        if (onceRunned) {
    -                            fmap[j] = a;
    -                            if ((j -= h) <= mj) {
    -                                break HAMMER;
    -                            }
    -                        } else {
    -                            onceRunned = true;
    -                        }
    -
    -                        a = fmap[j - h];
    -                        int i1 = a + d;
    -                        int i2 = vd;
    -
    -                        // following could be done in a loop, but
    -                        // unrolled it for performance:
    -                        if (block[i1 + 1] == block[i2 + 1]) {
    -                            if (block[i1 + 2] == block[i2 + 2]) {
    -                                if (block[i1 + 3] == block[i2 + 3]) {
    -                                    if (block[i1 + 4] == block[i2 + 4]) {
    -                                        if (block[i1 + 5] == block[i2 + 5]) {
    -                                            if (block[(i1 += 6)] == block[(i2 += 6)]) {
    -                                                int x = lastShadow;
    -                                                X: while (x > 0) {
    -                                                    x -= 4;
    -
    -                                                    if (block[i1 + 1] == block[i2 + 1]) {
    -                                                        if (quadrant[i1] == quadrant[i2]) {
    -                                                            if (block[i1 + 2] == block[i2 + 2]) {
    -                                                                if (quadrant[i1 + 1] == quadrant[i2 + 1]) {
    -                                                                    if (block[i1 + 3] == block[i2 + 3]) {
    -                                                                        if (quadrant[i1 + 2] == quadrant[i2 + 2]) {
    -                                                                            if (block[i1 + 4] == block[i2 + 4]) {
    -                                                                                if (quadrant[i1 + 3] == quadrant[i2 + 3]) {
    -                                                                                    if ((i1 += 4) >= lastPlus1) {
    -                                                                                        i1 -= lastPlus1;
    -                                                                                    }
    -                                                                                    if ((i2 += 4) >= lastPlus1) {
    -                                                                                        i2 -= lastPlus1;
    -                                                                                    }
    -                                                                                    workDoneShadow++;
    -                                                                                    continue X;
    -                                                                                } else if ((quadrant[i1 + 3] > quadrant[i2 + 3])) {
    -                                                                                    continue HAMMER;
    -                                                                                } else {
    -                                                                                    break HAMMER;
    -                                                                                }
    -                                                                            } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) {
    -                                                                                continue HAMMER;
    -                                                                            } else {
    -                                                                                break HAMMER;
    -                                                                            }
    -                                                                        } else if ((quadrant[i1 + 2] > quadrant[i2 + 2])) {
    -                                                                            continue HAMMER;
    -                                                                        } else {
    -                                                                            break HAMMER;
    -                                                                        }
    -                                                                    } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) {
    -                                                                        continue HAMMER;
    -                                                                    } else {
    -                                                                        break HAMMER;
    -                                                                    }
    -                                                                } else if ((quadrant[i1 + 1] > quadrant[i2 + 1])) {
    -                                                                    continue HAMMER;
    -                                                                } else {
    -                                                                    break HAMMER;
    -                                                                }
    -                                                            } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) {
    -                                                                continue HAMMER;
    -                                                            } else {
    -                                                                break HAMMER;
    -                                                            }
    -                                                        } else if ((quadrant[i1] > quadrant[i2])) {
    -                                                            continue HAMMER;
    -                                                        } else {
    -                                                            break HAMMER;
    -                                                        }
    -                                                    } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) {
    -                                                        continue HAMMER;
    -                                                    } else {
    -                                                        break HAMMER;
    -                                                    }
    -
    -                                                }
    -                                                break HAMMER;
    -                                            } // while x > 0
    -                                            else {
    -                                                if ((block[i1] & 0xff) > (block[i2] & 0xff)) {
    -                                                    continue HAMMER;
    -                                                } else {
    -                                                    break HAMMER;
    -                                                }
    -                                            }
    -                                        } else if ((block[i1 + 5] & 0xff) > (block[i2 + 5] & 0xff)) {
    -                                            continue HAMMER;
    -                                        } else {
    -                                            break HAMMER;
    -                                        }
    -                                    } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) {
    -                                        continue HAMMER;
    -                                    } else {
    -                                        break HAMMER;
    -                                    }
    -                                } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) {
    -                                    continue HAMMER;
    -                                } else {
    -                                    break HAMMER;
    -                                }
    -                            } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) {
    -                                continue HAMMER;
    -                            } else {
    -                                break HAMMER;
    -                            }
    -                        } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) {
    -                            continue HAMMER;
    -                        } else {
    -                            break HAMMER;
    -                        }
    -
    -                    } // HAMMER
    -                    // end inline mainGTU
    -
    -                    fmap[j] = v;
    -                }
    -
    -                if (firstAttemptShadow && (i <= hi)
    -                    && (workDoneShadow > workLimitShadow)) {
    -                    break HP;
    -                }
    -            }
    -        }
    -
    -        this.workDone = workDoneShadow;
    -        return firstAttemptShadow && (workDoneShadow > workLimitShadow);
    -    }
    -
    -    private static void vswap(int[] fmap, int p1, int p2, int n) {
    -        n += p1;
    -        while (p1 < n) {
    -            int t = fmap[p1];
    -            fmap[p1++] = fmap[p2];
    -            fmap[p2++] = t;
    -        }
    -    }
    -
    -    private static byte med3(byte a, byte b, byte c) {
    -        return (a < b) ? (b < c ? b : a < c ? c : a) : (b > c ? b : a > c ? c
    -                                                        : a);
    -    }
    -
    -    private void blockSort() {
    -        this.workLimit = WORK_FACTOR * this.last;
    -        this.workDone = 0;
    -        this.blockRandomised = false;
    -        this.firstAttempt = true;
    -        mainSort();
    -
    -        if (this.firstAttempt && (this.workDone > this.workLimit)) {
    -            randomiseBlock();
    -            this.workLimit = this.workDone = 0;
    -            this.firstAttempt = false;
    -            mainSort();
    -        }
    -
    -        int[] fmap = this.data.fmap;
    -        this.origPtr = -1;
    -        for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) {
    -            if (fmap[i] == 0) {
    -                this.origPtr = i;
    -                break;
    -            }
    -        }
    -
    -        // assert (this.origPtr != -1) : this.origPtr;
    -    }
    -
    -    /**
    -     * Method "mainQSort3", file "blocksort.c", BZip2 1.0.2
    -     */
    -    private void mainQSort3(final Data dataShadow, final int loSt,
    -                            final int hiSt, final int dSt) {
    -        final int[] stack_ll = dataShadow.stack_ll;
    -        final int[] stack_hh = dataShadow.stack_hh;
    -        final int[] stack_dd = dataShadow.stack_dd;
    -        final int[] fmap = dataShadow.fmap;
    -        final byte[] block = dataShadow.block;
    -
    -        stack_ll[0] = loSt;
    -        stack_hh[0] = hiSt;
    -        stack_dd[0] = dSt;
    -
    -        for (int sp = 1; --sp >= 0;) {
    -            final int lo = stack_ll[sp];
    -            final int hi = stack_hh[sp];
    -            final int d = stack_dd[sp];
    -
    -            if ((hi - lo < SMALL_THRESH) || (d > DEPTH_THRESH)) {
    -                if (mainSimpleSort(dataShadow, lo, hi, d)) {
    -                    return;
    -                }
    -            } else {
    -                final int d1 = d + 1;
    -                final int med = med3(block[fmap[lo] + d1],
    -                                     block[fmap[hi] + d1], block[fmap[(lo + hi) >>> 1] + d1]) & 0xff;
    -
    -                int unLo = lo;
    -                int unHi = hi;
    -                int ltLo = lo;
    -                int gtHi = hi;
    -
    -                while (true) {
    -                    while (unLo <= unHi) {
    -                        final int n = (block[fmap[unLo] + d1] & 0xff)
    -                            - med;
    -                        if (n == 0) {
    -                            final int temp = fmap[unLo];
    -                            fmap[unLo++] = fmap[ltLo];
    -                            fmap[ltLo++] = temp;
    -                        } else if (n < 0) {
    -                            unLo++;
    -                        } else {
    -                            break;
    -                        }
    -                    }
    -
    -                    while (unLo <= unHi) {
    -                        final int n = (block[fmap[unHi] + d1] & 0xff)
    -                            - med;
    -                        if (n == 0) {
    -                            final int temp = fmap[unHi];
    -                            fmap[unHi--] = fmap[gtHi];
    -                            fmap[gtHi--] = temp;
    -                        } else if (n > 0) {
    -                            unHi--;
    -                        } else {
    -                            break;
    -                        }
    -                    }
    -
    -                    if (unLo <= unHi) {
    -                        final int temp = fmap[unLo];
    -                        fmap[unLo++] = fmap[unHi];
    -                        fmap[unHi--] = temp;
    -                    } else {
    -                        break;
    -                    }
    -                }
    -
    -                if (gtHi < ltLo) {
    -                    stack_ll[sp] = lo;
    -                    stack_hh[sp] = hi;
    -                    stack_dd[sp] = d1;
    -                    sp++;
    -                } else {
    -                    int n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo)
    -                        : (unLo - ltLo);
    -                    vswap(fmap, lo, unLo - n, n);
    -                    int m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi)
    -                        : (gtHi - unHi);
    -                    vswap(fmap, unLo, hi - m + 1, m);
    -
    -                    n = lo + unLo - ltLo - 1;
    -                    m = hi - (gtHi - unHi) + 1;
    -
    -                    stack_ll[sp] = lo;
    -                    stack_hh[sp] = n;
    -                    stack_dd[sp] = d;
    -                    sp++;
    -
    -                    stack_ll[sp] = n + 1;
    -                    stack_hh[sp] = m - 1;
    -                    stack_dd[sp] = d1;
    -                    sp++;
    -
    -                    stack_ll[sp] = m;
    -                    stack_hh[sp] = hi;
    -                    stack_dd[sp] = d;
    -                    sp++;
    -                }
    -            }
    -        }
    -    }
    -
    -    private void mainSort() {
    -        final Data dataShadow = this.data;
    -        final int[] runningOrder = dataShadow.mainSort_runningOrder;
    -        final int[] copy = dataShadow.mainSort_copy;
    -        final boolean[] bigDone = dataShadow.mainSort_bigDone;
    -        final int[] ftab = dataShadow.ftab;
    -        final byte[] block = dataShadow.block;
    -        final int[] fmap = dataShadow.fmap;
    -        final char[] quadrant = dataShadow.quadrant;
    -        final int lastShadow = this.last;
    -        final int workLimitShadow = this.workLimit;
    -        final boolean firstAttemptShadow = this.firstAttempt;
    -
    -        // Set up the 2-byte frequency table
    -        for (int i = 65537; --i >= 0;) {
    -            ftab[i] = 0;
    -        }
    -
    -        /*
    -         * In the various block-sized structures, live data runs from 0 to
    -         * last+NUM_OVERSHOOT_BYTES inclusive. First, set up the overshoot area
    -         * for block.
    -         */
    -        for (int i = 0; i < NUM_OVERSHOOT_BYTES; i++) {
    -            block[lastShadow + i + 2] = block[(i % (lastShadow + 1)) + 1];
    -        }
    -        for (int i = lastShadow + NUM_OVERSHOOT_BYTES +1; --i >= 0;) {
    -            quadrant[i] = 0;
    -        }
    -        block[0] = block[lastShadow + 1];
    -
    -        // Complete the initial radix sort:
    -
    -        int c1 = block[0] & 0xff;
    -        for (int i = 0; i <= lastShadow; i++) {
    -            final int c2 = block[i + 1] & 0xff;
    -            ftab[(c1 << 8) + c2]++;
    -            c1 = c2;
    -        }
    -
    -        for (int i = 1; i <= 65536; i++) {
    -            ftab[i] += ftab[i - 1];
    -        }
    -
    -        c1 = block[1] & 0xff;
    -        for (int i = 0; i < lastShadow; i++) {
    -            final int c2 = block[i + 2] & 0xff;
    -            fmap[--ftab[(c1 << 8) + c2]] = i;
    -            c1 = c2;
    -        }
    -
    -        fmap[--ftab[((block[lastShadow + 1] & 0xff) << 8) + (block[1] & 0xff)]] = lastShadow;
    -
    -        /*
    -         * Now ftab contains the first loc of every small bucket. Calculate the
    -         * running order, from smallest to largest big bucket.
    -         */
    -        for (int i = 256; --i >= 0;) {
    -            bigDone[i] = false;
    -            runningOrder[i] = i;
    -        }
    -
    -        for (int h = 364; h != 1;) {
    -            h /= 3;
    -            for (int i = h; i <= 255; i++) {
    -                final int vv = runningOrder[i];
    -                final int a = ftab[(vv + 1) << 8] - ftab[vv << 8];
    -                final int b = h - 1;
    -                int j = i;
    -                for (int ro = runningOrder[j - h]; (ftab[(ro + 1) << 8] - ftab[ro << 8]) > a; ro = runningOrder[j
    -                                                                                                                - h]) {
    -                    runningOrder[j] = ro;
    -                    j -= h;
    -                    if (j <= b) {
    -                        break;
    -                    }
    -                }
    -                runningOrder[j] = vv;
    -            }
    -        }
    -
    -        /*
    -         * The main sorting loop.
    -         */
    -        for (int i = 0; i <= 255; i++) {
    -            /*
    -             * Process big buckets, starting with the least full.
    -             */
    -            final int ss = runningOrder[i];
    -
    -            // Step 1:
    -            /*
    -             * Complete the big bucket [ss] by quicksorting any unsorted small
    -             * buckets [ss, j]. Hopefully previous pointer-scanning phases have
    -             * already completed many of the small buckets [ss, j], so we don't
    -             * have to sort them at all.
    -             */
    -            for (int j = 0; j <= 255; j++) {
    -                final int sb = (ss << 8) + j;
    -                final int ftab_sb = ftab[sb];
    -                if ((ftab_sb & SETMASK) != SETMASK) {
    -                    final int lo = ftab_sb & CLEARMASK;
    -                    final int hi = (ftab[sb + 1] & CLEARMASK) - 1;
    -                    if (hi > lo) {
    -                        mainQSort3(dataShadow, lo, hi, 2);
    -                        if (firstAttemptShadow
    -                            && (this.workDone > workLimitShadow)) {
    -                            return;
    -                        }
    -                    }
    -                    ftab[sb] = ftab_sb | SETMASK;
    -                }
    -            }
    -
    -            // Step 2:
    -            // Now scan this big bucket so as to synthesise the
    -            // sorted order for small buckets [t, ss] for all t != ss.
    -
    -            for (int j = 0; j <= 255; j++) {
    -                copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
    -            }
    -
    -            for (int j = ftab[ss << 8] & CLEARMASK, hj = (ftab[(ss + 1) << 8] & CLEARMASK); j < hj; j++) {
    -                final int fmap_j = fmap[j];
    -                c1 = block[fmap_j] & 0xff;
    -                if (!bigDone[c1]) {
    -                    fmap[copy[c1]] = (fmap_j == 0) ? lastShadow : (fmap_j - 1);
    -                    copy[c1]++;
    -                }
    -            }
    -
    -            for (int j = 256; --j >= 0;) {
    -                ftab[(j << 8) + ss] |= SETMASK;
    -            }
    -
    -            // Step 3:
    -            /*
    -             * The ss big bucket is now done. Record this fact, and update the
    -             * quadrant descriptors. Remember to update quadrants in the
    -             * overshoot area too, if necessary. The "if (i < 255)" test merely
    -             * skips this updating for the last bucket processed, since updating
    -             * for the last bucket is pointless.
    -             */
    -            bigDone[ss] = true;
    -
    -            if (i < 255) {
    -                final int bbStart = ftab[ss << 8] & CLEARMASK;
    -                final int bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart;
    -                int shifts = 0;
    -
    -                while ((bbSize >> shifts) > 65534) {
    -                    shifts++;
    -                }
    -
    -                for (int j = 0; j < bbSize; j++) {
    -                    final int a2update = fmap[bbStart + j];
    -                    final char qVal = (char) (j >> shifts);
    -                    quadrant[a2update] = qVal;
    -                    if (a2update < NUM_OVERSHOOT_BYTES) {
    -                        quadrant[a2update + lastShadow + 1] = qVal;
    -                    }
    -                }
    -            }
    -
    -        }
    -    }
    -
    -    private void randomiseBlock() {
    -        final boolean[] inUse = this.data.inUse;
    -        final byte[] block = this.data.block;
    -        final int lastShadow = this.last;
    -
    -        for (int i = 256; --i >= 0;) {
    -            inUse[i] = false;
    -        }
    -
    -        int rNToGo = 0;
    -        int rTPos = 0;
    -        for (int i = 0, j = 1; i <= lastShadow; i = j, j++) {
    -            if (rNToGo == 0) {
    -                rNToGo = (char) Rand.rNums(rTPos);
    -                if (++rTPos == 512) {
    -                    rTPos = 0;
    -                }
    -            }
    -
    -            rNToGo--;
    -            block[j] ^= ((rNToGo == 1) ? 1 : 0);
    -
    -            // handle 16 bit signed numbers
    -            inUse[block[j] & 0xff] = true;
    -        }
    -
    -        this.blockRandomised = true;
    +    private boolean blockSort() {
    +        return new BlockSort().blockSort(data, last);
         }
     
         private void generateMTFValues() {
    @@ -1823,7 +1253,7 @@ private void generateMTFValues() {
             this.nMTF = wr + 1;
         }
     
    -    private static final class Data extends Object {
    +    static final class Data extends Object {
     
             // with blockSize 900k
             final boolean[] inUse = new boolean[256]; // 256 byte
    @@ -1844,9 +1274,9 @@ private static final class Data extends Object {
             final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte
             final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte
     
    -        final int[] stack_ll = new int[QSORT_STACK_SIZE]; // 4000 byte
    -        final int[] stack_hh = new int[QSORT_STACK_SIZE]; // 4000 byte
    -        final int[] stack_dd = new int[QSORT_STACK_SIZE]; // 4000 byte
    +        final int[] stack_ll = new int[BlockSort.QSORT_STACK_SIZE]; // 4000 byte
    +        final int[] stack_hh = new int[BlockSort.QSORT_STACK_SIZE]; // 4000 byte
    +        final int[] stack_dd = new int[BlockSort.QSORT_STACK_SIZE]; // 4000 byte
     
             final int[] mainSort_runningOrder = new int[256]; // 1024 byte
             final int[] mainSort_copy = new int[256]; // 1024 byte
    @@ -1874,6 +1304,11 @@ private static final class Data extends Object {
              */
             final char[] quadrant;
     
    +        /**
    +         * Index in fmap[] of original string after sorting.
    +         */
    +        int origPtr;
    +
             Data(int blockSize100k) {
                 super();
     
    

Vulnerability mechanics

Generated on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.

References

37

News mentions

0

No linked articles in our index yet.