nextcloud/3rdparty/granite/git/object/packed.php

305 lines
10 KiB
PHP
Raw Normal View History

<?php
/**
* Packed - provides a 'packed object' object
*
* PHP version 5.3
*
* @category Git
* @package Granite
* @author Craig Roberts <craig0990@googlemail.com>
* @license http://www.opensource.org/licenses/mit-license.php MIT Expat License
* @link http://craig0990.github.com/Granite/
*/
namespace Granite\Git\Object;
use \UnexpectedValueException as UnexpectedValueException;
/**
* Packed represents a packed object in the Git repository
*
* @category Git
* @package Granite
* @author Craig Roberts <craig0990@googlemail.com>
* @license http://www.opensource.org/licenses/mit-license.php MIT Expat License
* @link http://craig0990.github.com/Granite/
*/
class Packed extends Raw
{
/**
* The name of the packfile being read
*/
private $_packfile;
/**
* Added to the object size to make a 'best-guess' effort at how much compressed
* data to read - should be reimplemented, ideally with streams.
*/
const OBJ_PADDING = 512;
/**
* Reads the object data from the compressed data at $offset in $packfile
*
* @param string $packfile The path to the packfile
* @param int $offset The offset of the object data
*/
public function __construct($packfile, $offset)
{
$this->_packfile = $packfile;
list($this->type, $this->size, $this->content)
= $this->_readPackedObject($offset);
}
/**
* Reads the object data at $this->_offset
*
* @param int $offset Offset of the object header
*
* @return array Containing the type, size and object data
*/
private function _readPackedObject($offset)
{
$file = fopen($this->_packfile, 'rb');
fseek($file, $offset);
// Read the type and uncompressed size from the object header
list($type, $size) = $this->_readHeader($file, $offset);
$object_offset = ftell($file);
if ($type == self::OBJ_OFS_DELTA || $type == self::OBJ_REF_DELTA) {
return $this->_unpackDeltified(
$file, $offset, $object_offset, $type, $size
);
}
$content = gzuncompress(fread($file, $size + self::OBJ_PADDING), $size);
return array($type, $size, $content);
}
/**
* Reads a packed object header, returning the type and the size. For more
* detailed information, refer to the @see tag.
*
* From the @see tag: "Each byte is really 7 bits of data, with the first bit
* being used to say if that hunk is the last one or not before the data starts.
* If the first bit is a 1, you will read another byte, otherwise the data starts
* next. The first 3 bits in the first byte specifies the type of data..."
*
* @param handle $file File handle to read
* @param int $offset Offset of the object header
*
* @return array Containing the type and the size
* @see http://book.git-scm.com/7_the_packfile.html
*/
private function _readHeader($file, $offset)
{
// Read the object header byte-by-byte
fseek($file, $offset);
$byte = ord(fgetc($file));
/**
* Bit-shift right by four, then ignore the first bit with a bitwise AND
* This gives us the object type in binary:
* 001 commit self::OBJ_COMMIT
* 010 tree self::OBJ_TREE
* 011 blob self::OBJ_BLOB
* 100 tag self::OBJ_TAG
* 110 offset delta self::OBJ_OFS_DELTA
* 111 ref delta self::OBJ_REF_DELTA
*
* (000 is undefined, 101 is not currently in use)
* See http://book.git-scm.com/7_the_packfile.html for details
*/
$type = ($byte >> 4) & 0x07;
// Read the last four bits of the first byte, used to find the size
$size = $byte & 0x0F;
/**
* $shift initially set to four, since we use the last four bits of the first
* byte
*
* $byte & 0x80 checks the initial bit is set to 1 (i.e. keep reading data)
*
* Finally, $shift is incremented by seven for each consecutive byte (because
* we ignore the initial bit)
*/
for ($shift = 4; $byte & 0x80; $shift += 7) {
$byte = ord(fgetc($file));
/**
* The size is ANDed against 0x7F to strip the initial bit, then
* bitshifted by left $shift (4 or 7, depending on whether it's the
* initial byte) and ORed against the existing binary $size. This
* continuously increments the $size variable.
*/
$size |= (($byte & 0x7F) << $shift);
}
return array($type, $size);
}
/**
* Unpacks a deltified object located at $offset in $file
*
* @param handle $file File handle to read
* @param int $offset Offset of the object data
* @param int $object_offset Offset of the object data, past the header
* @param int $type The object type, either OBJ_REF_DELTA
or OBJ_OFS_DELTA
* @param int $size The expected size of the uncompressed data
*
* @return array Containing the type, size and object data
*/
private function _unpackDeltified($file, $offset, $object_offset, $type, $size)
{
fseek($file, $object_offset);
if ($type == self::OBJ_REF_DELTA) {
$base_sha = bin2hex(fread($file, 20));
$path = substr($this->_packfile, 0, strpos($this->_packfile, '.git')+5);
$base = Raw::factory($path, $base_sha);
$type = $base->type();
$base = $base->content();
$delta = gzuncompress(
fread($file, $size + self::OBJ_PADDING), $size
);
$content = $this->_applyDelta($base, $delta);
} elseif ($type == self::OBJ_OFS_DELTA) {
// 20 = maximum varint size according to Glip
$data = fread($file, $size + self::OBJ_PADDING + 20);
list($base_offset, $length) = $this->_bigEndianNumber($data);
$delta = gzuncompress(substr($data, $length), $size);
unset($data);
$base_offset = $offset - $base_offset;
list($type, $size, $base) = $this->_readPackedObject($base_offset);
$content = $this->_applyDelta($base, $delta);
} else {
throw new UnexpectedValueException(
"Unknown type $type for deltified object"
);
}
return array($type, strlen($content), $content);
}
/**
* Applies the $delta byte-sequence to $base and returns the
* resultant binary string.
*
* This code is modified from Grit (see below), the Ruby
* implementation used for GitHub under an MIT license.
*
* @param string $base The base string for the delta to be applied to
* @param string $delta The delta string to apply
*
* @return string The patched binary string
* @see
* https://github.com/mojombo/grit/blob/master/lib/grit/git-ruby/internal/pack.rb
*/
private function _applyDelta($base, $delta)
{
$pos = 0;
$src_size = $this->_varint($delta, $pos);
$dst_size = $this->_varint($delta, $pos);
if ($src_size !== strlen($base)) {
throw new UnexpectedValueException(
'Expected base delta size ' . strlen($base) . ' does not match the expected '
. "value $src_size"
);
}
$dest = "";
while ($pos < strlen($delta)) {
$byte = ord($delta{$pos++});
if ($byte & 0x80) {
/* copy a part of $base */
$offset = 0;
if ($byte & 0x01) $offset = ord($delta{$pos++});
if ($byte & 0x02) $offset |= ord($delta{$pos++}) << 8;
if ($byte & 0x04) $offset |= ord($delta{$pos++}) << 16;
if ($byte & 0x08) $offset |= ord($delta{$pos++}) << 24;
$length = 0;
if ($byte & 0x10) $length = ord($delta{$pos++});
if ($byte & 0x20) $length |= ord($delta{$pos++}) << 8;
if ($byte & 0x40) $length |= ord($delta{$pos++}) << 16;
if ($length == 0) $length = 0x10000;
$dest .= substr($base, $offset, $length);
} else {
/* take the next $byte bytes as they are */
$dest .= substr($delta, $pos, $byte);
$pos += $byte;
}
}
if (strlen($dest) !== $dst_size) {
throw new UnexpectedValueException(
"Deltified string expected to be $dst_size bytes, but actually "
. strlen($dest) . ' bytes'
);
}
return $dest;
}
/**
* Parse a Git varint (variable-length integer). Used in the `_applyDelta()`
* method to read the delta header.
*
* @param string $string The string to parse
* @param int &$pos The position in the string to read from
*
* @return int The integer value
*/
private function _varint($string, &$pos = 0)
{
$varint = 0;
$bitmask = 0x80;
for ($i = 0; $bitmask & 0x80; $i += 7) {
$bitmask = ord($string{$pos++});
$varint |= (($bitmask & 0x7F) << $i);
}
return $varint;
}
/**
* Decodes a big endian modified base 128 number (refer to @see tag); this only
* appears to be used in one place, the offset delta in packfiles. The offset
* is the number of bytes to seek back from the start of the delta object to find
* the base object.
*
* This code has been implemented using the C code given in the @see tag below.
*
* @param string &$data The data to read from and decode the number
*
* @return Array Containing the base offset (number of bytes to seek back) and
* the length to use when reading the delta
* @see http://git.rsbx.net/Documents/Git_Data_Formats.txt
*/
private function _bigEndianNumber(&$data)
{
$i = 0;
$byte = ord($data{$i++});
$number = $byte & 0x7F;
while ($byte & 0x80) {
$byte = ord($data{$i++});
$number = (($number + 1) << 7) | ($byte & 0x7F);
}
return array($number, $i);
}
}