#pragma once

#define INLINE inline

#define BITS_PER_INT 32

INLINE int GetBitForBitnum(int bitNum)
{
	static int bitsForBitnum[] = {
		(1 << 0),  (1 << 1),  (1 << 2),	 (1 << 3),	(1 << 4),  (1 << 5),  (1 << 6),	 (1 << 7),	(1 << 8),  (1 << 9),  (1 << 10),
		(1 << 11), (1 << 12), (1 << 13), (1 << 14), (1 << 15), (1 << 16), (1 << 17), (1 << 18), (1 << 19), (1 << 20), (1 << 21),
		(1 << 22), (1 << 23), (1 << 24), (1 << 25), (1 << 26), (1 << 27), (1 << 28), (1 << 29), (1 << 30), (1 << 31),
	};

	return bitsForBitnum[(bitNum) & (BITS_PER_INT - 1)];
}

#undef BITS_PER_INT

using u8 = uint8_t;
using u16 = uint16_t;
using u32 = uint32_t;
using u64 = uint64_t;
using uptr = uintptr_t;

using i8 = int8_t;
using i16 = int16_t;
using i32 = int32_t;
using i64 = int64_t;
using iptr = intptr_t;

// Endianess, don't use on PPC64 nor ARM64BE
#define LittleDWord(val) (val)

static INLINE void StoreLittleDWord(u32* base, size_t dwordIndex, u32 dword)
{
	base[dwordIndex] = LittleDWord(dword);
}

static INLINE u32 LoadLittleDWord(u32* base, size_t dwordIndex)
{
	return LittleDWord(base[dwordIndex]);
}

#include <algorithm>

static inline const u32 s_nMaskTable[33] = {
	0,
	(1 << 1) - 1,
	(1 << 2) - 1,
	(1 << 3) - 1,
	(1 << 4) - 1,
	(1 << 5) - 1,
	(1 << 6) - 1,
	(1 << 7) - 1,
	(1 << 8) - 1,
	(1 << 9) - 1,
	(1 << 10) - 1,
	(1 << 11) - 1,
	(1 << 12) - 1,
	(1 << 13) - 1,
	(1 << 14) - 1,
	(1 << 15) - 1,
	(1 << 16) - 1,
	(1 << 17) - 1,
	(1 << 18) - 1,
	(1 << 19) - 1,
	(1 << 20) - 1,
	(1 << 21) - 1,
	(1 << 22) - 1,
	(1 << 23) - 1,
	(1 << 24) - 1,
	(1 << 25) - 1,
	(1 << 26) - 1,
	(1 << 27) - 1,
	(1 << 28) - 1,
	(1 << 29) - 1,
	(1 << 30) - 1,
	0x7fffffff,
	0xffffffff,
};

enum EBitCoordType
{
	kCW_None,
	kCW_LowPrecision,
	kCW_Integral
};

class BitBufferBase
{
  protected:
	INLINE void SetName(const char* name)
	{
		m_BufferName = name;
	}

  public:
	INLINE bool IsOverflowed()
	{
		return m_Overflow;
	}
	INLINE void SetOverflowed()
	{
		m_Overflow = true;
	}

	INLINE const char* GetName()
	{
		return m_BufferName;
	}

  private:
	const char* m_BufferName = "";

  protected:
	u8 m_Overflow = false;
};

class BFRead : public BitBufferBase
{
  public:
	BFRead() = default;

	INLINE BFRead(uptr data, size_t byteLength, size_t startPos = 0, const char* bufferName = 0)
	{
		StartReading(data, byteLength, startPos);

		if (bufferName)
			SetName(bufferName);
	}

  public:
	INLINE void StartReading(uptr data, size_t byteLength, size_t startPos = 0)
	{
		m_Data = reinterpret_cast<u32 const*>(data);
		m_DataIn = m_Data;

		m_DataBytes = byteLength;
		m_DataBits = byteLength << 3;

		m_DataEnd = reinterpret_cast<u32 const*>(reinterpret_cast<u8 const*>(m_Data) + m_DataBytes);

		Seek(startPos);
	}

	INLINE void GrabNextDWord(bool overflow = false)
	{
		if (m_Data == m_DataEnd)
		{
			m_CachedBitsLeft = 1;
			m_CachedBufWord = 0;

			m_DataIn++;

			if (overflow)
				SetOverflowed();
		}
		else
		{
			if (m_DataIn > m_DataEnd)
			{
				SetOverflowed();
				m_CachedBufWord = 0;
			}
			else
			{
				m_CachedBufWord = LittleDWord(*(m_DataIn++));
			}
		}
	}

	INLINE void FetchNext()
	{
		m_CachedBitsLeft = 32;
		GrabNextDWord(false);
	}

	INLINE i32 ReadOneBit()
	{
		i32 ret = m_CachedBufWord & 1;

		if (--m_CachedBitsLeft == 0)
			FetchNext();
		else
			m_CachedBufWord >>= 1;

		return ret;
	}

	INLINE u32 ReadUBitLong(i32 numBits)
	{
		if (m_CachedBitsLeft >= numBits)
		{
			u32 ret = m_CachedBufWord & s_nMaskTable[numBits];

			m_CachedBitsLeft -= numBits;

			if (m_CachedBitsLeft)
				m_CachedBufWord >>= numBits;
			else
				FetchNext();

			return ret;
		}
		else
		{
			// need to merge words
			u32 ret = m_CachedBufWord;
			numBits -= m_CachedBitsLeft;

			GrabNextDWord(true);

			if (IsOverflowed())
				return 0;

			ret |= ((m_CachedBufWord & s_nMaskTable[numBits]) << m_CachedBitsLeft);

			m_CachedBitsLeft = 32 - numBits;
			m_CachedBufWord >>= numBits;

			return ret;
		}
	}

	INLINE i32 ReadSBitLong(int numBits)
	{
		i32 ret = ReadUBitLong(numBits);
		return (ret << (32 - numBits)) >> (32 - numBits);
	}

	INLINE u32 ReadUBitVar()
	{
		u32 ret = ReadUBitLong(6);

		switch (ret & (16 | 32))
		{
		case 16:
			ret = (ret & 15) | (ReadUBitLong(4) << 4);
			// Assert(ret >= 16);
			break;
		case 32:
			ret = (ret & 15) | (ReadUBitLong(8) << 4);
			// Assert(ret >= 256);
			break;
		case 48:
			ret = (ret & 15) | (ReadUBitLong(32 - 4) << 4);
			// Assert(ret >= 4096);
			break;
		}

		return ret;
	}

	INLINE u32 PeekUBitLong(i32 numBits)
	{
		i32 nSaveBA = m_CachedBitsLeft;
		i32 nSaveW = m_CachedBufWord;
		u32 const* pSaveP = m_DataIn;
		u32 nRet = ReadUBitLong(numBits);

		m_CachedBitsLeft = nSaveBA;
		m_CachedBufWord = nSaveW;
		m_DataIn = pSaveP;

		return nRet;
	}

	INLINE float ReadBitFloat()
	{
		u32 value = ReadUBitLong(32);
		return *reinterpret_cast<float*>(&value);
	}

	/*INLINE float ReadBitCoord() {
		i32   intval = 0, fractval = 0, signbit = 0;
		float value = 0.0;

		// Read the required integer and fraction flags
		intval = ReadOneBit();
		fractval = ReadOneBit();

		// If we got either parse them, otherwise it's a zero.
		if (intval || fractval) {
			// Read the sign bit
			signbit = ReadOneBit();

			// If there's an integer, read it in
			if (intval) {
				// Adjust the integers from [0..MAX_COORD_VALUE-1] to [1..MAX_COORD_VALUE]
				intval = ReadUBitLong(COORD_INTEGER_BITS) + 1;
			}

			// If there's a fraction, read it in
			if (fractval) {
				fractval = ReadUBitLong(COORD_FRACTIONAL_BITS);
			}

			// Calculate the correct floating point value
			value = intval + ((float)fractval * COORD_RESOLUTION);

			// Fixup the sign if negative.
			if (signbit)
				value = -value;
		}

		return value;
	}

	INLINE float ReadBitCoordMP() {
		i32   intval = 0, fractval = 0, signbit = 0;
		float value = 0.0;

		bool inBounds = ReadOneBit() ? true : false;

		// Read the required integer and fraction flags
		intval = ReadOneBit();

		// If we got either parse them, otherwise it's a zero.
		if (intval) {
			// Read the sign bit
			signbit = ReadOneBit();

			// If there's an integer, read it in
			// Adjust the integers from [0..MAX_COORD_VALUE-1] to [1..MAX_COORD_VALUE]
			if (inBounds)
				value = ReadUBitLong(COORD_INTEGER_BITS_MP) + 1;
			else
				value = ReadUBitLong(COORD_INTEGER_BITS) + 1;
		}

		// Fixup the sign if negative.
		if (signbit)
			value = -value;

		return value;
	}

	INLINE float ReadBitCellCoord(int bits, EBitCoordType coordType) {
		bool bIntegral = (coordType == kCW_Integral);
		bool bLowPrecision = (coordType == kCW_LowPrecision);

		int   intval = 0, fractval = 0;
		float value = 0.0;

		if (bIntegral)
			value = ReadUBitLong(bits);
		else {
			intval = ReadUBitLong(bits);

			// If there's a fraction, read it in
			fractval = ReadUBitLong(bLowPrecision ? COORD_FRACTIONAL_BITS_MP_LOWPRECISION : COORD_FRACTIONAL_BITS);

			// Calculate the correct floating point value
			value = intval + ((float)fractval * (bLowPrecision ? COORD_RESOLUTION_LOWPRECISION : COORD_RESOLUTION));
		}

		return value;
	}

	INLINE float ReadBitNormal() {
		// Read the sign bit
		i32 signbit = ReadOneBit();

		// Read the fractional part
		u32 fractval = ReadUBitLong(NORMAL_FRACTIONAL_BITS);

		// Calculate the correct floating point value
		float value = (float)fractval * NORMAL_RESOLUTION;

		// Fixup the sign if negative.
		if (signbit)
			value = -value;

		return value;
	}

	INLINE void ReadBitVec3Coord(Vector& fa) {
		i32 xflag, yflag, zflag;

		// This vector must be initialized! Otherwise, If any of the flags aren't set,
		// the corresponding component will not be read and will be stack garbage.
		fa.Init(0, 0, 0);

		xflag = ReadOneBit();
		yflag = ReadOneBit();
		zflag = ReadOneBit();

		if (xflag)
			fa[0] = ReadBitCoord();
		if (yflag)
			fa[1] = ReadBitCoord();
		if (zflag)
			fa[2] = ReadBitCoord();
	}

	INLINE void ReadBitVec3Normal(Vector& fa) {
		i32 xflag = ReadOneBit();
		i32 yflag = ReadOneBit();

		if (xflag)
			fa[0] = ReadBitNormal();
		else
			fa[0] = 0.0f;

		if (yflag)
			fa[1] = ReadBitNormal();
		else
			fa[1] = 0.0f;

		// The first two imply the third (but not its sign)
		i32 znegative = ReadOneBit();

		float fafafbfb = fa[0] * fa[0] + fa[1] * fa[1];
		if (fafafbfb < 1.0f)
			fa[2] = sqrt(1.0f - fafafbfb);
		else
			fa[2] = 0.0f;

		if (znegative)
			fa[2] = -fa[2];
	}

	INLINE void ReadBitAngles(QAngle& fa) {
		Vector tmp;
		ReadBitVec3Coord(tmp);
		fa.Init(tmp.x, tmp.y, tmp.z);
	}*/

	INLINE float ReadBitAngle(int numBits)
	{
		float shift = (float)(GetBitForBitnum(numBits));

		i32 i = ReadUBitLong(numBits);
		float fReturn = (float)i * (360.0 / shift);

		return fReturn;
	}

	INLINE i32 ReadChar()
	{
		return ReadSBitLong(sizeof(char) << 3);
	}
	INLINE u32 ReadByte()
	{
		return ReadUBitLong(sizeof(unsigned char) << 3);
	}

	INLINE i32 ReadShort()
	{
		return ReadSBitLong(sizeof(short) << 3);
	}
	INLINE u32 ReadWord()
	{
		return ReadUBitLong(sizeof(unsigned short) << 3);
	}

	INLINE i32 ReadLong()
	{
		return (i32)(ReadUBitLong(sizeof(i32) << 3));
	}
	INLINE float ReadFloat()
	{
		u32 temp = ReadUBitLong(sizeof(float) << 3);
		return *reinterpret_cast<float*>(&temp);
	}

	INLINE u32 ReadVarInt32()
	{
		constexpr int kMaxVarint32Bytes = 5;

		u32 result = 0;
		int count = 0;
		u32 b;

		do
		{
			if (count == kMaxVarint32Bytes)
				return result;

			b = ReadUBitLong(8);
			result |= (b & 0x7F) << (7 * count);
			++count;
		} while (b & 0x80);

		return result;
	}

	INLINE u64 ReadVarInt64()
	{
		constexpr int kMaxVarintBytes = 10;

		u64 result = 0;
		int count = 0;
		u64 b;

		do
		{
			if (count == kMaxVarintBytes)
				return result;

			b = ReadUBitLong(8);
			result |= static_cast<u64>(b & 0x7F) << (7 * count);
			++count;
		} while (b & 0x80);

		return result;
	}

	INLINE void ReadBits(uptr outData, u32 bitLength)
	{
		u8* out = reinterpret_cast<u8*>(outData);
		int bitsLeft = bitLength;

		// align output to dword boundary
		while (((uptr)out & 3) != 0 && bitsLeft >= 8)
		{
			*out = (unsigned char)ReadUBitLong(8);
			++out;
			bitsLeft -= 8;
		}

		// read dwords
		while (bitsLeft >= 32)
		{
			*((u32*)out) = ReadUBitLong(32);
			out += sizeof(u32);
			bitsLeft -= 32;
		}

		// read remaining bytes
		while (bitsLeft >= 8)
		{
			*out = ReadUBitLong(8);
			++out;
			bitsLeft -= 8;
		}

		// read remaining bits
		if (bitsLeft)
			*out = ReadUBitLong(bitsLeft);
	}

	INLINE bool ReadBytes(uptr outData, u32 byteLength)
	{
		ReadBits(outData, byteLength << 3);
		return !IsOverflowed();
	}

	INLINE bool ReadString(char* str, i32 maxLength, bool stopAtLineTermination = false, i32* outNumChars = 0)
	{
		bool tooSmall = false;
		int iChar = 0;

		while (1)
		{
			char val = ReadChar();

			if (val == 0)
				break;
			else if (stopAtLineTermination && val == '\n')
				break;

			if (iChar < (maxLength - 1))
			{
				str[iChar] = val;
				++iChar;
			}
			else
			{
				tooSmall = true;
			}
		}

		// Make sure it's null-terminated.
		// Assert(iChar < maxLength);
		str[iChar] = 0;

		if (outNumChars)
			*outNumChars = iChar;

		return !IsOverflowed() && !tooSmall;
	}

	INLINE char* ReadAndAllocateString(bool* hasOverflowed = 0)
	{
		char str[2048];

		int chars = 0;
		bool overflowed = !ReadString(str, sizeof(str), false, &chars);

		if (hasOverflowed)
			*hasOverflowed = overflowed;

		// Now copy into the output and return it;
		char* ret = new char[chars + 1];
		for (u32 i = 0; i <= chars; i++)
			ret[i] = str[i];

		return ret;
	}

	INLINE i64 ReadLongLong()
	{
		i64 retval;
		u32* longs = (u32*)&retval;

		// Read the two DWORDs according to network endian
		const short endianIndex = 0x0100;
		u8* idx = (u8*)&endianIndex;

		longs[*idx++] = ReadUBitLong(sizeof(i32) << 3);
		longs[*idx] = ReadUBitLong(sizeof(i32) << 3);

		return retval;
	}

	INLINE bool Seek(size_t startPos)
	{
		bool bSucc = true;

		if (startPos < 0 || startPos > m_DataBits)
		{
			SetOverflowed();
			bSucc = false;
			startPos = m_DataBits;
		}

		// non-multiple-of-4 bytes at head of buffer. We put the "round off"
		// at the head to make reading and detecting the end efficient.
		int nHead = m_DataBytes & 3;

		int posBytes = startPos / 8;
		if ((m_DataBytes < 4) || (nHead && (posBytes < nHead)))
		{
			// partial first dword
			u8 const* partial = (u8 const*)m_Data;

			if (m_Data)
			{
				m_CachedBufWord = *(partial++);
				if (nHead > 1)
					m_CachedBufWord |= (*partial++) << 8;
				if (nHead > 2)
					m_CachedBufWord |= (*partial++) << 16;
			}

			m_DataIn = (u32 const*)partial;

			m_CachedBufWord >>= (startPos & 31);
			m_CachedBitsLeft = (nHead << 3) - (startPos & 31);
		}
		else
		{
			int adjustedPos = startPos - (nHead << 3);

			m_DataIn = reinterpret_cast<u32 const*>(reinterpret_cast<u8 const*>(m_Data) + ((adjustedPos / 32) << 2) + nHead);

			if (m_Data)
			{
				m_CachedBitsLeft = 32;
				GrabNextDWord();
			}
			else
			{
				m_CachedBufWord = 0;
				m_CachedBitsLeft = 1;
			}

			m_CachedBufWord >>= (adjustedPos & 31);
			m_CachedBitsLeft = std::min(m_CachedBitsLeft, u32(32 - (adjustedPos & 31))); // in case grabnextdword overflowed
		}

		return bSucc;
	}

	INLINE size_t GetNumBitsRead()
	{
		if (!m_Data)
			return 0;

		size_t nCurOfs = size_t(((iptr(m_DataIn) - iptr(m_Data)) / 4) - 1);
		nCurOfs *= 32;
		nCurOfs += (32 - m_CachedBitsLeft);

		size_t nAdjust = 8 * (m_DataBytes & 3);
		return std::min(nCurOfs + nAdjust, m_DataBits);
	}

	INLINE bool SeekRelative(size_t offset)
	{
		return Seek(GetNumBitsRead() + offset);
	}

	INLINE size_t TotalBytesAvailable()
	{
		return m_DataBytes;
	}

	INLINE size_t GetNumBitsLeft()
	{
		return m_DataBits - GetNumBitsRead();
	}
	INLINE size_t GetNumBytesLeft()
	{
		return GetNumBitsLeft() >> 3;
	}

  private:
	size_t m_DataBits; // 0x0010
	size_t m_DataBytes; // 0x0018

	u32 m_CachedBufWord; // 0x0020
	u32 m_CachedBitsLeft; // 0x0024

	const u32* m_DataIn; // 0x0028
	const u32* m_DataEnd; // 0x0030
	const u32* m_Data; // 0x0038
};

class BFWrite : public BitBufferBase
{
  public:
	BFWrite() = default;

	INLINE BFWrite(uptr data, size_t byteLength, const char* bufferName = 0)
	{
		StartWriting(data, byteLength);

		if (bufferName)
			SetName(bufferName);
	}

  public:
	INLINE void StartWriting(uptr data, size_t byteLength)
	{
		m_Data = reinterpret_cast<u32*>(data);
		m_DataOut = m_Data;

		m_DataBytes = byteLength;
		m_DataBits = byteLength << 3;

		m_DataEnd = reinterpret_cast<u32*>(reinterpret_cast<u8*>(m_Data) + m_DataBytes);
	}

	INLINE int GetNumBitsLeft()
	{
		return m_OutBitsLeft + (32 * (m_DataEnd - m_DataOut - 1));
	}

	INLINE void Reset()
	{
		m_Overflow = false;
		m_OutBufWord = 0;
		m_OutBitsLeft = 32;
		m_DataOut = m_Data;
	}

	INLINE void TempFlush()
	{
		if (m_OutBitsLeft != 32)
		{
			if (m_DataOut == m_DataEnd)
				SetOverflowed();
			else
				StoreLittleDWord(m_DataOut, 0, LoadLittleDWord(m_DataOut, 0) & ~s_nMaskTable[32 - m_OutBitsLeft] | m_OutBufWord);
		}

		m_Flushed = true;
	}

	INLINE u8* GetBasePointer()
	{
		TempFlush();
		return reinterpret_cast<u8*>(m_Data);
	}

	INLINE u8* GetData()
	{
		return GetBasePointer();
	}

	INLINE void Finish()
	{
		if (m_OutBitsLeft != 32)
		{
			if (m_DataOut == m_DataEnd)
				SetOverflowed();

			StoreLittleDWord(m_DataOut, 0, m_OutBufWord);
		}
	}

	INLINE void FlushNoCheck()
	{
		StoreLittleDWord(m_DataOut++, 0, m_OutBufWord);

		m_OutBitsLeft = 32;
		m_OutBufWord = 0;
	}

	INLINE void Flush()
	{
		if (m_DataOut == m_DataEnd)
			SetOverflowed();
		else
			StoreLittleDWord(m_DataOut++, 0, m_OutBufWord);

		m_OutBitsLeft = 32;
		m_OutBufWord = 0;
	}

	INLINE void WriteOneBitNoCheck(i32 value)
	{
		m_OutBufWord |= (value & 1) << (32 - m_OutBitsLeft);

		if (--m_OutBitsLeft == 0)
			FlushNoCheck();
	}

	INLINE void WriteOneBit(i32 value)
	{
		m_OutBufWord |= (value & 1) << (32 - m_OutBitsLeft);

		if (--m_OutBitsLeft == 0)
			Flush();
	}

	INLINE void WriteUBitLong(u32 data, i32 numBits, bool checkRange = true)
	{
		if (numBits <= m_OutBitsLeft)
		{
			if (checkRange)
				m_OutBufWord |= (data) << (32 - m_OutBitsLeft);
			else
				m_OutBufWord |= (data & s_nMaskTable[numBits]) << (32 - m_OutBitsLeft);

			m_OutBitsLeft -= numBits;

			if (m_OutBitsLeft == 0)
				Flush();
		}
		else
		{
			// split dwords case
			i32 overflowBits = (numBits - m_OutBitsLeft);
			m_OutBufWord |= (data & s_nMaskTable[m_OutBitsLeft]) << (32 - m_OutBitsLeft);
			Flush();
			m_OutBufWord = (data >> (numBits - overflowBits));
			m_OutBitsLeft = 32 - overflowBits;
		}
	}

	INLINE void WriteSBitLong(i32 data, i32 numBits)
	{
		WriteUBitLong((u32)data, numBits, false);
	}

	INLINE void WriteUBitVar(u32 n)
	{
		if (n < 16)
			WriteUBitLong(n, 6);
		else if (n < 256)
			WriteUBitLong((n & 15) | 16 | ((n & (128 | 64 | 32 | 16)) << 2), 10);
		else if (n < 4096)
			WriteUBitLong((n & 15) | 32 | ((n & (2048 | 1024 | 512 | 256 | 128 | 64 | 32 | 16)) << 2), 14);
		else
		{
			WriteUBitLong((n & 15) | 48, 6);
			WriteUBitLong((n >> 4), 32 - 4);
		}
	}

	INLINE void WriteBitFloat(float value)
	{
		auto temp = &value;
		WriteUBitLong(*reinterpret_cast<u32*>(temp), 32);
	}

	INLINE void WriteFloat(float value)
	{
		auto temp = &value;
		WriteUBitLong(*reinterpret_cast<u32*>(temp), 32);
	}

	INLINE bool WriteBits(const uptr data, i32 numBits)
	{
		u8* out = (u8*)data;
		i32 numBitsLeft = numBits;

		// Bounds checking..
		if ((GetNumBitsWritten() + numBits) > m_DataBits)
		{
			SetOverflowed();
			return false;
		}

		// !! speed!! need fast paths
		// write remaining bytes
		while (numBitsLeft >= 8)
		{
			WriteUBitLong(*out, 8, false);
			++out;
			numBitsLeft -= 8;
		}

		// write remaining bits
		if (numBitsLeft)
			WriteUBitLong(*out, numBitsLeft, false);

		return !IsOverflowed();
	}

	INLINE bool WriteBytes(const uptr data, i32 numBytes)
	{
		return WriteBits(data, numBytes << 3);
	}

	INLINE i32 GetNumBitsWritten()
	{
		return (32 - m_OutBitsLeft) + (32 * (m_DataOut - m_Data));
	}

	INLINE i32 GetNumBytesWritten()
	{
		return (GetNumBitsWritten() + 7) >> 3;
	}

	INLINE void WriteChar(i32 val)
	{
		WriteSBitLong(val, sizeof(char) << 3);
	}

	INLINE void WriteByte(i32 val)
	{
		WriteUBitLong(val, sizeof(unsigned char) << 3, false);
	}

	INLINE void WriteShort(i32 val)
	{
		WriteSBitLong(val, sizeof(short) << 3);
	}

	INLINE void WriteWord(i32 val)
	{
		WriteUBitLong(val, sizeof(unsigned short) << 3);
	}

	INLINE bool WriteString(const char* str)
	{
		if (str)
			while (*str)
				WriteChar(*(str++));

		WriteChar(0);

		return !IsOverflowed();
	}

	INLINE void WriteLongLong(i64 val)
	{
		u32* pLongs = (u32*)&val;

		// Insert the two DWORDS according to network endian
		const short endianIndex = 0x0100;
		u8* idx = (u8*)&endianIndex;

		WriteUBitLong(pLongs[*idx++], sizeof(i32) << 3);
		WriteUBitLong(pLongs[*idx], sizeof(i32) << 3);
	}

	/*INLINE void WriteBitCoord(const float f) {
		i32 signbit = (f <= -COORD_RESOLUTION);
		i32 intval = (i32)abs(f);
		i32 fractval = abs((i32)(f * COORD_DENOMINATOR)) & (COORD_DENOMINATOR - 1);

		// Send the bit flags that indicate whether we have an integer part and/or a fraction part.
		WriteOneBit(intval);
		WriteOneBit(fractval);

		if (intval || fractval) {
			// Send the sign bit
			WriteOneBit(signbit);

			// Send the integer if we have one.
			if (intval) {
				// Adjust the integers from [1..MAX_COORD_VALUE] to [0..MAX_COORD_VALUE-1]
				intval--;
				WriteUBitLong((u32)intval, COORD_INTEGER_BITS);
			}

			// Send the fraction if we have one
			if (fractval) {
				WriteUBitLong((u32)fractval, COORD_FRACTIONAL_BITS);
			}
		}
	}

	INLINE void WriteBitCoordMP(const float f) {
		i32 signbit = (f <= -COORD_RESOLUTION);
		i32 intval = (i32)abs(f);
		i32 fractval = (abs((i32)(f * COORD_DENOMINATOR)) & (COORD_DENOMINATOR - 1));

		bool bInBounds = intval < (1 << COORD_INTEGER_BITS_MP);

		WriteOneBit(bInBounds);

		// Send the sign bit
		WriteOneBit(intval);

		if (intval) {
			WriteOneBit(signbit);

			// Send the integer if we have one.
			// Adjust the integers from [1..MAX_COORD_VALUE] to [0..MAX_COORD_VALUE-1]
			intval--;

			if (bInBounds)
				WriteUBitLong((u32)intval, COORD_INTEGER_BITS_MP);
			else
				WriteUBitLong((u32)intval, COORD_INTEGER_BITS);
		}
	}

	INLINE void WriteBitCellCoord(const float f, int bits, EBitCoordType coordType) {
		bool bIntegral = (coordType == kCW_Integral);
		bool bLowPrecision = (coordType == kCW_LowPrecision);

		i32 intval = (i32)abs(f);
		i32 fractval = bLowPrecision ? (abs((i32)(f * COORD_DENOMINATOR_LOWPRECISION)) & (COORD_DENOMINATOR_LOWPRECISION - 1)) :
	(abs((i32)(f * COORD_DENOMINATOR)) & (COORD_DENOMINATOR - 1));

		if (bIntegral)
			WriteUBitLong((u32)intval, bits);
		else {
			WriteUBitLong((u32)intval, bits);
			WriteUBitLong((u32)fractval, bLowPrecision ? COORD_FRACTIONAL_BITS_MP_LOWPRECISION : COORD_FRACTIONAL_BITS);
		}
	}*/

	INLINE void SeekToBit(int bit)
	{
		TempFlush();

		m_DataOut = m_Data + (bit / 32);
		m_OutBufWord = LoadLittleDWord(m_DataOut, 0);
		m_OutBitsLeft = 32 - (bit & 31);
	}

	/*INLINE void WriteBitVec3Coord(const Vector& fa) {
		i32 xflag, yflag, zflag;

		xflag = (fa[0] >= COORD_RESOLUTION) || (fa[0] <= -COORD_RESOLUTION);
		yflag = (fa[1] >= COORD_RESOLUTION) || (fa[1] <= -COORD_RESOLUTION);
		zflag = (fa[2] >= COORD_RESOLUTION) || (fa[2] <= -COORD_RESOLUTION);

		WriteOneBit(xflag);
		WriteOneBit(yflag);
		WriteOneBit(zflag);

		if (xflag)
			WriteBitCoord(fa[0]);
		if (yflag)
			WriteBitCoord(fa[1]);
		if (zflag)
			WriteBitCoord(fa[2]);
	}

	INLINE void WriteBitNormal(float f) {
		i32 signbit = (f <= -NORMAL_RESOLUTION);

		// NOTE: Since +/-1 are valid values for a normal, I'm going to encode that as all ones
		u32 fractval = abs((i32)(f * NORMAL_DENOMINATOR));

		// clamp..
		if (fractval > NORMAL_DENOMINATOR)
			fractval = NORMAL_DENOMINATOR;

		// Send the sign bit
		WriteOneBit(signbit);

		// Send the fractional component
		WriteUBitLong(fractval, NORMAL_FRACTIONAL_BITS);
	}

	INLINE void WriteBitVec3Normal(const Vector& fa) {
		i32 xflag, yflag;

		xflag = (fa[0] >= NORMAL_RESOLUTION) || (fa[0] <= -NORMAL_RESOLUTION);
		yflag = (fa[1] >= NORMAL_RESOLUTION) || (fa[1] <= -NORMAL_RESOLUTION);

		WriteOneBit(xflag);
		WriteOneBit(yflag);

		if (xflag)
			WriteBitNormal(fa[0]);
		if (yflag)
			WriteBitNormal(fa[1]);

		// Write z sign bit
		i32 signbit = (fa[2] <= -NORMAL_RESOLUTION);
		WriteOneBit(signbit);
	}*/

	INLINE void WriteBitAngle(float angle, int numBits)
	{
		u32 shift = GetBitForBitnum(numBits);
		u32 mask = shift - 1;

		i32 d = (i32)((angle / 360.0) * shift);
		d &= mask;

		WriteUBitLong((u32)d, numBits);
	}

	INLINE bool WriteBitsFromBuffer(BFRead* in, int numBits)
	{
		while (numBits > 32)
		{
			WriteUBitLong(in->ReadUBitLong(32), 32);
			numBits -= 32;
		}

		WriteUBitLong(in->ReadUBitLong(numBits), numBits);
		return !IsOverflowed() && !in->IsOverflowed();
	}

	/*INLINE void WriteBitAngles(const QAngle& fa) {
		// FIXME:
		Vector tmp(fa.x, fa.y, fa.z);
		WriteBitVec3Coord(tmp);
	}*/

  private:
	size_t m_DataBits = 0;
	size_t m_DataBytes = 0;

	u32 m_OutBufWord = 0;
	u32 m_OutBitsLeft = 32;

	u32* m_DataOut = nullptr;
	u32* m_DataEnd = nullptr;
	u32* m_Data = nullptr;

	bool m_Flushed = false; // :flushed:
};

#undef INLINE