• Welcome to Valhalla Legends Archive.
 

BitTorrent Bencoding

Started by shout, July 05, 2006, 10:34 AM

Previous topic - Next topic

shout

I am trying to implement some basic things for Bittorrent in a .Net dll... and I'm not quite sure how to approach Bencoding. (http://wiki.theory.org/BitTorrentSpecification, the part about bencoding).

The Bencoding part is easy enough,

namespace Stuff.Messages
{
    public class MessageBencoder
    {
        string m_message;

        public MessageBencoder()
        {
            m_message = string.Empty;
        }

        public void InsertString(string message)
        {
            m_message += message.Length.ToString() + ":" + message;
        }

        public void InsertInt(int message)
        {
           m_message += "i" + message.ToString() + "e";
        }

        public void StartList()
        {
            m_message += "l";
        }

        public void StartDictionary()
        {
            m_message += "d";
        }

        public void End()
        {
            m_message += "e";
        }

        public string Get()
        {
            return m_message;
        }

        public byte[] GetEncoded()
        {
            return System.Text.ASCIIEncoding.ASCII.GetBytes(m_message);
        }


But I don't know how to approach the unencoding... This is what I came up with but it's messy.


public class MessageDebencoder
    {
        private byte[] m_messageBuffer;
        private MemoryStream m_message;
        private byte tmpConsumed;
        private bool updated;

        public MessageDebencoder(byte[] message)
        {
            m_messageBuffer = message;
            m_message = new MemoryStream(m_messageBuffer);
        }

        public bool More
        {
            get { return (m_message.Length > 0); }
        }

        public MessageTypes Next()
        {
            tmpConsumed = (byte)m_message.ReadByte();
            updated = true;
            switch (tmpConsumed)
            {
                case (byte)'d':
                    return MessageTypes.MT_StartDictionary;
                case (byte)'l':
                    return MessageTypes.MT_StartList;
                case (byte)'i':
                    return MessageTypes.MT_Int;
                case (byte)'e':
                    return MessageTypes.MT_End;
                default:
                    return MessageTypes.MT_String;
            }
        }

        public MessageTypes Next(byte n)
        {
            switch (n)
            {
                case (byte)'d':
                    return MessageTypes.MT_StartDictionary;
                case (byte)'l':
                    return MessageTypes.MT_StartList;
                case (byte)'i':
                    return MessageTypes.MT_Int;
                case (byte)'e':
                    return MessageTypes.MT_End;
                default:
                    return MessageTypes.MT_String;
            }
        }

        public object GetNext()
        {
            MessageTypes omg;
            if (updated)
                omg = Next(tmpConsumed);
            else
                omg = Next();
            updated = false;
            switch (omg)
            {
                case MessageTypes.MT_Int:
                    {
                        byte[] n = new byte[256];
                        int i;
                        for (i = 0; true; i++)
                        {
                            n[i] = (byte)m_message.ReadByte();
                            if (n[i] == (byte)'e')
                                break;
                        }
                        string s = string.Empty;
                        for (int j = 0; j < i; j++)
                        {
                            if (char.IsDigit((char)n[j]))
                                s += ((char)n[j]).ToString();
                        }
                        return (object)int.Parse(s);
                    }

                case MessageTypes.MT_String:
                    {
                        string s = string.Empty;
                        s += (char)tmpConsumed;
                        for (int i = 1; true; i++)
                        {
                            s += (char)m_message.ReadByte();
                            try //The catch block was removed but this remained
                            {
                                if (s[i] == ':')
                                {
                                    s = s.Remove(i);
                                    break;
                                }
                            }
                            if (i == m_message.Length - m_message.Position)
                            {
                                //exception
                            }
                        }
                        int len = int.Parse(s);
                        s = string.Empty;
                        byte[] tmp = new byte[len];
                        m_message.Read(tmp, 0, len);
                        return (object)ASCIIEncoding.ASCII.GetString(tmp, 0, len);
                    }

                case MessageTypes.MT_StartList:
                    return (object)(MessageTypes.MT_StartList);
                case MessageTypes.MT_StartDictionary:
                    return (object)(MessageTypes.MT_StartDictionary);
                case MessageTypes.MT_End:
                    return (object)(MessageTypes.MT_End);
            }
            updated = false;
            return null;
        }
    }

    public enum MessageTypes
    {
        MT_String = 0,
        MT_Int = 1,
        MT_StartList = 2,
        MT_StartDictionary = 3,
        MT_End = 4,
        MT_ByteString = 5
    };


Any suggustions on a better way to implement?

MyndFyre

Well you're going to run into problems whenever a value that *should* be greater than 0x7f is put into your string, since Encoding.ASCII won't translate larger than that.  It's a good argument for not using strings as buffers.  The other argument for it is that since strings are immutable, you're forcing additional garbage collection because every time you append the string, you're generating a new string and making the runtime clean up the old one.

If you like, I'm sure that my databuffer's implementation can be adjusted, as can my datareader (which is designed to read from databuffer-made packets).  The nice thing is, if you need to adjust your packet encoding or something like that, you can do so without breaking the interface.  Most of the functions are already there for you (for example, your Next() function could be implemented by doing a ReadByte(), converting the result to a char, and then calling the appropriate function such as ReadInt32()). 

In any case, using a string as a buffer for binary data is both problematic and bad OO practice.  There have been other problems noted because of this, and it should be avoided when possible.
QuoteEvery generation of humans believed it had all the answers it needed, except for a few mysteries they assumed would be solved at any moment. And they all believed their ancestors were simplistic and deluded. What are the odds that you are the first generation of humans who will understand reality?

After 3 years, it's on the horizon.  The new JinxBot, and BN#, the managed Battle.net Client library.

Quote from: chyea on January 16, 2009, 05:05 PM
You've just located global warming.

shout

#2
Quote from: MyndFyre[vL] on July 05, 2006, 11:28 AM
Well you're going to run into problems whenever a value that *should* be greater than 0x7f is put into your string, since Encoding.ASCII won't translate larger than that.  It's a good argument for not using strings as buffers.  The other argument for it is that since strings are immutable, you're forcing additional garbage collection because every time you append the string, you're generating a new string and making the runtime clean up the old one.

If you like, I'm sure that my databuffer's implementation can be adjusted, as can my datareader (which is designed to read from databuffer-made packets).  The nice thing is, if you need to adjust your packet encoding or something like that, you can do so without breaking the interface.  Most of the functions are already there for you (for example, your Next() function could be implemented by doing a ReadByte(), converting the result to a char, and then calling the appropriate function such as ReadInt32()). 

In any case, using a string as a buffer for binary data is both problematic and bad OO practice.  There have been other problems noted because of this, and it should be avoided when possible.

I know the string buffer is impractical, that will be changed. This is not binary data. It is textual ASCII encoded data. BitTorrent uses textual data for everything except peer <-> peer transfer.


d10:im so leeti1337eel4:leeti1337e4:l33te

Unencoded would be:

Dictionary { "im so leet"  : 1337 } List { "leet", 1337, "l337" }


The annoying thing about it is there is no set formats for what comes first, second, ect.

shout

#3
MyndFyre was right again!

Meh... this works... just not too well.

If anyone wants to comment on the rotten fruits of my labor go ahead. :) Theres some trash that needs to be taken care of.


using System;
using System.Collections.Generic;
using System.Text;
using System.IO;

namespace BitTorrent.Messages
{
    public class MessageDebencoder
    {
        private byte[] m_messageBuffer;
        private MemoryStream m_message;
        private byte tmpConsumed;
        private bool updated;
#if DEBUG
        public string dbgDisplay
        {
            get { return ASCIIEncoding.ASCII.GetString(m_message.GetBuffer()); }
        }
#endif

        public MessageDebencoder(byte[] message)
        {
            m_messageBuffer = message;
            m_message = new MemoryStream(m_messageBuffer, 0, (int)message.Length, false, true);
        }

        public MessageDebencoder(MemoryStream message)
        {
            m_messageBuffer = message.GetBuffer();
            m_message = message;
        }

        public bool More
        {
            get { return (m_message.Length - m_message.Position > 0); }
        }

        public MessageTypes Next()
        {
            if (updated == true)
                return Next(tmpConsumed);
            tmpConsumed = (byte)m_message.ReadByte();
            updated = true;
            switch (tmpConsumed)
            {
                case (byte)'d':
                    return MessageTypes.MT_StartDictionary;
                case (byte)'l':
                    return MessageTypes.MT_StartList;
                case (byte)'i':
                    return MessageTypes.MT_Int;
                case (byte)'e':
                    return MessageTypes.MT_End;
                default:
                    return MessageTypes.MT_String;
            }
        }

        private MessageTypes Next(byte n)
        {
            switch (n)
            {
                case (byte)'d':
                    return MessageTypes.MT_StartDictionary;
                case (byte)'l':
                    return MessageTypes.MT_StartList;
                case (byte)'i':
                    return MessageTypes.MT_Int;
                case (byte)'e':
                    return MessageTypes.MT_End;
                default:
                    return MessageTypes.MT_String;
            }
        }

        public void GetNextDictionaryPair(out string key, out byte[] encoded_value)
        {
            int level = 0;
            byte[] bkey = (byte[])GetNext();
            key = ASCIIEncoding.ASCII.GetString(bkey);
            int start_position = (int)m_message.Position;
            do
            {
                if (Next() == MessageTypes.MT_StartDictionary)
                    level++;
                else if (Next() == MessageTypes.MT_StartList)
                    level++;
                else if (Next() == MessageTypes.MT_End)
                    level--;
                GetNext();
            } while (level > 0 && More);
            int len = (int)m_message.Position - start_position;
            byte[] temp = new byte[len];
            Array.Copy(m_messageBuffer, start_position, temp, 0, len);
            encoded_value = temp;
        }

        public byte[] GetNextList()
        {
            int start_position = (int)m_message.Position;
            int level = 0;
            do
            {
                if (Next() == MessageTypes.MT_StartDictionary)
                    level++;
                else if (Next() == MessageTypes.MT_StartList)
                    level++;
                else if (Next() == MessageTypes.MT_End)
                    level--;
                GetNext();
            }
            while (level > 0);
            int len = (int)m_message.Position - start_position;
            byte[] temp = new byte[len];
            Array.Copy(m_messageBuffer, start_position, temp, 0, len);
            return temp;
        }

        public string GetNextString()
        {
            return ASCIIEncoding.ASCII.GetString((byte[])GetNext());
        }

        public int GetNextInt()
        {
            return (int)GetNext();
        }

        public object GetNext()
        {
            MessageTypes message;
            if (updated)
                message = Next(tmpConsumed);
            else
                message = Next();
            updated = false;
            switch (message)
            {
                case MessageTypes.MT_Int:
                    {
                        byte[] buffer = new byte[256];
                        int index;
                        for (index = 0; true; index++)
                        {
                            buffer[index] = (byte)m_message.ReadByte();
                            if (buffer[index] == (byte)'e')
                                break;
                        }
                        string s = string.Empty;
                        for (int jindex = 0; jindex < index; jindex++)
                        {
                            if (char.IsDigit((char)buffer[jindex]))
                                s += ((char)buffer[jindex]).ToString();
                        }
                        return (object)int.Parse(s);
                    }

                case MessageTypes.MT_String:
                    {
                        string s = string.Empty;
                        s += (char)tmpConsumed;
                        bool lengthcolon = false;
                        for (int i = 1; true; i++)
                        {
                            s += (char)m_message.ReadByte();
                            if (m_message.Position == m_message.Length)
                                return (object)new byte[0];
                            if (s[i] == ':' && !lengthcolon)
                            {
                                lengthcolon = true;
                                s = s.Remove(i);
                                break;
                            }
                            if (i == m_message.Length - m_message.Position)
                            {
                                //exception
                            }
                        }
                        int len = int.Parse(s);
                        s = string.Empty;
                        byte[] tmp = new byte[len];
                        m_message.Read(tmp, 0, len);
                        return (object)tmp;
                    }

                case MessageTypes.MT_StartList:
                    return (object)(MessageTypes.MT_StartList);
                case MessageTypes.MT_StartDictionary:
                    return (object)(MessageTypes.MT_StartDictionary);
                case MessageTypes.MT_End:
                    return (object)(MessageTypes.MT_End);
            }
            updated = false;
            return null;
        }
    }

    public enum MessageTypes
    {
        MT_String,
        MT_Int,
        MT_StartList,
        MT_StartDictionary,
        MT_End,
        MT_ByteString
    };
}