How to encode and decode a torrent file using Javascript

·

9 min read

What is a torrent file?

The torrent file is used to download a file or folder over a P2P network. A torrent file has an extension .torrent and contains the information about the list of trackers and metadata of the file being downloaded. Torrent files are encoded with the encoding technique called Bencode Encoding. Bencoded files are just like JSON files, they are used to store loosely structured data in a platform-independent way.

Here is how the encoding algorithm works

  1. An integer is encoded as i<integer>e where the integer is a base 10 number without any leading zeroes (except 0 with 1 leading 0). Negative numbers are prefixed with a hyphen.
    E.g. 10 will be represented as i10e and -21 will be represented as
    i-21e

  2. A byte string is represented as <length>:<content> where length is a non-negative length of the string and content is the content of the byte string.
    E.g. "John" is encoded as 4:John.

  3. The list is encoded as l<elements>e where elements are the contents of the list.
    E.g. [5, "John"] will be encoded as li5e4:Johne

  4. Dictionary is represented as d<contents>e where contents are the key-value pairs in the dictionary. The value immediately follows the key. A key must be a byte string whereas a value can be anything in integers, strings, lists and dictionaries. All keys must be sorted in lexicographical order.
    E.g. { "name": "John" } is encoded as d4:name4:Johne

Exercise

Encode the following dictionary

{
  "name": "John",
  "age": 18,
  "hobbies": ["reading", "cycling"],
  "address": {
    "state": "CA",
    "country": "US",
  }
}

Bencode encoding algorithm using Javascript

Here’s a code to perform encoding in bencode

class Bencode {
  static #encodeDictionary(d) {
    const keys = Object.keys(d);
    if (!keys.every((key) => typeof key === "string"))
      throw new Error("Dictionary keys must be strings");
    keys.sort();
    return `d${keys
      .map((key) => `${Bencode.encode(key)}${Bencode.encode(d[key])}`)
      .join("")}e`;
  }

  static #encodeList(l) {
    return `l${l.map((item) => Bencode.encode(item)).join("")}e`;
  }

  static #encodeNumber(n) {
    return `i${n}e`;
  }

  static #encodeString(s) {
    return `${s.length}:${s}`;
  }

  static encode(obj) {
    switch (typeof obj) {
      case "object":
        if (Array.isArray(obj)) return Bencode.#encodeList(obj);
        else return Bencode.#encodeDictionary(obj);

      case "number":
        return Bencode.#encodeNumber(obj);

      case "string":
        return Bencode.#encodeString(obj);

      default:
        throw new Error("Unknown type");
    }
  }
}


const testObj = {
  name: "John",
  age: 18,
  hobbies: ["reading", "cycling"],
  address: {
    state: "CA",
    country: "US",
  },
};

console.log(JSON.stringify(Bencode.encode(testObj), null, 2));

// output: "d7:addressd7:country2:US5:state2:CAe3:agei18e7:hobbiesl7:reading7:cyclinge4:name4:Johne"

Explanation

  1. Methods starting with # are private methods. They can only be called within class.

  2. All methods are static, so we don’t have to instantiate the class to encode any object.

  3. encode method is the only accessible method outside the class. This method calls other private methods to encode specific data types like dictionary, list, number and a string.

  4. #encodeDictionary is used to encode a dictionary. First it checks that all the keys of dictionary are string and then sorts the keys in lexicographical order. This is a requirement of bencode algorithm.
    Then #encodeDictionary calls encode on key and value and puts all the key value pairs between d and e.

  5. #encodeList encodes a list. It calls encode on every item inside the list and puts it inside l and e.

  6. #encodeNumber encodes a number while putting it under i and e.

  7. #encodeString encodes a string and returns it in the string encoding format <length>: <string>

Decoding the Bencoded string using Javascript

class Bencode {
  static #decodeDictionary(s, start) {
    // dictionary must start with "d"
    if (s.length <= start || s[start] !== "d") throw Error("Not a dictionary");
    let answer = {};
    const n = s.length;
    let key,
      i = start + 1;
    while (i < n) {
      // dictionary must end with "e"
      if (s[i] === "e") return { nextPos: i + 1, val: answer };

      // a key can only be a string
      if (!key && ["i", "d", "l"].includes(s[i]))
        throw Error("Key of dictionary must be a string");

      // decode the key or value
      const ans = Bencode.#decode(s, i);

      // if key is present then answer is a value
      // else answer is a key
      if (key) {
        answer[key] = ans.val;

        // set key to undefined to find next key-value pair if present
        key = undefined;
      } else key = ans.val;
      i = ans.nextPos;
    }
    throw Error("Wrong dictionary format");
  }

  static #decodeList(s, startPos) {
    const n = s.length;
    // list must start with "l"
    if (n <= startPos || s[startPos] !== "l") throw Error("Not a list");
    const list = [];
    let i = startPos + 1;
    while (i < n) {
      // list must end with "e"
      if (s[i] === "e") return { nextPos: i + 1, val: list };
      const ans = Bencode.#decode(s, i);
      list.push(ans.val);
      i = ans.nextPos;
    }
    throw Error("Wrong list format");
  }

  static #decodeNumber(s, startPos) {
    const n = s.length;
    //  number must start with "i"
    if (n <= startPos || s[startPos] !== "i") throw Error("Not an integer");
    let val = 0,
      i = startPos + 1,
      sign = 1;
    if (i >= n) throw Error("Wrong integer format");

    //  check if the number is negative
    if (s[i] == "-") {
      sign = -1;
      i += 1;
    }
    while (i < n) {
      //  number must end with "e"
      if (s[i] === "e") return { nextPos: i + 1, val: val * sign };

      //  find the number
      if (s[i] < "0" || s[i] > "9") throw Error("Wrong integer format");
      val = val * 10 + parseInt(s[i]);

      i += 1;
    }
    throw Error("Wrong integer format");
  }

  static #decodeString(s, startPos) {
    const n = s.length;
    //  string must start with its length
    if (n <= startPos || s[startPos] < "0" || s[startPos] > "9")
      throw Error("Not a string");
    let i = startPos,
      len = 0;

    // find the length of the string
    while (i < n && s[i] >= "0" && s[i] <= "9") {
      len = len * 10 + parseInt(s[i]);
      i += 1;
    }

    //  string must have a colon after its length and before its value
    if (n <= i + len + 1 || s[i] !== ":") throw Error("Wrong string format");
    return { nextPos: i + len + 1, val: s.substring(i + 1, i + len + 1) };
  }

  static #decode(s, startPos) {
    switch (s[startPos]) {
      case "d":
        return Bencode.#decodeDictionary(s, startPos);
      case "i":
        return Bencode.#decodeNumber(s, startPos);
      case "l":
        return Bencode.#decodeList(s, startPos);
      default:
        if (s[startPos] >= "0" && s[startPos] <= "9")
          return Bencode.#decodeString(s, startPos);
        throw Error("Unknown type");
    }
  }

  static decode(s) {
    return Bencode.#decode(s, 0).val;
  }
}

const testString =
  "d7:addressd7:country2:US5:state2:CAe3:agei18e7:hobbiesl7:reading7:cyclinge4:name4:Johne";

console.log(JSON.stringify(Bencode.decode(testString), null, 2));

/*
output
{
  "address": {
    "country": "US",
    "state": "CA"
  },
  "age": 18,
  "hobbies": [
    "reading",
    "cycling"
  ],
  "name": "John"
}

*/

Explanation

  1. The #decode is used as a helper function. It calls other decoding methods for specific data types.

  2. #decodeDictionary is a recursive method to decode a dictionary.
    E.g. If the input is d7:addressd5:state2:CAe3:agei18ee that corresponds to
    input = { "address": { "state": "CA" }, "age": 18 }. Then, after decoding the address object it will return the start position of the next data type and value. Here, in the case of the address object, it will be
    { val: { "state": "CA" }, nextPos: 23}. Where 23 is the starting index of
    "age": 18 (notice the starting index of 3:agei inside the input string).

  3. #decodeList, #decodeNumber and #decodeString are similar methods that decode list, number and string respectively.

Here’s a full code along with the Torrent class for reference

const fs = require("fs");

class Bencode {
  // private methods
  static #decodeDictionary(s, start) {
    if (s.length <= start || s[start] !== "d") throw Error("Not a dictionary");
    let answer = {};
    const n = s.length;
    let key,
      i = start + 1;
    while (i < n) {
      if (s[i] === "e") return { nextPos: i + 1, val: answer };
      if (!key && ["i", "d", "l"].includes(s[i]))
        throw Error("Key of dictionary must be a string");
      const ans = Bencode.#decode(s, i);
      if (key) {
        answer[key] = ans.val;
        key = undefined;
      } else key = ans.val;
      i = ans.nextPos;
    }
    throw Error("Wrong dictionary format");
  }

  static #decodeList(s, startPos) {
    const n = s.length;
    if (n <= startPos || s[startPos] !== "l") throw Error("Not a list");
    const list = [];
    let i = startPos + 1;
    while (i < n) {
      if (s[i] === "e") return { nextPos: i + 1, val: list };
      const ans = Bencode.#decode(s, i);
      list.push(ans.val);
      i = ans.nextPos;
    }
    throw Error("Wrong list format");
  }

  static #decodeNumber(s, startPos) {
    const n = s.length;
    if (n <= startPos || s[startPos] !== "i") throw Error("Not an integer");
    let val = 0,
      i = startPos + 1,
      sign = 1;
    if (i >= n) throw Error("Wrong integer format");
    if (s[i] == "-") {
      sign = -1;
      i += 1;
    }
    while (i < n) {
      if (s[i] === "e") return { nextPos: i + 1, val: val * sign };
      if (s[i] < "0" || s[i] > "9") throw Error("Wrong integer format");
      val = val * 10 + parseInt(s[i]);
      i += 1;
    }
    throw Error("Wrong integer format");
  }

  static #decodeString(s, startPos) {
    const n = s.length;
    if (n <= startPos || s[startPos] < "0" || s[startPos] > "9")
      throw Error("Not a string");
    let i = startPos,
      len = 0;
    while (i < n && s[i] >= "0" && s[i] <= "9") {
      len = len * 10 + parseInt(s[i]);
      i += 1;
    }
    if (n <= i + len + 1 || s[i] !== ":") throw Error("Wrong string format");
    return { nextPos: i + len + 1, val: s.substring(i + 1, i + len + 1) };
  }

  static #decode(s, startPos) {
    switch (s[startPos]) {
      case "d":
        return Bencode.#decodeDictionary(s, startPos);
      case "i":
        return Bencode.#decodeNumber(s, startPos);
      case "l":
        return Bencode.#decodeList(s, startPos);
      default:
        if (s[startPos] >= "0" && s[startPos] <= "9")
          return Bencode.#decodeString(s, startPos);
        throw Error("Unknown type");
    }
  }

  static #encodeDictionary(d) {
    const keys = Object.keys(d);
    if (!keys.every((key) => typeof key === "string"))
      throw new Error("Dictionary keys must be strings");
    keys.sort();
    return `d${keys
      .map((key) => `${Bencode.encode(key)}${Bencode.encode(d[key])}`)
      .join("")}e`;
  }

  static #encodeList(l) {
    return `l${l.map((item) => Bencode.encode(item)).join("")}e`;
  }

  static #encodeNumber(n) {
    return `i${n}e`;
  }

  static #encodeString(s) {
    return `${s.length}:${s}`;
  }

  // public methods
  static decode(s) {
    return Bencode.#decode(s, 0).val;
  }

  static encode(obj) {
    switch (typeof obj) {
      case "object":
        if (Array.isArray(obj)) return Bencode.#encodeList(obj);
        else return Bencode.#encodeDictionary(obj);
      case "number":
        return Bencode.#encodeNumber(obj);
      case "string":
        return Bencode.#encodeString(obj);
      default:
        throw new Error("Unknown type");
    }
  }
}

class Torrent {
  static encode(obj) {
    return Bencode.encode(obj);
  }
  static decode(s) {
    if (s[0] !== "d" || s[s.length - 1] !== "e")
      throw new Error(
        "Wrong torrent format. A torrent file must be a dictionary."
      );
    return Bencode.decode(s);
  }
}

const test = () => {
  const testObj = {
    name: "John",
    age: 18,
    hobbies: ["reading", "cycling"],
    address: {
      state: "CA",
      country: "US",
    },
  };

  const encodedString = Torrent.encode(testObj);
  const decodedObj = Torrent.decode(encodedString);

  console.log("Encoded string:", encodedString);
  console.log("Decoded object: ", JSON.stringify(decodedObj, null, 2));

  // test on sample torrent file
  try {
    let data = fs.readFileSync("./sample.torrent", {
      encoding: "binary",
      flag: "r",
    });
    console.log(JSON.stringify(Bencode.decode(data), null, 2));
  } catch (e) {
      console.log(e.message);
  }
};

test();

You can also test these functions on actual torrent files. You can download sample torrent files from here

  1. http://sample-file.bazadanni.com/download/applications/torrent/sample.torrent

  2. https://webtorrent.io/free-torrents

If you have any questions or concerns about this code, please let me know.
Thank you for reading :)

References

  1. https://en.wikipedia.org/wiki/Bencode

  2. https://en.wikipedia.org/wiki/Torrent_file

  3. https://www.youtube.com/playlist?list=PLsdq-3Z1EPT1rNeq2GXpnivaWINnOaCd0