How to encode and decode a torrent file using Javascript
What is a torrent file?
The torrent file is used to download a file or folder over a P2P network. A torrent file has an extension .torrent and contains the information about the list of trackers and metadata of the file being downloaded. Torrent files are encoded with the encoding technique called Bencode Encoding. Bencoded files are just like JSON files, they are used to store loosely structured data in a platform-independent way.
Here is how the encoding algorithm works
An integer is encoded as
i<integer>e
where the integer is a base 10 number without any leading zeroes (except 0 with 1 leading 0). Negative numbers are prefixed with a hyphen.
E.g.10
will be represented asi10e
and-21
will be represented as
i-21e
A byte string is represented as
<length>:<content>
where length is a non-negative length of the string and content is the content of the byte string.
E.g."John"
is encoded as4:John
.The list is encoded as
l<elements>e
where elements are the contents of the list.
E.g.[5, "John"]
will be encoded asli5e4:Johne
Dictionary is represented as
d<contents>e
where contents are the key-value pairs in the dictionary. The value immediately follows the key. A key must be a byte string whereas a value can be anything in integers, strings, lists and dictionaries. All keys must be sorted in lexicographical order.
E.g.{ "name": "John" }
is encoded asd4:name4:Johne
Exercise
Encode the following dictionary
{
"name": "John",
"age": 18,
"hobbies": ["reading", "cycling"],
"address": {
"state": "CA",
"country": "US",
}
}
Bencode encoding algorithm using Javascript
Here’s a code to perform encoding in bencode
class Bencode {
static #encodeDictionary(d) {
const keys = Object.keys(d);
if (!keys.every((key) => typeof key === "string"))
throw new Error("Dictionary keys must be strings");
keys.sort();
return `d${keys
.map((key) => `${Bencode.encode(key)}${Bencode.encode(d[key])}`)
.join("")}e`;
}
static #encodeList(l) {
return `l${l.map((item) => Bencode.encode(item)).join("")}e`;
}
static #encodeNumber(n) {
return `i${n}e`;
}
static #encodeString(s) {
return `${s.length}:${s}`;
}
static encode(obj) {
switch (typeof obj) {
case "object":
if (Array.isArray(obj)) return Bencode.#encodeList(obj);
else return Bencode.#encodeDictionary(obj);
case "number":
return Bencode.#encodeNumber(obj);
case "string":
return Bencode.#encodeString(obj);
default:
throw new Error("Unknown type");
}
}
}
const testObj = {
name: "John",
age: 18,
hobbies: ["reading", "cycling"],
address: {
state: "CA",
country: "US",
},
};
console.log(JSON.stringify(Bencode.encode(testObj), null, 2));
// output: "d7:addressd7:country2:US5:state2:CAe3:agei18e7:hobbiesl7:reading7:cyclinge4:name4:Johne"
Explanation
Methods starting with # are private methods. They can only be called within class.
All methods are static, so we don’t have to instantiate the class to encode any object.
encode method is the only accessible method outside the class. This method calls other private methods to encode specific data types like dictionary, list, number and a string.
#encodeDictionary is used to encode a dictionary. First it checks that all the keys of dictionary are string and then sorts the keys in lexicographical order. This is a requirement of bencode algorithm.
Then #encodeDictionary calls encode on key and value and puts all the key value pairs between d and e.#encodeList encodes a list. It calls encode on every item inside the list and puts it inside l and e.
#encodeNumber encodes a number while putting it under i and e.
#encodeString encodes a string and returns it in the string encoding format <length>: <string>
Decoding the Bencoded string using Javascript
class Bencode {
static #decodeDictionary(s, start) {
// dictionary must start with "d"
if (s.length <= start || s[start] !== "d") throw Error("Not a dictionary");
let answer = {};
const n = s.length;
let key,
i = start + 1;
while (i < n) {
// dictionary must end with "e"
if (s[i] === "e") return { nextPos: i + 1, val: answer };
// a key can only be a string
if (!key && ["i", "d", "l"].includes(s[i]))
throw Error("Key of dictionary must be a string");
// decode the key or value
const ans = Bencode.#decode(s, i);
// if key is present then answer is a value
// else answer is a key
if (key) {
answer[key] = ans.val;
// set key to undefined to find next key-value pair if present
key = undefined;
} else key = ans.val;
i = ans.nextPos;
}
throw Error("Wrong dictionary format");
}
static #decodeList(s, startPos) {
const n = s.length;
// list must start with "l"
if (n <= startPos || s[startPos] !== "l") throw Error("Not a list");
const list = [];
let i = startPos + 1;
while (i < n) {
// list must end with "e"
if (s[i] === "e") return { nextPos: i + 1, val: list };
const ans = Bencode.#decode(s, i);
list.push(ans.val);
i = ans.nextPos;
}
throw Error("Wrong list format");
}
static #decodeNumber(s, startPos) {
const n = s.length;
// number must start with "i"
if (n <= startPos || s[startPos] !== "i") throw Error("Not an integer");
let val = 0,
i = startPos + 1,
sign = 1;
if (i >= n) throw Error("Wrong integer format");
// check if the number is negative
if (s[i] == "-") {
sign = -1;
i += 1;
}
while (i < n) {
// number must end with "e"
if (s[i] === "e") return { nextPos: i + 1, val: val * sign };
// find the number
if (s[i] < "0" || s[i] > "9") throw Error("Wrong integer format");
val = val * 10 + parseInt(s[i]);
i += 1;
}
throw Error("Wrong integer format");
}
static #decodeString(s, startPos) {
const n = s.length;
// string must start with its length
if (n <= startPos || s[startPos] < "0" || s[startPos] > "9")
throw Error("Not a string");
let i = startPos,
len = 0;
// find the length of the string
while (i < n && s[i] >= "0" && s[i] <= "9") {
len = len * 10 + parseInt(s[i]);
i += 1;
}
// string must have a colon after its length and before its value
if (n <= i + len + 1 || s[i] !== ":") throw Error("Wrong string format");
return { nextPos: i + len + 1, val: s.substring(i + 1, i + len + 1) };
}
static #decode(s, startPos) {
switch (s[startPos]) {
case "d":
return Bencode.#decodeDictionary(s, startPos);
case "i":
return Bencode.#decodeNumber(s, startPos);
case "l":
return Bencode.#decodeList(s, startPos);
default:
if (s[startPos] >= "0" && s[startPos] <= "9")
return Bencode.#decodeString(s, startPos);
throw Error("Unknown type");
}
}
static decode(s) {
return Bencode.#decode(s, 0).val;
}
}
const testString =
"d7:addressd7:country2:US5:state2:CAe3:agei18e7:hobbiesl7:reading7:cyclinge4:name4:Johne";
console.log(JSON.stringify(Bencode.decode(testString), null, 2));
/*
output
{
"address": {
"country": "US",
"state": "CA"
},
"age": 18,
"hobbies": [
"reading",
"cycling"
],
"name": "John"
}
*/
Explanation
The #decode is used as a helper function. It calls other decoding methods for specific data types.
#decodeDictionary is a recursive method to decode a dictionary.
E.g. If the input isd7:addressd5:state2:CAe3:agei18ee
that corresponds to
input = { "address": { "state": "CA" }, "age": 18 }
. Then, after decoding the address object it will return the start position of the next data type and value. Here, in the case of the address object, it will be
{ val: { "state": "CA" }, nextPos: 23}
. Where 23 is the starting index of
"age": 18
(notice the starting index of3:agei
inside the input string).#decodeList, #decodeNumber and #decodeString are similar methods that decode list, number and string respectively.
Here’s a full code along with the Torrent class for reference
const fs = require("fs");
class Bencode {
// private methods
static #decodeDictionary(s, start) {
if (s.length <= start || s[start] !== "d") throw Error("Not a dictionary");
let answer = {};
const n = s.length;
let key,
i = start + 1;
while (i < n) {
if (s[i] === "e") return { nextPos: i + 1, val: answer };
if (!key && ["i", "d", "l"].includes(s[i]))
throw Error("Key of dictionary must be a string");
const ans = Bencode.#decode(s, i);
if (key) {
answer[key] = ans.val;
key = undefined;
} else key = ans.val;
i = ans.nextPos;
}
throw Error("Wrong dictionary format");
}
static #decodeList(s, startPos) {
const n = s.length;
if (n <= startPos || s[startPos] !== "l") throw Error("Not a list");
const list = [];
let i = startPos + 1;
while (i < n) {
if (s[i] === "e") return { nextPos: i + 1, val: list };
const ans = Bencode.#decode(s, i);
list.push(ans.val);
i = ans.nextPos;
}
throw Error("Wrong list format");
}
static #decodeNumber(s, startPos) {
const n = s.length;
if (n <= startPos || s[startPos] !== "i") throw Error("Not an integer");
let val = 0,
i = startPos + 1,
sign = 1;
if (i >= n) throw Error("Wrong integer format");
if (s[i] == "-") {
sign = -1;
i += 1;
}
while (i < n) {
if (s[i] === "e") return { nextPos: i + 1, val: val * sign };
if (s[i] < "0" || s[i] > "9") throw Error("Wrong integer format");
val = val * 10 + parseInt(s[i]);
i += 1;
}
throw Error("Wrong integer format");
}
static #decodeString(s, startPos) {
const n = s.length;
if (n <= startPos || s[startPos] < "0" || s[startPos] > "9")
throw Error("Not a string");
let i = startPos,
len = 0;
while (i < n && s[i] >= "0" && s[i] <= "9") {
len = len * 10 + parseInt(s[i]);
i += 1;
}
if (n <= i + len + 1 || s[i] !== ":") throw Error("Wrong string format");
return { nextPos: i + len + 1, val: s.substring(i + 1, i + len + 1) };
}
static #decode(s, startPos) {
switch (s[startPos]) {
case "d":
return Bencode.#decodeDictionary(s, startPos);
case "i":
return Bencode.#decodeNumber(s, startPos);
case "l":
return Bencode.#decodeList(s, startPos);
default:
if (s[startPos] >= "0" && s[startPos] <= "9")
return Bencode.#decodeString(s, startPos);
throw Error("Unknown type");
}
}
static #encodeDictionary(d) {
const keys = Object.keys(d);
if (!keys.every((key) => typeof key === "string"))
throw new Error("Dictionary keys must be strings");
keys.sort();
return `d${keys
.map((key) => `${Bencode.encode(key)}${Bencode.encode(d[key])}`)
.join("")}e`;
}
static #encodeList(l) {
return `l${l.map((item) => Bencode.encode(item)).join("")}e`;
}
static #encodeNumber(n) {
return `i${n}e`;
}
static #encodeString(s) {
return `${s.length}:${s}`;
}
// public methods
static decode(s) {
return Bencode.#decode(s, 0).val;
}
static encode(obj) {
switch (typeof obj) {
case "object":
if (Array.isArray(obj)) return Bencode.#encodeList(obj);
else return Bencode.#encodeDictionary(obj);
case "number":
return Bencode.#encodeNumber(obj);
case "string":
return Bencode.#encodeString(obj);
default:
throw new Error("Unknown type");
}
}
}
class Torrent {
static encode(obj) {
return Bencode.encode(obj);
}
static decode(s) {
if (s[0] !== "d" || s[s.length - 1] !== "e")
throw new Error(
"Wrong torrent format. A torrent file must be a dictionary."
);
return Bencode.decode(s);
}
}
const test = () => {
const testObj = {
name: "John",
age: 18,
hobbies: ["reading", "cycling"],
address: {
state: "CA",
country: "US",
},
};
const encodedString = Torrent.encode(testObj);
const decodedObj = Torrent.decode(encodedString);
console.log("Encoded string:", encodedString);
console.log("Decoded object: ", JSON.stringify(decodedObj, null, 2));
// test on sample torrent file
try {
let data = fs.readFileSync("./sample.torrent", {
encoding: "binary",
flag: "r",
});
console.log(JSON.stringify(Bencode.decode(data), null, 2));
} catch (e) {
console.log(e.message);
}
};
test();
You can also test these functions on actual torrent files. You can download sample torrent files from here
If you have any questions or concerns about this code, please let me know.
Thank you for reading :)
References