Parsing JSON in Forty Lines of Awk
# The function takes two parameters, the JSON object/array and the desired key
# The rest are local variables (awk only allows local variables in the form
# of function parameters)
function get_json_value( \
s, key,
type, all, rest, isval, i, c, j, k \
) {
# Get the type of object by its first character
type = substr(s, 1, 1)
# If it's neither an object, nor an array, throw an error
if (type != "" && type != "[") error("invalid json array/object " s)
# This variable is needed for when we recursively call the function
# It will be true if the key argument is omitted, since undefined
# variables in awk can behave as either a string or a number
all = key == "" && key == 0
# Get the first part of the key (which we will be looking for)
# if the path is dotted and save the rest for now
if (!all && (j = index(key, ".")))
rest = substr(key, j+1)
key = substr(key, 1, j-1)
# k is the current key
# If this is an array, it is the index, which starts at 0
if (type == "[") k = 0
# isval keeps track of whether we are looking at a JSON key or value
# In an array, all items are values
isval = type == "["
# Loop over the characters in the provided JSON
# Skip the opening brace or bracket (to avoid infinite recursion) and
# increment the index by the length of the token
for (i = 2; i < length(s); i += length(c))
# If this is a value, and the key matches, we've found our
# desired object, so return it
if (!all && isval && k == key) return c
# If we see a colon in an object, the next token is a value
# This needs to be after the previous statement to not capture
# the colon itself
if (type == "" && c == ":") isval = 1
# If this is an array and we see a comma, increment the index
if (type == "[" && c == ",") ++k
# If we're here, it means we didn't find the value we're looking for
# so only return something if the whole array or object was requested
if (all) return substr(s, 1, i)