Parsing JSON in Forty Lines of Awk


# The function takes two parameters, the JSON object/array and the desired key
# The rest are local variables (awk only allows local variables in the form
# of function parameters)
function get_json_value( \
	s, key,
	type, all, rest, isval, i, c, j, k \
) {
	# Get the type of object by its first character
	type = substr(s, 1, 1)

	# If it's neither an object, nor an array, throw an error
	if (type != "" && type != "[") error("invalid json array/object " s)

	# This variable is needed for when we recursively call the function
	# It will be true if the key argument is omitted, since undefined
	# variables in awk can behave as either a string or a number
	all = key == "" && key == 0

	# Get the first part of the key (which we will be looking for)
	# if the path is dotted and save the rest for now
	if (!all && (j = index(key, "."))) 
		rest = substr(key, j+1)
		key = substr(key, 1, j-1)
	

	# k is the current key
	# If this is an array, it is the index, which starts at 0
	if (type == "[") k = 0

	# isval keeps track of whether we are looking at a JSON key or value
	# In an array, all items are values
	isval = type == "["

	# Loop over the characters in the provided JSON
	# Skip the opening brace or bracket (to avoid infinite recursion) and
	# increment the index by the length of the token
	for (i = 2; i < length(s); i += length(c)) 

		# If this is a value, and the key matches, we've found our
		# desired object, so return it
		if (!all && isval && k == key) return c

		# If we see a colon in an object, the next token is a value
		# This needs to be after the previous statement to not capture
		# the colon itself
		if (type == "" && c == ":") isval = 1

		# If this is an array and we see a comma, increment the index
		if (type == "[" && c == ",") ++k
	

	# If we're here, it means we didn't find the value we're looking for
	# so only return something if the whole array or object was requested
	if (all) return substr(s, 1, i)



Source link