Parsing a JSON string that is bigger than the memory
I spent some more time thinking about how this could be accomplished and finally managed to pull it off. Retrieving values and iterating over arrays/objects works like a charm. If you know of a better way to do it, please tell me. (I'm not too happy with the code; it seems like it could be a lot cleaner.) But hey it works.
If you want to try it here's a fiddle: https://repl.it/HfwS/31
json = {}setmetatable(json, { __call = function(filePath) local jsonFile = _file.open(filePath) local fileLen = jsonFile:stat().size local jsonPath = {} -- Would store `{'aa',2,'gg'}` for `example['aa'][2]['gg']()` local fakeJson = {} setmetatable(fakeJson, { __index = function (t, k) jsonPath[#jsonPath+1] = k return fakeJson end; __call = function() -- THIS IS WHERE THE JSON PARSING WOULD HAPPEN -- -- The jsonPath contains {'aa',2,'gg'} at this point local brcStack = {} -- will be used to push/pop braces/brackets local jsonPathDim = 1 -- table dimension (['a'] == 1; ['a']['b'] == 2; ...) -- Loop through the json file char by char local valueToReturn local filePos = 0 local nextChar = function() jsonFile:seek("set", filePos) filePos = filePos + 1 local char = jsonFile:read(1) --print(char) return char end local jsonValid = true for o=1, fileLen do -- infinite if jsonPathDim > #jsonPath then -- jsonPath followed. Now we can extract the value. while true do local currentChar = nextChar() if currentChar == '"' then -- string valueToReturn = '' for i=1, fileLen do currentChar = nextChar() if currentChar == '"' then break elseif currentChar == nil then jsonValid = false break else valueToReturn = valueToReturn .. currentChar end end break elseif string.find(currentChar,'[%d.]') then -- numbers 0.3, .3, 99 etc local rawValue = '' if currentChar == '.' then rawValue = '0' end for i=1, fileLen do if string.find(currentChar, '[%s,\r\n%]%}]') then break elseif filePos > fileLen then jsonValid = false break else rawValue = rawValue .. currentChar end currentChar = nextChar() end valueToReturn = tonumber(rawValue) break elseif currentChar == 't' then -- true valueToReturn = true break elseif currentChar == 'f' then -- false valueToReturn = false break elseif currentChar == 'n' then -- null valueToReturn = nil -- ? break elseif currentChar == '{' then -- null valueToReturn = {} brcStack[#brcStack+1] = '{' local origBrcLvl = #brcStack while true do currentChar = nextChar() if filePos > fileLen then jsonValid = false break elseif currentChar == '\\' then nextChar() -- Continue elseif origBrcLvl == #brcStack and currentChar == '"' then local keyToPush = '' while true do currentChar = nextChar() if currentChar == '"' then while true do currentChar = nextChar() if currentChar == ':' then valueToReturn[keyToPush] = 0 break elseif filePos > fileLen then break end end break elseif filePos > fileLen then jsonValid = false break else keyToPush = keyToPush .. currentChar end end break elseif currentChar == '[' or currentChar == '{' then brcStack[#brcStack+1] = currentChar elseif currentChar == ']' then if brcStack[#brcStack] == ']' then brcStack[#brcStack] = nil else jsonValid = false break end elseif currentChar == '}' then if brcStack[#brcStack] == '}' then brcStack[#brcStack] = nil else jsonValid = false break end end end break elseif currentChar == '[' then brcStack[#brcStack+1] = '[' valueToReturn = {} local origBrcLvl = #brcStack while true do currentChar = nextChar() if origBrcLvl == #brcStack and #valueToReturn == 0 and not string.find(currentChar, '[%s\r\n%]]') then valueToReturn[#valueToReturn+1] = 0 end if filePos > fileLen then jsonValid = false break elseif currentChar == '\\' then nextChar() -- Continue elseif origBrcLvl == #brcStack and currentChar == ',' then valueToReturn[#valueToReturn+1] = 0 elseif currentChar == '[' or currentChar == '{' then brcStack[#brcStack+1] = currentChar elseif currentChar == ']' then if brcStack[#brcStack] == ']' then brcStack[#brcStack] = nil else jsonValid = false break end elseif currentChar == '}' then if brcStack[#brcStack] == '}' then brcStack[#brcStack] = nil else jsonValid = false break end end end break end end break end local currentKey = jsonPath[jsonPathDim] local currentKeyLen = string.len(currentKey) if type(jsonPath[jsonPathDim]) == 'string' then -- Parsing { object while true do local currentChar = nextChar() if currentChar == '{' then brcStack[#brcStack+1] = '{' local origBrcLvl = #brcStack local keyFound = true for z=1, fileLen do -- loop over keys until we find it currentChar = nextChar() if currentChar == '\\' then nextChar() -- Continue elseif origBrcLvl == #brcStack and currentChar == '"' then local keyMatched = false for i=1, fileLen do local expectedChar = string.sub(currentKey,i,i) if nextChar() == expectedChar then if i == currentKeyLen and nextChar() == '"' then keyMatched = true while true do currentChar = nextChar() if currentChar == ':' then break elseif currentChar == nil then jsonValid = false break end end break end -- Continue else keyMatched = false break end end if keyMatched then keyFound = true break end elseif currentChar == '[' or currentChar == '{' then brcStack[#brcStack+1] = currentChar elseif currentChar == ']' then if brcStack[#brcStack] == ']' then brcStack[#brcStack] = nil else jsonValid = false break end elseif currentChar == '}' then if brcStack[#brcStack] == '}' then brcStack[#brcStack] = nil else jsonValid = false break end end end if keyFound then jsonPathDim = jsonPathDim+1 end break elseif currentChar == nil then jsonValid = false break end end elseif type(jsonPath[jsonPathDim]) == 'number' then -- Parsing [ array while true do local currentChar = nextChar() if currentChar == '[' then brcStack[#brcStack+1] = '[' local origBrcLvl = #brcStack local currentIndex = 1 -- currentKey local keyMatched = true for i=1, fileLen do currentChar = nextChar() if currentChar == '\\' then nextChar() -- Continue elseif origBrcLvl == #brcStack and currentChar == ',' then currentIndex = currentIndex +1 if currentIndex == currentKey then jsonPathDim = jsonPathDim+1 break end elseif currentChar == '[' or currentChar == '{' then brcStack[#brcStack+1] = currentChar elseif currentChar == ']' then if brcStack[#brcStack] == ']' then brcStack[#brcStack] = nil else jsonValid = false break end elseif currentChar == '}' then if brcStack[#brcStack] == '}' then brcStack[#brcStack] = nil else jsonValid = false break end else -- Continue end end break elseif currentChar == nil then jsonValid = false break end end else jsonValid = false break -- Invalid json end end jsonPath = {} -- Reset the jsonPath return valueToReturn end; }) return fakeJson end;})local example = json('example.json')-- Read a valuelocal value = example["aa"][2]['k1']()print(value)-- Loop over a key value table and print the keys and valuesfor key, value in pairs(example["aa"][2]()) do print('key: ' .. key, 'value: ' .. example["aa"][2][key]())end
JSON validation could be better, but if you supply invalid json data then you shouldn't expect anything anyways.
If you want to decode single JSON element (object, array, etc.) instead of decoding the whole JSON, you need JSON library having two features:
- "traverse" functionality (dry-run-decoding without creating Lua objects)
- ability to pass JSON as sequence of small parts (instead of preloading whole JSON as huge Lua string).
Example:
How to partially decode JSON using this module:
-- This is content of data.txt file:-- {"aa":["qq",{"k1":23,"gg":"YAY","Fermat_primes":[3, 5, 17, 257, 65537]}]}-- We want to extract as Lua values only "Fermat_primes" array and "gg" stringlocal json = require('json')-- Open filelocal file = assert(io.open('data.txt', 'r'))-- Define loader function which will read the file in 64-byte chunkslocal function my_json_loader() return file:read(64)endlocal FP, gg-- Prepare callback function for traverse with partial decodelocal function my_callback (path, json_type, value) path = table.concat(path, '/') if path == "aa/2/Fermat_primes" then FP = value return true -- we want to decode this array instead of traverse through it elseif path == "aa/2/gg" then gg = value endendjson.traverse(my_json_loader, my_callback)-- Close filefile:close()-- Display the resultsprint('aa.2.gg = '..gg)print('aa.2.Fermat_primes:')for k, v in ipairs(FP) do print(k, v) end
Output:
aa.2.gg = YAY aa.2.Fermat_primes: 1 3 2 5 3 17 4 257 5 65537