I have a large collection of documents scanned into PDF format, and I wish to write a shell script that will convert each document to DjVu format. Some documents were scanned a
Here are the elements to this answer:
pdfimages
will extract images so that the number of dots can be discovered.identify
will give the size of the image in units of PostScript points (72 to the inch)Below is a Lua script that solves the problem. I probably could have used a plain shell, but capturing the width and height would have been a greater nuisance.
#!/usr/bin/env lua
require 'osutil'
require 'posixutil'
require 'mathutil'
local function runf(...) return os.execute(string.format(...)) end
assert(arg[1], "no file on command line")
local function dimens(filename)
local cmd = [[identify -format "return %w, %h\n" $file | sed 1q]]
cmd = cmd:gsub('$file', os.quote(filename))
local w, h = assert(loadstring(os.capture(cmd)))()
assert(w and h)
return w, h
end
assert(#arg == 1, "dpi of just one file")
for _, pdf in ipairs(arg) do
local w, h = dimens(pdf) -- units are points
local insquared = w * h / (72.00 * 72.00)
local imagedir = os.capture 'mktemp -d'
assert(posix.isdir(imagedir))
runf('pdfimages -f 1 -l 1 %s %s 1>&2', os.quote(pdf),
os.quote(imagedir .. '/img'))
local dotsquared = 0
for file in posix.glob(imagedir .. '/img*') do
local w, h = dimens(file) -- units are pixels
dotsquared = dotsquared + w * h
end
os.execute('rm -rf ' .. os.quote(imagedir))
local dpi = math.sqrt(dotsquared / insquared)
if true then
io.stderr:write(insquared, " square inches\n")
io.stderr:write(dotsquared, " square dots\n")
io.stderr:write(dpi, " exact dpi\n")
io.stderr:write(math.round(dpi, 10), " rounded dpi\n")
end
print(math.round(dpi, 10))
end