I am converting hundreds of ODT files to PDF files, and it takes a long time doing one after the other. I have a CPU with multiple cores. Is it possible to use bash or python to
I've written a program in golang to batch convert thousands of doc/xls files.
Sometimes it can happen that Libreoffice doesn't convert some files, so you should open it and convert them to PDF manually. Luckily, they were only 10 out of my 16.000 documents to convert.
package main
import (
"os/exec"
"sync"
"path/filepath"
"os"
"fmt"
"strings"
)
// root dir of your documents to convert
root := "/.../conversion-from-office/"
var tasks = make(chan *exec.Cmd, 64)
func visit(path string, f os.FileInfo, err error) error {
if (f.IsDir()) {
// fmt.Printf("Entering %s\n", path)
} else {
ext := filepath.Ext(path)
if (strings.ToLower (ext) == "pdf") {
} else {
outfile := path[0:len(path)-len(ext)] + ".pdf"
if _, err := os.Stat(outfile); os.IsNotExist(err) {
fmt.Printf("Converting %s\n", path)
outdir := filepath.Dir(path)
tasks <- exec.Command("soffice", "--headless", "--convert-to", "pdf", path, "--outdir", outdir)
}
}
}
return nil
}
func main() {
// spawn four worker goroutines
var wg sync.WaitGroup
// the ...; i < 4;... indicates that I'm using 4 threads
for i := 0; i < 4; i++ {
wg.Add(1)
go func() {
for cmd := range tasks {
cmd.Run()
}
wg.Done()
}()
}
err := filepath.Walk(root, visit)
fmt.Printf("filepath.Walk() returned %v\n", err)
close(tasks)
// wait for the workers to finish
wg.Wait()
}