lopdf

Crates.io Build Status Docs

A Rust library for PDF document manipulation.

Example Code

```rust use lopdf::dictionary; use lopdf::{Document, Object, Stream}; use lopdf::content::{Content, Operation};

let mut doc = Document::withversion("1.5"); let pagesid = doc.newobjectid(); let fontid = doc.addobject(dictionary! { "Type" => "Font", "Subtype" => "Type1", "BaseFont" => "Courier", }); let resourcesid = doc.addobject(dictionary! { "Font" => dictionary! { "F1" => fontid, }, }); let content = Content { operations: vec![ Operation::new("BT", vec![]), Operation::new("Tf", vec!["F1".into(), 48.into()]), Operation::new("Td", vec![100.into(), 600.into()]), Operation::new("Tj", vec![Object::stringliteral("Hello World!")]), Operation::new("ET", vec![]), ], }; let contentid = doc.addobject(Stream::new(dictionary! {}, content.encode().unwrap())); let pageid = doc.addobject(dictionary! { "Type" => "Page", "Parent" => pagesid, "Contents" => contentid, }); let pages = dictionary! { "Type" => "Pages", "Kids" => vec![pageid.into()], "Count" => 1, "Resources" => resourcesid, "MediaBox" => vec![0.into(), 0.into(), 595.into(), 842.into()], }; doc.objects.insert(pagesid, Object::Dictionary(pages)); let catalogid = doc.addobject(dictionary! { "Type" => "Catalog", "Pages" => pagesid, }); doc.trailer.set("Root", catalog_id); doc.compress();

// Store file in current working directory. // Note: Line is exclude for when running tests if false { doc.save("example.pdf").unwrap(); } ```

```rust use lopdf::dictionary;

use std::collections::BTreeMap;

use lopdf::content::{Content, Operation}; use lopdf::{Document, Object, ObjectId, Stream, Bookmark};

pub fn generatefakedocument() -> Document { let mut doc = Document::withversion("1.5"); let pagesid = doc.newobjectid(); let fontid = doc.addobject(dictionary! { "Type" => "Font", "Subtype" => "Type1", "BaseFont" => "Courier", }); let resourcesid = doc.addobject(dictionary! { "Font" => dictionary! { "F1" => fontid, }, }); let content = Content { operations: vec![ Operation::new("BT", vec![]), Operation::new("Tf", vec!["F1".into(), 48.into()]), Operation::new("Td", vec![100.into(), 600.into()]), Operation::new("Tj", vec![Object::stringliteral("Hello World!")]), Operation::new("ET", vec![]), ], }; let contentid = doc.addobject(Stream::new(dictionary! {}, content.encode().unwrap())); let pageid = doc.addobject(dictionary! { "Type" => "Page", "Parent" => pagesid, "Contents" => contentid, "Resources" => resourcesid, "MediaBox" => vec![0.into(), 0.into(), 595.into(), 842.into()], }); let pages = dictionary! { "Type" => "Pages", "Kids" => vec![pageid.into()], "Count" => 1, }; doc.objects.insert(pagesid, Object::Dictionary(pages)); let catalogid = doc.addobject(dictionary! { "Type" => "Catalog", "Pages" => pagesid, }); doc.trailer.set("Root", catalog_id);

doc

}

fn main() -> std::io::Result<()> { // Generate a stack of Documents to merge let documents = vec![ generatefakedocument(), generatefakedocument(), generatefakedocument(), generatefakedocument(), ];

// Define a starting max_id (will be used as start index for object_ids)
let mut max_id = 1;
let mut pagenum = 1;
// Collect all Documents Objects grouped by a map
let mut documents_pages = BTreeMap::new();
let mut documents_objects = BTreeMap::new();
let mut document = Document::with_version("1.5");

for mut doc in documents {
    let mut first = false;
    doc.renumber_objects_with(max_id);

    max_id = doc.max_id + 1;

    documents_pages.extend(
        doc
                .get_pages()
                .into_iter()
                .map(|(_, object_id)| {
                    if !first {
                        let bookmark = Bookmark::new(String::from(format!("Page_{}", pagenum)), [0.0, 0.0, 1.0], 0, object_id);
                        document.add_bookmark(bookmark, None);
                        first = true;
                        pagenum += 1;
                    }

                    (
                        object_id,
                        doc.get_object(object_id).unwrap().to_owned(),
                    )
                })
                .collect::<BTreeMap<ObjectId, Object>>(),
    );
    documents_objects.extend(doc.objects);
}

// Catalog and Pages are mandatory
let mut catalog_object: Option<(ObjectId, Object)> = None;
let mut pages_object: Option<(ObjectId, Object)> = None;

// Process all objects except "Page" type
for (object_id, object) in documents_objects.iter() {
    // We have to ignore "Page" (as are processed later), "Outlines" and "Outline" objects
    // All other objects should be collected and inserted into the main Document
    match object.type_name().unwrap_or("") {
        "Catalog" => {
            // Collect a first "Catalog" object and use it for the future "Pages"
            catalog_object = Some((
                if let Some((id, _)) = catalog_object {
                    id
                } else {
                    *object_id
                },
                object.clone(),
            ));
        }
        "Pages" => {
            // Collect and update a first "Pages" object and use it for the future "Catalog"
            // We have also to merge all dictionaries of the old and the new "Pages" object
            if let Ok(dictionary) = object.as_dict() {
                let mut dictionary = dictionary.clone();
                if let Some((_, ref object)) = pages_object {
                    if let Ok(old_dictionary) = object.as_dict() {
                        dictionary.extend(old_dictionary);
                    }
                }

                pages_object = Some((
                    if let Some((id, _)) = pages_object {
                        id
                    } else {
                        *object_id
                    },
                    Object::Dictionary(dictionary),
                ));
            }
        }
        "Page" => {}     // Ignored, processed later and separately
        "Outlines" => {} // Ignored, not supported yet
        "Outline" => {}  // Ignored, not supported yet
        _ => {
            document.objects.insert(*object_id, object.clone());
        }
    }
}

// If no "Pages" found abort
if pages_object.is_none() {
    println!("Pages root not found.");

    return Ok(());
}

// Iter over all "Page" and collect with the parent "Pages" created before
for (object_id, object) in documents_pages.iter() {
    if let Ok(dictionary) = object.as_dict() {
        let mut dictionary = dictionary.clone();
        dictionary.set("Parent", pages_object.as_ref().unwrap().0);

        document
                .objects
                .insert(*object_id, Object::Dictionary(dictionary));
    }
}

// If no "Catalog" found abort
if catalog_object.is_none() {
    println!("Catalog root not found.");

    return Ok(());
}

let catalog_object = catalog_object.unwrap();
let pages_object = pages_object.unwrap();

// Build a new "Pages" with updated fields
if let Ok(dictionary) = pages_object.1.as_dict() {
    let mut dictionary = dictionary.clone();

    // Set new pages count
    dictionary.set("Count", documents_pages.len() as u32);

    // Set new "Kids" list (collected from documents pages) for "Pages"
    dictionary.set(
        "Kids",
        documents_pages
                .into_iter()
                .map(|(object_id, _)| Object::Reference(object_id))
                .collect::<Vec<_>>(),
    );

    document
            .objects
            .insert(pages_object.0, Object::Dictionary(dictionary));
}

// Build a new "Catalog" with updated fields
if let Ok(dictionary) = catalog_object.1.as_dict() {
    let mut dictionary = dictionary.clone();
    dictionary.set("Pages", pages_object.0);
    dictionary.remove(b"Outlines"); // Outlines not supported in merged PDFs

    document
            .objects
            .insert(catalog_object.0, Object::Dictionary(dictionary));
}

document.trailer.set("Root", catalog_object.0);

// Update the max internal ID as wasn't updated before due to direct objects insertion
document.max_id = document.objects.len() as u32;

// Reorder all new Document objects
document.renumber_objects();

 //Set any Bookmarks to the First child if they are not set to a page
document.adjust_zero_pages();

//Set all bookmarks to the PDF Object tree then set the Outlines to the Bookmark content map.
if let Some(n) = document.build_outline() {
    if let Ok(x) = document.get_object_mut(catalog_object.0) {
        if let Object::Dictionary(ref mut dict) = x {
            dict.set("Outlines", Object::Reference(n));
        }
    }
}

document.compress();

// Save the merged PDF
// Store file in current working directory.
// Note: Line is exclude for when running tests
if false {
    document.save("merged.pdf").unwrap();
}

Ok(())

} ```

```rust use lopdf::Document;

// For this example to work a parser feature needs to be enabled

[cfg(any(feature = "pomparser", feature = "nomparser"))]

{ let mut doc = Document::load("assets/example.pdf").unwrap();

doc.version = "1.4".to_string();
doc.replace_text(1, "Hello World!", "Modified text!");
// Store file in current working directory.
// Note: Line is exclude for when running tests
if false {
    doc.save("modified.pdf").unwrap();
}

} ```

FAQ