A Rust library for PDF document manipulation.
```rust extern crate lopdf; use lopdf::{Document, Object, Dictionary, Stream, StringFormat}; use lopdf::content::{Content, Operation}; use Object::Reference; use std::iter::FromIterator;
let mut doc = Document::withversion("1.5"); let pagesid = doc.newobjectid(); let fontid = doc.addobject( Dictionary::fromiter(vec![ ("Type", "Font".into()), ("Subtype", "Type1".into()), ("BaseFont", "Courier".into()), ]) ); let resourcesid = doc.addobject( Dictionary::fromiter(vec![ ("Font", Dictionary::fromiter(vec![ ("F1", Reference(fontid)), ]).into()), ]) ); let content = Content{operations: vec![ Operation::new("BT", vec![]), Operation::new("Tf", vec!["F1".into(), 48.into()]), Operation::new("Td", vec![100.into(), 600.into()]), Operation::new("Tj", vec![Object::String(b"Hello World!".tovec(), StringFormat::Literal)]), Operation::new("ET", vec![]), ]}; let contentid = doc.addobject(Stream::new(Dictionary::new(), content.encode().unwrap())); let pageid = doc.addobject( Dictionary::fromiter(vec![ ("Type", "Page".into()), ("Parent", Reference(pagesid)), ("Contents", vec![Reference(contentid)].into()), ]) ); let pages = Dictionary::fromiter(vec![ ("Type", "Pages".into()), ("Kids", vec![Reference(pageid)].into()), ("Count", 1.into()), ("Resources", Reference(resourcesid)), ("MediaBox", vec![0.into(), 0.into(), 595.into(), 842.into()].into()), ]); doc.objects.insert(pagesid, Object::Dictionary(pages)); let catalogid = doc.addobject( Dictionary::fromiter(vec![ ("Type", "Catalog".into()), ("Pages", Reference(pagesid)), ]) ); doc.trailer.set("Root", Reference(catalog_id)); doc.compress(); doc.save("example.pdf").unwrap(); ```
rust
let mut doc = Document::load("example.pdf")?;
doc.version = "1.4".to_string();
if let Some(content_stream) = doc.objects.get_mut(&(3, 0)) {
match *content_stream {
Object::Stream(ref mut stream) => {
let mut content = stream.decode_content().unwrap();
content.operations[3].operands[0] = Object::String(
b"Modified text!".to_vec(),
StringFormat::Literal);
stream.set_content(content.encode().unwrap());
},
_ => ()
}
}
doc.save("modified.pdf")?;
Why keeping everything in memory as high-level objects until finallay serializing the entire document?
Normally a PDF document won't be very large, ranging form tens of KB to hundreds of MB. Memory size is not a bottle neck for today's computer. By keep the whole document in memory, stream length can be pre-calculated, no need to use a reference object for the Length entry, the resulting PDF file is smaller for distribution and faster for PDF consumers to process.
Producing is a one-time effort, while consuming is many more.