/* * PDFedit - free program for PDF document manipulation. * Copyright (C) 2006, 2007 PDFedit team: Michal Hocko, * Miroslav Jahoda, * Jozef Misutka, * Martin Petricek * * Project is hosted on http://sourceforge.net/projects/pdfedit */ // vim:tabstop=4:shiftwidth=4:noexpandtab:textwidth=80 /* * ===================================================================================== * Filename: cpage.cc * Description: CPage. * Created: 20/03/2006 11:46:14 AM CET * Author: jmisutka, mhocko (annotation stuff) * ===================================================================================== */ #ifndef _CPAGE_H #define _CPAGE_H // all basic includes #include "static.h" // CDict #include "cobject.h" // CContentstream #include "ccontentstream.h" // CAnnotation #include "cannotation.h" // Text output #include "textoutput.h" // Factories #include "factories.h" // State updater #include "stateupdater.h" //===================================================================================== namespace pdfobjects { //===================================================================================== //===================================================================================== // Display parameters (loose xpdf parameters put into a simple structure) // -- default values are in cpage.cc because we do not want to have global variables. //===================================================================================== /** * Graphical state parameters. * * These parameters are used by xpdf when updating bounding boxex of content stream operators, * displaying page etc. */ typedef struct DisplayParams { /** Paramaters */ double hDpi; /**< Horizontal DPI. */ double vDpi; /**< Vertical DPI. */ Rectangle pageRect; /**< Page rectangle. */ int rotate; /**< Page rotation. */ GBool useMediaBox;/**< Use page media box. */ GBool crop; /**< Crop the page. */ GBool upsideDown; /**< Upside down. */ /** Constructor. Default values are set. */ DisplayParams () : hDpi (DEFAULT_HDPI), vDpi (DEFAULT_VDPI), pageRect (Rectangle (DEFAULT_PAGE_LX, DEFAULT_PAGE_LY, DEFAULT_PAGE_RX, DEFAULT_PAGE_RY)), rotate (DEFAULT_ROTATE), useMediaBox (gTrue), crop (gFalse), upsideDown (gTrue) {} /** Equality operator. */ bool operator== (const DisplayParams& dp) const { return (hDpi == dp.hDpi && vDpi == dp.vDpi && pageRect == dp.pageRect && rotate == dp.rotate && useMediaBox == dp.useMediaBox && crop == dp.crop && upsideDown == dp.upsideDown); } /** Converting position from pixmap of viewed page to pdf position. * @param fromX X position on viewed page. * @param fromY Y position on viewed page. * * @param toX return X position in pdf page. * @param toY return Y position in pdf page. * * @see convertPdfPosToPixmapPos */ void convertPixmapPosToPdfPos( double fromX, double fromY, double & toX, double & toY ) const { double * ctm /*[6]*/; double h; PDFRectangle pdfRect ( pageRect.xleft, pageRect.yleft, pageRect.xright, pageRect.yright ); GfxState state (hDpi, vDpi, &pdfRect, rotate, upsideDown ); ctm = state.getCTM(); h = (ctm[0]*ctm[3] - ctm[1]*ctm[2]); assert( h != 0 ); toX = (fromX*ctm[3] - ctm[2]*fromY + ctm[2]*ctm[5] - ctm[4]*ctm[3]) / h; toY = (ctm[0]*fromY + ctm[1]*ctm[4] - ctm[0]*ctm[5] - ctm[1]*fromX) / h; } /** Converting pdf position to position on pixmap of viewed page. * @param fromX X position in pdf page. * @param fromY Y position in pdf page. * * @param toX return X position on viewed page. * @param toY return Y position on viewed page. * * @see convertPixmapPosToPdfPos */ void convertPdfPosToPixmapPos( double fromX, double fromY, double & toX, double & toY ) const { PDFRectangle pdfRect ( pageRect.xleft, pageRect.yleft, pageRect.xright, pageRect.yright ); GfxState state (hDpi, vDpi, &pdfRect, rotate, upsideDown ); state.transform( fromX, fromY, &toX, &toY ); } // // Default values // -- small hack to declare them as ints, to be able to init // them here (if double, we could not init them here because of the non // integral type compilator error)) // static const int DEFAULT_HDPI = 72; /**< Default horizontal dpi. */ static const int DEFAULT_VDPI = 72; /**< Default vertical dpi. */ static const int DEFAULT_ROTATE = 0; /**< No rotatation. */ static const int DEFAULT_PAGE_LX = 0; /**< Default x position of left upper corner. */ static const int DEFAULT_PAGE_LY = 0; /**< Default y position of right upper corner. */ static const int DEFAULT_PAGE_RX = 612; /**< Default A4 width on a device with 72 horizontal dpi. */ static const int DEFAULT_PAGE_RY = 792; /**< Default A4 height on a device with 72 vertical dpi. */ } DisplayParams; //===================================================================================== // Text search parameters (loose xpdf parameters put into a simple structure) // -- default values are in cpage.cc because we do not want to pollute global space. //===================================================================================== /** * Text search parameters. * * These parameters are used by xpdf when serching a text string. */ typedef struct TextSearchParams { /** Paramaters */ GBool startAtTop; /**< Start searching from the top. */ double xStart; /**< Start searching from x position. */ double yStart; /**< Start searching from y position. */ double xEnd; /**< Stop searching from x position. */ double yEnd; /**< Stop searching from y position. */ /** Constructor. Default values are set. */ TextSearchParams () : startAtTop (DEFAULT_START_AT_TOP), xStart (DEFAULT_X_START), yStart (DEFAULT_Y_START), xEnd (DEFAULT_X_END), yEnd (DEFAULT_Y_END) {} // // Default values // -- small hack to declare them as ints, to be able to init // them here (if double, we could not init them here because of the non // integral type compilator error)) // static const GBool DEFAULT_START_AT_TOP = gTrue; /**< Start at top. */ static const int DEFAULT_X_START = 0; /**< Default x position of left upper corner. */ static const int DEFAULT_Y_START = 0; /**< Default y position of left upper corner. */ static const int DEFAULT_X_END = 0; /**< Default x position of right upper corner. */ static const int DEFAULT_Y_END = 0; /**< Default y position of right upper corner. */ } TextSearchParams; //===================================================================================== // Comparators Point/Rectangle //===================================================================================== /** * Comparator that we can use to find out if another rectangle intersects * rectangle specified by this comparator. */ struct PdfOpCmpRc { /** * Consructor. * * @param rc Rectangle used when comparing. */ PdfOpCmpRc (const Rectangle& rc) : rc_(rc) {} /** * Is the intersection of supplied rectangle and rectangle specified in the * constructor not empty. * * Our rectangle does NOT contain another rectangle if * */ bool operator() (const Rectangle& rc) const { return Rectangle::isInitialized (libs::rectangle_intersect (rc_, rc)); } private: const Rectangle rc_; /**< Rectangle to be compared. */ }; /** * Comparator that we can use to find out if a rectange contains point specified * by this comparator. */ struct PdfOpCmpPt { /** * Consructor. * * @param pt Point that we use when comparing. */ PdfOpCmpPt (const Point& pt) : pt_(pt) {} /** * Is point in a rectangle. * * @param rc Rectangle. */ bool operator() (const Rectangle& rc) const { return (rc.contains (pt_.x, pt_.y)); } private: const Point pt_; /**< Point to be compared. */ }; /** Sets unitialized inheritable page attributes. * @param pageDict Page dictionary reference where to set values. * * Gets InheritedPageAttr structure for given pageDict (uses * fillInheritedPageAttr helper function) and sets all fields which are not * present in given dictionary to found values. */ void setInheritablePageAttr(boost::shared_ptr & pageDict); //===================================================================================== // CPage //===================================================================================== // // Forward declaration // class CPage; // // // typedef observer::ObserverHandler CPageObserverSubject; /** * This object represents page object from pdf specification v1.5. Pdf page object is a dictionary * reachable from page tree structure with several required properties. * It is responsible just for one single page. * * Every pdf page contains all information required for displaying the page * (e.g. page metrics, page contents etc.) Page properties can be inherited from * its parent in the page tree. The first encountered during page tree traversal * is used. This feature can * cause problems because it is no well defined what does it mean to change a * property that is inherited (it is not present in the page dictionary but in a * parent) * * We display a page using xpdf code. The argument to this function is an output * device which can draw graphical objects. The contents of a page is specified * by a "Contents" entry in the page dictionary. If empty the page is blank. * * CPage is a subject that can be observed. This is important when a change * leads to content stream reparsing (e.g. deleting an entry from "Contents" * property in the page dictionary) * * Content stream consists of a sequence of operators which should be processed * sequentially. The operators define what is really on a page. The pdf * specification is too general about pdf operators and that is why working with * operators is difficult. According to pdf specification text is split * neither to sentences nor words. Letters of a word can occur randomly in the content stream * because the position of a letter (text) is absolute. (e.g. it is very likely * that a word "humor" will be split into "hu" "m" "or" because of the "m" * beeing wider than other letters.) This makes searching and exporting page text a problem. * We use xpdf code to perform both actions. Xpdf parses a page to lines and * words with a rough approuch when a more letters are claimed as one word when * they are close enough. This algorithm is working ok for normal pdf files, but * if the pdf creator would like to disable text exporting it could produce such * sequence of pdfoperators, that hardly any program could export text correctly. * * Pdf operators are in one or more streams. Problem with this * approach is that these operators can be split * into streams at almost arbitrary place. * * Processing pdf operators can be very expensive so they are parsed only on demand. Each operator * can be placed in a bounding box. These bounding boxes are used when searching * the page for a text, selecting objects, drawing the page. * * Each page content stream is a selfcontained entity that can not * use resources defined in another page. It can use only inherited resources * from a parent in the page tree. Which means we can not simply change fonts * on a page to match another page, use images from another page etc. */ class CPage : public noncopyable, public CPageObserverSubject { public: /** Container of content streams. */ typedef std::vector > ContentStreams; /** Type for annotation storage. */ typedef std::vector > AnnotStorage; /** Position in content stream container. */ typedef size_t CcPosition; /** Type of page observer context. */ typedef observer::BasicChangeContext BasicObserverContext; private: /** Pdf dictionary representing a page. */ boost::shared_ptr dictionary; /** Class representing content stream. */ ContentStreams contentstreams; /** Actual display parameters. */ DisplayParams lastParams; /** Is page valid. */ bool valid; /** Keeps all annotations from this page. * * This structure is synchronized with page dictionary Annots field with * observer. */ AnnotStorage annotStorage; // // Constructors // public: /** * Constructor. * * @param pageDict Dictionary representing pdf page. */ CPage (boost::shared_ptr& pageDict); // // Annotation observer // private: /** * Consolidates annotStorage field according given change. * Works in two steps. First handles oldValue and second newValue. At first * checkes oldValue type and if it is reference, dereference indirect * objects and if it is annotation dictionary, it will invalidate * associated CAnnotation and removes it from annotStorage. *
* In second step, checks newValue type and if it is reference to * dictionary, it will create new CAnnotation instance and adds it to * annotStorage. * * @param oldValue Removed reference from annotStorage. * @param newValue Added reference to the annotStorage. */ void consolidateAnnotsStorage(boost::shared_ptr & oldValue, boost::shared_ptr & newValue); /** Observer for Annots property. * This observer is registered on page dictionary and if Annots property is * a reference also to this property. Any change which leads to change of * Annots array (either add, remove or change) is handled here. Note that * it doesn't handle array content change. */ class AnnotsPropWatchDog: public IIPropertyObserver { /** Page owner of this observer. */ CPage* page; public: /** Initialization constructor. * Sets page field according parameter. * * @param _page CPage instance. */ AnnotsPropWatchDog(CPage * _page):page(_page) { // given parameter must be non NULL // this is used only internaly by CPage, so assert is enough for // checking assert(_page); } /** Empty destructor. */ virtual ~AnnotsPropWatchDog() throw(){} /** Observer handler. * * Checks given context type: *
    *
  • BasicChangeContext means that Annots property in page dictionary * is reference and its value has changed. *
  • ComplexChangeContext means that page dictionary has changed. So * checks property id and if it not Annots, immediatelly returns, * because this change doesn't affect annotations. Otherwise checks * original value type. If it is reference, unregisters this observer * from it. If newValue is reference, registers observer to it. *
* In any case: *
    *
  • Tries to get array from oldValue and unregister observers from * it (uses page->unregisterAnnotsObservers). *
  • Invalidates and removes all annotations from * page->annotStorage. *
  • collects all current annotations (uses collectAnnotations). *
  • Tries to get current Annots array and registers observers to it * (uses page->registerAnnotsObservers) *
* * @param newValue New value of changed property. * @param context Context of the change. */ virtual void notify (boost::shared_ptr newValue, boost::shared_ptr context) const throw(); /** Returns observer priority. */ virtual priority_t getPriority()const throw() { // TODO some constant return 0; } }; /** * Observer for Annots array synchronization. * This observer is registered on Annots array property and all its * reference typed elements. It handles change in Annots array content - * this means either element is added, removed or replaced, or any of its * reference elements changes its value. */ class AnnotsArrayWatchDog: public IIPropertyObserver { /** Page owner of this observer. */ CPage* page; public: typedef observer::BasicChangeContext BasicObserverContext; typedef CDict::CDictComplexObserverContext ComplextObserverContext; /** Initialization constructor. * Sets page field according parameter. * * @param _page CPage instance. */ AnnotsArrayWatchDog(CPage * _page):page(_page) { // given parameter must be non NULL // this is used only internaly by CPage, so assert is enough for // checking assert(_page); } /** Empty destructor. */ virtual ~AnnotsArrayWatchDog() throw(){} /** Observer handler. * * Checks given context type: *
    *
  • BasicObserverContext means that Annots array reference element * has changed its value. *
  • ComplexChangeContext means that Annots array content has changed. * If original value is reference, then unregisters this obsever from * it. If newValue is reference registers this observer to it. *
  • Different context is not supported and so method immediatelly * returns. *
* In both situations calls consolidateAnnotsStorage with original and * new value parameters. * * @param newValue New value of changed property. * @param context Context of the change. */ virtual void notify (boost::shared_ptr newValue, boost::shared_ptr context) const throw(); /** Returns observer priority. */ virtual priority_t getPriority()const throw() { // TODO some constant return 0; } }; /** Watchdog for Annots property. * @see AnnotsPropWatchDog */ boost::shared_ptr annotsPropWatchDog; /** Watchdog for Annotation array. * @see AnnotsArrayWatchDog */ boost::shared_ptr annotsArrayWatchDog; /** Registers observers for annotations synchronization. * Checks type of given property and if it is reference, registers * annotsPropWatchDog observer to it and dereferences indirect object. If * annots or dereferenced object is array, registers annotsArrayWatchDog * observer to it and all its reference type elements. * * @param annots Annots property. */ void registerAnnotsObservers(boost::shared_ptr & annots); /** Unregisters obsevers from given Annots property. * This method works reversely to registerAnnotsObservers (observers are * unregistered but rest is same). * * @param annots Annots property. */ void unregisterAnnotsObservers(boost::shared_ptr & annots); // // CStream observer // private: /** * Observer implementation for content stream synchronization. */ class ContentsWatchDog: public IIPropertyObserver { /** * Owner of this observer. */ CPage* page; public: /** Initialization constructor. * Sets page field according parameter. * * @param _page CPage instance. */ ContentsWatchDog (CPage* _page) : page(_page) { assert(_page); } /** Empty destructor. */ virtual ~ContentsWatchDog() throw() {} /** * Observer handler. * * @param * @param context Context of the change. */ virtual void notify (boost::shared_ptr, boost::shared_ptr) const throw(); /** Returns observer priority. */ virtual priority_t getPriority() const throw() { return 0; } }; /** * Contents observer. */ boost::shared_ptr contentsWatchDog; protected: /** * Register observer on all cstreams that content stream consists of. */ void registerContentsObserver () const; /** * Unregister observer from all cstreams that this object consists of. * * This function is called when saving consten stream consisting of * more streams. If we do not unregister observers, we would be notified * that a stream has changed after the first save (when the content stream * is invalid) and our observer would want to reparse an invalid stream. */ void unregisterContentsObserver () const; // // Destructor // public: /** Destructor. */ ~CPage () { kernelPrintDbg (debug::DBG_INFO, "Page destroyed."); // Unregister all observers if (valid) { // Unregister contents observer unregisterContentsObserver (); // unregisters annotation observers - if annotation array present in // page dictionary if(dictionary->containsProperty("Annots")) { boost::shared_ptr annotsDict=dictionary->getProperty("Annots"); unregisterAnnotsObservers(annotsDict); } } } // // Comparable interface // public: /** * Equality operator. * * @param page Another page object. */ bool operator== (const CPage& page) { return (this == &page) ? true : false; } // // Invalidate page // public: /** * Inform all obsevers that this page is not valid. */ void invalidate () { assert (valid); // unregisters annotation observers - if annotation array present in // page dictionary if(dictionary->containsProperty("Annots")) { boost::shared_ptr annotsDict=dictionary->getProperty("Annots"); unregisterAnnotsObservers(annotsDict); } // Unregister contents observer unregisterContentsObserver (); _objectChanged (true); valid = false; } // // Get methods // public: /** * Get the dictionary representing this object. * * @return Dictionary. */ boost::shared_ptr getDictionary () const { return dictionary; } /** * Get pdf operators at specified position. * This call will be delegated to content stream object. * * @param opContainer Operator container where operators in specified are * wil be stored. * @param rc Rectangle around which we will be looking. */ template void getObjectsAtPosition (OpContainer& opContainer, const Rectangle& rc) { kernelPrintDbg (debug::DBG_DBG, " at rectangle (" << rc << ")"); // Get the objects with specific comparator getObjectsAtPosition (opContainer, PdfOpCmpRc (rc)); } /** * Get pdf operators at specified position. * This call will be delegated to content stream object. * * @param opContainer Operator container where operators in specified are * wil be stored. * @param pt Point around which we will be looking. */ template void getObjectsAtPosition (OpContainer& opContainer, const Point& pt) { kernelPrintDbg (debug::DBG_DBG, " at point (" << pt << ")"); // Get the objects with specific comparator getObjectsAtPosition (opContainer, PdfOpCmpPt (pt)); } /** * Get pdf operators at specified position. * This call will be delegated to content stream object. * * @param opContainer Operator container where operators in specified are wil be stored. * @param cmp Null if default kernel area comparator should be used otherwise points * to an object which will decide whether an operator is "near" a point. */ template void getObjectsAtPosition (OpContainer& opContainer, PositionComparator cmp) { kernelPrintDbg (debug::DBG_DBG, ""); // Are we in valid pdf assert (hasValidPdf (dictionary)); assert (hasValidRef (dictionary)); if (!hasValidPdf(dictionary) || !hasValidRef(dictionary)) throw CObjInvalidObject (); // If not parsed if (contentstreams.empty()) parseContentStream (); // Get the objects with specific comparator for (ContentStreams::iterator it = contentstreams.begin (); it != contentstreams.end(); ++it) (*it)->getOperatorsAtPosition (opContainer, cmp); } /** * Get contents streams. * * @param container Output container of all contentstreams. * * @return Content stream. */ template void getContentStreams (Container& container) { kernelPrintDbg (debug::DBG_DBG, ""); assert (valid); // If not parsed if (contentstreams.empty()) parseContentStream (); container.clear(); std::copy (contentstreams.begin(), contentstreams.end(), std::back_inserter(container)); } /** * Returns plain text extracted from a page using xpdf code. * * This method uses xpdf TextOutputDevice that outputs a page to a text device. * Text in a pdf is stored neither word by word nor letter by letter. It is not * easy not decide whether two letters form a word. Xpdf uses insane * algorithm that works most of the time. * * @param text Output string where the text will be saved. * @param encoding Encoding format. * @param rc Rectangle from which to extract the text. */ void getText (std::string& text, const std::string* encoding = NULL, const Rectangle* rc = NULL) const; /** * Get text source of a page. */ template void convert (textoutput::OutputBuilder& out) { typedef textoutput::PageTextSource TextSource; kernelPrintDbg (debug::DBG_INFO, ""); // If not parsed if (contentstreams.empty()) parseContentStream (); // Create gfx resource and state boost::shared_ptr gfxres; boost::shared_ptr gfxstate; createXpdfDisplayParams (gfxres, gfxstate); assert (gfxres && gfxstate); // Create page text class with parametrized parts TextSource text_source; // Get text from all content streams for (ContentStreams::iterator it = contentstreams.begin(); it != contentstreams.end(); ++it) { // Get operators and build text representation if not empty CContentStream::Operators ops; (*it)->getPdfOperators (ops); if (!ops.empty()) { PdfOperator::Iterator itt = PdfOperator::getIterator (ops.front()); StateUpdater::updatePdfOperators (itt, gfxres, *gfxstate, text_source); } } // Create lines, columns... text_source.format (); // Build the output if (hasValidPdf(dictionary)) text_source.output (out, getPagePosition()); else text_source.output (out, 0); } // // Annotations // public: /** * Fills given container with all page's annotations. * * Copies annotStorage content to given container (which is cleared at * first). *
* Given container must support clear and insert operations and store * shared_ptr<CAnnotation$gt; elements. * * @param container Container which is filled in. */ template void getAllAnnotations(T & container)const { assert (valid); container.clear(); container.insert(container.begin(), annotStorage.begin(), annotStorage.end()); } /** * Adds new annotation to this page. * * Inserts deep copy of given annotation and stores its reference to Annots * array in page dictionary (if this doesn't exist, it is created). * User has to call getAllAnnotations to get current annotations state (we * don't have identifier for annotations - there are some mechanisms how to * do it according pdf specification, but there is no explicit identifier). *
* Given annotation may come from different CPdf or may belong to nowhere. *
* As a result annotStorage is updated. New indirect object representing * annotation dictionary is added to same pdf (dictionary is same as given * one except P field is updated to contain correct reference to this page). *
* Note that this page must belong to pdf and has to have valid indirect * reference. This is neccessary, because annotation is indirect object page * keeps reference to it. Reference without pdf doesn't make sense. * * @param annot Annotation to add. * @throw CObjInvalidObject if this page doesn't have valid pdf or indirect * reference. * @throw ElementBadTypeException if Annots field from page dictionary is * not an array (or reference with array indirect target). */ void addAnnotation(boost::shared_ptr annot); /** Removes given annotation from page. * @param annot Annotation to remove. * * Tries to find given annotation in annotStorage and if found, removes * reference from Annots array. *
* As a result, removed annotation is invalidated and not accessible. User * has to call getAllAnnotations method to get current state (same way as * in addAnnotation case). * * @return true if annotation was removed. */ bool delAnnotation(boost::shared_ptr annot); // // Font // public: /** * Get all font ids and base names that are in the resource dictionary of a page. * * The resource can be inherited from a parent in the page tree dictionary. * Base names should be human readable or at least standard system fonts * defined in the pdf specification. We * must choose from these items to make a font change valid. Otherwise, we * have to add standard system font or manually a font object. * * @param cont Output container of font id and basename pairs. */ template void getFontIdsAndNames (Container& cont) const; /** * Add new simple type 1 font item to the page resource dictionary. * * The id of this font is arbitrary but it has to be unique. It will be generated as follows: * PdfEditor for the first item, PdfEditorr for the second, PdfEditorrr for * the third etc. * * We supposed that the font name is a standard system font avaliable to all viewers. * * @param fontname Output container of pairs of (Id,Name). * @param winansienc Set encoding to standard WinAnsiEnconding. */ void addSystemType1Font (const std::string& fontname, bool winansienc = true); // // Helper methods // public: /** * Get page position. */ size_t getPagePosition () const; /** * Set display params. */ void setDisplayParams (const DisplayParams& dp) { lastParams = dp; // set rotate to positive integer lastParams.rotate -= ((int)(lastParams.rotate / 360) -1) * 360; // set rotate to range [ 0, 360 ) lastParams.rotate -= ((int)lastParams.rotate / 360) * 360; // Use mediabox if (lastParams.useMediaBox) lastParams.pageRect = getMediabox (); // Change bbox etc... reparseContentStream (); } /** * Draw page on an output device. * * We use xpdf code to draw a page. It uses insane global parameters and * many local parameters. * * @param out Output device. * @param params Display parameters. */ void displayPage (::OutputDev& out, const DisplayParams& params, int x = -1, int y = -1, int w = -1, int h = -1); /** * Draw page on an output device. * Use old display params. */ void displayPage (::OutputDev& out, int x = -1, int y = -1, int w = -1, int h = -1); /** * Draw page on an output device with last used display parameters. * * @param out Output device. * @param dict If not null, page is created from dict otherwise * this page dictionary is used. But still some information is gathered from this page dictionary. */ void displayPage (::OutputDev& out, boost::shared_ptr dict = boost::shared_ptr (), int x = -1, int y = -1, int w = -1, int h = -1) const; /** * Parse content stream. * Content stream is an optional property. When found it is parsed, * nothing is done otherwise. * * @return True if content stream was found and was parsed, false otherwise. */ bool parseContentStream (); /** * Reparse content stream using actual display parameters. */ void reparseContentStream (); /** * Add new content stream to the front. This function adds new entry in the "Contents" * property of a page. The container of provided operators must form a valid * contentstream. * This function should be used when supplied operators * should be handled at the beginning end e.g. should be drawn first which means * they will appear the "below" other object. * * This function can be used to separate our changes from original content stream. * * @param cont Container of operators to add. */ template void addContentStreamToFront (const Container& cont); template inline void addContentStream (const Container& cont) { {int THIS_FUNCTION_SHOULD_NOT_BE_USED_USE__addContentStreamToFront_OR_addContentStreamToBack__INSTEAD;} addContentStreamToFront (cont); } /** * Add new content stream to the back. This function adds new entry in the "Contents" * property of a page. The container of provided operators must form a valid * contentstream. * This function should be used when supplied operators * should be handled at the end e.g. should be drawn at the end which means * they will appear "above" other objects. * * This function can be used to separate our changes from original content stream. * * @param cont Container of operators to add. */ template void addContentStreamToBack (const Container& cont); /** * Remove content stream. * This function removes all objects from "Contents" entry which form specified contentstream. * * @param csnum Number of content stream to remove. */ void removeContentStream (size_t csnum); // // Page translation // public: /** * Set transform matrix of a page. This operator will be preceding first cm * operator (see pdf specification), if not found it will be the first operator. * * @param tm Six number representing transform matrix. */ void setTransformMatrix (double tm[6]); // // Media box // public: /** * Return media box of this page. * * It is a required item in page dictionary (spec p.119) but can be * inherited from a parent in the page tree. * * @return Rectangle specifying the box. */ Rectangle getMediabox () const; /** * Set media box of this page. * * @param rc Rectangle specifying the page metrics. */ void setMediabox (const Rectangle& rc); // // Rotation // public: /** * Get rotation. * * @return Rotation degree measurment. */ int getRotation () const; /** * Set rotation. * * @param rot Set rotation degree measurment. */ void setRotation (int rot); // // Text search/find // public: /** * Find all occurences of a text on this page. * * It uses xpdf TextOutputDevice to get the bounding box of found text. * * @param text Text to find. * @param recs Output container of rectangles of all occurences of the text. * @param params Search parameters. * * @return Number of occurences found. */ template size_t findText (std::string text, RectangleContainer& recs, const TextSearchParams& params = TextSearchParams()) const; // // Helper functions // public: /** * Return shared pointer to the content stream. * @param cc Raw ccontentstream pointer. */ boost::shared_ptr getContentStream (CContentStream* cc) { for (ContentStreams::iterator it = contentstreams.begin(); it != contentstreams.end(); ++it) if ((*it).get() == cc) return *it; assert (!"Contentstream not found"); throw CObjInvalidOperation (); } private: /** * Create xpdf's state and resource parameters. * * @param res Gfx resource parameter. * @param state Gfx state parameter. */ void createXpdfDisplayParams (boost::shared_ptr& res, boost::shared_ptr& state); private: /** * Save changes and indicate that the object has changed by calling all * observers. * * @param invalid If true indicate that this page has been invalidated. */ void _objectChanged (bool invalid = false); // // PdfEdit changes // public: /** * Get n-th change. * Higher change means older change. */ boost::shared_ptr getChange (size_t nthchange = 0) const; /** * Get our changes sorted. * The first change is the last change. If there are no changes * container is empty. */ template void getChanges (Container& cont) const; /** * Get count of our changes. */ size_t getChangeCount () const; /** * Draw nth change on an output device with last used display parameters. * * @param out Output device. * @param cont Container of content streams to display */ template void displayChange (::OutputDev& out, const Container& cont) const; void displayChange (::OutputDev& out, const std::vector cs) const { ContentStreams css; for (std::vector::const_iterator it = cs.begin(); it != cs.end(); ++it) { if (static_cast(*it) >= contentstreams.size()) throw CObjInvalidOperation (); css.push_back (contentstreams[*it]); } displayChange (out, css); } /** * Move contentstream up one level. Which means it will be repainted by less objects. */ void moveAbove (boost::shared_ptr ct); void moveAbove (CcPosition pos) { if (pos >= contentstreams.size()) throw OutOfRange(); moveAbove (contentstreams[pos]); } /** * Move contentstream below one level. Which means it will be repainted by more objects. */ void moveBelow (boost::shared_ptr ct); void moveBelow (CcPosition pos) { if (pos >= contentstreams.size()) throw OutOfRange(); moveBelow (contentstreams[pos]); } }; //========================================================== // Helper functions //========================================================== /** * Check whether iproperty claimed to be a page is conforming to the pdf specification. * (p.118) * * @param ip IProperty. */ bool isPage (boost::shared_ptr ip); //===================================================================================== } // namespace pdfobjects //===================================================================================== #endif // _CPAGE_H