Foxit PDF SDK
fs_search.h
Go to the documentation of this file.
1 
15 #ifndef FS_SEARCH_H_
16 #define FS_SEARCH_H_
17 
18 #include "common/fs_common.h"
19 #include "pdf/annots/fs_annot.h"
20 #include "pdf/fs_pdfpage.h"
21 #ifndef _FX_NO_XFA_
22 #include "addon/xfa/fs_xfa.h"
23 #endif // #ifndef _FX_NO_XFA_
24 
30 namespace foxit {
34 namespace pdf {
40  public:
47  virtual bool NeedToCancelNow() = 0;
48 };
49 
51 class TextPageCharInfo FS_FINAL : public Object {
52  public:
58  typedef enum _TextCharFlag {
60  e_Unknown = -1,
62  e_Normal = 0,
68  e_Hyphen = 3,
71  } TextCharFlag;
72 
73 
90  const RectF& char_box, const RectF& char_outbox, const Matrix& matrix) {
91  this->font = font;
92  this->flag = flag;
93  this->font_size = font_size;
94  this->origin_x = origin_x;
95  this->origin_y = origin_y;
96  this->char_box = char_box;
97  this->char_outbox = char_outbox;
98  this->matrix = matrix;
99  }
100 
104  , font_size(0)
105  , origin_x(0)
106  , origin_y(0) {}
107 
113  TextPageCharInfo(const TextPageCharInfo& char_info) {
114  this->font = char_info.font;
115  this->flag = char_info.flag;
116  this->font_size = char_info.font_size;
117  this->origin_x = char_info.origin_x;
118  this->origin_y = char_info.origin_y;
119  this->char_box = char_info.char_box;
120  this->char_outbox = char_info.char_outbox;
121  this->matrix = char_info.matrix;
122  }
123 
132  this->font = char_info.font;
133  this->flag = char_info.flag;
134  this->font_size = char_info.font_size;
135  this->origin_x = char_info.origin_x;
136  this->origin_y = char_info.origin_y;
137  this->char_box = char_info.char_box;
138  this->char_outbox = char_info.char_outbox;
139  this->matrix = char_info.matrix;
140  return *this;
141  }
142 
150  bool operator == (const TextPageCharInfo& char_info) const {
151  if (font != char_info.font || flag != char_info.flag || fabs(font_size - char_info.font_size) > FLT_EPSILON ||
152  fabs(origin_x - char_info.origin_x) > FLT_EPSILON || fabs(origin_y - char_info.origin_y) > FLT_EPSILON ||
153  char_box != char_info.char_box || char_outbox != char_info.char_outbox || matrix != char_info.matrix)
154  return false;
155 
156  return true;
157  }
158 
166  bool operator != (const TextPageCharInfo& char_info) const{
167  if (font != char_info.font || flag != char_info.flag || fabs(font_size - char_info.font_size) > FLT_EPSILON ||
168  fabs(origin_x - char_info.origin_x) > FLT_EPSILON || fabs(origin_y - char_info.origin_y) > FLT_EPSILON ||
169  char_box != char_info.char_box || char_outbox != char_info.char_outbox || matrix != char_info.matrix)
170  return true;
171 
172  return false;
173  }
174 
192  void Set(const common::Font& font, TextCharFlag flag, float font_size, float origin_x, float origin_y,
193  const RectF& char_box, const RectF& char_outbox, const Matrix& matrix) {
194  this->font = font;
195  this->flag = flag;
196  this->font_size = font_size;
197  this->origin_x = origin_x;
198  this->origin_y = origin_y;
199  this->char_box = char_box;
200  this->char_outbox = char_outbox;
201  this->matrix = matrix;
202  }
203 
208 
216 
222  float font_size;
223 
227  float origin_x;
228 
232  float origin_y;
233 
238 
243 
248 };
249 
265 class TextPage FS_FINAL : public Base {
266  public:
272  typedef enum _TextParseFlags {
279  } TextParseFlags;
280 
286  typedef enum _TextOrderFlag {
291  } TextOrderFlag;
292 
293 
301  explicit TextPage(const PDFPage& page, int flags = foxit::pdf::TextPage::e_ParseTextNormal);
302 
304  ~TextPage();
310  TextPage(const TextPage& other);
318  TextPage& operator = (const TextPage& other);
319 
327  bool operator == (const TextPage& other) const;
335  bool operator != (const TextPage& other) const;
336 
344  bool IsEmpty() const;
345 
351  int GetCharCount() const;
352 
362  TextPageCharInfo GetCharInfo(int char_index);
363 
378  WString GetChars(int start = 0, int count = -1) const;
379 
392  int GetIndexAtPos(float x, float y, float tolerance) const;
393 
401  WString GetTextInRect(const RectF& rect) const;
402 
411  WString GetText(TextOrderFlag flag) const;
412 
427  common::Range GetWordAtPos(float x, float y, float tolerance) const;
428 
440  int GetTextRectCount(int start = 0, int count = -1);
441 
451  RectF GetTextRect(int rect_index) const;
452 
463  common::Rotation GetBaselineRotation(int rect_index);
464 
473 
481  common::Range GetCharRange(const RectF& rect);
482 
497 
498  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
499  explicit TextPage(FS_HANDLE handle = NULL);
500 };
501 
513 class TextSearch FS_FINAL : public Base {
514  public:
520  typedef enum _SearchFlags {
531  } SearchFlags;
532 
533 
549  explicit TextSearch(const PDFDoc& document, SearchCancelCallback* cancel = NULL, int flags = foxit::pdf::TextPage::e_ParseTextNormal);
550 
551  #ifndef _FX_NO_XFA_
552 
563  explicit TextSearch(const foxit::addon::xfa::XFADoc& xfa_document, foxit::pdf::SearchCancelCallback* cancel = NULL);
564 #endif // #ifndef _FX_NO_XFA_
565 
570  explicit TextSearch(const foxit::pdf::TextPage& text_page);
571 
580  explicit TextSearch(const foxit::pdf::annots::Annot& annot);
581 
583  ~TextSearch();
589  TextSearch(const TextSearch& other);
597  TextSearch& operator = (const TextSearch& other);
598 
606  bool operator == (const TextSearch& other) const;
614  bool operator != (const TextSearch& other) const;
615 
623  bool IsEmpty() const;
624 
635  bool SetPattern(const wchar_t* key_words, bool is_regex_search = false);
636 
652  bool SetStartPage(int page_index);
653 
669  bool SetEndPage(int page_index);
670 
692  bool SetStartCharacter(int char_index);
693 
705  bool SetSearchFlags(uint32 search_flags);
706 
713  bool FindNext();
714 
721  bool FindPrev();
722 
728  RectFArray GetMatchRects() const;
729 
738  int GetMatchPageIndex() const;
739 
746 
757 
768 
775  int GetMatchStartCharIndex() const;
776 
783  int GetMatchEndCharIndex() const;
784 
785  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
786  explicit TextSearch(FS_HANDLE handle = NULL);
787 };
788 
797 class TextLink FS_FINAL : public Base{
798  public:
800  ~TextLink();
806  TextLink(const TextLink& other);
814  TextLink& operator = (const TextLink& other);
815 
823  bool operator == (const TextLink& other) const;
831  bool operator != (const TextLink& other) const;
832 
840  bool IsEmpty() const;
841 
850  WString GetURI();
851 
857  int GetStartCharIndex();
858 
864  int GetEndCharIndex();
865 
872  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
873  explicit TextLink(FS_HANDLE handle = NULL);
874 
875 };
876 
881 class PageTextLinks FS_FINAL : public Base{
882  public:
888  explicit PageTextLinks(const TextPage& page);
894  PageTextLinks(const PageTextLinks& other);
902  PageTextLinks& operator = (const PageTextLinks& other);
910  bool operator == (const PageTextLinks& other) const ;
918  bool operator != (const PageTextLinks& other) const ;
919 
927  bool IsEmpty() const;
929  ~PageTextLinks();
930 
936  int GetTextLinkCount();
937 
946  TextLink GetTextLink(int index);
947 
948  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
949  explicit PageTextLinks(FS_HANDLE handle = NULL);
950 };
951 } // namespace pdf
952 } // namespace foxit
953 #endif // FS_SEARCH_H_
954 
int GetMatchStartCharIndex() const
Get the index of the first character of current match pattern, based on current match page.
common::Range GetCharRange(const RectF &rect)
Get the character index range of all text rectangles within the specified rectangle region.
TextOrderFlag
Enumeration for text order flag which is used when getting text content of a PDF page.
Definition: fs_search.h:286
bool IsEmpty() const
Check whether current object is empty or not.
If set, match the case of keyword when searching.
Definition: fs_search.h:524
Definition: fs_common.h:1368
TextSearch(const PDFDoc &document, SearchCancelCallback *cancel=0, int flags=foxit::pdf::TextPage::e_ParseTextNormal)
Constructor, for a PDF document.
Definition: fs_search.h:513
RectF char_box
The glyph bounding box in page space.
Definition: fs_search.h:237
CFX_Object Object
Object type.
Definition: fs_basictypes.h:220
bool FindPrev()
Search for previous matched pattern.
TextCharFlag
Enumeration for PDF textpage character flag.
Definition: fs_search.h:58
Character flag: Normal.
Definition: fs_search.h:62
TextPageCharInfo(const common::Font &font, TextCharFlag flag, float font_size, float origin_x, float origin_y, const RectF &char_box, const RectF &char_outbox, const Matrix &matrix)
Constructor, with parameters.
Definition: fs_search.h:89
RectFArray GetTextRectArrayByRect(const RectF &rect)
Get the array of all text rectangles within the specified rectangle region.
common::Range GetWordAtPos(float x, float y, float tolerance) const
Get the character range of a word at or around a specified position on the page, in PDF coordinate sy...
~TextSearch()
Destructor.
Character flag: Hyphen.
Definition: fs_search.h:68
bool SetStartPage(int page_index)
Set starting page index.
WIDE STRING CLASS.
Definition: fx_string.h:1461
common::Font font
A font for character.
Definition: fs_search.h:207
int GetMatchEndCharIndex() const
Get the index of the last character of current match pattern, based on current match page.
Character flag: Generated.
Definition: fs_search.h:64
RectF char_outbox
The typographic(display and printing) bounding box in page space.
Definition: fs_search.h:242
Definition: fs_pdfdoc.h:776
bool operator !=(const TextPageCharInfo &char_info) const
Not equal operator.
Definition: fs_search.h:166
If set, match the whole word of keyword when searching.
Definition: fs_search.h:526
bool operator==(const TextSearch &other) const
Equal operator.
TextPageCharInfo(const TextPageCharInfo &char_info)
Constructor, with another character information object.
Definition: fs_search.h:113
bool SetStartCharacter(int char_index)
Set starting character index, from where the search process is to be started.
TextParseFlags
Enumeration for parsing flags used for text page.
Definition: fs_search.h:272
Character flag: UnUnicode.
Definition: fs_search.h:66
If set, match the key word consecutively when searching. For example, "CC" will be matched twice in "...
Definition: fs_search.h:528
~TextPage()
Destructor.
int GetTextRectCount(int start=0, int count=-1)
Count the text rectangles within a range specified by a start index and count.
Definition: fs_xfa.h:910
bool operator !=(const TextSearch &other) const
Not equal operator.
RectF GetTextRect(int rect_index) const
Get the text rectangle by the index.
bool operator==(const TextPage &other) const
Equal operator.
TextSearch & operator=(const TextSearch &other)
Assign operator.
float font_size
Font size for character.
Definition: fs_search.h:222
Header file for annotation related definitions and classes.
TextCharFlag flag
Flags to indicate which properties of textpage character flag are meaningful.
Definition: fs_search.h:215
virtual bool NeedToCancelNow()=0
A callback function used to check whether to cancel the searching process or not.
WString GetText(TextOrderFlag flag) const
Get the page text.
int GetCharCount() const
Get the count of all the characters.
TextPageCharInfo GetCharInfo(int char_index)
Get character information of a specific character.
FX_UINT32 uint32
32-bit unsigned integer.
Definition: fs_basictypes.h:195
Definition: fs_pdfpage.h:412
void * FS_HANDLE
Handle type.
Definition: fs_basictypes.h:213
Header file for common definitions and classes.
int GetMatchPageIndex() const
Get the page index, to which current match belongs.
If this is set, that means to get text content of a PDF page by the display order.
Definition: fs_search.h:290
Parse the text content of a PDF page by normalizing characters based on their positions in the PDF pa...
Definition: fs_search.h:274
WString GetTextInRect(const RectF &rect) const
Get the text within a rectangle, in PDF coordinate system.
TextPageCharInfo()
Constructor.
Definition: fs_search.h:102
bool IsEmpty() const
Check whether current object is empty or not.
Definition: fs_basictypes.h:451
Character flag: Unknown.
Definition: fs_search.h:60
Header file for XFA related definitions and functions.
Header file for PDF page related definitions and classes.
bool FindNext()
Search for next matched pattern.
bool SetPattern(const wchar_t *key_words, bool is_regex_search=false)
Set keywords to search.
Definition: fs_annot.h:1006
common::Rotation GetBaselineRotation(int rect_index)
Get the text trend (as rotation) of a specified rectangle.
int GetIndexAtPos(float x, float y, float tolerance) const
Get the character index at or around a specified position on the page, in PDF coordinate system.
Definition: fs_common.h:1523
TextPage & operator=(const TextPage &other)
Assign operator.
Rotation
Enumeration for rotation.
Definition: fs_common.h:57
Foxit namespace.
Definition: fs_taggedpdf.h:27
int GetMatchSentenceStartIndex()
Get the index of the first character of current matched pattern, based on the matched sentence.
TextPageCharInfo & operator=(const TextPageCharInfo &char_info)
Assign operator.
Definition: fs_search.h:131
Definition: fs_search.h:51
float origin_y
The y-coordinate of the origin position.
Definition: fs_search.h:232
WString GetTextUnderAnnot(annots::Annot &annot) const
Get the page text which intersect with a specified an annotation.
Matrix matrix
The matrix of the character.
Definition: fs_search.h:247
WString GetChars(int start=0, int count=-1) const
Get all the characters within a range specified by a start index and count.
bool operator !=(const TextPage &other) const
Not equal operator.
#define NULL
The null-pointer value.
Definition: fx_system.h:792
No special searching options.
Definition: fs_search.h:522
If set, to ignore full-width characters and treat all characters as standard ASCII or standard-width ...
Definition: fs_search.h:530
Definition: fx_coordinates.h:1076
WString GetMatchSentence()
Get the sentence that contains current match pattern.
void Set(const common::Font &font, TextCharFlag flag, float font_size, float origin_x, float origin_y, const RectF &char_box, const RectF &char_outbox, const Matrix &matrix)
Set value.
Definition: fs_search.h:192
int GetMatchSentenceEndIndex()
Get the index of the last character of current matched pattern, based on the matched sentence.
Parse the text content of a PDF page by the stream order.
Definition: fs_search.h:278
bool operator==(const TextPageCharInfo &char_info) const
Equal operator.
Definition: fs_search.h:150
SearchFlags
Enumeration for searching flags.
Definition: fs_search.h:520
TextPage(const PDFPage &page, int flags=foxit::pdf::TextPage::e_ParseTextNormal)
Constructor, from a parsed PDF page.
Definition: fs_search.h:265
Definition: fs_search.h:39
If this is set, that means to get text content of a PDF page by the stream order.
Definition: fs_search.h:288
Character flag: ComboWord.
Definition: fs_search.h:70
bool SetEndPage(int page_index)
Set ending page index.
bool SetSearchFlags(uint32 search_flags)
Set search flags.
Definition: fx_coordinates.h:771
RectFArray GetMatchRects() const
Get the rectangles of current match pattern.
Parse the text content of a PDF page with outputting the hyphen on a line feed.
Definition: fs_search.h:276
float origin_x
The x-coordinate of the origin position.
Definition: fs_search.h:227