Foxit PDF SDK
fs_search.h
Go to the documentation of this file.
1 
15 #ifndef FS_SEARCH_H_
16 #define FS_SEARCH_H_
17 
18 #include "common/fs_common.h"
19 #include "pdf/annots/fs_annot.h"
20 #include "pdf/fs_pdfpage.h"
21 #ifndef _FX_NO_XFA_
22 #include "addon/xfa/fs_xfa.h"
23 #endif // #ifndef _FX_NO_XFA_
24 
30 namespace foxit {
34 namespace pdf {
40  public:
47  virtual bool NeedToCancelNow() = 0;
48 };
49 
51 class TextPageCharInfo FS_FINAL : public Object {
52  public:
58  typedef enum _TextCharFlag {
60  e_Unknown = -1,
62  e_Normal = 0,
68  e_Hyphen = 3,
71  } TextCharFlag;
72 
73 
90  const RectF& char_box, const RectF& char_outbox, const Matrix& matrix) {
91  this->font = font;
92  this->flag = flag;
93  this->font_size = font_size;
94  this->origin_x = origin_x;
95  this->origin_y = origin_y;
96  this->char_box = char_box;
97  this->char_outbox = char_outbox;
98  this->matrix = matrix;
99  }
100 
104  , font_size(0)
105  , origin_x(0)
106  , origin_y(0) {}
107 
113  TextPageCharInfo(const TextPageCharInfo& char_info) {
114  this->font = char_info.font;
115  this->flag = char_info.flag;
116  this->font_size = char_info.font_size;
117  this->origin_x = char_info.origin_x;
118  this->origin_y = char_info.origin_y;
119  this->char_box = char_info.char_box;
120  this->char_outbox = char_info.char_outbox;
121  this->matrix = char_info.matrix;
122  }
123 
132  this->font = char_info.font;
133  this->flag = char_info.flag;
134  this->font_size = char_info.font_size;
135  this->origin_x = char_info.origin_x;
136  this->origin_y = char_info.origin_y;
137  this->char_box = char_info.char_box;
138  this->char_outbox = char_info.char_outbox;
139  this->matrix = char_info.matrix;
140  return *this;
141  }
142 
150  bool operator == (const TextPageCharInfo& char_info) const {
151  if (font != char_info.font || flag != char_info.flag || fabs(font_size - char_info.font_size) > FLT_EPSILON ||
152  fabs(origin_x - char_info.origin_x) > FLT_EPSILON || fabs(origin_y - char_info.origin_y) > FLT_EPSILON ||
153  char_box != char_info.char_box || char_outbox != char_info.char_outbox || matrix != char_info.matrix)
154  return false;
155 
156  return true;
157  }
158 
166  bool operator != (const TextPageCharInfo& char_info) const{
167  if (font != char_info.font || flag != char_info.flag || fabs(font_size - char_info.font_size) > FLT_EPSILON ||
168  fabs(origin_x - char_info.origin_x) > FLT_EPSILON || fabs(origin_y - char_info.origin_y) > FLT_EPSILON ||
169  char_box != char_info.char_box || char_outbox != char_info.char_outbox || matrix != char_info.matrix)
170  return true;
171 
172  return false;
173  }
174 
192  void Set(const common::Font& font, TextCharFlag flag, float font_size, float origin_x, float origin_y,
193  const RectF& char_box, const RectF& char_outbox, const Matrix& matrix) {
194  this->font = font;
195  this->flag = flag;
196  this->font_size = font_size;
197  this->origin_x = origin_x;
198  this->origin_y = origin_y;
199  this->char_box = char_box;
200  this->char_outbox = char_outbox;
201  this->matrix = matrix;
202  }
203 
208 
216 
222  float font_size;
223 
227  float origin_x;
228 
232  float origin_y;
233 
238 
243 
248 };
249 
265 class TextPage FS_FINAL : public Base {
266  public:
272  typedef enum _TextParseFlags {
279  } TextParseFlags;
280 
286  typedef enum _TextOrderFlag {
291  } TextOrderFlag;
292 
293 
301  explicit TextPage(const PDFPage& page, int flags = foxit::pdf::TextPage::e_ParseTextNormal);
302 
304  ~TextPage();
310  TextPage(const TextPage& other);
318  TextPage& operator = (const TextPage& other);
319 
327  bool operator == (const TextPage& other) const;
335  bool operator != (const TextPage& other) const;
336 
344  bool IsEmpty() const;
345 
351  int GetCharCount() const;
352 
362  TextPageCharInfo GetCharInfo(int char_index);
363 
378  WString GetChars(int start = 0, int count = -1) const;
379 
392  int GetIndexAtPos(float x, float y, float tolerance) const;
393 
401  WString GetTextInRect(const RectF& rect) const;
402 
411  WString GetText(TextOrderFlag flag) const;
412 
427  common::Range GetWordAtPos(float x, float y, float tolerance) const;
428 
440  int GetTextRectCount(int start = 0, int count = -1);
441 
451  RectF GetTextRect(int rect_index) const;
452 
463  common::Rotation GetBaselineRotation(int rect_index);
464 
473 
481  common::Range GetCharRange(const RectF& rect);
482 
497 
498  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
499  explicit TextPage(FS_HANDLE handle = NULL);
500 };
501 
513 class TextSearch FS_FINAL : public Base {
514  public:
520  typedef enum _SearchFlags {
531  } SearchFlags;
532 
533 
549  explicit TextSearch(const PDFDoc& document, SearchCancelCallback* cancel = NULL, int flags = foxit::pdf::TextPage::e_ParseTextNormal);
550 
551  #ifndef _FX_NO_XFA_
552 
563  explicit TextSearch(const foxit::addon::xfa::XFADoc& xfa_document, foxit::pdf::SearchCancelCallback* cancel = NULL);
564 #endif // #ifndef _FX_NO_XFA_
565 
570  explicit TextSearch(const foxit::pdf::TextPage& text_page);
571 
580  explicit TextSearch(const foxit::pdf::annots::Annot& annot);
581 
583  ~TextSearch();
589  TextSearch(const TextSearch& other);
597  TextSearch& operator = (const TextSearch& other);
598 
606  bool operator == (const TextSearch& other) const;
614  bool operator != (const TextSearch& other) const;
615 
623  bool IsEmpty() const;
624 
635  bool SetPattern(const wchar_t* key_words, bool is_regex_search = false);
636 
652  bool SetStartPage(int page_index);
653 
669  bool SetEndPage(int page_index);
670 
692  bool SetStartCharacter(int char_index);
693 
705  bool SetSearchFlags(uint32 search_flags);
706 
713  bool FindNext();
714 
721  bool FindPrev();
722 
728  RectFArray GetMatchRects() const;
729 
738  int GetMatchPageIndex() const;
739 
746 
757 
768 
775  int GetMatchStartCharIndex() const;
776 
783  int GetMatchEndCharIndex() const;
784 
785  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
786  explicit TextSearch(FS_HANDLE handle = NULL);
787 };
788 
797 class TextLink FS_FINAL : public Base{
798  public:
800  ~TextLink();
806  TextLink(const TextLink& other);
814  TextLink& operator = (const TextLink& other);
815 
823  bool operator == (const TextLink& other) const;
831  bool operator != (const TextLink& other) const;
832 
840  bool IsEmpty() const;
841 
850  WString GetURI();
851 
857  int GetStartCharIndex();
858 
864  int GetEndCharIndex();
865 
872  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
873  explicit TextLink(FS_HANDLE handle = NULL);
874 
875 };
876 
881 class PageTextLinks FS_FINAL : public Base{
882  public:
888  explicit PageTextLinks(const TextPage& page);
894  PageTextLinks(const PageTextLinks& other);
902  PageTextLinks& operator = (const PageTextLinks& other);
910  bool operator == (const PageTextLinks& other) const ;
918  bool operator != (const PageTextLinks& other) const ;
919 
927  bool IsEmpty() const;
929  ~PageTextLinks();
930 
936  int GetTextLinkCount();
937 
946  TextLink GetTextLink(int index);
947 
948  // User is strongly recommended NOT to use this method; otherwise unknown situation may occur.
949  explicit PageTextLinks(FS_HANDLE handle = NULL);
950 };
951 } // namespace pdf
952 } // namespace foxit
953 #endif // FS_SEARCH_H_
954 
foxit::pdf::TextPage::TextParseFlags
TextParseFlags
Enumeration for parsing flags used for text page.
Definition: fs_search.h:272
foxit::pdf::TextSearch::e_SearchMatchCase
If set, match the case of keyword when searching.
Definition: fs_search.h:524
foxit::pdf::TextPage::IsEmpty
bool IsEmpty() const
Check whether current object is empty or not.
foxit::pdf::TextPage::e_TextStreamOrder
If this is set, that means to get text content of a PDF page by the stream order.
Definition: fs_search.h:288
foxit::pdf::TextPageCharInfo::origin_x
float origin_x
The x-coordinate of the origin position.
Definition: fs_search.h:227
foxit::pdf::TextPageCharInfo::char_box
RectF char_box
The glyph bounding box in page space.
Definition: fs_search.h:237
foxit::FS_HANDLE
void * FS_HANDLE
Handle type.
Definition: fs_basictypes.h:213
foxit::pdf::TextPage::e_ParseTextOutputHyphen
Parse the text content of a PDF page with outputting the hyphen on a line feed.
Definition: fs_search.h:276
foxit::pdf::TextPage::GetTextRectArrayByRect
RectFArray GetTextRectArrayByRect(const RectF &rect)
Get the array of all text rectangles within the specified rectangle region.
foxit::pdf::annots::Annot
Definition: fs_annot.h:1006
foxit::pdf::TextPageCharInfo::font
common::Font font
A font for character.
Definition: fs_search.h:207
foxit::pdf::TextPage::GetCharCount
int GetCharCount() const
Get the count of all the characters.
foxit::pdf::TextSearch::SetEndPage
bool SetEndPage(int page_index)
Set ending page index.
foxit::pdf::TextSearch::operator!=
bool operator!=(const TextSearch &other) const
Not equal operator.
foxit::Object
CFX_Object Object
Object type.
Definition: fs_basictypes.h:220
foxit::pdf::TextPageCharInfo::flag
TextCharFlag flag
Flags to indicate which properties of textpage character flag are meaningful.
Definition: fs_search.h:215
foxit::pdf::TextSearch::GetMatchStartCharIndex
int GetMatchStartCharIndex() const
Get the index of the first character of current match pattern, based on current match page.
foxit::pdf::TextPageCharInfo::matrix
Matrix matrix
The matrix of the character.
Definition: fs_search.h:247
foxit::pdf::TextPage::operator=
TextPage & operator=(const TextPage &other)
Assign operator.
foxit::pdf::SearchCancelCallback::NeedToCancelNow
virtual bool NeedToCancelNow()=0
A callback function used to check whether to cancel the searching process or not.
foxit::pdf::TextPageCharInfo::origin_y
float origin_y
The y-coordinate of the origin position.
Definition: fs_search.h:232
foxit::pdf::TextPage::GetTextRect
RectF GetTextRect(int rect_index) const
Get the text rectangle by the index.
foxit::pdf::TextPage::e_TextDisplayOrder
If this is set, that means to get text content of a PDF page by the display order.
Definition: fs_search.h:290
foxit::pdf::TextPage
Definition: fs_search.h:265
foxit::pdf::TextPage::GetWordAtPos
common::Range GetWordAtPos(float x, float y, float tolerance) const
Get the character range of a word at or around a specified position on the page, in PDF coordinate sy...
foxit::pdf::TextPageCharInfo::e_Hyphen
Character flag: Hyphen.
Definition: fs_search.h:68
fs_common.h
Header file for common definitions and classes.
foxit::pdf::TextSearch::GetMatchRects
RectFArray GetMatchRects() const
Get the rectangles of current match pattern.
CFX_ArrayTemplate< RectF >
foxit::pdf::TextPageCharInfo::e_Generated
Character flag: Generated.
Definition: fs_search.h:64
foxit::pdf::TextPageCharInfo::char_outbox
RectF char_outbox
The typographic(display and printing) bounding box in page space.
Definition: fs_search.h:242
foxit::pdf::TextPage::GetBaselineRotation
common::Rotation GetBaselineRotation(int rect_index)
Get the text trend (as rotation) of a specified rectangle.
foxit::pdf::TextPage::GetCharRange
common::Range GetCharRange(const RectF &rect)
Get the character index range of all text rectangles within the specified rectangle region.
foxit::pdf::TextSearch::GetMatchEndCharIndex
int GetMatchEndCharIndex() const
Get the index of the last character of current match pattern, based on current match page.
foxit::pdf::TextSearch::operator==
bool operator==(const TextSearch &other) const
Equal operator.
foxit::pdf::TextPageCharInfo
Definition: fs_search.h:51
foxit::pdf::TextPage::e_ParseTextUseStreamOrder
Parse the text content of a PDF page by the stream order.
Definition: fs_search.h:278
foxit::pdf::TextPage::GetText
WString GetText(TextOrderFlag flag) const
Get the page text.
foxit::pdf::TextSearch::SetStartCharacter
bool SetStartCharacter(int char_index)
Set starting character index, from where the search process is to be started.
foxit::pdf::TextSearch::FindPrev
bool FindPrev()
Search for previous matched pattern.
foxit::pdf::TextPageCharInfo::TextPageCharInfo
TextPageCharInfo(const TextPageCharInfo &char_info)
Constructor, with another character information object.
Definition: fs_search.h:113
foxit::pdf::TextPageCharInfo::font_size
float font_size
Font size for character.
Definition: fs_search.h:222
fs_xfa.h
Header file for XFA related definitions and functions.
foxit::pdf::TextPageCharInfo::operator=
TextPageCharInfo & operator=(const TextPageCharInfo &char_info)
Assign operator.
Definition: fs_search.h:131
foxit::pdf::TextPageCharInfo::e_UnUnicode
Character flag: UnUnicode.
Definition: fs_search.h:66
foxit::pdf::TextPage::GetChars
WString GetChars(int start=0, int count=-1) const
Get all the characters within a range specified by a start index and count.
foxit::pdf::TextSearch::GetMatchPageIndex
int GetMatchPageIndex() const
Get the page index, to which current match belongs.
foxit::pdf::TextSearch::SetStartPage
bool SetStartPage(int page_index)
Set starting page index.
foxit::pdf::TextPage::GetIndexAtPos
int GetIndexAtPos(float x, float y, float tolerance) const
Get the character index at or around a specified position on the page, in PDF coordinate system.
foxit::common::Rotation
Rotation
Enumeration for rotation.
Definition: fs_common.h:57
foxit::pdf::TextPageCharInfo::e_Normal
Character flag: Normal.
Definition: fs_search.h:62
foxit::pdf::TextSearch::e_SearchMatchWholeWord
If set, match the whole word of keyword when searching.
Definition: fs_search.h:526
foxit::pdf::TextSearch::e_SearchConsecutive
If set, match the key word consecutively when searching. For example, "CC" will be matched twice in "...
Definition: fs_search.h:528
foxit::pdf::TextPageCharInfo::TextCharFlag
TextCharFlag
Enumeration for PDF textpage character flag.
Definition: fs_search.h:58
fs_pdfpage.h
Header file for PDF page related definitions and classes.
foxit
Foxit namespace.
Definition: fs_taggedpdf.h:27
foxit::pdf::TextPage::e_ParseTextNormal
Parse the text content of a PDF page by normalizing characters based on their positions in the PDF pa...
Definition: fs_search.h:274
foxit::pdf::TextPage::TextPage
TextPage(const PDFPage &page, int flags=foxit::pdf::TextPage::e_ParseTextNormal)
Constructor, from a parsed PDF page.
foxit::addon::xfa::XFADoc
Definition: fs_xfa.h:910
foxit::pdf::TextSearch::operator=
TextSearch & operator=(const TextSearch &other)
Assign operator.
foxit::pdf::TextPage::GetTextInRect
WString GetTextInRect(const RectF &rect) const
Get the text within a rectangle, in PDF coordinate system.
foxit::pdf::TextPageCharInfo::TextPageCharInfo
TextPageCharInfo(const common::Font &font, TextCharFlag flag, float font_size, float origin_x, float origin_y, const RectF &char_box, const RectF &char_outbox, const Matrix &matrix)
Constructor, with parameters.
Definition: fs_search.h:89
foxit::pdf::TextSearch::SetPattern
bool SetPattern(const wchar_t *key_words, bool is_regex_search=false)
Set keywords to search.
foxit::pdf::TextSearch::FindNext
bool FindNext()
Search for next matched pattern.
foxit::pdf::TextPageCharInfo::TextPageCharInfo
TextPageCharInfo()
Constructor.
Definition: fs_search.h:102
foxit::pdf::TextSearch::IsEmpty
bool IsEmpty() const
Check whether current object is empty or not.
NULL
#define NULL
The null-pointer value.
Definition: fx_system.h:792
foxit::pdf::TextSearch::e_SearchNormal
No special searching options.
Definition: fs_search.h:522
CFX_FloatRect
Definition: fx_coordinates.h:771
foxit::pdf::TextPageCharInfo::Set
void Set(const common::Font &font, TextCharFlag flag, float font_size, float origin_x, float origin_y, const RectF &char_box, const RectF &char_outbox, const Matrix &matrix)
Set value.
Definition: fs_search.h:192
foxit::pdf::TextSearch::SearchFlags
SearchFlags
Enumeration for searching flags.
Definition: fs_search.h:520
foxit::pdf::TextSearch
Definition: fs_search.h:513
foxit::pdf::PDFDoc
Definition: fs_pdfdoc.h:776
foxit::pdf::PDFPage
Definition: fs_pdfpage.h:412
foxit::pdf::SearchCancelCallback
Definition: fs_search.h:39
foxit::pdf::TextPageCharInfo::e_Unknown
Character flag: Unknown.
Definition: fs_search.h:60
foxit::pdf::TextSearch::TextSearch
TextSearch(const PDFDoc &document, SearchCancelCallback *cancel=0, int flags=foxit::pdf::TextPage::e_ParseTextNormal)
Constructor, for a PDF document.
foxit::pdf::TextPageCharInfo::e_ComboWord
Character flag: ComboWord.
Definition: fs_search.h:70
foxit::pdf::TextSearch::SetSearchFlags
bool SetSearchFlags(uint32 search_flags)
Set search flags.
foxit::pdf::TextPage::~TextPage
~TextPage()
Destructor.
foxit::common::Font
Definition: fs_common.h:1523
CFX_Matrix
Definition: fx_coordinates.h:1076
foxit::pdf::TextSearch::e_SearchNotMatchFullWidth
If set, to ignore full-width characters and treat all characters as standard ASCII or standard-width ...
Definition: fs_search.h:530
fs_annot.h
Header file for annotation related definitions and classes.
foxit::pdf::TextSearch::GetMatchSentenceEndIndex
int GetMatchSentenceEndIndex()
Get the index of the last character of current matched pattern, based on the matched sentence.
foxit::pdf::TextPageCharInfo::operator==
bool operator==(const TextPageCharInfo &char_info) const
Equal operator.
Definition: fs_search.h:150
CFX_WideString
WIDE STRING CLASS.
Definition: fx_string.h:1461
foxit::pdf::TextSearch::GetMatchSentence
WString GetMatchSentence()
Get the sentence that contains current match pattern.
foxit::common::Range
Definition: fs_common.h:1368
foxit::pdf::TextPage::GetTextUnderAnnot
WString GetTextUnderAnnot(annots::Annot &annot) const
Get the page text which intersect with a specified an annotation.
foxit::pdf::TextPage::GetTextRectCount
int GetTextRectCount(int start=0, int count=-1)
Count the text rectangles within a range specified by a start index and count.
foxit::uint32
FX_UINT32 uint32
32-bit unsigned integer.
Definition: fs_basictypes.h:195
foxit::pdf::TextPage::operator!=
bool operator!=(const TextPage &other) const
Not equal operator.
foxit::pdf::TextSearch::GetMatchSentenceStartIndex
int GetMatchSentenceStartIndex()
Get the index of the first character of current matched pattern, based on the matched sentence.
foxit::pdf::TextSearch::~TextSearch
~TextSearch()
Destructor.
foxit::Base
Definition: fs_basictypes.h:451
foxit::pdf::TextPage::operator==
bool operator==(const TextPage &other) const
Equal operator.
foxit::pdf::TextPage::TextOrderFlag
TextOrderFlag
Enumeration for text order flag which is used when getting text content of a PDF page.
Definition: fs_search.h:286
foxit::pdf::TextPage::GetCharInfo
TextPageCharInfo GetCharInfo(int char_index)
Get character information of a specific character.
foxit::pdf::TextPageCharInfo::operator!=
bool operator!=(const TextPageCharInfo &char_info) const
Not equal operator.
Definition: fs_search.h:166