Foxit PDF Conversion SDK
fs_pdf2office.h
Go to the documentation of this file.
1 
15 #ifndef FS_CONVERT_H_
16 #define FS_CONVERT_H_
17 
18 #include "common/fs_common.h"
19 
25 namespace foxit {
26 using namespace common;
30 namespace conversion {
34 namespace pdf2office {
36 class PDF2WordSettingData FS_FINAL : public Object{
37  public:
40  : enable_retain_page_layout(false)
41  , enable_generate_headers_and_footers(true)
42  , enable_generate_footnotes_and_endnotes(false)
43  , enable_generate_page_rendered_break(false)
44  , max_blank_paragraphs_per_page_bottom(0){ }
45 
66  PDF2WordSettingData(bool enable_retain_page_layout, bool enable_generate_headers_and_footers, bool enable_generate_footnotes_and_endnotes, bool enable_generate_page_rendered_break, int max_blank_paragraphs_per_page_bottom) {
67  this->enable_retain_page_layout = enable_retain_page_layout;
68  this->enable_generate_headers_and_footers = enable_generate_headers_and_footers;
69  this->enable_generate_footnotes_and_endnotes = enable_generate_footnotes_and_endnotes;
70  this->enable_generate_page_rendered_break = enable_generate_page_rendered_break;
71  this->max_blank_paragraphs_per_page_bottom = max_blank_paragraphs_per_page_bottom;
72  }
73 
81  PDF2WordSettingData & operator = (const PDF2WordSettingData & data) {
82  enable_retain_page_layout = data.enable_retain_page_layout;
83  enable_generate_headers_and_footers = data.enable_generate_headers_and_footers;
84  enable_generate_footnotes_and_endnotes = data.enable_generate_footnotes_and_endnotes;
85  enable_generate_page_rendered_break = data.enable_generate_page_rendered_break;
86  max_blank_paragraphs_per_page_bottom = data.max_blank_paragraphs_per_page_bottom;
87  return (*this);
88  }
89 
112  void Set(bool enable_retain_page_layout, bool enable_generate_headers_and_footers, bool enable_generate_footnotes_and_endnotes, bool enable_generate_page_rendered_break, int max_blank_paragraphs_per_page_bottom) {
113  this->enable_retain_page_layout = enable_retain_page_layout;
114  this->enable_generate_headers_and_footers = enable_generate_headers_and_footers;
115  this->enable_generate_footnotes_and_endnotes = enable_generate_footnotes_and_endnotes;
116  this->enable_generate_page_rendered_break = enable_generate_page_rendered_break;
117  this->max_blank_paragraphs_per_page_bottom = max_blank_paragraphs_per_page_bottom;
118  }
119 
127 
138 
147 
159 
169 };
170 
172 class PDF2PowerPointSettingData FS_FINAL : public Object{
173  public:
176  : enable_aggressively_split_sections(false)
177  , enable_adapt_to_largest_page(false) { }
178 
195  PDF2PowerPointSettingData(bool enable_aggressively_split_sections, bool enable_adapt_to_largest_page) {
196  this->enable_aggressively_split_sections = enable_aggressively_split_sections;
197  this->enable_adapt_to_largest_page = enable_adapt_to_largest_page;
198  }
199 
208  enable_aggressively_split_sections = data.enable_aggressively_split_sections;
209  enable_adapt_to_largest_page = data.enable_adapt_to_largest_page;
210  return (*this);
211  }
212 
231  void Set(bool enable_aggressively_split_sections, bool enable_adapt_to_largest_page) {
232  this->enable_aggressively_split_sections = enable_aggressively_split_sections;
233  this->enable_adapt_to_largest_page = enable_adapt_to_largest_page;
234  }
235 
245 
257 };
258 
260 class PDF2ExcelSettingData FS_FINAL : public Object {
261  public:
267  typedef enum _WorkbookSettings {
269  e_WorkbookSettingsDocument = 0,
271  e_WorkbookSettingsEachTable = 1,
273  e_WorkbookSettingsEachPage = 2
274  } WorkbookSettings;
275 
278  : decimal_symbol("")
279  , thousands_separator("")
280  , workbook_settings(PDF2ExcelSettingData::e_WorkbookSettingsEachPage)
281  ,enable_aggressive_table_repair(true)
282  ,include_watermarks(false){ }
283 
297  PDF2ExcelSettingData(String decimal_symbol, String thousands_separator, WorkbookSettings workbook_settings, bool enable_aggressive_table_repair, bool include_watermarks) {
298  this->decimal_symbol = decimal_symbol;
299  this->thousands_separator = thousands_separator;
300  this->workbook_settings = workbook_settings;
301  this->enable_aggressive_table_repair = enable_aggressive_table_repair;
302  this->include_watermarks = include_watermarks;
303  }
304 
312  PDF2ExcelSettingData& operator = (const PDF2ExcelSettingData& data) {
313  this->decimal_symbol = data.decimal_symbol;
314  this->thousands_separator = data.thousands_separator;
315  this->workbook_settings = data.workbook_settings;
316  this->enable_aggressive_table_repair = data.enable_aggressive_table_repair;
317  this->include_watermarks = data.include_watermarks;
318  return (*this);
319  }
320 
336  void Set(String decimal_symbol, String thousands_separator, WorkbookSettings workbook_settings, bool enable_aggressive_table_repair, bool include_watermarks) {
337  this->decimal_symbol = decimal_symbol;
338  this->thousands_separator = thousands_separator;
339  this->workbook_settings = workbook_settings;
340  this->enable_aggressive_table_repair = enable_aggressive_table_repair;
341  this->include_watermarks = include_watermarks;
342  }
343 
358 
374 
382 
393 
404 };
405 
407 class PDF2OfficeSettingData FS_FINAL : public Object {
408  public:
411  : enable_ml_recognition(false)
412  , include_pdf_comments(true)
413  , enable_trailing_space(true)
414  , include_images(true)
415  , enable_matching_system_fonts(false) { }
416 
454  PDF2OfficeSettingData(const wchar_t* metrics_data_folder_path, bool enable_ml_recognition, const Range& page_range, bool include_pdf_comments,
455  const PDF2WordSettingData& word_setting_data, const PDF2PowerPointSettingData& power_point_setting_data,
456  const PDF2ExcelSettingData& excel_setting_data, bool enable_trailing_space, bool include_images, bool enable_matching_system_fonts) {
457  this->metrics_data_folder_path = metrics_data_folder_path;
458  this->enable_ml_recognition = enable_ml_recognition;
459  this->page_range = page_range;
460  this->include_pdf_comments = include_pdf_comments;
461  this->word_setting_data = word_setting_data;
462  this->power_point_setting_data = power_point_setting_data;
463  this->excel_setting_data = excel_setting_data;
464  this->enable_trailing_space = enable_trailing_space;
465  this->include_images = include_images;
466  this->enable_matching_system_fonts = enable_matching_system_fonts;
467  }
468 
476  PDF2OfficeSettingData &operator = (const PDF2OfficeSettingData& data) {
477  metrics_data_folder_path = data.metrics_data_folder_path;
478  enable_ml_recognition = data.enable_ml_recognition;
479  page_range = data.page_range;
480  include_pdf_comments = data.include_pdf_comments;
481  word_setting_data = data.word_setting_data;
482  power_point_setting_data = data.power_point_setting_data;
483  excel_setting_data = data.excel_setting_data;
484  enable_trailing_space = data.enable_trailing_space;
485  include_images = data.include_images;
486  enable_matching_system_fonts = data.enable_matching_system_fonts;
487  return (*this);
488  }
489 
528  void Set(const wchar_t* metrics_data_folder_path, bool enable_ml_recognition, const Range &page_range, bool include_pdf_comments,
529  const PDF2WordSettingData& word_setting_data, const PDF2PowerPointSettingData& power_point_setting_data,
530  const PDF2ExcelSettingData& excel_setting_data, bool enable_trailing_space, bool include_images, bool enable_matching_system_fonts) {
531  this->metrics_data_folder_path = metrics_data_folder_path;
532  this->enable_ml_recognition = enable_ml_recognition;
533  this->page_range = page_range;
534  this->include_pdf_comments = include_pdf_comments;
535  this->word_setting_data = word_setting_data;
536  this->power_point_setting_data = power_point_setting_data;
537  this->excel_setting_data = excel_setting_data;
538  this->enable_trailing_space = enable_trailing_space;
539  this->include_images = include_images;
540  this->enable_matching_system_fonts = enable_matching_system_fonts;
541  }
542 
552 
566 
575 
584 
592 
600 
608 
621 
632 
643 };
644 
650  public:
656  virtual bool NeedToPause() = 0;
657 
666  virtual void ProgressNotify(int converted_count, int total_count) = 0;
667 };
668 
677 class PDF2Office FS_FINAL : public Base {
678  public:
706  static Progressive StartConvertToWord(const wchar_t* src_pdf_path, const wchar_t* src_pdf_password, const wchar_t* saved_word_file_path,
707  const PDF2OfficeSettingData& setting_data, ConvertCallback* convert_callback = NULL);
708 
737  static Progressive StartConvertToWord(file::ReaderCallback* src_pdf_reader, const wchar_t* src_pdf_password, file::StreamCallback* saved_word_file_stream,
738  const PDF2OfficeSettingData& setting_data, ConvertCallback* convert_callback = NULL);
739 
766  static Progressive StartConvertToExcel(const wchar_t* src_pdf_path, const wchar_t* src_pdf_password, const wchar_t* saved_excel_file_path,
767  const PDF2OfficeSettingData& setting_data, ConvertCallback* convert_callback = NULL);
768 
796  static Progressive StartConvertToExcel(file::ReaderCallback* src_pdf_reader, const wchar_t* src_pdf_password, file::StreamCallback* saved_excel_file_stream,
797  const PDF2OfficeSettingData& setting_data, ConvertCallback* convert_callback = NULL);
798 
825  static Progressive StartConvertToPowerPoint(const wchar_t* src_pdf_path, const wchar_t* src_pdf_password, const wchar_t* saved_ppt_file_path,
826  const PDF2OfficeSettingData& setting_data, ConvertCallback* convert_callback = NULL);
827 
855  static Progressive StartConvertToPowerPoint(file::ReaderCallback* src_pdf_reader, const wchar_t* src_pdf_password, file::StreamCallback* saved_ppt_file_stream,
856  const PDF2OfficeSettingData& setting_data, ConvertCallback* convert_callback = NULL);
857 };
858 } // namespace pdf2office
859 } // namespace conversion
860 } // namespace foxit
861 
862 #endif
PDF2WordSettingData(bool enable_retain_page_layout, bool enable_generate_headers_and_footers, bool enable_generate_footnotes_and_endnotes, bool enable_generate_page_rendered_break, int max_blank_paragraphs_per_page_bottom)
Constructor, with parameters.
Definition: fs_pdf2office.h:66
PDF2PowerPointSettingData()
Constructor.
Definition: fs_pdf2office.h:175
Definition: fs_common.h:271
CFX_Object Object
Object type.
Definition: fs_basictypes.h:145
int max_blank_paragraphs_per_page_bottom
Specifies the maximum number of blank paragraphs that can be placed at the bottom of each page when c...
Definition: fs_pdf2office.h:168
File reading interface.
Definition: fx_stream.h:566
bool enable_trailing_space
A boolean value which indicates whether to add trailing spaces from lines in paragraphs for convertin...
Definition: fs_pdf2office.h:620
WIDE STRING CLASS.
Definition: fx_string.h:1452
PDF2WordSettingData word_setting_data
A setting data object that used for converting PDF documents to Word format documents.
Definition: fs_pdf2office.h:591
bool enable_aggressively_split_sections
A boolean value which indicates whether split sections aggressively for the conversion of PDF documen...
Definition: fs_pdf2office.h:244
bool enable_generate_headers_and_footers
A boolean value which indicates whether convert the recognized headers and footers from PDF documents...
Definition: fs_pdf2office.h:137
PDF2ExcelSettingData(String decimal_symbol, String thousands_separator, WorkbookSettings workbook_settings, bool enable_aggressive_table_repair, bool include_watermarks)
Constructor, with parameters.
Definition: fs_pdf2office.h:297
PDF2OfficeSettingData(const wchar_t *metrics_data_folder_path, bool enable_ml_recognition, const Range &page_range, bool include_pdf_comments, const PDF2WordSettingData &word_setting_data, const PDF2PowerPointSettingData &power_point_setting_data, const PDF2ExcelSettingData &excel_setting_data, bool enable_trailing_space, bool include_images, bool enable_matching_system_fonts)
Constructor, with parameters.
Definition: fs_pdf2office.h:454
String thousands_separator
A string value which indicates being recognized as thousands separator during the conversion of PDF d...
Definition: fs_pdf2office.h:373
bool enable_generate_page_rendered_break
A boolean value which indicates whether to generate the "lastRenderedPageBreak" marker in word format...
Definition: fs_pdf2office.h:158
PDF2ExcelSettingData excel_setting_data
A setting data object that used for converting PDF documents to Excel format documents.
Definition: fs_pdf2office.h:607
WString metrics_data_folder_path
A valid path of a folder which contains metrics data files. This should not be an empty string.
Definition: fs_pdf2office.h:551
void Set(bool enable_aggressively_split_sections, bool enable_adapt_to_largest_page)
Set value.
Definition: fs_pdf2office.h:231
File stream interface, reading & writing.
Definition: fx_stream.h:669
bool enable_adapt_to_largest_page
A boolean value that indicates whether adapt content of smaller pages to the largest page size during...
Definition: fs_pdf2office.h:256
PDF2PowerPointSettingData power_point_setting_data
A setting data object that used for converting PDF documents to Power Point format documents.
Definition: fs_pdf2office.h:599
bool enable_aggressive_table_repair
A boolean value that indicates whether aggressive table repair is enabled during the conversion of PD...
Definition: fs_pdf2office.h:392
Definition: fs_common.h:208
WorkbookSettings
Enumeration for Excel workbook settings.
Definition: fs_pdf2office.h:267
Header file for common definitions and classes.
void Set(const wchar_t *metrics_data_folder_path, bool enable_ml_recognition, const Range &page_range, bool include_pdf_comments, const PDF2WordSettingData &word_setting_data, const PDF2PowerPointSettingData &power_point_setting_data, const PDF2ExcelSettingData &excel_setting_data, bool enable_trailing_space, bool include_images, bool enable_matching_system_fonts)
Set value.
Definition: fs_pdf2office.h:528
PDF2WordSettingData()
Constructor.
Definition: fs_pdf2office.h:39
bool enable_retain_page_layout
A boolean value which indicates whether retain page layout for the conversion of PDF documents to Wor...
Definition: fs_pdf2office.h:126
String decimal_symbol
A string value which indicates being recognized as decimal during the conversion of PDF documents to ...
Definition: fs_pdf2office.h:357
bool include_images
A boolean value which indicates whether to include images in PDF documents for converting PDF documen...
Definition: fs_pdf2office.h:631
Definition: fs_basictypes.h:232
PDF2ExcelSettingData()
Constructor.
Definition: fs_pdf2office.h:277
Foxit namespace.
Definition: fs_basictypes.h:124
WorkbookSettings workbook_settings
An enumeration value which indicates the setting of the workbook for the conversion of PDF documents ...
Definition: fs_pdf2office.h:381
Definition: fs_pdf2office.h:677
void Set(String decimal_symbol, String thousands_separator, WorkbookSettings workbook_settings, bool enable_aggressive_table_repair, bool include_watermarks)
Set value.
Definition: fs_pdf2office.h:336
bool include_watermarks
A boolean value that indicates whether to include watermarks during the conversion of PDF documents t...
Definition: fs_pdf2office.h:403
BYTE STRING CLASS.
Definition: fx_string.h:317
bool include_pdf_comments
A boolean value which indicates whether to include PDF documents comments for the conversion of PDF d...
Definition: fs_pdf2office.h:583
Definition: fs_pdf2office.h:649
bool enable_generate_footnotes_and_endnotes
A boolean value which indicates whether convert the recognized footnotes and endnotes from PDF docume...
Definition: fs_pdf2office.h:146
void Set(bool enable_retain_page_layout, bool enable_generate_headers_and_footers, bool enable_generate_footnotes_and_endnotes, bool enable_generate_page_rendered_break, int max_blank_paragraphs_per_page_bottom)
Set value.
Definition: fs_pdf2office.h:112
#define NULL
The null-pointer value.
Definition: fx_system.h:785
PDF2OfficeSettingData()
Constructor.
Definition: fs_pdf2office.h:410
bool enable_matching_system_fonts
A boolean value that indicates whether to accurately match system fonts when converting PDF documents...
Definition: fs_pdf2office.h:642
Range page_range
A range object that specifies some pages. These pages will be used to convert PDF documents to office...
Definition: fs_pdf2office.h:574
bool enable_ml_recognition
A boolean value which indicates whether enable machine learning-based recognition functionality....
Definition: fs_pdf2office.h:565
PDF2PowerPointSettingData(bool enable_aggressively_split_sections, bool enable_adapt_to_largest_page)
Constructor, with parameters.
Definition: fs_pdf2office.h:195