mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2024-11-21 07:13:32 +01:00
update nodejs_v18_compatibility (#62)
This commit is contained in:
parent
ed59a3f24a
commit
12e5e9a383
4
.babelrc
4
.babelrc
@ -1,8 +1,8 @@
|
||||
{
|
||||
"env": {
|
||||
"testing": {
|
||||
"presets": ["es2015"]
|
||||
"presets": ["@babel/preset-env", "@babel/preset-react"]
|
||||
}
|
||||
},
|
||||
"presets": [ ["es2015", { "loose": false, "modules": false}], "react", "stage-0", "stage-2"]
|
||||
"presets": ["@babel/preset-env", "@babel/preset-react"]
|
||||
}
|
||||
|
22718
package-lock.json
generated
22718
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -27,6 +27,7 @@
|
||||
"dependencies": {
|
||||
"bootstrap": "^3.3.7",
|
||||
"enumify": "^1.0.4",
|
||||
"mini-css-extract-plugin": "^2.7.6",
|
||||
"pdfjs-dist": "^2.8.335",
|
||||
"rc-progress": "^2.0.6",
|
||||
"react": "^15.4.2",
|
||||
@ -38,11 +39,12 @@
|
||||
"remarkable": "^1.7.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@babel/preset-env": "^7.23.3",
|
||||
"@babel/preset-react": "^7.23.3",
|
||||
"babel-core": "^6.22.1",
|
||||
"babel-eslint": "^10.1.0",
|
||||
"babel-loader": "^7.1.1",
|
||||
"babel-loader": "^8.x",
|
||||
"babel-plugin-transform-runtime": "^6.22.0",
|
||||
"babel-preset-es2015": "^6.22.0",
|
||||
"babel-preset-react": "^6.22.0",
|
||||
"babel-preset-stage-0": "^6.22.0",
|
||||
"babel-preset-stage-2": "^6.24.1",
|
||||
@ -52,8 +54,7 @@
|
||||
"esformatter-jsx": "^7.4.1",
|
||||
"eslint": "^7.30.0",
|
||||
"eslint-plugin-jasmine": "^2.2.0",
|
||||
"eslint-plugin-react": "^6.9.0",
|
||||
"extract-text-webpack-plugin": "^3.0.2",
|
||||
"eslint-plugin-react": "^7.x",
|
||||
"file-loader": "^6.2.0",
|
||||
"html-webpack-plugin": "^5.3.2",
|
||||
"mocha": "^3.2.0",
|
||||
|
@ -172,7 +172,7 @@ export default class DebugView extends React.Component {
|
||||
</Checkbox> }
|
||||
</ButtonGroup>
|
||||
<ButtonGroup>
|
||||
<Checkbox onClick={ ::this.showStatistics }>
|
||||
<Checkbox onClick={() => this.showStatistics()}>
|
||||
Show Statistics
|
||||
</Checkbox>
|
||||
</ButtonGroup>
|
||||
|
@ -38,7 +38,7 @@ export default class AppState {
|
||||
}
|
||||
|
||||
// the uploaded pdf as file buffer
|
||||
storeFileBuffer(fileBuffer:Uint8Array) {
|
||||
storeFileBuffer(fileBuffer) {
|
||||
this.fileBuffer = fileBuffer;
|
||||
this.mainView = View.LOADING;
|
||||
this.render()
|
||||
|
@ -17,7 +17,7 @@ export default class LineConverter {
|
||||
}
|
||||
|
||||
// returns a CombineResult
|
||||
compact(textItems: TextItem[]) {
|
||||
compact(textItems) {
|
||||
// we can't trust order of occurence, esp. footnoteLinks like to come last
|
||||
sortByX(textItems);
|
||||
|
||||
|
@ -12,7 +12,7 @@ export default class LineItemBlock extends PageItem {
|
||||
}
|
||||
}
|
||||
|
||||
addItem(item:LineItem) {
|
||||
addItem(item) {
|
||||
if (this.type && item.type && this.type !== item.type) {
|
||||
throw `Adding item of type ${item.type} to block of type ${this.type}`
|
||||
}
|
||||
|
@ -1,4 +1,3 @@
|
||||
import TextItem from './TextItem.jsx';
|
||||
import { sortByX } from '../pageItemFunctions.jsx'
|
||||
|
||||
//Groups all text items which are on the same y line
|
||||
@ -9,12 +8,12 @@ export default class TextItemLineGrouper {
|
||||
}
|
||||
|
||||
// returns a CombineResult
|
||||
group(textItems: TextItem[]) {
|
||||
group(textItems) {
|
||||
return this.groupItemsByLine(textItems);
|
||||
}
|
||||
|
||||
|
||||
groupItemsByLine(textItems:TextItem[]) {
|
||||
groupItemsByLine(textItems) {
|
||||
const lines = [];
|
||||
var currentLine = [];
|
||||
textItems.forEach(item => {
|
||||
|
@ -12,83 +12,83 @@ BlockType.initEnum({
|
||||
H1: {
|
||||
headline: true,
|
||||
headlineLevel: 1,
|
||||
toText(block:LineItemBlock) {
|
||||
toText(block) {
|
||||
return '# ' + linesToText(block.items, true);
|
||||
}
|
||||
},
|
||||
H2: {
|
||||
headline: true,
|
||||
headlineLevel: 2,
|
||||
toText(block:LineItemBlock) {
|
||||
toText(block) {
|
||||
return '## ' + linesToText(block.items, true);
|
||||
}
|
||||
},
|
||||
H3: {
|
||||
headline: true,
|
||||
headlineLevel: 3,
|
||||
toText(block:LineItemBlock) {
|
||||
toText(block) {
|
||||
return '### ' + linesToText(block.items, true);
|
||||
}
|
||||
},
|
||||
H4: {
|
||||
headline: true,
|
||||
headlineLevel: 4,
|
||||
toText(block:LineItemBlock) {
|
||||
toText(block) {
|
||||
return '#### ' + linesToText(block.items, true);
|
||||
}
|
||||
},
|
||||
H5: {
|
||||
headline: true,
|
||||
headlineLevel: 5,
|
||||
toText(block:LineItemBlock) {
|
||||
toText(block) {
|
||||
return '##### ' + linesToText(block.items, true);
|
||||
}
|
||||
},
|
||||
H6: {
|
||||
headline: true,
|
||||
headlineLevel: 6,
|
||||
toText(block:LineItemBlock) {
|
||||
toText(block) {
|
||||
return '###### ' + linesToText(block.items, true);
|
||||
}
|
||||
},
|
||||
TOC: {
|
||||
mergeToBlock: true,
|
||||
toText(block:LineItemBlock) {
|
||||
toText(block) {
|
||||
return linesToText(block.items, true);
|
||||
}
|
||||
},
|
||||
FOOTNOTES: {
|
||||
mergeToBlock: true,
|
||||
mergeFollowingNonTypedItems: true,
|
||||
toText(block:LineItemBlock) {
|
||||
toText(block) {
|
||||
return linesToText(block.items, false);
|
||||
}
|
||||
},
|
||||
CODE: {
|
||||
mergeToBlock: true,
|
||||
toText(block:LineItemBlock) {
|
||||
toText(block) {
|
||||
return '```\n' + linesToText(block.items, true) + '```'
|
||||
}
|
||||
},
|
||||
LIST: {
|
||||
mergeToBlock: true,
|
||||
mergeFollowingNonTypedItemsWithSmallDistance: true,
|
||||
toText(block:LineItemBlock) {
|
||||
toText(block) {
|
||||
return linesToText(block.items, false);
|
||||
}
|
||||
},
|
||||
PARAGRAPH: {
|
||||
toText(block:LineItemBlock) {
|
||||
toText(block) {
|
||||
return linesToText(block.items, false);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
export function isHeadline(type: BlockType) {
|
||||
export function isHeadline(type) {
|
||||
return type && type.name.length == 2 && type.name[0] === 'H'
|
||||
}
|
||||
|
||||
export function blockToText(block: LineItemBlock) {
|
||||
export function blockToText(block) {
|
||||
if (!block.type) {
|
||||
return linesToText(block.items, false);
|
||||
}
|
||||
|
@ -28,7 +28,7 @@ export default class ToLineItemBlockTransformation extends Transformation {
|
||||
showWhitespaces={ this.showWhitespaces } />;
|
||||
}
|
||||
|
||||
completeTransform(parseResult:ParseResult) {
|
||||
completeTransform(parseResult) {
|
||||
// The usual cleanup
|
||||
parseResult.messages = [];
|
||||
parseResult.pages.forEach(page => {
|
||||
|
@ -28,7 +28,7 @@ export default class ToLineItemTransformation extends Transformation {
|
||||
showWhitespaces={ this.showWhitespaces } />;
|
||||
}
|
||||
|
||||
completeTransform(parseResult:ParseResult) {
|
||||
completeTransform(parseResult) {
|
||||
// The usual cleanup
|
||||
parseResult.messages = [];
|
||||
parseResult.pages.forEach(page => {
|
||||
|
@ -13,7 +13,7 @@ export default class ToMarkdown extends Transformation {
|
||||
return <MarkdownPageView key={ page.index } page={ page } />;
|
||||
}
|
||||
|
||||
transform(parseResult:ParseResult) {
|
||||
transform(parseResult) {
|
||||
parseResult.pages.forEach(page => {
|
||||
var text = '';
|
||||
page.items.forEach(block => {
|
||||
|
@ -14,7 +14,7 @@ export default class ToTextBlocks extends Transformation {
|
||||
return <TextPageView key={ page.index } page={ page } />;
|
||||
}
|
||||
|
||||
transform(parseResult:ParseResult) {
|
||||
transform(parseResult) {
|
||||
parseResult.pages.forEach(page => {
|
||||
const textItems = [];
|
||||
page.items.forEach(block => {
|
||||
|
@ -28,7 +28,7 @@ export default class ToTextItemTransformation extends Transformation {
|
||||
showWhitespaces={ this.showWhitespaces } />;
|
||||
}
|
||||
|
||||
completeTransform(parseResult:ParseResult) {
|
||||
completeTransform(parseResult) {
|
||||
// The usual cleanup
|
||||
parseResult.messages = [];
|
||||
parseResult.pages.forEach(page => {
|
||||
|
@ -23,12 +23,12 @@ export default class Transformation {
|
||||
}
|
||||
|
||||
// Transform an incoming ParseResult into an outgoing ParseResult
|
||||
transform(parseResult: ParseResult) { // eslint-disable-line no-unused-vars
|
||||
transform(parseResult) { // eslint-disable-line no-unused-vars
|
||||
throw new TypeError("Do not call abstract method foo from child.");
|
||||
}
|
||||
|
||||
// Sometimes the transform() does only visualize a change. This methods then does the actual change.
|
||||
completeTransform(parseResult: ParseResult) { // eslint-disable-line no-unused-vars
|
||||
completeTransform(parseResult) { // eslint-disable-line no-unused-vars
|
||||
parseResult.messages = [];
|
||||
return parseResult;
|
||||
}
|
||||
|
@ -16,7 +16,7 @@ export default class CompactLines extends ToLineItemTransformation {
|
||||
super("Compact To Lines");
|
||||
}
|
||||
|
||||
transform(parseResult:ParseResult) {
|
||||
transform(parseResult) {
|
||||
const {mostUsedDistance, fontToFormats} = parseResult.globals;
|
||||
const foundFootnotes = [];
|
||||
const foundFootnoteLinks = [];
|
||||
|
@ -12,7 +12,7 @@ export default class DetectHeaders extends ToLineItemTransformation {
|
||||
super("Detect Headers");
|
||||
}
|
||||
|
||||
transform(parseResult:ParseResult) {
|
||||
transform(parseResult) {
|
||||
const {tocPages, headlineTypeToHeightRange, mostUsedHeight, mostUsedDistance, mostUsedFont, maxHeight} = parseResult.globals;
|
||||
const hasToc = tocPages.length > 0;
|
||||
var detectedHeaders = 0;
|
||||
|
@ -13,7 +13,7 @@ export default class DetectListItems extends ToLineItemTransformation {
|
||||
super("Detect List Items");
|
||||
}
|
||||
|
||||
transform(parseResult:ParseResult) {
|
||||
transform(parseResult) {
|
||||
var foundListItems = 0;
|
||||
var foundNumberedItems = 0;
|
||||
parseResult.pages.forEach(page => {
|
||||
|
@ -15,7 +15,7 @@ export default class DetectTOC extends ToLineItemTransformation {
|
||||
super("Detect TOC");
|
||||
}
|
||||
|
||||
transform(parseResult:ParseResult) {
|
||||
transform(parseResult) {
|
||||
const tocPages = [];
|
||||
const maxPagesToEvaluate = Math.min(20, parseResult.pages.length);
|
||||
const linkLeveler = new LinkLeveler();
|
||||
@ -289,7 +289,7 @@ class LinkLeveler {
|
||||
this.uniqueFonts = [];
|
||||
}
|
||||
|
||||
levelPageItems(tocLinks:TocLink[]) {
|
||||
levelPageItems(tocLinks) {
|
||||
if (!this.levelByMethod) {
|
||||
const uniqueX = this.calculateUniqueX(tocLinks);
|
||||
if (uniqueX.length > 1) {
|
||||
|
@ -30,7 +30,7 @@ export default class RemoveRepetitiveElements extends ToLineItemTransformation {
|
||||
// - For each page, collect all items of the first, and all items of the last line
|
||||
// - Calculate how often these items occur accros all pages (hash ignoring numbers, whitespace, upper/lowercase)
|
||||
// - Delete items occuring on more then 2/3 of all pages
|
||||
transform(parseResult:ParseResult) {
|
||||
transform(parseResult) {
|
||||
|
||||
// find first and last lines per page
|
||||
const pageStore = [];
|
||||
|
@ -11,7 +11,7 @@ export default class VerticalToHorizontal extends ToLineItemTransformation {
|
||||
super("Vertical to Horizontal Text");
|
||||
}
|
||||
|
||||
transform(parseResult:ParseResult) {
|
||||
transform(parseResult) {
|
||||
var foundVerticals = 0;
|
||||
parseResult.pages.forEach(page => {
|
||||
const stream = new VerticalsStream();
|
||||
|
@ -9,7 +9,7 @@ export default class CalculateGlobalStats extends ToTextItemTransformation {
|
||||
this.fontMap = fontMap;
|
||||
}
|
||||
|
||||
transform(parseResult:ParseResult) {
|
||||
transform(parseResult) {
|
||||
// Parse heights
|
||||
const heightToOccurrence = {};
|
||||
const fontToOccurrence = {};
|
||||
|
@ -11,7 +11,7 @@ export default class DetectCodeQuoteBlocks extends ToLineItemBlockTransformation
|
||||
super("Detect Code/Quote Blocks");
|
||||
}
|
||||
|
||||
transform(parseResult:ParseResult) {
|
||||
transform(parseResult) {
|
||||
const {mostUsedHeight} = parseResult.globals;
|
||||
var foundCodeItems = 0;
|
||||
parseResult.pages.forEach(page => {
|
||||
|
@ -12,7 +12,7 @@ export default class DetectListLevels extends ToLineItemBlockTransformation {
|
||||
this.showWhitespaces = true;
|
||||
}
|
||||
|
||||
transform(parseResult:ParseResult) {
|
||||
transform(parseResult) {
|
||||
var listBlocks = 0;
|
||||
var modifiedBlocks = 0;
|
||||
parseResult.pages.forEach(page => {
|
||||
|
@ -11,7 +11,7 @@ export default class GatherBlocks extends ToLineItemBlockTransformation {
|
||||
super("Gather Blocks");
|
||||
}
|
||||
|
||||
transform(parseResult:ParseResult) {
|
||||
transform(parseResult) {
|
||||
const {mostUsedDistance} = parseResult.globals;
|
||||
var createdBlocks = 0;
|
||||
var lineItemCount = 0;
|
||||
|
@ -1,7 +1,6 @@
|
||||
import PageItem from './models/PageItem.jsx';
|
||||
import LineItemBlock from './models/LineItemBlock.jsx';
|
||||
|
||||
export function minXFromBlocks(blocks:LineItemBlock[]) {
|
||||
export function minXFromBlocks(blocks) {
|
||||
var minX = 999;
|
||||
blocks.forEach(block => {
|
||||
block.items.forEach(item => {
|
||||
@ -14,7 +13,7 @@ export function minXFromBlocks(blocks:LineItemBlock[]) {
|
||||
return minX;
|
||||
}
|
||||
|
||||
export function minXFromPageItems(items:PageItem) {
|
||||
export function minXFromPageItems(items) {
|
||||
var minX = 999;
|
||||
items.forEach(item => {
|
||||
minX = Math.min(minX, item.x)
|
||||
@ -25,13 +24,13 @@ export function minXFromPageItems(items:PageItem) {
|
||||
return minX;
|
||||
}
|
||||
|
||||
export function sortByX(items:PageItem) {
|
||||
export function sortByX(items) {
|
||||
items.sort((a, b) => {
|
||||
return a.x - b.x;
|
||||
});
|
||||
}
|
||||
|
||||
export function sortCopyByX(items:PageItem) {
|
||||
export function sortCopyByX(items) {
|
||||
const copy = items.concat();
|
||||
sortByX(copy);
|
||||
return copy;
|
||||
|
@ -9,6 +9,7 @@ var BUILD_DIR = path.resolve(__dirname, 'build');
|
||||
var NODEMODULES_DIR = path.resolve(__dirname, 'node_modules');
|
||||
|
||||
module.exports = {
|
||||
mode: 'development',
|
||||
context: SOURCE_DIR,
|
||||
resolve: {
|
||||
modules: [
|
||||
|
Loading…
Reference in New Issue
Block a user