mirror of
https://github.com/jzillmann/pdf-to-markdown.git
synced 2025-01-24 06:29:06 +01:00
present pages in order
This commit is contained in:
parent
56425c5c5e
commit
7810d81792
@ -16,6 +16,9 @@
|
||||
<script>
|
||||
import store from '../store.js'
|
||||
import pdfjs from 'pdfjs-dist';
|
||||
import Page from '../models/Page.js';
|
||||
import TextItem from '../models/TextItem.js';
|
||||
|
||||
export default {
|
||||
props : {
|
||||
multiple : {
|
||||
@ -65,64 +68,31 @@ export default {
|
||||
reader.onload = (evt) => {
|
||||
console.debug("Loaded");
|
||||
const buffer = evt.target.result;
|
||||
// const lines = []
|
||||
const pages= []
|
||||
PDFJS.getDocument(buffer).then(function (pdfDocument) {
|
||||
//console.log('Number of pages: ' + pdfDocument.numPages);
|
||||
console.log('Number of pages: ' + pdfDocument.numPages);
|
||||
// console.debug(pdfDocument);
|
||||
for (var i = 0; i <= 3; i++) {
|
||||
const numPages = pdfDocument.numPages;
|
||||
// const numPages = 3;
|
||||
store.preparePageUpload(numPages);
|
||||
for (var i = 0; i <= numPages; i++) {
|
||||
pdfDocument.getPage(i).then(function(page){
|
||||
page.getTextContent().then(function(textContent) {
|
||||
var text = '';
|
||||
var line;
|
||||
var lineY;
|
||||
//console.debug(textContent);
|
||||
const pageTextContents = []
|
||||
textContent.items.map(function(item) {
|
||||
const textItems = textContent.items.map(function(item) {
|
||||
const transform = item.transform;
|
||||
const x = transform[4];
|
||||
const y = transform[5];
|
||||
const width = item.width;
|
||||
const height = item.height;
|
||||
pageTextContents.push({
|
||||
text: item.str,
|
||||
x: x,
|
||||
y: y,
|
||||
return new TextItem({
|
||||
x: transform[4],
|
||||
y: transform[5],
|
||||
width: item.width,
|
||||
height: item.height
|
||||
height: item.height,
|
||||
text: item.str
|
||||
});
|
||||
if(!line){
|
||||
console.debug("First line: "+item.str);
|
||||
lineY = y;
|
||||
line = item.str;
|
||||
} else {
|
||||
if (y === lineY){
|
||||
console.debug("Add to line: "+line +" / "+ item.str);
|
||||
line += item.str;
|
||||
} else {
|
||||
console.debug("Start line: "+line+ " / " +item.str);
|
||||
text += line + '\n';
|
||||
line = item.str;
|
||||
lineY = y;
|
||||
}
|
||||
}
|
||||
// console.debug('|'+item.str+'|');
|
||||
// lines.push(item.str);
|
||||
// lines.push(text)
|
||||
});
|
||||
text += line ;
|
||||
console.debug("Push Page ");
|
||||
console.debug(text);
|
||||
pages.push(text)
|
||||
console.debug(pageTextContents);
|
||||
store.uploadPage(page.pageIndex, textItems);
|
||||
});
|
||||
});
|
||||
}
|
||||
});
|
||||
console.debug("Store all");
|
||||
console.debug(pages);
|
||||
store.upload(pages);
|
||||
console.debug("now:"+store.state.uploaded);
|
||||
};
|
||||
reader.readAsArrayBuffer(files[0]);
|
||||
},
|
||||
|
8
src/models/Page.js
Normal file
8
src/models/Page.js
Normal file
@ -0,0 +1,8 @@
|
||||
export default class Page {
|
||||
|
||||
constructor(options) {
|
||||
this.index = options.index;
|
||||
this.textItems = []
|
||||
}
|
||||
|
||||
}
|
12
src/models/TextItem.js
Normal file
12
src/models/TextItem.js
Normal file
@ -0,0 +1,12 @@
|
||||
//Holds individual text items of a page
|
||||
export default class TextItem {
|
||||
|
||||
constructor(options) {
|
||||
this.x = options.x;
|
||||
this.y = options.y;
|
||||
this.width = options.width;
|
||||
this.height = options.height;
|
||||
this.text = options.text;
|
||||
}
|
||||
|
||||
}
|
50
src/store.js
50
src/store.js
@ -1,9 +1,56 @@
|
||||
import Page from './models/Page.js';
|
||||
|
||||
// Holds the state of the application
|
||||
export default {
|
||||
|
||||
state: {
|
||||
uploaded: false,
|
||||
pages: []
|
||||
pagesToUpload: 0,
|
||||
uploadedPages: 0,
|
||||
rawPages: [],
|
||||
pages: [],
|
||||
},
|
||||
|
||||
preparePageUpload: function(numPages) {
|
||||
this.state.pagesToUpload = numPages;
|
||||
for (var i = 0; i <= numPages; i++) {
|
||||
this.state.rawPages.push(new Page({
|
||||
index: i
|
||||
}));
|
||||
}
|
||||
},
|
||||
|
||||
uploadPage: function(pageIndex, textItems) {
|
||||
this.state.rawPages[pageIndex].textItems = textItems;
|
||||
this.state.uploadedPages++;
|
||||
if (this.state.uploadedPages == this.state.pagesToUpload) {
|
||||
this.state.rawPages.map(rawPage => {
|
||||
var text = '';
|
||||
var line;
|
||||
var lineY;
|
||||
rawPage.textItems.forEach(textItem => {
|
||||
if (!line) {
|
||||
// console.debug("First line: "+item.str);
|
||||
lineY = textItem.y;
|
||||
line = textItem.text;
|
||||
} else {
|
||||
if (textItem.y === lineY) {
|
||||
//console.debug("Add to line: "+line +" / "+ item.str);
|
||||
line += textItem.text;
|
||||
} else {
|
||||
// console.debug("Start line: "+line+ " / " +item.str);
|
||||
text += line + '\n';
|
||||
line = textItem.text;
|
||||
lineY = textItem.y;
|
||||
}
|
||||
}
|
||||
});
|
||||
text += line;
|
||||
this.state.pages.push(text);
|
||||
});
|
||||
// this.state.pages = pages;
|
||||
this.state.uploaded = true;
|
||||
}
|
||||
},
|
||||
|
||||
upload: function(pages) {
|
||||
@ -11,4 +58,5 @@ export default {
|
||||
this.state.uploaded = true;
|
||||
this.state.pages = pages;
|
||||
},
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user