present pages in order

This commit is contained in:
Johannes Zillmann 2017-01-04 19:09:37 +01:00
parent 56425c5c5e
commit 7810d81792
4 changed files with 84 additions and 46 deletions

View File

@ -16,6 +16,9 @@
<script>
import store from '../store.js'
import pdfjs from 'pdfjs-dist';
import Page from '../models/Page.js';
import TextItem from '../models/TextItem.js';
export default {
props : {
multiple : {
@ -65,64 +68,31 @@ export default {
reader.onload = (evt) => {
console.debug("Loaded");
const buffer = evt.target.result;
// const lines = []
const pages= []
PDFJS.getDocument(buffer).then(function (pdfDocument) {
//console.log('Number of pages: ' + pdfDocument.numPages);
console.log('Number of pages: ' + pdfDocument.numPages);
// console.debug(pdfDocument);
for (var i = 0; i <= 3; i++) {
const numPages = pdfDocument.numPages;
// const numPages = 3;
store.preparePageUpload(numPages);
for (var i = 0; i <= numPages; i++) {
pdfDocument.getPage(i).then(function(page){
page.getTextContent().then(function(textContent) {
var text = '';
var line;
var lineY;
//console.debug(textContent);
const pageTextContents = []
textContent.items.map(function(item) {
const textItems = textContent.items.map(function(item) {
const transform = item.transform;
const x = transform[4];
const y = transform[5];
const width = item.width;
const height = item.height;
pageTextContents.push({
text: item.str,
x: x,
y: y,
return new TextItem({
x: transform[4],
y: transform[5],
width: item.width,
height: item.height
height: item.height,
text: item.str
});
if(!line){
console.debug("First line: "+item.str);
lineY = y;
line = item.str;
} else {
if (y === lineY){
console.debug("Add to line: "+line +" / "+ item.str);
line += item.str;
} else {
console.debug("Start line: "+line+ " / " +item.str);
text += line + '\n';
line = item.str;
lineY = y;
}
}
// console.debug('|'+item.str+'|');
// lines.push(item.str);
// lines.push(text)
});
text += line ;
console.debug("Push Page ");
console.debug(text);
pages.push(text)
console.debug(pageTextContents);
store.uploadPage(page.pageIndex, textItems);
});
});
}
});
console.debug("Store all");
console.debug(pages);
store.upload(pages);
console.debug("now:"+store.state.uploaded);
};
reader.readAsArrayBuffer(files[0]);
},

8
src/models/Page.js Normal file
View File

@ -0,0 +1,8 @@
export default class Page {
constructor(options) {
this.index = options.index;
this.textItems = []
}
}

12
src/models/TextItem.js Normal file
View File

@ -0,0 +1,12 @@
//Holds individual text items of a page
export default class TextItem {
constructor(options) {
this.x = options.x;
this.y = options.y;
this.width = options.width;
this.height = options.height;
this.text = options.text;
}
}

View File

@ -1,9 +1,56 @@
import Page from './models/Page.js';
// Holds the state of the application
export default {
state: {
uploaded: false,
pages: []
pagesToUpload: 0,
uploadedPages: 0,
rawPages: [],
pages: [],
},
preparePageUpload: function(numPages) {
this.state.pagesToUpload = numPages;
for (var i = 0; i <= numPages; i++) {
this.state.rawPages.push(new Page({
index: i
}));
}
},
uploadPage: function(pageIndex, textItems) {
this.state.rawPages[pageIndex].textItems = textItems;
this.state.uploadedPages++;
if (this.state.uploadedPages == this.state.pagesToUpload) {
this.state.rawPages.map(rawPage => {
var text = '';
var line;
var lineY;
rawPage.textItems.forEach(textItem => {
if (!line) {
// console.debug("First line: "+item.str);
lineY = textItem.y;
line = textItem.text;
} else {
if (textItem.y === lineY) {
//console.debug("Add to line: "+line +" / "+ item.str);
line += textItem.text;
} else {
// console.debug("Start line: "+line+ " / " +item.str);
text += line + '\n';
line = textItem.text;
lineY = textItem.y;
}
}
});
text += line;
this.state.pages.push(text);
});
// this.state.pages = pages;
this.state.uploaded = true;
}
},
upload: function(pages) {
@ -11,4 +58,5 @@ export default {
this.state.uploaded = true;
this.state.pages = pages;
},
}