I have successfully detected text elements on an image taken with react-native-camera. The vision().textRecognizerProcessImage(photoUri)
is successfully returning the detected text.
// take the photo from the camera
takePicture = async () => {
try {
const options = {
quality: 0.8,
base64: true,
skipProcessing: true,
};
const picture = await this.camera.takePictureAsync(options);
console.log('picture', picture);
const {uri, width, height} = picture;
// saving the pixel size of the selected image too
// for boxes position/size calculation
this.setState({
photoUri: uri,
imgPixelHeight: height,
imgPixelWidth: width,
});
} catch (e) {
console.warn(e);
}
};
// processing the image to detect text
processImage = async () => {
const {photoUri} = this.state;
console.log('processing Image...');
vision()
.textRecognizerProcessImage(photoUri)
.then(result => {
console.log('processImage response', result);
this.displayProcessResult(result);
})
.catch(error => {
console.log('process error', error);
});
};
// setting the elements
displayProcessResult(result) {
const textElements = this.getElements(result);
this.setState({displayRectangles: true, textElements}, () => {
console.log('my new state', this.state);
});
}
// getting only the elements objects from mlkit vision
getElements(result) {
let allElements = [];
const {blocks} = result;
blocks.forEach(block => {
const {lines} = block;
lines.forEach(line => {
const {elements} = line;
allElements = [...allElements, ...elements];
});
});
console.log('displayProcessResult response', allElements);
return allElements;
}
// drawing the boxes aronund the detected text elements
drawRectangles() {
const {textElements} = this.state;
return textElements.map((element, index) => this.drawBox(element, index));
}
drawBox = (e, i) => {
let [left, top, right, bottom] = e.boundingBox;
const {imageWidh, imageHeight, imgPixelWidth, imgPixelHeight} = this.state;
// calculating the absolute position of each box base on the pixel size of the
// of the image and the on screen sizes (imageWidh, imageHeight) given by react native
// (check the renderCameraOrImage funtion)
left = (left / imgPixelWidth) * imageWidh;
right = (right / imgPixelWidth) * imageWidh;
top = (top / imgPixelHeight) * imageHeight;
bottom = (bottom / imgPixelHeight) * imageHeight;
const boxWidth = Math.abs(left - right);
const boxHeight = Math.abs(top - bottom);
console.log('box', boxWidth, boxHeight);
return (
<TouchableOpacity
key={i}
style={{
position: 'absolute',
left,
top,
width: boxWidth,
height: boxHeight,
borderWidth: 1,
borderColor: Colors.green,
zIndex: 2000,
}}
/>
);
};
// rendering the image or the camera base on the photoUri definition in the state
renderCameraOrImage() {
// render cam or img
const {photoUri} = this.state;
return (
<View>
{!photoUri ? (
<RNCamera
ref={ref => {
this.camera = ref;
}}
style={styles.camera}
type={RNCamera.Constants.Type.back}
flashMode={RNCamera.Constants.FlashMode.off}
androidCameraPermissionOptions={{
title: 'Permission to use camera',
message: 'We need your permission to use your camera',
buttonPositive: 'Ok',
buttonNegative: 'Cancel',
}}
zoom={this.state.zoomValue}
/>
) : (
<Image
source={{uri: this.state.photoUri}}
style={styles.camera}
onLayout={event => {
var {x, y, width, height} = event.nativeEvent.layout;
this.setState({imageWidh: width, imageHeight: height});
}}
/>
)}
</View>
);
}
The result I am getting is not satisfying as the drawn boxes are off:
I need help for defining a better approach so that boxes are correctly positioned on the text elements