first commit
This commit is contained in:
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
node_modules/
|
||||
package-lock.json
|
||||
.env
|
||||
real.html
|
||||
67
config/index.js
Normal file
67
config/index.js
Normal file
@@ -0,0 +1,67 @@
|
||||
const commandLineArgs = require('command-line-args')
|
||||
, logger = require("logops");
|
||||
|
||||
class Config {
|
||||
|
||||
constructor() {
|
||||
this.options = {}
|
||||
this.requires = ['env', 'gateway', 'quix24']
|
||||
this.init()
|
||||
this.validate()
|
||||
this.finalize()
|
||||
}
|
||||
|
||||
init() {
|
||||
try {
|
||||
this.options = commandLineArgs([
|
||||
{ name: 'env', alias: 'e', type: String, defaultValue: ['production'] },
|
||||
{ name: 'host', type: String },
|
||||
{ name: 'port', type: Number },
|
||||
{ name: 'database', alias: 'd', type: String },
|
||||
{ name: 'username', alias: 'u', type: String },
|
||||
{ name: 'password', alias: 'p', type: String },
|
||||
{ name: 'mongo', alias: 'm', type: String },
|
||||
{ name: 'redis', alias: 'r', type: String },
|
||||
{ name: 'level', alias: 'l', type: String },
|
||||
{ name: 'gateway', alias: 'g', type: String },
|
||||
{ name: 'quix24', alias: 'q', type: String }
|
||||
]);
|
||||
} catch (e) {
|
||||
logger.debug('Command line arguments interpret failed :', e.message)
|
||||
logger.debug('expected arguments : ', JSON.stringify(this.requires))
|
||||
process.exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
finalize() {
|
||||
if (this.options.env == 'development') {
|
||||
this.options.level = this.options.level ? this.options.level : "DEBUG"
|
||||
logger.formatters.dev.omit = ['pid', 'port', 'hostname', 'app'];
|
||||
// logger.format = logger.formatters.dev;
|
||||
} else {
|
||||
this.options.level = this.options.level ? this.options.level : "WARN"
|
||||
}
|
||||
logger.formatters.json.omit = ['pid', 'port', 'hostname', 'app'];
|
||||
logger.setLevel(this.options.level)
|
||||
logger.debug(this.options, 'Environment setting options')
|
||||
}
|
||||
|
||||
validate() {
|
||||
for (let key in this.options) {
|
||||
if (this.options[key]) {
|
||||
delete this.requires[this.requires.indexOf(key)]
|
||||
}
|
||||
}
|
||||
this.requires = this.requires.filter(function (el) {
|
||||
return el != null;
|
||||
})
|
||||
if (this.requires.length) {
|
||||
logger.debug('Process terminated invalid required arguments: ', JSON.stringify(this.requires))
|
||||
process.exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
let config = new Config;
|
||||
module.exports = config.options
|
||||
65
config/website/Real.js
Normal file
65
config/website/Real.js
Normal file
@@ -0,0 +1,65 @@
|
||||
class Real {
|
||||
|
||||
constructor() {
|
||||
this.selectors = {
|
||||
title: '.rd-title',
|
||||
price: '.rd-buybox__price',
|
||||
main_image: '.rd-gallery__container--thumbnails .swiper-slide',
|
||||
// info_box: '.rd-product-description__top-accordion-content-attributes .rd-attribute-table > div',
|
||||
info_box: 'div.rd-product-description__top-accordion-content > div.rd-product-description__top-accordion-content-attributes .rd-attribute-table > div',
|
||||
// info_title: 'span.rd-product-description__attribute-title',
|
||||
info_title: '> span',
|
||||
// info_data: 'div.rd-product-description__attribute-values',
|
||||
info_data: 'div > div',
|
||||
description: '#rd-product-description',
|
||||
waitElements: []
|
||||
}
|
||||
this.selectors.waitElements.push(this.selectors.main_image)
|
||||
this.selectors.waitElements.push(this.selectors.info_box)
|
||||
}
|
||||
|
||||
extract($, cb) {
|
||||
let cleanup = (txt) => {
|
||||
return typeof txt == 'string' ? txt.replace(/[\-\t\n\r]+/g, '').replace(/[\s]{2}/g, '').trim() : txt
|
||||
}
|
||||
let info = {}
|
||||
$(this.selectors.info_box).map((k, el) => {
|
||||
let infoData = $(el).find(this.selectors.info_data).map((k, v) => {
|
||||
return cleanup($(v).text())
|
||||
}).get()
|
||||
info[$(el).find(this.selectors.info_title).text()] = cleanup(infoData.filter((item, index) => infoData.indexOf(item) === index).join(' | '))
|
||||
})
|
||||
|
||||
info = Object.assign({
|
||||
title: cleanup($(this.selectors.title).text()),
|
||||
price: cleanup($(this.selectors.price).text()).replace(',', '.') + ($(this.selectors.price).data('cents') ? cleanup($(this.selectors.price).data('cents')) : ''),
|
||||
main_img: (() => {
|
||||
let images = $(this.selectors.main_image)
|
||||
if (images.length) {
|
||||
images = images.map((k, v) => {
|
||||
// return $(v).css('background-image').slice(4, -1).replace(/"/g, "")
|
||||
return $(v).find('img').attr('src')
|
||||
}).get()
|
||||
} else {
|
||||
images = []
|
||||
let img = $('.rd-product-image-gallery__slide-large-image')
|
||||
if (img.length) images = [img.attr('src')]
|
||||
}
|
||||
return images.length ? images : null
|
||||
})()
|
||||
}, info)
|
||||
|
||||
$(this.selectors.description).find('*').each((k, elem) => this.removeAllAttributes($, elem))
|
||||
info.description = '<div>' + $(this.selectors.description).html() + '</div>'
|
||||
cb(info)
|
||||
}
|
||||
|
||||
removeAllAttributes = function ($, node) {
|
||||
node.attributes || Object.keys(node.attribs).map(
|
||||
name => $(node).removeAttr(name)
|
||||
);
|
||||
return node
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = new Real
|
||||
53
index.js
Normal file
53
index.js
Normal file
@@ -0,0 +1,53 @@
|
||||
const express = require("express")
|
||||
, expressLogging = require("express-logging")
|
||||
, logger = require("logops")
|
||||
, HttpStatus = require('http-status-codes')
|
||||
, port = 32102
|
||||
, productController = require('./src/controller/ProductController')
|
||||
, controllers = {};
|
||||
|
||||
|
||||
process.env["NODE_TLS_REJECT_UNAUTHORIZED"] = 0;
|
||||
|
||||
/**
|
||||
* Express web server starting
|
||||
*
|
||||
*/
|
||||
logger.getContext = function getContext() {
|
||||
return {
|
||||
app: 'Extracter'
|
||||
};
|
||||
}
|
||||
var app = express();
|
||||
app.use(express.json());
|
||||
app.use(expressLogging(logger));
|
||||
app.use((req, res, next) => {
|
||||
logger.debug(Object.assign(req.body, req.query, req.params), 'Request params')
|
||||
next()
|
||||
})
|
||||
app.listen(port, () => {
|
||||
logger.info('Startup');
|
||||
});
|
||||
|
||||
/**
|
||||
* Request handlers
|
||||
*
|
||||
*/
|
||||
app.all('*', (req, res, next) => {
|
||||
let urlSegments = req.url.split('/');
|
||||
req.params.service = urlSegments[1]
|
||||
req.params.what = urlSegments.length > 2 ? urlSegments[2] : ''
|
||||
req.params.args = urlSegments.length > 3 ? urlSegments.slice(3) : []
|
||||
|
||||
if(!controllers.hasOwnProperty(req.params.service)) {
|
||||
let controller = req.params.service.charAt(0).toUpperCase() + req.params.service.slice(1) + 'Controller';
|
||||
controllers[req.params.service] = productController
|
||||
logger.info('%s has been loaded', controller)
|
||||
}
|
||||
controllers[req.params.service][req.params.service](req, result => {
|
||||
let body = { code: HttpStatus.OK, message: HttpStatus.getStatusText(HttpStatus.OK), data: result }
|
||||
if(result.code) body = result
|
||||
res.json(body)
|
||||
})
|
||||
})
|
||||
|
||||
22
package.json
Normal file
22
package.json
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"name": "spd-app-spider",
|
||||
"version": "1.0.0",
|
||||
"description": "IBP Corp Tracking service",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1",
|
||||
"start": "node index.js"
|
||||
},
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"@k3rn31p4nic/google-translate-api": "^1.1.0",
|
||||
"cheerio": "^1.0.0-rc.3",
|
||||
"command-line-args": "^5.1.1",
|
||||
"express": "^4.17.1",
|
||||
"express-logging": "^1.1.1",
|
||||
"http-status-codes": "^1.4.0",
|
||||
"logops": "^2.1.1",
|
||||
"node-fetch": "^2.6.0"
|
||||
}
|
||||
}
|
||||
53
src/component/RestClientComp.js
Normal file
53
src/component/RestClientComp.js
Normal file
@@ -0,0 +1,53 @@
|
||||
const fetch = require("node-fetch")
|
||||
, logger = require("logops")
|
||||
, TokenExpiredException = require('../exception/TokenExpiredException')
|
||||
, config = require('../../config');
|
||||
|
||||
|
||||
class RestClientComp {
|
||||
|
||||
constructor() {
|
||||
this.token = null
|
||||
this.gateway = 'http://' + config.quix24 + '/api/v1/'
|
||||
this.clientId = 'F8Nbz9gGUvLgdn8T'
|
||||
this.clientSecret = 'Fesmx7hHCCMx7Cby7FhV2fAtf2NYEFzP5fCKuwEEdpf8hpnsnnQn9qYhNHt6SsDD'
|
||||
}
|
||||
|
||||
auth() {
|
||||
let url = this.gateway + 'oauth/token?client_id=' + this.clientId + '&grant_type=client_credentials&client_secret=' + this.clientSecret
|
||||
return fetch(url, { method: 'GET', headers: { 'content-type': 'application/json' } }).then(res => res.json()).then(res => {
|
||||
logger.debug(res, 'Token from %s', this.gateway)
|
||||
this.token = res.access_token
|
||||
return res.access_token
|
||||
})
|
||||
}
|
||||
|
||||
headers(additionalheaders) {
|
||||
let headers = {
|
||||
"Authorization": "Bearer " + this.token,
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
return additionalheaders ? Object.assign(headers, additionalheaders) : headers
|
||||
}
|
||||
|
||||
response(res) {
|
||||
if (res.code == 406) {
|
||||
throw new TokenExpiredException("Token expired " + this.gateway)
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
async request(fetchable) {
|
||||
if(!this.token) await this.auth()
|
||||
return fetchable().catch(error => {
|
||||
logger.error({message: error.message}, 'Fetchable failed with error')
|
||||
if (error.status == 406) {
|
||||
logger.info('Requesting token %s', this.gateway)
|
||||
return this.auth().then(() => fetchable())
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
module.exports = RestClientComp;
|
||||
24
src/controller/ProductController.js
Normal file
24
src/controller/ProductController.js
Normal file
@@ -0,0 +1,24 @@
|
||||
const fetch = require("node-fetch")
|
||||
, logger = require("logops")
|
||||
, productService = require('../service/ProductService');
|
||||
|
||||
class ProductController {
|
||||
getMethodName(req) {
|
||||
return req.method.toLowerCase()
|
||||
+ req.params.what.split('_').map(w => w.charAt(0).toUpperCase() + w.slice(1)).join('')
|
||||
}
|
||||
|
||||
product(req, callback) {
|
||||
let fn = this.getMethodName(req);
|
||||
let params = Object.assign(req.body, req.query)
|
||||
params = Object.assign(params, req.params)
|
||||
logger.info('Invoking method ProductService.%s', fn)
|
||||
this[fn](params, callback)
|
||||
}
|
||||
|
||||
async post(params, callback) {
|
||||
productService.get(params.uri, callback)
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = new ProductController;
|
||||
16
src/exception/TokenExpiredException.js
Normal file
16
src/exception/TokenExpiredException.js
Normal file
@@ -0,0 +1,16 @@
|
||||
class TokenExpiredException extends Error {
|
||||
constructor (message) {
|
||||
super(message)
|
||||
|
||||
// assign the error class name in your custom error (as a shortcut)
|
||||
this.name = this.constructor.name
|
||||
|
||||
// capturing the stack trace keeps the reference to your error class
|
||||
Error.captureStackTrace(this, this.constructor);
|
||||
|
||||
// you may also assign additional properties to your error
|
||||
this.status = 406
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = TokenExpiredException
|
||||
36
src/service/ProductService.js
Normal file
36
src/service/ProductService.js
Normal file
@@ -0,0 +1,36 @@
|
||||
const config = require("../../config")
|
||||
, logger = require("logops")
|
||||
, configWebsiteReal = require('../../config/website/Real')
|
||||
, RestClient = require('../component/RestClientComp')
|
||||
, cheerio = require('cheerio')
|
||||
, fetch = require('node-fetch')
|
||||
|
||||
|
||||
class ProductService {
|
||||
constructor() {
|
||||
this.restClient = new RestClient
|
||||
this.restClient.gateway = 'http://' + config.gateway + '/api/v1/'
|
||||
this.restClient.clientId = '7tmK4rF8As8CtRw5'
|
||||
this.restClient.clientSecret = 'cgzLcVFku5zEvXkCKNZsAbxXQENFqPDx2kfjnMGd3m9BczehmFt2pw9r9MdASSFt'
|
||||
}
|
||||
|
||||
get(uri, callback) {
|
||||
return this.restClient.request(() => fetch(this.restClient.gateway + 'browse', {
|
||||
method: 'POST',
|
||||
headers: this.restClient.headers(),
|
||||
body: JSON.stringify({
|
||||
uri: uri, waitElements: configWebsiteReal.selectors.waitElements
|
||||
})
|
||||
}).then(res => res.text()).then(res => this.restClient.response(res))).then(html => this.extract(html, callback))
|
||||
}
|
||||
|
||||
extract(html, callback) {
|
||||
let $ = cheerio.load(html);
|
||||
configWebsiteReal.extract($, callback)
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
const productService = new ProductService
|
||||
module.exports = productService
|
||||
Reference in New Issue
Block a user