validate CSV files in Grunt build - csv
How can I validate CSV files (encoding, headline, delimiter, column count) in my Grunt build? I had a look at CSVLint but neither get it to work, nor know how to include it in Grunt.
Edit: PapaParse looks promising, but has no Grunt integration either.
Although grunt integration for PapaParse does not exist its API can be utilized by configuring a custom Function Task inside your Gruntfile.js.
Install papaparse via npm
Firstly, cd to your project directory, install papaparse via npm and add it to the devDependencies section of your projects package.json. To do this run the following command via your CLI tool:
$ npm i -D papaparse
Gruntfile.js
The following gist shows how to configure a custom Function Task named validateCSV in your Gruntfile.js.
module.exports = function(grunt) {
// Requirements
var fs = require('fs');
var Papa = require('papaparse');
// Other project configuration tasks.
grunt.initConfig({
// ...
});
/**
* Register a custom Function task to validate .csv files using Papa Parse.
*/
grunt.registerTask('validateCSV', 'Lint .csv files via Papa Parse', function() {
var glob = './csv/*.csv'; // <-- Note: Edit glob pattern as required.
var success = true;
// Create an Array of all .csv files using the glob pattern provided.
var csvFiles = grunt.file.expand(glob).map(function(file) {
return file;
});
// Report if no .csv files were found and return early.
if (csvFiles.length === 0) {
grunt.log.write('No .csv files were found');
return;
}
// Loop over each .csv file in the csvFiles Array.
csvFiles.forEach(function(csvFile) {
// Read the contents of the .csv file.
var csvString = fs.readFileSync(csvFile, {
encoding: 'utf8'
});
// Parse the .csv contents via Papa Parse.
var papa = Papa.parse(csvString, {
delimiter: ',',
newline: '',
quoteChar: '"',
header: true,
skipEmptyLines: true
// For additional config options visit:
// http://papaparse.com/docs#config
});
// Basic error and success logging.
if (papa.errors.length > 0) {
grunt.log.error('Error(s) in file: '['red'] + csvFile['red']);
// Report each error for a single .csv file.
// For additional Papa Parse errors visit:
// http://papaparse.com/docs#errors
papa.errors.forEach(function(error) {
grunt.log.write('\n type: ' + error.type);
grunt.log.write('\n code: ' + error.code);
grunt.log.write('\n message: ' + error.message);
grunt.log.write('\n row: ' + error.row + '\n\n');
});
// Indicate that a .csv file failed validation.
success = false;
} else {
grunt.log.ok('No errors found in file: ' + csvFile);
}
});
// If errors are found in any of the .csv files this will
// prevent subsequent defined tasks from being processed.
if (!success) {
grunt.fail.warn('Errors(s) were found when validating .csv files');
}
});
// Register the custom Function task.
grunt.registerTask('default', [
'validateCSV'
// ...
]);
};
Notes
The following line of code (taken from the Gruntfile.js above) that reads:
var glob = './csv/*.csv';
... will need to be changed/edited according to your project requirements. Currently the globbing pattern assumes all .csv files reside inside a folder named csv.
You may also need to set the config options as per your requirements.
The custom Function Task also includes some basic error and success reporting that will be logged to the CLI.
Running the Task
To run the grunt task simply execute the following via your CLI tool:
$ grunt validateCSV
EDIT: Updated Answer (based on the following comment...)
Would it also be possible to "configure" the task from within the
grunt.initConfig()? For example linting different CSV directories?
To achieve this you can create a separate Javascript module that exports a Registered MutliTask.
Lets call it papaparse.js and save it to a directory named custom-grunt-tasks which resides in the same top level directory as your Gruntfile.js
Note: This .js file and directory name can be any name that you prefer, however you will need to updated the references inside Gruntfile.js.
papaparse.js
module.exports = function(grunt) {
'use strict';
// Requirements
var fs = require('fs');
var Papa = require('papaparse');
grunt.registerMultiTask('papaparse', 'Misc Tasks', function() {
// Default options. These are used when no options are
// provided via the initConfig({...}) papaparse task.
var options = this.options({
quotes: false,
delimiter: ',',
newline: '',
quoteChar: '"',
header: true,
skipEmptyLines: true
});
// Loop over each path provided via the src array.
this.data.src.forEach(function(dir) {
// Append a forward slash If a directory path
// provided does not end in with one.
if (dir.slice(-1) !== '/') {
dir += '/';
}
// Generate the globbin pattern.
var glob = [dir, '*.csv'].join('');
// Create an Array of all .csv files using the glob pattern.
var csvFiles = grunt.file.expand(glob).map(function(file) {
return file;
});
// Report if no .csv files were found and return early.
if (csvFiles.length === 0) {
grunt.log.write(
'>> No .csv files found using the globbing '['yellow'] +
'pattern: '['yellow'] + glob['yellow']
);
return;
}
// Loop over each .csv file in the csvFiles Array.
csvFiles.forEach(function(csvFile) {
var success = true;
// Read the contents of the .csv file.
var csvString = fs.readFileSync(csvFile, {
encoding: 'utf8'
});
// Parse the .csv contents via Papa Parse.
var papa = Papa.parse(csvString, options);
// Basic error and success logging.
if (papa.errors.length > 0) {
grunt.log.error('Error(s) in file: '['red'] + csvFile['red']);
// Report each error for a single .csv file.
// For additional Papa Parse errors visit:
// http://papaparse.com/docs#errors
papa.errors.forEach(function(error) {
grunt.log.write('\n type: ' + error.type);
grunt.log.write('\n code: ' + error.code);
grunt.log.write('\n message: ' + error.message);
grunt.log.write('\n row: ' + error.row + '\n\n');
});
// Indicate that a .csv file failed validation.
success = false;
} else {
grunt.log.ok('No errors found in file: ' + csvFile);
}
// If errors are found in any of the .csv files this will prevent
// subsequent files and defined tasks from being processed.
if (!success) {
grunt.fail.warn('Errors(s) found when validating .csv files');
}
});
});
});
};
Gruntfile.js
Your Gruntfile.js can then be configured something like this:
module.exports = function(grunt) {
grunt.initConfig({
// ...
papaparse: {
setOne: {
src: ['./csv/', './csv2']
},
setTwo: {
src: ['./csv3/'],
options: {
skipEmptyLines: false
}
}
}
});
// Load the custom multiTask named `papaparse` - which is defined in
// `papaparse.js` stored in the directory named `custom-grunt-tasks`.
grunt.loadTasks('./custom-grunt-tasks');
// Register and add papaparse to the default Task.
grunt.registerTask('default', [
'papaparse' // <-- This runs Targets named setOne and setTwo
// ...
]);
// `papaparse.js` allows for multiple targets to be defined, so
// you can use the colon notation to just run one Target.
// The following only runs the setTwo Target.
grunt.registerTask('processOneTarget', [
'papaparse:setTwo'
// ...
]);
};
Running the Task
The papaparse Task has been added to the taskList Array of the default Task, so it can be executed by entering the following via your CLI tool:
$ grunt
Notes
Running the example gist by entering $ grunt via your CLI will process all .csv files inside the directories named csv, csv2, and csv3.
Running $ grunt processOneTarget via your CLI will process only .csv files inside the directory named csv3.
As the papaparse.js utilizes a MultiTask you'll notice that in the papaparse Task defined in Gruntfile.js it includes two Targets. Namely setOne and setTwo.
The setOne Target src Array defines paths to two directories that should be processed. I.e. Directories ./csv/ and ./csv2. All .csv files found in these paths will be processed using the default papaparse options defined in papaparse.js as the Target does not define any custom options.
The setTwo target src Array defines a path to one directory. (I.e. ./csv3/). All .csv files found in this path will be processed using the default papaparse options defined in papaparse.js with the exception of the skipEmptyLines option as it's set to false.
You may find that simply defining one Target in Gruntfile.js with multiple paths in the src Array, without any custom options, meets your requirement. For Example:
// ...
grunt.initConfig({
// ...
papaparse: {
myTask: {
src: ['./csv/', './csv2', './csv3']
}
}
// ...
});
// ...
Hope this helps!
Related
gulp : multiple libs for one app
I'm trying to build multiple libs for a single application using gulp. I have this directory structure dist/ # expected result lib1.js lib2.js src/ libs/ lib1/ ... # js files for lib1 lib2/ ... # js files for lib2 wrap/ # wrappers, one per lib lib1.js lib2.js And I wrote this gulp task, using through2 : gulp.task('build', function() { var dn = 'azerty'; // whatever return gulp.src('src/libs/**/*.js') .pipe(through.obj(function(file, enc, cb) { // for extracting a lib's dirname var str = path.dirname(file.path), parts = str.split('/'), rev = parts.reverse(); dn = rev[0]; util.log(dn); // al is ok here... cb(); })) .pipe(concat(dn + '.js')) .pipe(wrap({ src : 'src/wrap/' + dn + '.js'})) .pipe(gulp.dest('dist')); }); But it doesn't work properly, the value of 'dn' variable seems to be lost at the concat point, this is very strange because 'dn' is a global variable relatively to the task :* How to work around this ? By using gulp-foreach or something other ? Regards. EDIT : this is sadly the same with gulp-foreach... someone has an idea to solve this ?
How to load and use environment-related values in config phase
I would like to deploy my web application to several environments. Using Continuous Integration I can run a task to generate a config.json for a particular environment. This file will contain, among others, the particular URLs to use for it. { "baseUrl": "http://www.myapp.es/", "baseApiUrl": "http://api.myapp.es/", "baseAuthUrl": "http://api.myapp.es/auth/" } The issue comes up when I try to set my different services through providers in the config phase. Of course, services are not available yet in the phase so I cannot use $http to load that json file and set my providers correctly. Basically I would like to do something like: function config($authProvider) { $authProvider.baseUrl = config.baseAuthUrl; } Is there a way to load those values on runtime from a file? The only thing I can think about is having that mentioned task altering this file straight away. However I have several modules and therefore, that would have to do in all of them which doesn´t seem right.
You can create constants in the config of your main module: Add $provide as a dependency in your config method use the provider method to add all constants like this $provide.provider('BASE_API_URL', { $get: function () { return 'https://myexample.net/api/'; } }); You can use BASE_API_URL as a dependency in your services. I hope this helps Optionally you can set the url depending of your environment: $provide.provider('BASE_API_URL', { $get: function () { if(window.location.hostname.toLowerCase() == 'myapp.myexample.net') { return 'https://myexample.net/api/' //pre-production }else { return 'http://localhost:61132/'; //local } } }); Regards!
Finally, the solution was generating an angular constants file using templating (gulp-template) through a gulp task. At the end, I am using a yaml file instead a json one (which is the one generated my CI engine with the proper values for the environment I want to deploy to). Basically: config.yml baseUrl: 'http://www.myapp.es/' baseApiUrl: 'http://api.myapp.es/' auth: url: 'auth/' config.module.constants.template (function () { 'use strict'; angular .module('app.config') .constant('env_variables', { baseUrl: '<%=baseUrl%>', baseApiUrl: '<%=baseApiUrl%>', authUrl: '<%=auth.url%>' }); }()); gulpfile.js gulp.task('splicing', function(done) { var yml = path.join(conf.paths.src, '../config/config.yml'); var json = yaml.safeLoad(fs.readFileSync(yml, 'utf8')); var template = path.join(conf.paths.src, '../config/config.module.constants.template'); var targetFile = path.join(conf.paths.src, '/app/config'); return gulp.src(template) .pipe($.template(json)) .pipe($.rename("config.module.constants.js")) .pipe(gulp.dest(targetFile), done); }); Then you just inject it in the config phase you need: function config($authProvider, env_variables) { $authProvider.baseUrl = env_variables.baseApiUrl + env_variables.authUrl; } One more benefit about using gulp for this need is that you can integrate the generation of these constants with your build, serve or watch tasks and literally, forget about doing any change from now on. Hope it helps!
Gulp - using ES6 modules in a combined js file?
I am trying to use ES6 modules in my current GULP setup. I've read that this is yet to be supported by browsers or Babel, so there is a need some elaborate setup to make this work, using things such Browserify, babelify, vinyl-source-stream. (Seems extremely complex setup). What I want is different from examples I had found online. All the examples are with external files being imported, and I really don't want that. I want all the files to be bundled into a single file, with all the modules there already. Here's what I have: My current GULP setup is like this: gulp.task('buildJS', function() { var src = [ './js/dist/app.js', './js/dist/templates.js', './js/dist/connect.js', './js/dist/config.js', './js/dist/utilities.js', './js/dist/components/*.js', './js/dist/pages/**/*.js', './js/dist/modals/*.js', './js/dist/init.js' // must be last ]; gulp.src(src) .pipe(concat('app.js')) .pipe(babel({modules:"common"})) // I have no idea what "modules" actually does .pipe(gulp.dest('../js/')) }); And this is an example of a component file in /js/dist/components/. There are many files like this, and they are all combined to a single file. module "components/foo" { export function render(settings) { return ... } } So later in some page controller I would use it: import { render } from "components/foo"; Question: Now that I have a single file, (been transformed using Babel), how can I use the modules via Import?
No, don't naively concatenate the files. Use browserify to bundle them, with babelify to compile them (via babel). A basic example would look something like this: browserify('./entry') .transform(babelify) .bundle() // ... It's hard to give more specific advice because your use case is so unclear. Do you have a dependency graph that begins at one file, or are you trying to bundle together a bunch of indepdendent modules? Are you trying to run a script to kick off an application, or do you just want to be able to access modules individually? Based on the example you linked to in your comment you should have something like this: components/doughnut.js export default function Doughnut (settings = {}) { // ... }; Doughnut.prototype = {} routes/home.js import Doughnut from './components/doughnut'; export default function () { var component = new Doughnut(); $('body').html(component.render()); }; Have each module export what you want to be available from any other module. Have each module import whatever it needs from any other module(s). Whatever uses the controller from this example should then do import home from './routes/home'; These modules aren't tied to a global variable App and can be reused in other applications (as long as you otherwise make them reusable). .pipe(babel({modules:"common"})) // I have no idea what "modules" modules is a babel option that determines what module format it compiles ES6 module syntax to. In this case, CommonJS. module "components/foo" { Thanks to your comments I now understand why you have this. You need to eliminate that. Your component file should look something like: export function render (settings) { return ... } Paired with: import { render } from "components/foo"; Or if you want a default export / import: export default function render (settings) { return ... } import render from "components/foo"; import { render } from "components/foo"; If you're browserifying your modules, you're probably going to need to use relative paths like ./components/foo or use something else to deal with the paths, like babel's resolveModuleSource option.
Since the end of 2015 I have been using rollupjs in order to create a bundle of ES2015 (ES6) modules, so I could use import/export freely in my code. I've found Rollupjs to be very good and easy to use. The people behind it are great people which devote themselves to the project. I've had many questions which I had posted on the project's Github issues page and I always got answered pretty quickly. Setup includes these rollupjs plugins: rollup (basic rollupjs bundler) rollup-plugin-babel (converts ES2015 code to ES5 or earlier, for legacy browsers support) rollup-plugin-eslint (verify the javascript code is valid) rollup-plugin-uglify (minify the code, to make it smaller) rollup-plugin-progress (shows bundle progress in terminal. shows which file being "worked on") beepbeep (Make a console beep sound. I use this to inform me of compilaction errors) Simplified GULP setup I'm using: var gulp = require('gulp'), gutil = require('gulp-util'), rollup = require('rollup').rollup, babelRollup = require('rollup-plugin-babel'), eslintRollup = require('rollup-plugin-eslint'), uglifyRollup = require('rollup-plugin-uglify'), rollupProgress = require('rollup-plugin-progress'), beep = require('beepbeep'); // ESlint var eslint_settings = { rulePaths: [], rules: { "no-mixed-spaces-and-tabs" : [2, "smart-tabs"], "block-spacing" : [2, "always"], "comma-style" : [2, "last"], "no-debugger" : [1], "no-alert" : [2], "indent-legacy" : [1, 4, {"SwitchCase":1}], 'strict' : 0, 'no-undef' : 1 }, ecmaFeatures : { modules: true, sourceType: "module" }, "parserOptions": { "ecmaVersion" : 6, "sourceType": "module", "ecmaFeatures": { "jsx": false, "experimentalObjectRestSpread": true } }, globals : ['$', '_', 'afterEach', 'assert', 'beforeEach', 'Cookies', 'd3', 'dataLayer', 'describe', 'done', 'expect', 'ga', 'it', 'jQuery', 'sinon'], baseConfig: { //parser: 'babel-eslint', }, envs: [ 'browser', 'es6' ] }; // Rollup plugins configuration function getRollupPlugins( settings = {} ){ var rollupPlugins = []; rollupPlugins.push({ presets : [['es2015', {"modules": false}]], //['es2015-rollup'], runtimeHelpers : true, exclude : 'node_modules/**', plugins : ["external-helpers"] }); rollupPlugins.push(eslintRollup( Object.assign({throwOnError:true}, eslint_settings) )) rollupPlugins.push(rollupProgress({ clearLine:true // default: true })) // I would advise Babel to only be used for production output since it greatly slower bundle creation if( settings.ENV == 'production' ){ rollupPlugins.push(uglifyRollup()) rollupPlugins.push(babelRollup(rollupPlugins__babel)); } return rollupPlugins; } var rollupPlugins = getRollupPlugins(); /** * a generic Rollup bundle creator * #param {String} outputPath [where to save the bundle to (must end with /)] * #param {String} outputFileName [bundle file name] * #param {String} entryFile [rollup entry file to start scanning from] * #return {Object} [Promise] */ function rollupBundle(outputPath, outputFileName, entryFile, bundleOptions){ bundleOptions = bundleOptions || {}; bundleOptions.plugins = bundleOptions.plugins || rollupPlugins; return new Promise(function(resolve, reject) { outputFileName += '.js'; var cache; // fs.truncate(outputPath + outputFileName, 0, function() { // gutil.log( gutil.colors.dim.gray('Emptied: '+ outputPath + outputFileName) ); // }); rollup({ entry : entryFile, plugins : bundleOptions.plugins, cache : cache }) .then(function (bundle) { var bundleSettings = { format : bundleOptions.format || 'umd', sourceMap : false, banner : config.banner }, result = bundle.generate(bundleSettings), mapFileName = outputFileName + '.map', sourceMappingURL = '\n//# sourceMappingURL='+ mapFileName; cache = bundle; // if folder does not exists, create it if( !fs.existsSync(outputPath) ){ gutil.log( gutil.colors.black.bgWhite('Creating directory ' + outputPath) ); fs.mkdirSync(outputPath); } // save bundle file to disk fs.writeFile( outputPath + outputFileName, result.code + (bundleSettings.sourceMap ? sourceMappingURL : ''), function(){ resolve(); }); // save map file to disk if( bundleSettings.sourceMap ) fs.writeFile( outputPath + mapFileName, result.map.toString()); }) .catch(function(err){ beep(1); gutil.log( gutil.colors.white.bgRed('Rollup [catch]: ', err.stack) ); resolve(); }) }); } // This task bundles the main application, using an entry file which itself has many imports, // and those imports also has imports.. like a tree branching gulp.task('bundle-app', ()=>{ return rollupBundle('../dist/js/', 'app', 'js/dist/app.js', {format:'cjs'}); });
how to modify config files using gulp
I use gulp to configure complex local setup and need to auto-edit files. The scenario is: determine if certain file contains certain lines after certain other line (found using regular expression) if line is not found, insert the line. optionally, delete some lines found in the file. I need this to amend system configuration files and compile scenarios. What would be the best way to do it in gulp?
Gulp is plain javascript. So what I would do if I were you is to create a plugin to pipe to the original config file. Gulp streams emit Vinyl files. So all you really got to do is to create a "pipe factory" that transforms the objects. It would look something like this (using EventStream): var es = require('event-stream'); // you could receive params in here if you're using the same // plugin in different occasions. function fixConfigFile() { return es.map(function(file, cb) { var fileContent = file.contents.toString(); // determine if certain file contains certain lines... // if line is not found, insert the line. // optionally, delete some lines found in the file. // update the vinyl file file.contents = new Buffer(fileContent); // send the updated file down the pipe cb(null, file); }); } gulp.task('fix-config', function() { return gulp.src('path/to/original/*.config') .pipe(fixConfigFile()) .pipe(gulp.dest('path/to/fixed/configs'); });
Or you can use vinyl-map: const map = require('vinyl-map') const gulp = require('gulp') const modify = map((contents, filename) => { contents = contents.toString() // modify contents somehow return contents }) gulp.task('modify', () => gulp.src(['./index.js']) .pipe(modify) .pipe(gulp.dest('./dist')) })
creating tar archives using gulp
I'm using gulp-tar to create a tar file... how do I add a top level folder so that when the user runs tar -xzf myArchive.tar it extracts into a specific folder. here's my code: gulp.task('prod', ['min', 'gittag'], function() { //copy all files under /server into a zip file gulp.src('../server/**/*') .pipe(tar('xoserver' + '-'+ gittag +'.tar')) .pipe(gzip()) .pipe(gulp.dest('../prod')); }); The above creates a tar.zip file all right, but I have to be careful to add a -C <folder> while extracting, else the files get extracted to the current folder. [edited] What I'm trying to do here is generate a tarball of the format xoserver-alpha-d414ddf.tar.gz which, when extracted with a tar xvf will create a folder xoserver-alpha-d414ddf and unpack all the files under it. Essentially I am trying to add new folder name above my packed files. If I add a base option, the folder extracted to is just server [ANSWER] Thanks to ddprrt for a good answer. I am reproducing the final code in case someone else wants to use a similar strategy of embedding the git tag into the name of the tarball for distribution/testing. gulp.task('gittag', function(cb) { // generate the git tag git.exec({args : 'branch -v'}, function (err, stdout) { var lines = stdout.split('\n'); for (var l in lines) { if (lines[l][0] == '*') { var words = lines[l].split(/\s+/); gittag = words[1]+ '-' + words[2]; console.log('Gittag is %s', gittag); break; } } cb(); }); }); gulp.task('min', ['runbmin', 'template', 'vendor']); // generate min files gulp.task('prod', ['min', 'gittag'], function() { // create tarball //copy all files under /server into a zip file return gulp.src('../server/**/*') .pipe(rename(function(path) { path.dirname = 'server-' + gittag + '/' + path.dirname; })) .pipe(tar('xoserver-'+gittag+'.tar')) .pipe(gzip()) .pipe(gulp.dest('../prod')); });
This is what the base option is for. gulp.task('prod', ['min', 'gittag'], function() { return gulp.src('../server/**/*', { base: '../server/' }) .pipe(tar('xoserver' + '-'+ gittag +'.tar')) .pipe(gzip()) .pipe(gulp.dest('../prod')); }); With it you can tell gulp which paths to include when dealing with the globs you receive. Btw. Don't forget to return streams or call the done callback in your task. Helps gulp orchestrating your build pipeline As for the second question, you can use gulp-rename task to change the directory where your virtual files are located. Would be something like .pipe(rename(function(path) { path.dirname = 'whatever/' + path.dirname }));