I have written an awk script that converts a distributor flatfile into a CSV importable into Magento. This file is semi-colon delimited.
It is not putting quotes around each field like the importer requires. It works fairly well, but is causing some issues on the data import without the enclosing double quotes. I spent a couple hours trying to figure out how to add this to the existing script, without much luck. Any help would be greatly appreciated - I am pretty new to AWK.
Current Output
store;websites;attribute_set;type;category_ids;sku;has_options;name;meta_title;meta_description;image;small_image;thumbnail;url_key;url_path;config_attributes;custom_design;page_layout;options_container;country_of_manufacture;msrp_enabled;msrp_display_actual_price_type;gift_message_available;rsr_pn;manufacturer_pn;price;special_price;cost;weight;msrp;status;visibility;manufacturer;enable_googlecheckout;tax_class_id;is_recurring;description;short_description;meta_keyword;custom_layout_update;news_from_date;news_to_date;special_from_date;special_to_date;custom_design_from;custom_design_to;qty;min_qty;use_config_min_qty;is_qty_decimal;backorders;use_config_backorders;min_sale_qty;use_config_min_sale_qty;max_sale_qty;use_config_max_sale_qty;is_in_stock;low_stock_date;notify_stock_qty;use_config_notify_stock_qty;manage_stock;use_config_manage_stock;stock_status_changed_auto;use_config_qty_increments;qty_increments;use_config_enable_qty_inc;enable_qty_increments;is_decimal_divided;stock_status_changed_automatically;use_config_enable_qty_increments;product_name;store_id;product_type_id;product_status_changed;product_changed_websites;gallery;related;upsell;crosssell;tier_prices;associated;bundle_options;grouped;group_price_price;downloadable_options;super_attribute_pricing;product_tags
admin;base;Default;simple;2,35,36;844802016148;0;5.11 HOLSTER SHIRT L WHITE;;;/5/1/511-40011-010-L_1.jpg;/5/1/511-40011-010-L_1.jpg;/5/1/511-40011-010-L_1.jpg;511-40011-010-L;511-40011-010-L.html;;;No layout updates;Block after Info Column;;Use config;Use config;No;511-40011-010-L;40011;74.99;;48.00;5;74.99;Enabled;Catalog, Search;5.11 Tactical;Yes;Taxable Goods;No;5.11 Tactical Short Sleeve Shirt L White Holster Shirt Crew 40011;5.11 Tactical Short Sleeve Shirt L White Holster Shirt Crew 40011;;;;;;;;;0;0;1;0;0;1;1;1;0;1;1;;;1;0;1;0;1;0;1;0;0;0;1;5.11 HOLSTER SHIRT L WHITE;0;simple;;;;;;;;;;;;;;
Desired Output
"store";"websites";"attribute_set";"type";"category_ids";"sku";"has_options";"name";"meta_title";"meta_description";"image";"small_image";"thumbnail";"url_key";"url_path";"config_attributes";"custom_design";"page_layout";"options_container";"country_of_manufacture";"msrp_enabled";"msrp_display_actual_price_type";"gift_message_available";"rsr_pn";"manufacturer_pn";"price";"special_price";"cost";"weight";"msrp";"status";"visibility";"manufacturer";"enable_googlecheckout";"tax_class_id";"is_recurring";"description";"short_description";"meta_keyword";"custom_layout_update";"news_from_date";"news_to_date";"special_from_date";"special_to_date";"custom_design_from";"custom_design_to";"qty";"min_qty";"use_config_min_qty";"is_qty_decimal";"backorders";"use_config_backorders";"min_sale_qty";"use_config_min_sale_qty";"max_sale_qty";"use_config_max_sale_qty";"is_in_stock";"low_stock_date";"notify_stock_qty";"use_config_notify_stock_qty";"manage_stock";"use_config_manage_stock";"stock_status_changed_auto";"use_config_qty_increments";"qty_increments";"use_config_enable_qty_inc";"enable_qty_increments";"is_decimal_divided";"stock_status_changed_automatically";"use_config_enable_qty_increments";"product_name";"store_id";"product_type_id";"product_status_changed";"product_changed_websites";"gallery";"related";"upsell";"crosssell";"tier_prices";"associated";"bundle_options";"grouped";"group_price_price";"downloadable_options";"super_attribute_pricing";"product_tags"
"admin";"base";"Default";"simple";"2,35,36";"844802016148";"0";"5.11 HOLSTER SHIRT L WHITE";"";"";"/5/1/511-40011-010-L_1.jpg";"/5/1/511-40011-010-L_1.jpg";"/5/1/511-40011-010-L_1.jpg";"511-40011-010-L";"511-40011-010-L.html";"";"";"No layout updates";"Block after Info Column";"";"Use config";"Use config";"No";"511-40011-010-L";"40011";"74.99";"";"48.00";"5";"74.99";"Enabled";"Catalog, Search";"5.11 Tactical";"Yes";"Taxable Goods";"No";"5.11 Tactical Short Sleeve Shirt L White Holster Shirt Crew 40011";"5.11 Tactical Short Sleeve Shirt L White Holster Shirt Crew 40011";"";"";"";"";"";"";"";"";"0";"0";"1";"0";"0";"1";"1";"1";"0";"1";"1";"";"";"1";"0";"1";"0";"1";"0";"1";"0";"0";"0";"1";"5.11 HOLSTER SHIRT L WHITE";"0";"simple";"";"";"";"";"";"";"";"";"";"";"";"";"";"
Script - rsrimport.awk
#!/bin/awk -f
# ----------------------------------------------------------------------------------------
# Copyright (c) 2012 - 2013 John Steensen <john.steensen#live.com>
# All rights reserved. No warranty, explicit or implicit, provided.
# ----------------------------------------------------------------------------------------
# AWK Processing
# Updated 03DEC2012#1552 MST
# ----------------------------------------------------------------------------------------
# Warnings/Dependancy Notes
# AWK
# ----------------------------------------------------------------------------------------
BEGIN {
FS=";";
OFS=";";
CATEGORY="47";
IMAGE="imagepathfail";
URLKEY="urlkeyfail";
URLPATH="urlpathfail";
print "store", "websites", "attribute_set", "type", "category_ids", "sku", "has_options", "name", "image", "small_image", "thumbnail", "url_key", "url_path", "page_layout", "options_container", "msrp_enabled", "msrp_display_actual_price_type", "gift_message_available", "rsr_pn", "manufacturer_pn", "price", "cost", "weight", "msrp", "manufacturer", "status", "is_recurring", "visibility", "enable_googlecheckout", "tax_class_id", "description", "short_description", "qty", "min_qty", "use_config_min_qty", "is_qty_decimal", "backorders", "use_config_backorders", "min_sale_qty", "use_config_min_sale_qty", "max_sale_qty", "use_config_max_sale_qty", "is_in_stock", "notify_stock_qty", "use_config_notify_stock_qty", "manage_stock", "use_config_manage_stock", "stock_status_changed_auto", "use_config_qty_increments", "qty_increments", "use_config_enable_qty_inc", "enable_qty_increments", "is_decimal_divided", "stock_status_changed_automatically", "use_config_enable_qty_increments", "product_name", "store_id", "product_type_id";
}
{
# DEFINE CATEGORY
if ($4=="1") CATEGORY="2,3,4";
else if ($4=="2") CATEGORY="2,3,7";
else if ($4=="3") CATEGORY="2,3,8";
else if ($4=="4") CATEGORY="2,3,22,23";
else if ($4=="5") CATEGORY="2,3,5";
else if ($4=="7") CATEGORY="2,3,6";
else if ($4=="8") CATEGORY="2,27,28";
else if ($4=="9") CATEGORY="2,27,29";
else if ($4=="10") CATEGORY="2,9,13";
else if ($4=="11") CATEGORY="2,9,14";
else if ($4=="12") CATEGORY="2,35,38";
else if ($4=="13") CATEGORY="2,9,16";
else if ($4=="14") CATEGORY="2,35,37";
else if ($4=="15") CATEGORY="2,19,21";
else if ($4=="16") CATEGORY="2,9,15";
else if ($4=="17") CATEGORY="2,9,16";
else if ($4=="18") CATEGORY="2,19,20";
else if ($4=="20") CATEGORY="2,27,33";
else if ($4=="21") CATEGORY="2,9,17";
else if ($4=="22") CATEGORY="2,3,22,24";
else if ($4=="23") CATEGORY="2,3,22,25";
else if ($4=="24") CATEGORY="2,9,13";
else if ($4=="25") CATEGORY="2,40,43";
else if ($4=="26") CATEGORY="2,40,44";
else if ($4=="27") CATEGORY="2,3,22,26";
else if ($4=="28") CATEGORY="2,27,31";
else if ($4=="29") CATEGORY="2,27,32";
else if ($4=="30") CATEGORY="2,27,30";
else if ($4=="31") CATEGORY="2,27,34";
else if ($4=="32") CATEGORY="2,9,11";
else if ($4=="33") CATEGORY="2,35,36";
else if ($4=="34") CATEGORY="2,9,10";
else if ($4=="35") CATEGORY="2,9,18";
else if ($4=="36") CATEGORY="2,40,42";
else if ($4=="38") CATEGORY="2,40,41";
else if ($4=="39") CATEGORY="2,40,45";
else if ($4=="40") CATEGORY="2,35,39";
else if ($4=="41") CATEGORY="2,9,12";
else if ($4=="43") CATEGORY="2,9,12";
else if ($4=="01") CATEGORY="2,3,4";
else if ($4=="02") CATEGORY="2,3,7";
else if ($4=="03") CATEGORY="2,3,8";
else if ($4=="04") CATEGORY="2,3,22,23";
else if ($4=="05") CATEGORY="2,3,5";
else if ($4=="07") CATEGORY="2,3,6";
else if ($4=="08") CATEGORY="2,27,28";
else if ($4=="09") CATEGORY="2,27,29";
else CATEGORY="47";
# DEFINE IMAGE WITH PATH.
IMAGE="/5/1/"$1"_1.jpg";
# DEFINE URL KEY
URLKEY=$1;
# DEFINE URL PATH
URLPATH=$1".html";
print "admin", "base", "Default", "simple", CATEGORY, $1, "0", $3, IMAGE, IMAGE, IMAGE, URLKEY, URLPATH, "No layout updates", "Block after Info Column", "Use config", "Use config", "No", $1, $12, $6, $7, $8, $6, $11, "Enabled", "No", "Catalog, Search", "Yes", "Taxable Goods", $14, $14, $9, "0", "1", "0", "0", "1", "1", "1", "0", "1", "1", "0", "1", "0", "1", "0", "1", "0", "1", "0", "0", "0", "1", $3, "0", "simple";
}
END {}
If you want:
add this to the existing script.
You can insert additional \"\" in each argument of print like this:
print "\"admin\"", "\"base\"", ...
Edited:
Yes, perhaps seting OFS is better solution:
BEGIN { OFS="\";\""; } ... print "\"admin", ...., "simple\"";
awk '{for (i=1;i<=NF;i++) $i="\""$i"\""}1' FS=";" OFS=";" input
To add quotes around the entries you could use a simple AWK loop:
Script - simple_loop.awk
BEGIN {FS=";"}
{
for(i=1;i<NF;i++){
printf("\"%s\";", $i);
}
printf("\"%s\"\n",$NF);
}
For instance
echo "admin;base;5.11 HOLSTER SHIRT L WHITE;;" | awk -f simple_loop.awk
Should output
"admin";"base";"5.11 HOLSTER SHIRT L WHITE";"";""
In this case I would use a sed expression instead of AWK.
If your data is in a file called data.txt, you can get it writing:
sed "s/;/\";\"/g;s/^/\"/;s/$/\"/" data.txt
That will print the result to the std output, but if you want to replace the content of the file just use sed -i this way:
sed -i "s/;/\";\"/g;s/^/\"/;s/$/\"/" data.txt
And that is all !!
Explanation:
The sed expression consists in three sed commands separated by ";" that you can run separately:
sed "s/;/\";\"/g
It makes a substitution (that is what means the first "s"), then the "/" (the default separator), ";" that is what we want to replace. Then the second separator "/", and the replacement: \";\" It is a sequence: escaped quote, a semicolon and a escaped quote. So with this command we will replace semicolons ; by ";". The last /g means that each ; will be replaced (not only the first smicolon).
If the input was a;b;c after this running the first command it will be a";"b";"c.
Now we need to add quotes in the beginning (^ in a regular expression) and in the end ($). So that is what it means:
sed "s/^/\"/" // the first quote
And
sed "s/$/\"/" // the last quote
Getting the desired output:
"a";"b";"c"
Let me refactor your program a bit:
/#!/bin/awk -f
BEGIN {
FS=";";
OFS="\";\"";
IMAGE="imagepathfail";
URLKEY="urlkeyfail";
URLPATH="urlpathfail";
# DEFINE CATEGORY
CATEGORY["1"] ="2,3,4";
CATEGORY["2"] ="2,3,7";
CATEGORY["3"] ="2,3,8";
CATEGORY["4"] ="2,3,22,23";
CATEGORY["5"] ="2,3,5";
CATEGORY["7"] ="2,3,6";
CATEGORY["8"] ="2,27,28";
CATEGORY["9"] ="2,27,29";
CATEGORY["10"]="2,9,13";
CATEGORY["11"]="2,9,14";
CATEGORY["12"]="2,35,38";
CATEGORY["13"]="2,9,16";
CATEGORY["14"]="2,35,37";
CATEGORY["15"]="2,19,21";
CATEGORY["16"]="2,9,15";
CATEGORY["17"]="2,9,16";
CATEGORY["18"]="2,19,20";
CATEGORY["20"]="2,27,33";
CATEGORY["21"]="2,9,17";
CATEGORY["22"]="2,3,22,24";
CATEGORY["23"]="2,3,22,25";
CATEGORY["24"]="2,9,13";
CATEGORY["25"]="2,40,43";
CATEGORY["26"]="2,40,44";
CATEGORY["27"]="2,3,22,26";
CATEGORY["28"]="2,27,31";
CATEGORY["29"]="2,27,32";
CATEGORY["30"]="2,27,30";
CATEGORY["31"]="2,27,34";
CATEGORY["32"]="2,9,11";
CATEGORY["33"]="2,35,36";
CATEGORY["34"]="2,9,10";
CATEGORY["35"]="2,9,18";
CATEGORY["36"]="2,40,42";
CATEGORY["38"]="2,40,41";
CATEGORY["39"]="2,40,45";
CATEGORY["40"]="2,35,39";
CATEGORY["41"]="2,9,12";
CATEGORY["43"]="2,9,12";
CATEGORY["01"]="2,3,4";
CATEGORY["02"]="2,3,7";
CATEGORY["03"]="2,3,8";
CATEGORY["04"]="2,3,22,23";
CATEGORY["05"]="2,3,5";
CATEGORY["07"]="2,3,6";
CATEGORY["08"]="2,27,28";
CATEGORY["09"]="2,27,29";
# header
print "store", "websites", "attribute_set", "type", "category_ids", "sku", "has_options", "name", "image", "small_image", "thumbnail", "url_key", "url_path", "page_layout", "options_container", "msrp_enabled", "msrp_display_actual_price_type", "gift_message_available", "rsr_pn", "manufacturer_pn", "price", "cost", "weight", "msrp", "manufacturer", "status", "is_recurring", "visibility", "enable_googlecheckout", "tax_class_id", "description", "short_description", "qty", "min_qty", "use_config_min_qty", "is_qty_decimal", "backorders", "use_config_backorders", "min_sale_qty", "use_config_min_sale_qty", "max_sale_qty", "use_config_max_sale_qty", "is_in_stock", "notify_stock_qty", "use_config_notify_stock_qty", "manage_stock", "use_config_manage_stock", "stock_status_changed_auto", "use_config_qty_increments", "qty_increments", "use_config_enable_qty_inc", "enable_qty_increments", "is_decimal_divided", "stock_status_changed_automatically", "use_config_enable_qty_increments", "product_name", "store_id", "product_type_id";
}
function getCategory(val) {
return (val in CATEGORY) ? CATEGORY[val] : "47";
}
{
# DEFINE IMAGE WITH PATH.
IMAGE="/5/1/"$1"_1.jpg";
# DEFINE URL KEY
URLKEY=$1;
# DEFINE URL PATH
URLPATH=$1".html";
print "\" "admin", "base", "Default", "simple", getCategory($4), $1, "0", $3, IMAGE, IMAGE, IMAGE, URLKEY, URLPATH, "No layout updates", "Block after Info Column", "Use config", "Use config", "No", $1, $12, $6, $7, $8, $6, $11, "Enabled", "No", "Catalog, Search", "Yes", "Taxable Goods", $14, $14, $9, "0", "1", "0", "0", "1", "1", "1", "0", "1", "1", "0", "1", "0", "1", "0", "1", "0", "1", "0", "0", "0", "1", $3, "0", "simple" "\"";
}
In my opinion, we could use printf (formated output) and double quote is obtain with \" into format string.
e.g.
gawk 'BEGIN{print "WKT,punto";}{printf "\"LINESTRING Z (%f %f 0,%f %f 0)\",\"%d"\n",$3,$2,$4,$5,$1}' Frecce_geoloc_12-24.txt
output:
$3 $2 $4 $5 $1
"LINESTRING Z (-72.319686 -50.609328 0,-50.609309 -72.319499 0)","6582"
"LINESTRING Z (-72.319245 -50.609215 0,-50.609195 -72.319052 0)","6583"
"LINESTRING Z (-72.318799 -50.609101 0,-50.609081 -72.318607 0)","6584"
"LINESTRING Z (-72.318366 -50.608990 0,-50.608969 -72.318169 0)","6585"