SQL ==> PIG
How to map the SQL query in PIG script
SQL Function | SQL | PIG |
SELECT | SELECT column_name,column_name FROM table_name; | FOREACH alias GENERATE column_name, column_name; |
SELECT * | SELECT * FROM table_name; | FOREACH alias GENERATE *; |
DISTINCT | SELECT DISTINCT column_name,column_name FROM table_name; | DISTINCT(FOREACH alias GENERATE column_name, column_name); |
WHERE | SELECT column_name,column_name FROM table_name WHERE column_name operator value; | FOREACH (FILTER alias BY column_name operator value) GENERATE column_name, column_name; |
AND/OR | ... WHERE (column_name operator value1 AND column_name operator value2) OR column_name operator value3; | FILTER alias BY (column_name operator value1 AND column_name operator value2) OR column_name operator value3; |
ORDER BY | ... ORDER BY column_name ASC|DESC, column_name ASC|DESC; | ORDER alias BY column_name ASC|DESC, column_name ASC|DESC; |
TOP/LIMIT | SELECT TOP number column_nameFROM table_name ORDER BY column_name ASC|DESC; SELECT column_name FROM table_name ORDER BY column_name ASC|DESC LIMIT number; |
FOREACH (GROUP alias BY column_name) GENERATELIMIT alias number; TOP(number, column_index, alias); |
GROUP BY | SELECT function(column_name) FROM table GROUPBY column_name; | FOREACH (GROUP alias BY column_name) GENERATE function(alias.column_name); |
LIKE | ... WHERE column_name LIKE pattern; | FILTER alias BYREGEX_EXTRACT(column_name, pattern, 1) IS NOT NULL; |
IN | ... WHERE column_name IN (value1,value2,...); | FILTER alias BY column_name IN(value1, value2,...); |
JOIN | SELECT column_name(s) FROM table1 JOIN table2 ON table1.column_name=table2.column_name; | FOREACH (JOIN alias1 BY column_name, alias2 BY column_name) GENERATE column_name(s); |
LEFT/RIGHT/FULL OUTER JOIN | SELECT column_name(s) FROM table1 LEFT|RIGHT|FULL OUTER JOIN table2 ON table1.column_name=table2.column_name; | FOREACH (JOIN alias1 BY column_name LEFT|RIGHT|FULL, alias2 BY column_name) GENERATE column_name(s); |
UNION ALL | SELECT column_name(s) FROM table1 UNION ALL SELECT column_name(s) FROM table2; | UNION alias1, alias2; |
AVG | SELECT AVG(column_name) FROM table_name; | FOREACH (GROUP alias ALL) GENERATE AVG(alias.column_name); |
COUNT | SELECT COUNT(column_name) FROM table_name; | FOREACH (GROUP alias ALL) GENERATE COUNT(alias.column_name); |
COUNT DISTINCT | SELECT COUNT(DISTINCT column_name) FROM table_name; | FOREACH alias { unique_column = DISTINCT column_name; GENERATE COUNT(unique_column); }; |
MAX | SELECT MAX(column_name) FROM table_name; | FOREACH (GROUP alias ALL) GENERATE MAX(alias.column_name); |
MIN | SELECT MIN(column_name) FROM table_name; | FOREACH (GROUP alias ALL) GENERATE MIN(alias.column_name); |
SUM | SELECT SUM(column_name) FROM table_name; | FOREACH (GROUP alias ALL) GENERATE SUM(alias.column_name); |
HAVING | ... HAVING aggregate_function(column_name) operator value; | FILTER alias BY aggregate_function(column_name) operator value; |
UCASE/UPPER | SELECT UCASE(column_name) FROM table_name; | FOREACH alias GENERATE UPPER(column_name); |
LCASE/LOWER | SELECT LCASE(column_name) FROM table_name; | FOREACH alias GENERATE LOWER(column_name); |
SUBSTRING | SELECT SUBSTRING(column_name,start,length) AS some_name FROM table_name; | FOREACH alias GENERATE SUBSTRING(column_name, start, start+length) as some_name; |
LEN | SELECT LEN(column_name) FROM table_name; | FOREACH alias GENERATE SIZE(column_name); |
ROUND | SELECT ROUND(column_name, 0) FROM table_name; | FOREACH alias GENERATE ROUND(column_name); |
No comments:
Post a Comment