Why am I getting this error when I try to use writerow
db.writerow(loanList[i])
TypeError: writerows() argument must be iterable
'''
import csv
header = ['Name','Results']
file = open("loanResults.csv", "w", newline = "")
a = 10000
b = [0.032,0.043,0.037,0.043,0.044,0.029,0.028,0.030]
c = [6,7,4,3,4,6,7,9,]
loanList = []
def monthlyRepayment(principalAmount,interest,year):
for i in range(len(b)):
finalAmount = principalAmount * ((1 + interest[i]) **year[i])
monthlyAmount = (finalAmount / (year[i]*12))
loanList.append(round(monthlyAmount))
print(round(monthlyAmount))
testCase1 = monthlyRepayment(a,b,c)
db = csv.writer(file)
db.writerow(header)
for i in range(len(loanList)):
db.writerows(loanList[i])
file.close()
'''
* def myvariable = 1
* def schema =
"""
{
myvariable : '#number',
2: '#number',
3: '#number',
4: '#number',
5: '#number',
6: '#number',
}
"""
I need to use 'myvariable' as a key. How can I do this?
Here you go:
* def schema = {}
* schema.myvariable = 1
* match schema == { myvariable: 1 }
# dynamic key name
* def name = 'myvariable'
* def schema = {}
* schema[name] = 1
* match schema == { myvariable: 1 }
I am working on parsing logs(Json format) in Scala. I don't know how to proceed. I may get different kinds of logs to be processed.
how do i write/design my code to handle different types of Json structures?
can i give my Scala program a schema and let it parse?
I wrote some code using Object mapper and read through the nodes but i want a more structure agnostic approach.
I am not sure where to start. please point me to some reading or examples. i tried to google or search in Stackoverflow resulting in too many examples and it is confusing as i am learning Scala also.
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.fs.Path
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Calendar;
import org.apache.spark.sql.hive.HiveContext
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.databind.JsonMappingException;
import org.apache.spark.rdd.RDD;
sc.setLogLevel("OFF");
val args = sc.getConf.get("spark.driver.args").split("\\s+")
args.foreach(println);
var envStr = "dev";
var srcStr = "appm"
val RootFolderStr = "/source_folder/";
val DestFolderStr = "/dest_folder/";
val dateformatter = new SimpleDateFormat("yyyy-MM-dd'T'hh:mm:ss.SSS'Z'");
val formatter = new SimpleDateFormat("yyyy-MM-dd");
val theMonthFormatter = new SimpleDateFormat("yyyy-MM");
var fromDay: Date = formatter.parse("2018-04-29");
var toDay: Date = formatter.parse("2018-05-01");
if (args.length < 2) {
printf("usage: need at least 2 parameters in spark.driver.args");
sys.exit(2);
}
envStr = args(0).toLowerCase();
srcStr = args(1).toLowerCase();
if (args.length == 4) {
fromDay = formatter.parse(args(2));
toDay = formatter.parse(args(3));
}
if (args.length == 2) {
// default to be yesterday to today
toDay = formatter.parse(formatter.format(Calendar.getInstance().getTime()));
val previousDay = Calendar.getInstance();
previousDay.add(Calendar.DATE, -1);
fromDay = formatter.parse(formatter.format(previousDay.getTime()));
}
// get the sub-folder for the monthly partition
val monthFolder = theMonthFormatter.format(fromDay);
var rootFolder = RootFolderStr.replaceAll("ENV", envStr) + monthFolder;
rootFolder = rootFolder.replaceAll("SRC", srcStr);
val destFolder = DestFolderStr.replaceAll("ENV", envStr);
var toCalendar = Calendar.getInstance();
toCalendar.setTime(toDay);
toCalendar.add(Calendar.DATE, 1);
// need to consider the case across the month boundary
val toDay2 = formatter.parse(formatter.format(toCalendar.getTime()));
// filter out .tmp files and 0-size files
// .tmp files are not safe to read from, it's possible that the files are under updating by Flume job and the message data is incomplete
// when the Spark job starts to read from it.
val pathInfos = FileSystem.get(sc.hadoopConfiguration).listStatus(new Path(rootFolder));
// filter out the 0-length files, .tmp files which is of today
val allfiles = pathInfos.filter(fileStatus => {
if (fileStatus.getLen == 0)
false
else {
val aPath = fileStatus.getPath().getName();
// use the modification time is more accurate.
val lastTime = fileStatus.getModificationTime();
val aDate = new Date(lastTime);
// all files between fromDay and toDay2
aDate.after(fromDay) && aDate.before(toDay2);
}
}
).map(_.getPath.toString);
case class event_log(
time_stp: Long,
msg_sze: Int,
msg_src: String,
action_path: String,
s_code: Int,
s_desc: String,
p_code: String,
c_id: String,
m_id: String,
c_ip: String,
c_gp: String,
gip: String,
ggip: String,
rbody: String
);
def readEvents(fileList: Array[String], msgSrc: String, fromTS: Long, toTS: Long): RDD[(event_log)] = {
val records =
sc.sequenceFile[Long, String](fileList.mkString(","))
.filter((message) => {
(message._1 >= fromTS && message._1 < toTS);
}
)
val eventLogs = records.map((message) => {
val time_stp = message._1;
var msg_sze = message._2.length();
var c_id = ""
var m_id = "";
var p_code = "";
var c_ip = "";
var c_gp = "";
var gip = "";
var ggip = "";
var rbody = "";
var action_path = "";
var s_code: Int = 200;
var s_desc = "";
try {
// parse the message
val mapper = new ObjectMapper();
val aBuff = message._2.getBytes();
val root = mapper.readTree(aBuff);
var aNode = root.path("rbody");
rbody = aNode.textValue();
if (rbody != null && rbody.length() > 0) {
val mapper_2 = new ObjectMapper();
val aBuff_2 = rbody.getBytes();
var root2 = mapper_2.readTree(aBuff_2);
aNode = root2.path("p_code");
if (aNode != null && aNode.isValueNode())
p_code = String.valueOf(aNode.intValue());
aNode = root2.path("mkay");
if (aNode != null && aNode.isObject()) {
root2 = aNode;
}
{
aNode = root2.get("c_id");
if (aNode != null && aNode.isValueNode())
c_id = aNode.textValue();
aNode = root2.get("m_id");
if (aNode != null && aNode.isValueNode()) {
m_id = String.valueOf(aNode.intValue());
}
}
}
aNode = root.path("c_ip");
c_ip = aNode.textValue();
aNode = root.path("c_gp");
c_gp = aNode.textValue();
aNode = root.path("gip");
gip = aNode.textValue();
aNode = root.path("ggip");
ggip = aNode.textValue();
aNode = root.path("action_path");
action_path = aNode.textValue();
aNode = root.path("s_code");
val statusNodeValue = aNode.textValue().trim();
s_code = Integer.valueOf(statusNodeValue.substring(0, 3));
s_desc = statusNodeValue.substring(3).trim();
}
catch {
// return empty string as indicator that it's not a well-formatted JSON message
case jex: JsonParseException => {
msg_sze = 0
};
case ioEx: java.io.IOException => {
msg_sze = 0
};
case rtEx: JsonMappingException => {
msg_sze = 0
};
}
event_log(time_stp, msg_sze, msgSrc, action_path, s_code, s_desc,
p_code, c_id, m_id,
c_ip, c_gp, gip, ggip,
rbody);
});
eventLogs;
}
val hiveContext = new HiveContext(sc)
if (allfiles.length == 0)
sys.exit(3);
val fromTime = fromDay.getTime();
val toTime = toDay.getTime();
val events = readEvents(allfiles, srcStr, fromTime, toTime);
val df = hiveContext.createDataFrame(events).coalesce(1);
df.write.parquet(destFolder);
sys.exit(0);
In mysql cli, i get following result:
mysql> select * from words limit 1;
+----+------+--------------------+---------------------+---------------------+
| id | name | full | created_at | updated_at |
+----+------+--------------------+---------------------+---------------------+
| 30 | prpr | a full explanation | 2016-09-20 12:59:07 | 2016-09-20 12:59:07 |
+----+------+--------------------+---------------------+---------------------+
the "created_at" is 2016-09-20 12:59:07
but when i
static void main(String[] args) {
def c = Sql.newInstance("jdbc:mysql://127.0.0.1:3306/ro_test", "root", "root")
println c.rows("select * from words")[0]['created_at']
}
the output is
2016-09-21 05:30:58.0
I hope groovy code output is same with mysql cli, how to do that?
These two dates probably refer to (roughly) the same instant in time. Given that the dates are 5.5 hours apart, my guess is that the MySQL CLI is showing the date in the UTC timezone, whereas the Groovy code is showing the date in the UTC+05:30 (Indian) time zone.
In other words
2016-09-20 12:59:07 + 5.5 hours ≈ 2016-09-21 05:30:58.0
When I force specific timezone, it work
static void main(String[] args) {
def c = Sql.newInstance("jdbc:mysql://127.0.0.1:3306/ro_test", "root", "root")
def tz = TimeZone.default
def cal = Calendar.getInstance(TimeZone.getTimeZone("Asia/Shanghai"))
c.query("select * from words") { ResultSetImpl rs ->
while (rs.next()) {
println rs.getTimestamp(4, cal)
}
}
}
I think the best way is rewrite Groovy.sql.Sql#rows with above code, the full implementation is here:
List<LinkedHashMap> e2(String stmt) {
def cal = Calendar.getInstance(Time.timezone)
List<GroovyRowResult> rs = []
c.query(stmt) { ResultSetImpl rs2 ->
def md = rs2.metaData
int cc = md.columnCount
while (rs2.next()) {
def attrs = [:]
for (int i = 1; i <= cc; i++) {
def key = md.getColumnLabel(i)
def t = md.getColumnType(i)
def v
if (t == Types.TIMESTAMP) {
v = rs2.getTimestamp(i, cal)
} else {
v = rs2.getObject(i)
}
attrs[key] = v
}
rs.add(attrs)
}
}
rs
}
Following is a example for Adjacency List + Inheritance. This works as expected but if i try to use it in a another Model Mammut as a relationship it throws me this error:
Traceback (most recent call last):
File "bin/py", line 73, in <module>
exec(compile(__file__f.read(), __file__, "exec"))
File "../adjacency_list.py", line 206, in <module>
create_entries(IntTreeNode)
File "../adjacency_list.py", line 170, in create_entries
mut.nodes.append(node)
File "/home/xxx/.buildout/eggs/SQLAlchemy-0.9.8-py3.4-linux-x86_64.egg/sqlalchemy/orm/dynamic.py", line 304, in append
attributes.instance_dict(self.instance), item, None)
File "/home/xxx/.buildout/eggs/SQLAlchemy-0.9.8-py3.4-linux-x86_64.egg/sqlalchemy/orm/dynamic.py", line 202, in append
self.fire_append_event(state, dict_, value, initiator)
File "/home/xxx/.buildout/eggs/SQLAlchemy-0.9.8-py3.4-linux-x86_64.egg/sqlalchemy/orm/dynamic.py", line 99, in fire_append_event
value = fn(state, value, initiator or self._append_token)
File "/home/xxx/.buildout/eggs/SQLAlchemy-0.9.8-py3.4-linux-x86_64.egg/sqlalchemy/orm/attributes.py", line 1164, in emit_backref_from_collection_append_event
child_impl.append(
AttributeError: '_ProxyImpl' object has no attribute 'append'
The Code:
from sqlalchemy import (Column, ForeignKey, Integer, String, create_engine,
Float)
from sqlalchemy.orm import (Session, relationship, backref, joinedload_all)
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm.collections import attribute_mapped_collection
from sqlalchemy.ext.declarative import declared_attr, AbstractConcreteBase
Base = declarative_base()
class Mammut(Base):
__tablename__ = "mammut"
id = Column(Integer, primary_key=True)
nodes = relationship(
'TreeNode',
backref='mammut',
lazy='dynamic',
cascade="all, delete-orphan",
#viewonly=True
)
class TreeNode(AbstractConcreteBase, Base):
id = Column(Integer, primary_key=True)
name = Column(String(50), nullable=False)
depth = Column(Integer, default=0)
data_type = Column(String(50))
#declared_attr
def mammut_id(cls):
return Column(Integer, ForeignKey('mammut.id'))
#declared_attr
def __tablename__(cls):
return cls.__name__.lower()
#declared_attr
def __mapper_args__(cls):
ret = {}
if cls.__name__ != "TreeNode":
ret = {'polymorphic_identity': cls.__name__,
'concrete': True,
# XXX redundant makes only sense if we use one table
'polymorphic_on': cls.data_type}
return ret
#declared_attr
def parent_id(cls):
_fid = '%s.id' % cls.__name__.lower()
return Column(Integer, ForeignKey(_fid))
#declared_attr
def children(cls):
_fid = '%s.id' % cls.__name__
return relationship(cls.__name__,
# cascade deletions
cascade="all, delete-orphan",
# many to one + adjacency list - remote_side
# is required to reference the 'remote'
# column in the join condition.
backref=backref("parent", remote_side=_fid),
# children will be represented as a dictionary
# on the "name" attribute.
collection_class=attribute_mapped_collection(
'name'),
)
def get_path(self, field):
if self.parent:
return self.parent.get_path(field) + [getattr(self, field)]
else:
return [getattr(self, field)]
#property
def name_path(self):
# XXX there is no way to query for it except we add a function with a
# cte (recursive query) to our database see [1] for it
# https://stackoverflow.com/questions/14487386/sqlalchemy-recursive-hybrid-property-in-a-tree-node
return '/'.join(self.get_path(field='name'))
def __init__(self, name, value=None, parent=None):
self.name = name
self.parent = parent
self.depth = 0
self.value = value
if self.parent:
self.depth = self.parent.depth + 1
def __repr__(self):
ret = "%s(name=%r, id=%r, parent_id=%r, value=%r, depth=%r, " \
"name_path=%s data_type=%s)" % (
self.__class__.__name__,
self.name,
self.id,
self.parent_id,
self.value,
self.depth,
self.name_path,
self.data_type
)
return ret
def dump(self, _indent=0):
return " " * _indent + repr(self) + \
"\n" + \
"".join([
c.dump(_indent + 1)
for c in self.children.values()]
)
class IntTreeNode(TreeNode):
value = Column(Integer)
class FloatTreeNode(TreeNode):
value = Column(Float)
miau = Column(String(50), default='zuff')
def __repr__(self):
ret = "%s(name=%r, id=%r, parent_id=%r, value=%r, depth=%r, " \
"name_path=%s data_type=%s miau=%s)" % (
self.__class__.__name__,
self.name,
self.id,
self.parent_id,
self.value,
self.depth,
self.name_path,
self.data_type,
self.miau
)
return ret
if __name__ == '__main__':
engine = create_engine('sqlite:///', echo=True)
def msg(msg, *args):
msg = msg % args
print("\n\n\n" + "-" * len(msg.split("\n")[0]))
print(msg)
print("-" * len(msg.split("\n")[0]))
msg("Creating Tree Table:")
Base.metadata.create_all(engine)
session = Session(engine)
def create_entries(Cls):
node = Cls('rootnode', value=2)
Cls('node1', parent=node)
Cls('node3', parent=node)
node2 = Cls('node2')
Cls('subnode1', parent=node2)
node.children['node2'] = node2
Cls('subnode2', parent=node.children['node2'])
msg("Created new tree structure:\n%s", node.dump())
msg("flush + commit:")
# XXX this throws the error
mut = Mammut()
mut.nodes.append(node)
session.add(mut)
session.add(node)
session.commit()
msg("Tree After Save:\n %s", node.dump())
Cls('node4', parent=node)
Cls('subnode3', parent=node.children['node4'])
Cls('subnode4', parent=node.children['node4'])
Cls('subsubnode1', parent=node.children['node4'].children['subnode3'])
# remove node1 from the parent, which will trigger a delete
# via the delete-orphan cascade.
del node.children['node1']
msg("Removed node1. flush + commit:")
session.commit()
msg("Tree after save:\n %s", node.dump())
msg("Emptying out the session entirely, "
"selecting tree on root, using eager loading to join four levels deep.")
session.expunge_all()
node = session.query(Cls).\
options(joinedload_all("children", "children",
"children", "children")).\
filter(Cls.name == "rootnode").\
first()
msg("Full Tree:\n%s", node.dump())
# msg("Marking root node as deleted, flush + commit:")
# session.delete(node)
# session.commit()
create_entries(IntTreeNode)
create_entries(FloatTreeNode)
nodes = session.query(TreeNode).filter(
TreeNode.name == "rootnode").all()
for idx, n in enumerate(nodes):
msg("Full (%s) Tree:\n%s" % (idx, n.dump()))
concrete inheritance can be very difficult, and AbstractConcreteBase itself has bugs in 0.9 which get in the way of elaborate mappings like this from being used.
Using 1.0 (not released, use git master), I can get the major elements going as follows:
from sqlalchemy import Column, String, Integer, create_engine, ForeignKey, Float
from sqlalchemy.orm import Session, relationship
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm.collections import attribute_mapped_collection
from sqlalchemy.ext.declarative import declared_attr, AbstractConcreteBase
Base = declarative_base()
class Mammut(Base):
__tablename__ = "mammut"
id = Column(Integer, primary_key=True)
nodes = relationship(
'TreeNode',
lazy='dynamic',
back_populates='mammut',
)
class TreeNode(AbstractConcreteBase, Base):
id = Column(Integer, primary_key=True)
name = Column(String)
#declared_attr
def __tablename__(cls):
if cls.__name__ == 'TreeNode':
return None
else:
return cls.__name__.lower()
#declared_attr
def __mapper_args__(cls):
return {'polymorphic_identity': cls.__name__, 'concrete': True}
#declared_attr
def parent_id(cls):
return Column(Integer, ForeignKey(cls.id))
#declared_attr
def mammut_id(cls):
return Column(Integer, ForeignKey('mammut.id'))
#declared_attr
def mammut(cls):
return relationship("Mammut", back_populates="nodes")
#declared_attr
def children(cls):
return relationship(
cls,
back_populates="parent",
collection_class=attribute_mapped_collection('name'),
)
#declared_attr
def parent(cls):
return relationship(
cls, remote_side="%s.id" % cls.__name__,
back_populates='children')
class IntTreeNode(TreeNode):
value = Column(Integer)
class FloatTreeNode(TreeNode):
value = Column(Float)
miau = Column(String(50), default='zuff')
e = create_engine("sqlite://", echo=True)
Base.metadata.create_all(e)
session = Session(e)
root = IntTreeNode(name='root')
IntTreeNode(name='n1', parent=root)
n2 = IntTreeNode(name='n2', parent=root)
IntTreeNode(name='n2n1', parent=n2)
m1 = Mammut()
m1.nodes.append(n2)
m1.nodes.append(root)
session.add(root)
session.commit()
session.close()
root = session.query(TreeNode).filter_by(name='root').one()
print root.children