I created a wizard in account.invoice form who is displayed when user click on "confirm bill".
This wizard contain two buttons: discard and confirm.
When user click on confirm button the bill is created.
How to save fields of the "account.invoice" view from the confirm button of the wizard?
I hope you can help me.
this is the confirm function of the wizard:
#api.multi
def action_invoice_validation(self):
invoice = self.env['account.invoice'].search([('id', '=', self._context['parent_obj'])])
to_open_invoices = self.filtered(lambda inv: invoice.state != 'open')
to_open_invoices.action_move_creation()
return to_open_invoices.invoice_validation()
Below the two functions called in my principal function
action_move_creation function:
#api.multi
def action_move_creation(self):
""" Creates invoice related analytics and financial move lines """
account_move = self.env['account.move'].search([('id', '=', self._context['parent_obj'])])
for inv in self:
if not inv.date_invoice:
inv.write({'date_invoice': fields.Date.context_today(self)})
if not inv.date_due:
inv.write({'date_due': inv.date_invoice})
company_currency = inv.company_id.currency_id
# create move lines (one per invoice line + eventual taxes and analytic lines)
iml = inv.invoice_line_move_line_get()
iml += inv.tax_line_move_line_get()
diff_currency = inv.currency_id != company_currency
# create one move line for the total and possibly adjust the other lines amount
total, total_currency, iml = inv.compute_invoice_totals(company_currency, iml)
name = inv.name or ''
if inv.payment_term_id:
totlines = \
inv.payment_term_id.with_context(currency_id=company_currency.id).compute(total, inv.date_invoice)[
0]
res_amount_currency = total_currency
for i, t in enumerate(totlines):
if inv.currency_id != company_currency:
amount_currency = company_currency._convert(t[1], inv.currency_id, inv.company_id,
inv._get_currency_rate_date() or fields.Date.today())
else:
amount_currency = False
# last line: add the diff
res_amount_currency -= amount_currency or 0
if i + 1 == len(totlines):
amount_currency += res_amount_currency
iml.append({
'type': 'dest',
'name': name,
'price': t[1],
'account_id': inv.account_id.id,
'date_maturity': t[0],
'amount_currency': diff_currency and amount_currency,
'currency_id': diff_currency and inv.currency_id.id,
'invoice_id': inv.id
})
else:
iml.append({
'type': 'dest',
'name': name,
'price': total,
'account_id': inv.account_id.id,
'date_maturity': inv.date_due,
'amount_currency': diff_currency and total_currency,
'currency_id': diff_currency and inv.currency_id.id,
'invoice_id': inv.id
})
part = self.env['res.partner']._find_accounting_partner(inv.partner_id)
line = [(0, 0, self.line_get_convert(l, part.id)) for l in iml]
line = inv.group_lines(iml, line)
line = inv.finalize_invoice_move_lines(line)
date = inv.date or inv.date_invoice
move_vals = {
'ref': inv.reference,
'line_ids': line,
'journal_id': inv.journal_id.id,
'date': date,
'narration': inv.comment,
}
move = account_move.create(move_vals)
# Pass invoice in method post: used if you want to get the same
# account move reference when creating the same invoice after a cancelled one:
move.post(invoice=inv)
# make the invoice point to that move
vals = {
'move_id': move.id,
'date': date,
'move_name': move.name,
}
inv.write(vals)
return True
my invoice_validation function:
#api.multi
def invoice_validation(self):
invoice = self.env['account.invoice'].search([('id', '=', self._context['parent_obj'])])
for invoice in self.filtered(lambda p: invoice.partner_id not in invoice.message_partner_ids):
invoice.message_subscribe([invoice.partner_id.id])
# Auto-compute reference, if not already existing and if configured on company
if not create_uidinvoice.reference and invoice.type == 'out_invoice':
invoice.reference = invoice._get_computed_reference()
# DO NOT FORWARD-PORT.
# The reference is copied after the move creation because we need the move to get the invoice number but
# we need the invoice number to get the reference.
invoice.move_id.ref = invoice.reference
self._check_duplicate_supplier_reference()
return self.write({'state': 'open'})
Related
#client.command()
async def show(ctx, player, *args): # General stats
rs = requests.get(apiLink + "/checkban?name=" + str(player))
if rs.status_code == 200: # HTTP OK
rs = rs.json()
joined_array = ','.join({str(rs["otherNames"]['usedNames'])})
embed = discord.Embed(title="Other users for" + str(player),
description="""User is known as:
""" +joined_array)
await ctx.send(embed=embed)
My goal here is to have every username on different lines after each comma, and preferably without the [] at the start and end. I have tried adding
joined_array = ','.join({str(rs["otherNames"]['usedNames'])}) but the response from the bot is the same as shown in the image.
Any answer or tip/suggestion is appreciated!
Try this:
array = ['user1', 'user2', 'user3', 'user4', 'user5', 'user6'] #your list
new = ",\n".join(array)
print(new)
Output:
user1,
user2,
user3,
user4,
user5,
user6
In your case I think array should be replaced with rs["otherNames"]['usedNames']
I am creating a python script that can read scanned, and tabular .pdfs and extract some important data and insert it into a JSON to later be implemented into a SQL database (I will also be developing the DB as a project for learning MongoDB).
Basically, my issue is I have never worked with any JSON files before but that was the format I was recommended to output to. The scraping script works, the pre-processing could be a lot cleaner, but for now it works. The issue I run into is the keys, and values are in the same list, and some of the values because they had a decimal point are two different list items. Not really sure where to even start.
I don't really know where to start, I suppose since I know what the indexes of the list are I can easily assign keys and values, but then it may not be applicable to any .pdf, that is the script cannot be coded explicitly.
import PyPDF2 as pdf2
import textract
with "TestSpec.pdf" as filename:
pdfFileObj = open(filename, 'rb')
pdfReader = pdf2.pdfFileReader(pdfFileObj)
num_pages = pdfReader.numpages
count = 0
text = ""
while count < num_pages:
pageObj = pdfReader.getPage(0)
count += 1
text += pageObj.extractText()
if text != "":
text = text
else:
text = textract.process(filename, method="tesseract", language="eng")
def cleanText(x):
'''
This function takes the byte data extracted from scanned PDFs, and cleans it of all
unnessary data.
Requires re
'''
stringedText = str(x)
cleanText = stringedText.replace('\n','')
splitText = re.split(r'\W+', cleanText)
caseingText = [word.lower() for word in splitText]
cleanOne = [word for word in caseingText if word != 'n']
dexStop = cleanOne.index("od260")
dexStart = cleanOne.index("sheet")
clean = cleanOne[dexStart + 1:dexStop]
return clean
cleanText = cleanText(text)
This is the current output
['n21', 'feb', '2019', 'nsequence', 'lacz', 'rp', 'n5', 'gat', 'ctc', 'tac', 'cat', 'ggc', 'gca', 'cat', 'ttc', 'ccc', 'gaa', 'aag', 'tgc', '3', 'norder', 'no', '15775199', 'nref', 'no', '207335463', 'n25', 'nmole', 'dna', 'oligo', '36', 'bases', 'nproperties', 'amount', 'of', 'oligo', 'shipped', 'to', 'ntm', '50mm', 'nacl', '66', '8', 'xc2', 'xb0c', '11', '0', '32', '6', 'david', 'cook', 'ngc', 'content', '52', '8', 'd260', 'mmoles', 'kansas', 'state', 'university', 'biotechno', 'nmolecular', 'weight', '10', '965', '1', 'nnmoles']
and we want the output as a JSON setup like
{"Date | 21feb2019", "Sequence ID: | lacz-rp", "Sequence 5'-3' | gat..."}
and so on. Just not sure how to do that.
here is a screenshot of the data from my sample pdf
So, i have figured out some of this. I am still having issues with grabbing the last 3rd of the data i need without explicitly programming it in. but here is what i have so far. Once i have everything working then i will worry about optimizing it and condensing.
# for PDF reading
import PyPDF2 as pdf2
import textract
# for data preprocessing
import re
from dateutil.parser import parse
# For generating the JSON file array
import json
# This finds and opens the pdf file, reads the data, and extracts the data.
filename = "*.pdf"
pdfFileObj = open(filename, 'rb')
pdfReader = pdf2.PdfFileReader(pdfFileObj)
text = ""
pageObj = pdfReader.getPage(0)
text += pageObj.extractText()
# checks if extracted data is in string form or picture, if picture textract reads data.
# it then closes the pdf file
if text != "":
text = text
else:
text = textract.process(filename, method="tesseract", language="eng")
pdfFileObj.close()
# Converts text to string from byte data for preprocessing
stringedText = str(text)
# Removed escaped lines and replaced them with actual new lines.
formattedText = stringedText.replace('\\n', '\n').lower()
# Slices the long string into a workable piece (only contains useful data)
slice1 = formattedText[(formattedText.index("sheet") + 10): (formattedText.index("secondary") - 2)]
clean = re.sub('\n', " ", slice1)
clean2 = re.sub(' +', ' ', clean)
# Creating the PrimerData dictionary
with open("PrimerData.json",'w') as file:
primerDataSlice = clean[clean.index("molecular"): -1]
primerData = re.split(": |\n", primerDataSlice)
primerKeys = primerData[0::2]
primerValues = primerData[1::2]
primerDict = {"Primer Data": dict(zip(primerKeys,primerValues))}
# Generatring the JSON array "Primer Data"
primerJSON = json.dumps(primerDict, ensure_ascii=False)
file.write(primerJSON)
# Grabbing the date (this has just the date, so json will have to add date.)
date = re.findall('(\d{2}[\/\- ](\d{2}|january|jan|february|feb|march|mar|april|apr|may|may|june|jun|july|jul|august|aug|september|sep|october|oct|november|nov|december|dec)[\/\- ]\d{2,4})', clean2)
Without input data it is difficult to give you working code. A minimal working example with input would help. As for JSON handling, python dictionaries can dump to json easily. See examples here.
https://docs.python-guide.org/scenarios/json/
Get a json string from a dictionary and write to a file. Figure out how to parse the text into a dictionary.
import json
d = {"Date" : "21feb2019", "Sequence ID" : "lacz-rp", "Sequence 5'-3'" : "gat"}
json_data = json.dumps(d)
print(json_data)
# Write that data to a file
So, I did figure this out, the problem was really just that because of the way my pre-processing was pulling all the data into a single list wasn't really that great of an idea considering that the keys for the dictionary never changed.
Here is the semi-finished result for making the Dictionary and JSON file.
# Collect the sequence name
name = clean2[clean2.index("Sequence") + 11: clean2.index("Sequence") + 19]
# Collecting Shipment info
ordered = input("Who placed this order? ")
received = input("Who is receiving this order? ")
dateOrder = re.findall(
r"(\d{2}[/\- ](\d{2}|January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|July|Jul|August|Aug|September|Sep|October|Oct|November|Nov|December|Dec)[/\- ]\d{2,4})",
clean2)
dateReceived = date.today()
refNo = clean2[clean2.index("ref.No. ") + 8: clean2.index("ref.No.") + 17]
orderNo = clean2[clean2.index("Order No.") +
10: clean2.index("Order No.") + 18]
# Finding and grabbing the sequence data. Storing it and then finding the
# GC content and melting temp or TM
bases = int(clean2[clean2.index("bases") - 3:clean2.index("bases") - 1])
seqList = [line for line in clean2 if re.match(r'^[AGCT]+$', line)]
sequence = "".join(i for i in seqList[:bases])
def gc_content(x):
count = 0
for i in x:
if i == 'G' or i == 'C':
count += 1
else:
count = count
return round((count / bases) * 100, 1)
gc = gc_content(sequence)
tm = mt.Tm_GC(sequence, Na=50)
moleWeight = round(mw(Seq(sequence, generic_dna)), 2)
dilWeight = float(clean2[clean2.index("ug/OD260:") +
10: clean2.index("ug/OD260:") + 14])
dilution = dilWeight * 10
primerDict = {"Primer Data": {
"Sequence": sequence,
"Bases": bases,
"TM (50mM NaCl)": tm,
"% GC content": gc,
"Molecular weight": moleWeight,
"ug/0D260": dilWeight,
"Dilution volume (uL)": dilution
},
"Shipment Info": {
"Ref. No.": refNo,
"Order No.": orderNo,
"Ordered by": ordered,
"Date of Order": dateOrder,
"Received By": received,
"Date Received": str(dateReceived.strftime("%d-%b-%Y"))
}}
# Generating the JSON array "Primer Data"
with open("".join(name) + ".json", 'w') as file:
primerJSON = json.dumps(primerDict, ensure_ascii=False)
file.write(primerJSON)
Instead of having a user input "Job Name", I want it to be a Tkinter drop down menu.
Im guessing the idea will be to append Jobname with the list of names from Table. How will it have it show in the tkinter dropdown?
Jobname = []
...
for row in rows:
data = "%s %s %s %s %s" % (row["Id"], row["Title"], row["Date"], row["Time"], row["Duration"])
movieList.append(data)
print data
My Code
def reportdata():
clear()
w11 = Label(fr, text="Job Name : ")
w11.grid(row=1, column=0)
listlab.append(w11)
w22 = Label(fr, text="Job Start Date: ")
w22.grid(row=2, column=0)
listlab.append(w22)
w33 = Label(fr, text="Job End Date: ")
w33.grid(row=3, column=0)
listlab.append(w33)
e11 = Entry(fr, textvariable=jjname)
e11.grid(row=1, column=1)
listlab.append(e11)
e22 = Entry(fr, textvariable=jjdates)
e22.grid(row=2, column=1)
listlab.append(e22)
e33 = Entry(fr, textvariable=jjdatee)
e33.grid(row=3, column=1)
listlab.append(e33)
mbuttonn = Button(fr, text="Generate", command=savexl)
mbuttonn.grid(row=4, column=3)
listlab.append(mbuttonn)
Please see below for an example of how you can use a list to populate a tkinter OptionMenu:
from tkinter import *
root = Tk()
a = [] #creates a list to store the job names in
var = StringVar() #creates a stringvar to store the value of options
for i in range(20): #fills list with nonsense jobs for troubleshooting
a.append("Job Name "+str(i))
var.set(a[0]) #sets the default option of options
options = OptionMenu(root, var, *a) #createa an optionmenu populated with every element of the list
button = Button(root, text="Ok", command=lambda:print(var.get())) #prints the current value of options
options.pack()
button.pack()
This uses a list to fill out an OptionMenu with all of it's elements, then we create a button which prints the current value of the OptionMenu.
If you can get your SQL imported data into a list you can use the same logic as above.
I am wondering if there is a out of the box way, or a plugin that can achieve the following behaviour in SublimeText3.
I would like to put the caret at a certain line. And then select all the text until another line number. The amount of lines should be variable.
For example put the caret on 10 and then expand selection to line 21 or line 104.
I hate having to hold down key or use the mouse for this action.
I wrote a simple plugin that allows you to enter a line to select until via an input_panel:
Features:
works bidirectionally
respects the current selection
only executes if there is a single selection
Setup Info:
# GitHub
Code:
import sublime, sublime_plugin
class SelectToLineCommand( sublime_plugin.TextCommand ):
def run( self, edit ):
window = self.view.window()
selections = self.view.sel()
if len( selections ) != 1:
return
self.currentSelection = selections[0]
if self.currentSelection.a > self.currentSelection.b:
self.currentSelection = sublime.Region( self.currentSelection.b, self.currentSelection.a )
window.show_input_panel( "Select To Line Number", "", self.get_LineNumber, None, None )
def get_LineNumber( self, userInput ):
lineToRow_Offset = 1
row = int( userInput ) - lineToRow_Offset
selectionEnd_Row = self.view.text_point( row, 0 )
currentSelection = self.currentSelection
if selectionEnd_Row >= currentSelection.b:
selectionStart = currentSelection.a
selectionEnd = self.view.line( selectionEnd_Row ).b
elif selectionEnd_Row < currentSelection.a:
selectionStart = currentSelection.b
selectionEnd = self.view.line( selectionEnd_Row ).a
newSelection = sublime.Region( selectionStart, selectionEnd )
self.view.selection.clear()
self.view.selection.add( newSelection )
I would like to scan hbase table and see integers as strings (not their binary representation). I can do the conversion but have no idea how to write scan statement by using Java API from hbase shell:
org.apache.hadoop.hbase.util.Bytes.toString(
"\x48\x65\x6c\x6c\x6f\x20\x48\x42\x61\x73\x65".to_java_bytes)
org.apache.hadoop.hbase.util.Bytes.toString("Hello HBase".to_java_bytes)
I will be very happy to have examples of scan, get that searching binary data (long's) and output normal strings. I am using hbase shell, not JAVA.
HBase stores data as byte arrays (untyped). Therefore if you perform a table scan data will be displayed in a common format (escaped hexadecimal string), e.g: "\x48\x65\x6c\x6c\x6f\x20\x48\x42\x61\x73\x65" -> Hello HBase
If you want to get back the typed value from the serialized byte array you have to do this manually.
You have the following options:
Java code (Bytes.toString(...))
hack the to_string function in $HBASE/HOME/lib/ruby/hbase/table.rb :
replace toStringBinary with toInt for non-meta tables
write a get/scan JRuby function which converts the byte array to the appropriate type
Since you want it HBase shell, then consider the last option:
Create a file get_result.rb :
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.client.HTable
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Result;
import java.util.ArrayList;
# Simple function equivalent to scan 'test', {COLUMNS => 'c:c2'}
def get_result()
htable = HTable.new(HBaseConfiguration.new, "test")
rs = htable.getScanner(Bytes.toBytes("c"), Bytes.toBytes("c2"))
output = ArrayList.new
output.add "ROW\t\t\t\t\t\tCOLUMN\+CELL"
rs.each { |r|
r.raw.each { |kv|
row = Bytes.toString(kv.getRow)
fam = Bytes.toString(kv.getFamily)
ql = Bytes.toString(kv.getQualifier)
ts = kv.getTimestamp
val = Bytes.toInt(kv.getValue)
output.add " #{row} \t\t\t\t\t\t column=#{fam}:#{ql}, timestamp=#{ts}, value=#{val}"
}
}
output.each {|line| puts "#{line}\n"}
end
load it in the HBase shell and use it:
require '/path/to/get_result'
get_result
Note: modify/enhance/fix the code according to your needs
Just for completeness' sake, it turns out that the call Bytes::toStringBinary gives the hex-escaped sequence you get in HBase shell:
\x0B\x2_SOME_ASCII_TEXT_\x10\x00...
Whereas, Bytes::toString will try to deserialize to a string assuming UTF8, which will look more like:
\u8900\u0710\u0115\u0320\u0000_SOME_UTF8_TEXT_\u4009...
you can add a scan_counter command to the hbase shell.
first:
add to /usr/lib/hbase/lib/ruby/hbase/table.rb (after the scan function):
#----------------------------------------------------------------------------------------------
# Scans whole table or a range of keys and returns rows matching specific criterias with values as number
def scan_counter(args = {})
unless args.kind_of?(Hash)
raise ArgumentError, "Arguments should be a hash. Failed to parse #{args.inspect}, #{args.class}"
end
limit = args.delete("LIMIT") || -1
maxlength = args.delete("MAXLENGTH") || -1
if args.any?
filter = args["FILTER"]
startrow = args["STARTROW"] || ''
stoprow = args["STOPROW"]
timestamp = args["TIMESTAMP"]
columns = args["COLUMNS"] || args["COLUMN"] || get_all_columns
cache = args["CACHE_BLOCKS"] || true
versions = args["VERSIONS"] || 1
timerange = args[TIMERANGE]
# Normalize column names
columns = [columns] if columns.class == String
unless columns.kind_of?(Array)
raise ArgumentError.new("COLUMNS must be specified as a String or an Array")
end
scan = if stoprow
org.apache.hadoop.hbase.client.Scan.new(startrow.to_java_bytes, stoprow.to_java_bytes)
else
org.apache.hadoop.hbase.client.Scan.new(startrow.to_java_bytes)
end
columns.each { |c| scan.addColumns(c) }
scan.setFilter(filter) if filter
scan.setTimeStamp(timestamp) if timestamp
scan.setCacheBlocks(cache)
scan.setMaxVersions(versions) if versions > 1
scan.setTimeRange(timerange[0], timerange[1]) if timerange
else
scan = org.apache.hadoop.hbase.client.Scan.new
end
# Start the scanner
scanner = #table.getScanner(scan)
count = 0
res = {}
iter = scanner.iterator
# Iterate results
while iter.hasNext
if limit > 0 && count >= limit
break
end
row = iter.next
key = org.apache.hadoop.hbase.util.Bytes::toStringBinary(row.getRow)
row.list.each do |kv|
family = String.from_java_bytes(kv.getFamily)
qualifier = org.apache.hadoop.hbase.util.Bytes::toStringBinary(kv.getQualifier)
column = "#{family}:#{qualifier}"
cell = to_string_scan_counter(column, kv, maxlength)
if block_given?
yield(key, "column=#{column}, #{cell}")
else
res[key] ||= {}
res[key][column] = cell
end
end
# One more row processed
count += 1
end
return ((block_given?) ? count : res)
end
#----------------------------------------------------------------------------------------
# Helper methods
# Returns a list of column names in the table
def get_all_columns
#table.table_descriptor.getFamilies.map do |family|
"#{family.getNameAsString}:"
end
end
# Checks if current table is one of the 'meta' tables
def is_meta_table?
tn = #table.table_name
org.apache.hadoop.hbase.util.Bytes.equals(tn, org.apache.hadoop.hbase.HConstants::META_TABLE_NAME) || org.apache.hadoop.hbase.util.Bytes.equals(tn, org.apache.hadoop.hbase.HConstants::ROOT_TABLE_NAME)
end
# Returns family and (when has it) qualifier for a column name
def parse_column_name(column)
split = org.apache.hadoop.hbase.KeyValue.parseColumn(column.to_java_bytes)
return split[0], (split.length > 1) ? split[1] : nil
end
# Make a String of the passed kv
# Intercept cells whose format we know such as the info:regioninfo in .META.
def to_string(column, kv, maxlength = -1)
if is_meta_table?
if column == 'info:regioninfo' or column == 'info:splitA' or column == 'info:splitB'
hri = org.apache.hadoop.hbase.util.Writables.getHRegionInfoOrNull(kv.getValue)
return "timestamp=%d, value=%s" % [kv.getTimestamp, hri.toString]
end
if column == 'info:serverstartcode'
if kv.getValue.length > 0
str_val = org.apache.hadoop.hbase.util.Bytes.toLong(kv.getValue)
else
str_val = org.apache.hadoop.hbase.util.Bytes.toStringBinary(kv.getValue)
end
return "timestamp=%d, value=%s" % [kv.getTimestamp, str_val]
end
end
val = "timestamp=#{kv.getTimestamp}, value=#{org.apache.hadoop.hbase.util.Bytes::toStringBinary(kv.getValue)}"
(maxlength != -1) ? val[0, maxlength] : val
end
def to_string_scan_counter(column, kv, maxlength = -1)
if is_meta_table?
if column == 'info:regioninfo' or column == 'info:splitA' or column == 'info:splitB'
hri = org.apache.hadoop.hbase.util.Writables.getHRegionInfoOrNull(kv.getValue)
return "timestamp=%d, value=%s" % [kv.getTimestamp, hri.toString]
end
if column == 'info:serverstartcode'
if kv.getValue.length > 0
str_val = org.apache.hadoop.hbase.util.Bytes.toLong(kv.getValue)
else
str_val = org.apache.hadoop.hbase.util.Bytes.toStringBinary(kv.getValue)
end
return "timestamp=%d, value=%s" % [kv.getTimestamp, str_val]
end
end
val = "timestamp=#{kv.getTimestamp}, value=#{org.apache.hadoop.hbase.util.Bytes::toLong(kv.getValue)}"
(maxlength != -1) ? val[0, maxlength] : val
end
second:
add to /usr/lib/hbase/lib/ruby/shell/commands/
the following file called: scan_counter.rb
#
# Copyright 2010 The Apache Software Foundation
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
module Shell
module Commands
class ScanCounter < Command
def help
return <<-EOF
Scan a table with cell value that is long; pass table name and optionally a dictionary of scanner
specifications. Scanner specifications may include one or more of:
TIMERANGE, FILTER, LIMIT, STARTROW, STOPROW, TIMESTAMP, MAXLENGTH,
or COLUMNS. If no columns are specified, all columns will be scanned.
To scan all members of a column family, leave the qualifier empty as in
'col_family:'.
Some examples:
hbase> scan_counter '.META.'
hbase> scan_counter '.META.', {COLUMNS => 'info:regioninfo'}
hbase> scan_counter 't1', {COLUMNS => ['c1', 'c2'], LIMIT => 10, STARTROW => 'xyz'}
hbase> scan_counter 't1', {FILTER => org.apache.hadoop.hbase.filter.ColumnPaginationFilter.new(1, 0)}
hbase> scan_counter 't1', {COLUMNS => 'c1', TIMERANGE => [1303668804, 1303668904]}
For experts, there is an additional option -- CACHE_BLOCKS -- which
switches block caching for the scanner on (true) or off (false). By
default it is enabled. Examples:
hbase> scan_counter 't1', {COLUMNS => ['c1', 'c2'], CACHE_BLOCKS => false}
EOF
end
def command(table, args = {})
now = Time.now
formatter.header(["ROW", "COLUMN+CELL"])
count = table(table).scan_counter(args) do |row, cells|
formatter.row([ row, cells ])
end
formatter.footer(now, count)
end
end
end
end
finally
add to /usr/lib/hbase/lib/ruby/shell.rb the function scan_counter.
replace the current function with this: (you can identify it by: 'DATA MANIPULATION COMMANDS',)
Shell.load_command_group(
'dml',
:full_name => 'DATA MANIPULATION COMMANDS',
:commands => %w[
count
delete
deleteall
get
get_counter
incr
put
scan
scan_counter
truncate
]
)